From: Jonathan Peyton Date: Fri, 12 May 2017 18:01:32 +0000 (+0000) Subject: Clang-format and whitespace cleanup of source code X-Git-Tag: llvmorg-5.0.0-rc1~5203 X-Git-Url: http://review.tizen.org/git/?a=commitdiff_plain;h=3041982dd18f1877fc44ad76e45d7d93957e9212;p=platform%2Fupstream%2Fllvm.git Clang-format and whitespace cleanup of source code This patch contains the clang-format and cleanup of the entire code base. Some of clang-formats changes made the code look worse in places. A best effort was made to resolve the bulk of these problems, but many remain. Most of the problems were mangling line-breaks and tabbing of comments. Patch by Terry Wilmarth Differential Revision: https://reviews.llvm.org/D32659 llvm-svn: 302929 --- diff --git a/openmp/runtime/src/extractExternal.cpp b/openmp/runtime/src/extractExternal.cpp index 7a6fdb7..d8c4901 100644 --- a/openmp/runtime/src/extractExternal.cpp +++ b/openmp/runtime/src/extractExternal.cpp @@ -13,13 +13,13 @@ //===----------------------------------------------------------------------===// -#include -#include -#include #include -#include -#include +#include #include +#include +#include +#include +#include /* Given a set of n object files h ('external' object files) and a set of m object files o ('internal' object files), @@ -30,468 +30,457 @@ Usage: hide.exe - Thus, the prefixed symbols become hidden in the sense that they now have a special - prefix. + Thus, the prefixed symbols become hidden in the sense that they now have a + special prefix. */ using namespace std; -void stop(char* errorMsg) { - printf("%s\n", errorMsg); - exit(1); +void stop(char *errorMsg) { + printf("%s\n", errorMsg); + exit(1); } // an entry in the symbol table of a .OBJ file class Symbol { public: - __int64 name; - unsigned value; - unsigned short sectionNum, type; - char storageClass, nAux; + __int64 name; + unsigned value; + unsigned short sectionNum, type; + char storageClass, nAux; }; class _rstream : public istrstream { private: - const char *buf; + const char *buf; + protected: - _rstream(pair p):istrstream(p.first,p.second),buf(p.first){} - ~_rstream() { - delete[]buf; - } + _rstream(pair p) + : istrstream(p.first, p.second), buf(p.first) {} + ~_rstream() { delete[] buf; } }; -/* A stream encapuslating the content of a file or the content of a string, overriding the - >> operator to read various integer types in binary form, as well as a symbol table - entry. -*/ +// A stream encapuslating the content of a file or the content of a string, +// overriding the >> operator to read various integer types in binary form, +// as well as a symbol table entry. class rstream : public _rstream { private: - template - inline rstream& doRead(T &x) { - read((char*)&x, sizeof(T)); - return *this; - } - static pair getBuf(const char *fileName) { - ifstream raw(fileName,ios::binary | ios::in); - if(!raw.is_open()) - stop("rstream.getBuf: Error opening file"); - raw.seekg(0,ios::end); - streampos fileSize = raw.tellg(); - if(fileSize < 0) - stop("rstream.getBuf: Error reading file"); - char *buf = new char[fileSize]; - raw.seekg(0,ios::beg); - raw.read(buf, fileSize); - return pair(buf,fileSize); - } + template inline rstream &doRead(T &x) { + read((char *)&x, sizeof(T)); + return *this; + } + static pair getBuf(const char *fileName) { + ifstream raw(fileName, ios::binary | ios::in); + if (!raw.is_open()) + stop("rstream.getBuf: Error opening file"); + raw.seekg(0, ios::end); + streampos fileSize = raw.tellg(); + if (fileSize < 0) + stop("rstream.getBuf: Error reading file"); + char *buf = new char[fileSize]; + raw.seekg(0, ios::beg); + raw.read(buf, fileSize); + return pair(buf, fileSize); + } + public: - // construct from a string - rstream(const char *buf,streamsize size):_rstream(pair(buf, size)){} - /* construct from a file whole content is fully read once to initialize the content of - this stream - */ - rstream(const char *fileName):_rstream(getBuf(fileName)){} - rstream& operator>>(int &x) { - return doRead(x); - } - rstream& operator>>(unsigned &x) { - return doRead(x); - } - rstream& operator>>(short &x) { - return doRead(x); - } - rstream& operator>>(unsigned short &x) { - return doRead(x); - } - rstream& operator>>(Symbol &e) { - read((char*)&e, 18); - return *this; - } + // construct from a string + rstream(const char *buf, streamsize size) + : _rstream(pair(buf, size)) {} + // construct from a file whole content is fully read once to initialize the + // content of this stream + rstream(const char *fileName) : _rstream(getBuf(fileName)) {} + rstream &operator>>(int &x) { return doRead(x); } + rstream &operator>>(unsigned &x) { return doRead(x); } + rstream &operator>>(short &x) { return doRead(x); } + rstream &operator>>(unsigned short &x) { return doRead(x); } + rstream &operator>>(Symbol &e) { + read((char *)&e, 18); + return *this; + } }; // string table in a .OBJ file class StringTable { private: - map directory; - size_t length; - char *data; - - // make from bytes in - void makeDirectory(void) { - unsigned i = 4; - while(i < length) { - string s = string(data + i); - directory.insert(make_pair(s, i)); - i += s.size() + 1; - } + map directory; + size_t length; + char *data; + + // make from bytes in + void makeDirectory(void) { + unsigned i = 4; + while (i < length) { + string s = string(data + i); + directory.insert(make_pair(s, i)); + i += s.size() + 1; } - // initialize and with contents specified by the arguments - void init(const char *_data) { - unsigned _length = *(unsigned*)_data; - - if(_length < sizeof(unsigned) || _length != *(unsigned*)_data) - stop("StringTable.init: Invalid symbol table"); - if(_data[_length - 1]) { - // to prevent runaway strings, make sure the data ends with a zero - data = new char[length = _length + 1]; - data[_length] = 0; - } else { - data = new char[length = _length]; - } - *(unsigned*)data = length; - KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned), - length - sizeof(unsigned)); - makeDirectory(); + } + // initialize and with contents specified by the arguments + void init(const char *_data) { + unsigned _length = *(unsigned *)_data; + + if (_length < sizeof(unsigned) || _length != *(unsigned *)_data) + stop("StringTable.init: Invalid symbol table"); + if (_data[_length - 1]) { + // to prevent runaway strings, make sure the data ends with a zero + data = new char[length = _length + 1]; + data[_length] = 0; + } else { + data = new char[length = _length]; } + *(unsigned *)data = length; + KMP_MEMCPY(data + sizeof(unsigned), _data + sizeof(unsigned), + length - sizeof(unsigned)); + makeDirectory(); + } + public: - StringTable(rstream &f) { - /* Construct string table by reading from f. - */ - streampos s; - unsigned strSize; - char *strData; - - s = f.tellg(); - f>>strSize; - if(strSize < sizeof(unsigned)) - stop("StringTable: Invalid string table"); - strData = new char[strSize]; - *(unsigned*)strData = strSize; - // read the raw data into - f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned)); - s = f.tellg() - s; - if(s < strSize) - stop("StringTable: Unexpected EOF"); - init(strData); - delete[]strData; + StringTable(rstream &f) { + // Construct string table by reading from f. + streampos s; + unsigned strSize; + char *strData; + + s = f.tellg(); + f >> strSize; + if (strSize < sizeof(unsigned)) + stop("StringTable: Invalid string table"); + strData = new char[strSize]; + *(unsigned *)strData = strSize; + // read the raw data into + f.read(strData + sizeof(unsigned), strSize - sizeof(unsigned)); + s = f.tellg() - s; + if (s < strSize) + stop("StringTable: Unexpected EOF"); + init(strData); + delete[] strData; + } + StringTable(const set &strings) { + // Construct string table from given strings. + char *p; + set::const_iterator it; + size_t s; + + // count required size for data + for (length = sizeof(unsigned), it = strings.begin(); it != strings.end(); + ++it) { + size_t l = (*it).size(); + + if (l > (unsigned)0xFFFFFFFF) + stop("StringTable: String too long"); + if (l > 8) { + length += l + 1; + if (length > (unsigned)0xFFFFFFFF) + stop("StringTable: Symbol table too long"); + } } - StringTable(const set &strings) { - /* Construct string table from given strings. - */ - char *p; - set::const_iterator it; - size_t s; - - // count required size for data - for(length = sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) { - size_t l = (*it).size(); - - if(l > (unsigned) 0xFFFFFFFF) - stop("StringTable: String too long"); - if(l > 8) { - length += l + 1; - if(length > (unsigned) 0xFFFFFFFF) - stop("StringTable: Symbol table too long"); - } - } - data = new char[length]; - *(unsigned*)data = length; - // populate data and directory - for(p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); ++it) { - const string &str = *it; - size_t l = str.size(); - if(l > 8) { - directory.insert(make_pair(str, p - data)); - KMP_MEMCPY(p, str.c_str(), l); - p[l] = 0; - p += l + 1; - } - } + data = new char[length]; + *(unsigned *)data = length; + // populate data and directory + for (p = data + sizeof(unsigned), it = strings.begin(); it != strings.end(); + ++it) { + const string &str = *it; + size_t l = str.size(); + if (l > 8) { + directory.insert(make_pair(str, p - data)); + KMP_MEMCPY(p, str.c_str(), l); + p[l] = 0; + p += l + 1; + } } - ~StringTable() { - delete[] data; + } + ~StringTable() { delete[] data; } + // Returns encoding for given string based on this string table. Error if + // string length is greater than 8 but string is not in the string table + // -- returns 0. + __int64 encode(const string &str) { + __int64 r; + + if (str.size() <= 8) { + // encoded directly + ((char *)&r)[7] = 0; + KMP_STRNCPY_S((char *)&r, sizeof(r), str.c_str(), 8); + return r; + } else { + // represented as index into table + map::const_iterator it = directory.find(str); + if (it == directory.end()) + stop("StringTable::encode: String now found in string table"); + ((unsigned *)&r)[0] = 0; + ((unsigned *)&r)[1] = (*it).second; + return r; } - /* Returns encoding for given string based on this string table. - Error if string length is greater than 8 but string is not in - the string table--returns 0. - */ - __int64 encode(const string &str) { - __int64 r; - - if(str.size() <= 8) { - // encoded directly - ((char*)&r)[7] = 0; - KMP_STRNCPY_S((char*)&r, sizeof(r), str.c_str(), 8); - return r; - } else { - // represented as index into table - map::const_iterator it = directory.find(str); - if(it == directory.end()) - stop("StringTable::encode: String now found in string table"); - ((unsigned*)&r)[0] = 0; - ((unsigned*)&r)[1] = (*it).second; - return r; - } - } - /* Returns string represented by x based on this string table. - Error if x references an invalid position in the table--returns - the empty string. - */ - string decode(__int64 x) const { - if(*(unsigned*)&x == 0) { - // represented as index into table - unsigned &p = ((unsigned*)&x)[1]; - if(p >= length) - stop("StringTable::decode: Invalid string table lookup"); - return string(data + p); - } else { - // encoded directly - char *p = (char*)&x; - int i; - - for(i = 0; i < 8 && p[i]; ++i); - return string(p, i); - } - } - void write(ostream &os) { - os.write(data, length); + } + // Returns string represented by x based on this string table. Error if x + // references an invalid position in the table--returns the empty string. + string decode(__int64 x) const { + if (*(unsigned *)&x == 0) { + // represented as index into table + unsigned &p = ((unsigned *)&x)[1]; + if (p >= length) + stop("StringTable::decode: Invalid string table lookup"); + return string(data + p); + } else { + // encoded directly + char *p = (char *)&x; + int i; + + for (i = 0; i < 8 && p[i]; ++i) + ; + return string(p, i); } + } + void write(ostream &os) { os.write(data, length); } }; -/* for the named object file, determines the set of defined symbols and the set of undefined external symbols - and writes them to and respectively -*/ -void computeExternalSymbols(const char *fileName, set *defined, set *undefined){ - streampos fileSize; - size_t strTabStart; - unsigned symTabStart, symNEntries; - rstream f(fileName); - - f.seekg(0,ios::end); - fileSize = f.tellg(); - - f.seekg(8); - f >> symTabStart >> symNEntries; - // seek to the string table - f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); - if(f.eof()) { - printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart = %u, symNEntries = %u\n", - fileName, (unsigned long) fileSize, symTabStart, symNEntries); - stop("computeExternalSymbols: Unexpected EOF 1"); - } - StringTable stringTable(f); // read the string table - if(f.tellg() != fileSize) - stop("computeExternalSymbols: Unexpected data after string table"); - - f.clear(); - f.seekg(symTabStart); // seek to the symbol table - - defined->clear(); undefined->clear(); - for(int i = 0; i < symNEntries; ++i) { - // process each entry - Symbol e; - - if(f.eof()) - stop("computeExternalSymbols: Unexpected EOF 2"); - f>>e; - if(f.fail()) - stop("computeExternalSymbols: File read error"); - if(e.nAux) { // auxiliary entry: skip - f.seekg(e.nAux * 18, ios::cur); - i += e.nAux; - } - // if symbol is extern and defined in the current file, insert it - if(e.storageClass == 2) - if(e.sectionNum) - defined->insert(stringTable.decode(e.name)); - else - undefined->insert(stringTable.decode(e.name)); +// for the named object file, determines the set of defined symbols and the set +// of undefined external symbols and writes them to and +// respectively +void computeExternalSymbols(const char *fileName, set *defined, + set *undefined) { + streampos fileSize; + size_t strTabStart; + unsigned symTabStart, symNEntries; + rstream f(fileName); + + f.seekg(0, ios::end); + fileSize = f.tellg(); + + f.seekg(8); + f >> symTabStart >> symNEntries; + // seek to the string table + f.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); + if (f.eof()) { + printf("computeExternalSymbols: fileName='%s', fileSize = %lu, symTabStart " + "= %u, symNEntries = %u\n", + fileName, (unsigned long)fileSize, symTabStart, symNEntries); + stop("computeExternalSymbols: Unexpected EOF 1"); + } + StringTable stringTable(f); // read the string table + if (f.tellg() != fileSize) + stop("computeExternalSymbols: Unexpected data after string table"); + + f.clear(); + f.seekg(symTabStart); // seek to the symbol table + + defined->clear(); + undefined->clear(); + for (int i = 0; i < symNEntries; ++i) { + // process each entry + Symbol e; + + if (f.eof()) + stop("computeExternalSymbols: Unexpected EOF 2"); + f >> e; + if (f.fail()) + stop("computeExternalSymbols: File read error"); + if (e.nAux) { // auxiliary entry: skip + f.seekg(e.nAux * 18, ios::cur); + i += e.nAux; } + // if symbol is extern and defined in the current file, insert it + if (e.storageClass == 2) + if (e.sectionNum) + defined->insert(stringTable.decode(e.name)); + else + undefined->insert(stringTable.decode(e.name)); + } } -/* For each occurrence of an external symbol in the object file named by - by that is a member of , renames it by prefixing - with "__kmp_external_", writing back the file in-place -*/ +// For each occurrence of an external symbol in the object file named by +// by that is a member of , renames it by prefixing +// with "__kmp_external_", writing back the file in-place void hideSymbols(char *fileName, const set &hide) { - static const string prefix("__kmp_external_"); - set strings; // set of all occurring symbols, appropriately prefixed - streampos fileSize; - size_t strTabStart; - unsigned symTabStart, symNEntries; - int i; - rstream in(fileName); - - in.seekg(0,ios::end); - fileSize = in.tellg(); - - in.seekg(8); - in >> symTabStart >> symNEntries; - in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); - if(in.eof()) - stop("hideSymbols: Unexpected EOF"); - StringTable stringTableOld(in); // read original string table - - if(in.tellg() != fileSize) - stop("hideSymbols: Unexpected data after string table"); - - // compute set of occurring strings with prefix added - for(i = 0; i < symNEntries; ++i) { - Symbol e; - - in.seekg(symTabStart + i * 18); - if(in.eof()) - stop("hideSymbols: Unexpected EOF"); - in >> e; - if(in.fail()) - stop("hideSymbols: File read error"); - if(e.nAux) - i += e.nAux; - const string &s = stringTableOld.decode(e.name); - // if symbol is extern and found in , prefix and insert into strings, - // otherwise, just insert into strings without prefix - strings.insert( (e.storageClass == 2 && hide.find(s) != hide.end()) ? - prefix + s : s); - } - - ofstream out(fileName, ios::trunc | ios::out | ios::binary); - if(!out.is_open()) - stop("hideSymbols: Error opening output file"); - - // make new string table from string set - StringTable stringTableNew = StringTable(strings); - - // copy input file to output file up to just before the symbol table - in.seekg(0); - char *buf = new char[symTabStart]; - in.read(buf, symTabStart); - out.write(buf, symTabStart); - delete []buf; - - // copy input symbol table to output symbol table with name translation - for(i = 0; i < symNEntries; ++i) { - Symbol e; - - in.seekg(symTabStart + i*18); - if(in.eof()) - stop("hideSymbols: Unexpected EOF"); - in >> e; - if(in.fail()) - stop("hideSymbols: File read error"); - const string &s = stringTableOld.decode(e.name); - out.seekp(symTabStart + i*18); - e.name = stringTableNew.encode( (e.storageClass == 2 && hide.find(s) != hide.end()) ? - prefix + s : s); - out.write((char*)&e, 18); - if(out.fail()) - stop("hideSymbols: File write error"); - if(e.nAux) { - // copy auxiliary symbol table entries - int nAux = e.nAux; - for(int j = 1; j <= nAux; ++j) { - in >> e; - out.seekp(symTabStart + (i + j) * 18); - out.write((char*)&e, 18); - } - i += nAux; - } + static const string prefix("__kmp_external_"); + set strings; // set of all occurring symbols, appropriately prefixed + streampos fileSize; + size_t strTabStart; + unsigned symTabStart, symNEntries; + int i; + rstream in(fileName); + + in.seekg(0, ios::end); + fileSize = in.tellg(); + + in.seekg(8); + in >> symTabStart >> symNEntries; + in.seekg(strTabStart = symTabStart + 18 * (size_t)symNEntries); + if (in.eof()) + stop("hideSymbols: Unexpected EOF"); + StringTable stringTableOld(in); // read original string table + + if (in.tellg() != fileSize) + stop("hideSymbols: Unexpected data after string table"); + + // compute set of occurring strings with prefix added + for (i = 0; i < symNEntries; ++i) { + Symbol e; + + in.seekg(symTabStart + i * 18); + if (in.eof()) + stop("hideSymbols: Unexpected EOF"); + in >> e; + if (in.fail()) + stop("hideSymbols: File read error"); + if (e.nAux) + i += e.nAux; + const string &s = stringTableOld.decode(e.name); + // if symbol is extern and found in , prefix and insert into strings, + // otherwise, just insert into strings without prefix + strings.insert( + (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s); + } + + ofstream out(fileName, ios::trunc | ios::out | ios::binary); + if (!out.is_open()) + stop("hideSymbols: Error opening output file"); + + // make new string table from string set + StringTable stringTableNew = StringTable(strings); + + // copy input file to output file up to just before the symbol table + in.seekg(0); + char *buf = new char[symTabStart]; + in.read(buf, symTabStart); + out.write(buf, symTabStart); + delete[] buf; + + // copy input symbol table to output symbol table with name translation + for (i = 0; i < symNEntries; ++i) { + Symbol e; + + in.seekg(symTabStart + i * 18); + if (in.eof()) + stop("hideSymbols: Unexpected EOF"); + in >> e; + if (in.fail()) + stop("hideSymbols: File read error"); + const string &s = stringTableOld.decode(e.name); + out.seekp(symTabStart + i * 18); + e.name = stringTableNew.encode( + (e.storageClass == 2 && hide.find(s) != hide.end()) ? prefix + s : s); + out.write((char *)&e, 18); + if (out.fail()) + stop("hideSymbols: File write error"); + if (e.nAux) { + // copy auxiliary symbol table entries + int nAux = e.nAux; + for (int j = 1; j <= nAux; ++j) { + in >> e; + out.seekp(symTabStart + (i + j) * 18); + out.write((char *)&e, 18); + } + i += nAux; } - // output string table - stringTableNew.write(out); + } + // output string table + stringTableNew.write(out); } // returns true iff and have no common element -template -bool isDisjoint(const set &a, const set &b) { - set::const_iterator ita, itb; - - for(ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) { - const T &ta = *ita, &tb = *itb; - if(ta < tb) - ++ita; - else if (tb < ta) - ++itb; - else - return false; - } - return true; +template bool isDisjoint(const set &a, const set &b) { + set::const_iterator ita, itb; + + for (ita = a.begin(), itb = b.begin(); ita != a.end() && itb != b.end();) { + const T &ta = *ita, &tb = *itb; + if (ta < tb) + ++ita; + else if (tb < ta) + ++itb; + else + return false; + } + return true; } -/* precondition: and are arrays with elements where - >= . The first elements correspond to the external object - files and the rest correspond to the internal object files. - postcondition: file x is said to depend on file y if undefined[x] and defined[y] are not - disjoint. Returns the transitive closure of the set of internal object files, as a set of - file indexes, under the 'depends on' relation, minus the set of internal object files. -*/ -set *findRequiredExternal(int nExternal, int nTotal, set *defined, set *undefined) { - set *required = new set; - set fresh[2]; - int i, cur = 0; - bool changed; - - for(i = nTotal - 1; i >= nExternal; --i) - fresh[cur].insert(i); - do { - changed = false; - for(set::iterator it = fresh[cur].begin(); it != fresh[cur].end(); ++it) { - set &s = undefined[*it]; - - for(i = 0; i < nExternal; ++i) { - if(required->find(i) == required->end()) { - if(!isDisjoint(defined[i], s)) { - // found a new qualifying element - required->insert(i); - fresh[1 - cur].insert(i); - changed = true; - } - } - } - } - fresh[cur].clear(); - cur = 1 - cur; - } while(changed); - return required; +// PRE: and are arrays with elements where +// >= . The first elements correspond to the +// external object files and the rest correspond to the internal object files. +// POST: file x is said to depend on file y if undefined[x] and defined[y] are +// not disjoint. Returns the transitive closure of the set of internal object +// files, as a set of file indexes, under the 'depends on' relation, minus the +// set of internal object files. +set *findRequiredExternal(int nExternal, int nTotal, set *defined, + set *undefined) { + set *required = new set; + set fresh[2]; + int i, cur = 0; + bool changed; + + for (i = nTotal - 1; i >= nExternal; --i) + fresh[cur].insert(i); + do { + changed = false; + for (set::iterator it = fresh[cur].begin(); it != fresh[cur].end(); + ++it) { + set &s = undefined[*it]; + + for (i = 0; i < nExternal; ++i) { + if (required->find(i) == required->end()) { + if (!isDisjoint(defined[i], s)) { + // found a new qualifying element + required->insert(i); + fresh[1 - cur].insert(i); + changed = true; + } + } + } + } + fresh[cur].clear(); + cur = 1 - cur; + } while (changed); + return required; } int main(int argc, char **argv) { - int nExternal, nInternal, i; - set *defined, *undefined; - set::iterator it; - - if(argc < 3) - stop("Please specify a positive integer followed by a list of object filenames"); - nExternal = atoi(argv[1]); - if(nExternal <= 0) - stop("Please specify a positive integer followed by a list of object filenames"); - if(nExternal + 2 > argc) - stop("Too few external objects"); - nInternal = argc - nExternal - 2; - defined = new set[argc - 2]; - undefined = new set[argc - 2]; - - // determine the set of defined and undefined external symbols - for(i = 2; i < argc; ++i) - computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2); - - // determine the set of required external files - set *requiredExternal = findRequiredExternal(nExternal, argc - 2, defined, undefined); - set hide; - - /* determine the set of symbols to hide--namely defined external symbols of the - required external files - */ - for(it = requiredExternal->begin(); it != requiredExternal->end(); ++it) { - int idx = *it; - set::iterator it2; - /* We have to insert one element at a time instead of inserting a range because - the insert member function taking a range doesn't exist on Windows* OS, at least - at the time of this writing. - */ - for(it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2) - hide.insert(*it2); - } - - /* process the external files--removing those that are not required and hiding - the appropriate symbols in the others - */ - for(i = 0; i < nExternal; ++i) - if(requiredExternal->find(i) != requiredExternal->end()) - hideSymbols(argv[2 + i], hide); - else - remove(argv[2 + i]); - // hide the appropriate symbols in the internal files - for(i = nExternal + 2; i < argc; ++i) - hideSymbols(argv[i], hide); - return 0; + int nExternal, nInternal, i; + set *defined, *undefined; + set::iterator it; + + if (argc < 3) + stop("Please specify a positive integer followed by a list of object " + "filenames"); + nExternal = atoi(argv[1]); + if (nExternal <= 0) + stop("Please specify a positive integer followed by a list of object " + "filenames"); + if (nExternal + 2 > argc) + stop("Too few external objects"); + nInternal = argc - nExternal - 2; + defined = new set[argc - 2]; + undefined = new set[argc - 2]; + + // determine the set of defined and undefined external symbols + for (i = 2; i < argc; ++i) + computeExternalSymbols(argv[i], defined + i - 2, undefined + i - 2); + + // determine the set of required external files + set *requiredExternal = + findRequiredExternal(nExternal, argc - 2, defined, undefined); + set hide; + + // determine the set of symbols to hide--namely defined external symbols of + // the required external files + for (it = requiredExternal->begin(); it != requiredExternal->end(); ++it) { + int idx = *it; + set::iterator it2; + // We have to insert one element at a time instead of inserting a range + // because the insert member function taking a range doesn't exist on + // Windows* OS, at least at the time of this writing. + for (it2 = defined[idx].begin(); it2 != defined[idx].end(); ++it2) + hide.insert(*it2); + } + + // process the external files--removing those that are not required and hiding + // the appropriate symbols in the others + for (i = 0; i < nExternal; ++i) + if (requiredExternal->find(i) != requiredExternal->end()) + hideSymbols(argv[2 + i], hide); + else + remove(argv[2 + i]); + // hide the appropriate symbols in the internal files + for (i = nExternal + 2; i < argc; ++i) + hideSymbols(argv[i], hide); + return 0; } diff --git a/openmp/runtime/src/kmp.h b/openmp/runtime/src/kmp.h index 85b661b..657a685 100644 --- a/openmp/runtime/src/kmp.h +++ b/openmp/runtime/src/kmp.h @@ -22,36 +22,35 @@ /* #define BUILD_PARALLEL_ORDERED 1 */ /* This fix replaces gettimeofday with clock_gettime for better scalability on - the Altix. Requires user code to be linked with -lrt. -*/ + the Altix. Requires user code to be linked with -lrt. */ //#define FIX_SGI_CLOCK /* Defines for OpenMP 3.0 tasking and auto scheduling */ -# ifndef KMP_STATIC_STEAL_ENABLED -# define KMP_STATIC_STEAL_ENABLED 1 -# endif +#ifndef KMP_STATIC_STEAL_ENABLED +#define KMP_STATIC_STEAL_ENABLED 1 +#endif -#define TASK_CURRENT_NOT_QUEUED 0 -#define TASK_CURRENT_QUEUED 1 +#define TASK_CURRENT_NOT_QUEUED 0 +#define TASK_CURRENT_QUEUED 1 #ifdef BUILD_TIED_TASK_STACK -#define TASK_STACK_EMPTY 0 // entries when the stack is empty - -// Used to define TASK_STACK_SIZE and TASK_STACK_MASK -#define TASK_STACK_BLOCK_BITS 5 -#define TASK_STACK_BLOCK_SIZE ( 1 << TASK_STACK_BLOCK_BITS ) // Number of entries in each task stack array -#define TASK_STACK_INDEX_MASK ( TASK_STACK_BLOCK_SIZE - 1 ) // Mask for determining index into stack block +#define TASK_STACK_EMPTY 0 // entries when the stack is empty +#define TASK_STACK_BLOCK_BITS 5 // Used in TASK_STACK_SIZE and TASK_STACK_MASK +// Number of entries in each task stack array +#define TASK_STACK_BLOCK_SIZE (1 << TASK_STACK_BLOCK_BITS) +// Mask for determining index into stack block +#define TASK_STACK_INDEX_MASK (TASK_STACK_BLOCK_SIZE - 1) #endif // BUILD_TIED_TASK_STACK -#define TASK_NOT_PUSHED 1 +#define TASK_NOT_PUSHED 1 #define TASK_SUCCESSFULLY_PUSHED 0 -#define TASK_TIED 1 -#define TASK_UNTIED 0 -#define TASK_EXPLICIT 1 -#define TASK_IMPLICIT 0 -#define TASK_PROXY 1 -#define TASK_FULL 0 +#define TASK_TIED 1 +#define TASK_UNTIED 0 +#define TASK_EXPLICIT 1 +#define TASK_IMPLICIT 0 +#define TASK_PROXY 1 +#define TASK_FULL 0 #define KMP_CANCEL_THREADS #define KMP_THREAD_ATTR @@ -62,14 +61,14 @@ #undef KMP_CANCEL_THREADS #endif +#include +#include +#include #include #include -#include -#include #include -#include -/* include don't use; problems with /MD on Windows* OS NT due to bad Microsoft library */ -/* some macros provided below to replace some of these functions */ +/* include don't use; problems with /MD on Windows* OS NT due to bad + Microsoft library. Some macros provided below to replace these functions */ #ifndef __ABSOFT_WIN #include #endif @@ -100,9 +99,9 @@ class kmp_stats_list; #include #endif -#include "kmp_version.h" #include "kmp_debug.h" #include "kmp_lock.h" +#include "kmp_version.h" #if USE_DEBUGGER #include "kmp_debugger.h" #endif @@ -112,14 +111,14 @@ class kmp_stats_list; #include "kmp_wrapper_malloc.h" #if KMP_OS_UNIX -# include -# if !defined NSIG && defined _NSIG -# define NSIG _NSIG -# endif +#include +#if !defined NSIG && defined _NSIG +#define NSIG _NSIG +#endif #endif #if KMP_OS_LINUX -# pragma weak clock_gettime +#pragma weak clock_gettime #endif #if OMPT_SUPPORT @@ -128,7 +127,7 @@ class kmp_stats_list; /*Select data placement in NUMA memory */ #define NO_FIRST_TOUCH 0 -#define FIRST_TOUCH 1 /* Exploit SGI's first touch page placement algo */ +#define FIRST_TOUCH 1 /* Exploit SGI's first touch page placement algo */ /* If not specified on compile command line, assume no first touch */ #ifndef BUILD_MEMORY @@ -136,27 +135,28 @@ class kmp_stats_list; #endif // 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64. -// 3 - fast allocation using sync, non-sync free lists of any size, non-self free lists of limited size. +// 3 - fast allocation using sync, non-sync free lists of any size, non-self +// free lists of limited size. #ifndef USE_FAST_MEMORY #define USE_FAST_MEMORY 3 #endif #ifndef KMP_NESTED_HOT_TEAMS -# define KMP_NESTED_HOT_TEAMS 0 -# define USE_NESTED_HOT_ARG(x) +#define KMP_NESTED_HOT_TEAMS 0 +#define USE_NESTED_HOT_ARG(x) +#else +#if KMP_NESTED_HOT_TEAMS +#if OMP_40_ENABLED +#define USE_NESTED_HOT_ARG(x) , x #else -# if KMP_NESTED_HOT_TEAMS -# if OMP_40_ENABLED -# define USE_NESTED_HOT_ARG(x) ,x -# else // Nested hot teams feature depends on omp 4.0, disable it for earlier versions -# undef KMP_NESTED_HOT_TEAMS -# define KMP_NESTED_HOT_TEAMS 0 -# define USE_NESTED_HOT_ARG(x) -# endif -# else -# define USE_NESTED_HOT_ARG(x) -# endif +#undef KMP_NESTED_HOT_TEAMS +#define KMP_NESTED_HOT_TEAMS 0 +#define USE_NESTED_HOT_ARG(x) +#endif +#else +#define USE_NESTED_HOT_ARG(x) +#endif #endif // Assume using BGET compare_exchange instruction instead of lock by default. @@ -177,129 +177,134 @@ class kmp_stats_list; @{ */ -// FIXME DOXYGEN... need to group these flags somehow (Making them an anonymous enum would do it...) +// FIXME DOXYGEN... need to group these flags somehow (Making them an anonymous +// enum would do it...) /*! Values for bit flags used in the ident_t to describe the fields. */ /*! Use trampoline for internal microtasks */ -#define KMP_IDENT_IMB 0x01 +#define KMP_IDENT_IMB 0x01 /*! Use c-style ident structure */ -#define KMP_IDENT_KMPC 0x02 +#define KMP_IDENT_KMPC 0x02 /* 0x04 is no longer used */ /*! Entry point generated by auto-parallelization */ -#define KMP_IDENT_AUTOPAR 0x08 +#define KMP_IDENT_AUTOPAR 0x08 /*! Compiler generates atomic reduction option for kmpc_reduce* */ -#define KMP_IDENT_ATOMIC_REDUCE 0x10 +#define KMP_IDENT_ATOMIC_REDUCE 0x10 /*! To mark a 'barrier' directive in user code */ -#define KMP_IDENT_BARRIER_EXPL 0x20 +#define KMP_IDENT_BARRIER_EXPL 0x20 /*! To Mark implicit barriers. */ -#define KMP_IDENT_BARRIER_IMPL 0x0040 -#define KMP_IDENT_BARRIER_IMPL_MASK 0x01C0 -#define KMP_IDENT_BARRIER_IMPL_FOR 0x0040 -#define KMP_IDENT_BARRIER_IMPL_SECTIONS 0x00C0 +#define KMP_IDENT_BARRIER_IMPL 0x0040 +#define KMP_IDENT_BARRIER_IMPL_MASK 0x01C0 +#define KMP_IDENT_BARRIER_IMPL_FOR 0x0040 +#define KMP_IDENT_BARRIER_IMPL_SECTIONS 0x00C0 -#define KMP_IDENT_BARRIER_IMPL_SINGLE 0x0140 +#define KMP_IDENT_BARRIER_IMPL_SINGLE 0x0140 #define KMP_IDENT_BARRIER_IMPL_WORKSHARE 0x01C0 /*! * The ident structure that describes a source location. */ typedef struct ident { - kmp_int32 reserved_1; /**< might be used in Fortran; see above */ - kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC identifies this union member */ - kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */ + kmp_int32 reserved_1; /**< might be used in Fortran; see above */ + kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC + identifies this union member */ + kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */ #if USE_ITT_BUILD - /* but currently used for storing region-specific ITT */ - /* contextual information. */ +/* but currently used for storing region-specific ITT */ +/* contextual information. */ #endif /* USE_ITT_BUILD */ - kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */ - char const *psource; /**< String describing the source location. - The string is composed of semi-colon separated fields which describe the source file, - the function and a pair of line numbers that delimit the construct. - */ + kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */ + char const *psource; /**< String describing the source location. + The string is composed of semi-colon separated fields + which describe the source file, the function and a pair + of line numbers that delimit the construct. */ } ident_t; /*! @} */ // Some forward declarations. - -typedef union kmp_team kmp_team_t; -typedef struct kmp_taskdata kmp_taskdata_t; -typedef union kmp_task_team kmp_task_team_t; -typedef union kmp_team kmp_team_p; -typedef union kmp_info kmp_info_p; -typedef union kmp_root kmp_root_p; +typedef union kmp_team kmp_team_t; +typedef struct kmp_taskdata kmp_taskdata_t; +typedef union kmp_task_team kmp_task_team_t; +typedef union kmp_team kmp_team_p; +typedef union kmp_info kmp_info_p; +typedef union kmp_root kmp_root_p; #ifdef __cplusplus extern "C" { #endif /* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ /* Pack two 32-bit signed integers into a 64-bit signed integer */ /* ToDo: Fix word ordering for big-endian machines. */ -#define KMP_PACK_64(HIGH_32,LOW_32) \ - ( (kmp_int64) ((((kmp_uint64)(HIGH_32))<<32) | (kmp_uint64)(LOW_32)) ) +#define KMP_PACK_64(HIGH_32, LOW_32) \ + ((kmp_int64)((((kmp_uint64)(HIGH_32)) << 32) | (kmp_uint64)(LOW_32))) + +// Generic string manipulation macros. Assume that _x is of type char * +#define SKIP_WS(_x) \ + { \ + while (*(_x) == ' ' || *(_x) == '\t') \ + (_x)++; \ + } +#define SKIP_DIGITS(_x) \ + { \ + while (*(_x) >= '0' && *(_x) <= '9') \ + (_x)++; \ + } +#define SKIP_TO(_x, _c) \ + { \ + while (*(_x) != '\0' && *(_x) != (_c)) \ + (_x)++; \ + } - -/* - * Generic string manipulation macros. - * Assume that _x is of type char * - */ -#define SKIP_WS(_x) { while (*(_x) == ' ' || *(_x) == '\t') (_x)++; } -#define SKIP_DIGITS(_x) { while (*(_x) >= '0' && *(_x) <= '9') (_x)++; } -#define SKIP_TO(_x,_c) { while (*(_x) != '\0' && *(_x) != (_c)) (_x)++; } - -/* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */ -#define KMP_MAX( x, y ) ( (x) > (y) ? (x) : (y) ) -#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) ) +#define KMP_MAX(x, y) ((x) > (y) ? (x) : (y)) +#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) /* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - - /* Enumeration types */ enum kmp_state_timer { - ts_stop, - ts_start, - ts_pause, + ts_stop, + ts_start, + ts_pause, - ts_last_state + ts_last_state }; enum dynamic_mode { - dynamic_default, + dynamic_default, #ifdef USE_LOAD_BALANCE - dynamic_load_balance, + dynamic_load_balance, #endif /* USE_LOAD_BALANCE */ - dynamic_random, - dynamic_thread_limit, - dynamic_max + dynamic_random, + dynamic_thread_limit, + dynamic_max }; -/* external schedule constants, duplicate enum omp_sched in omp.h in order to not include it here */ +/* external schedule constants, duplicate enum omp_sched in omp.h in order to + * not include it here */ #ifndef KMP_SCHED_TYPE_DEFINED #define KMP_SCHED_TYPE_DEFINED typedef enum kmp_sched { - kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check - // Note: need to adjust __kmp_sch_map global array in case this enum is changed - kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33) - kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35) - kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36) - kmp_sched_auto = 4, // mapped to kmp_sch_auto (38) - kmp_sched_upper_std = 5, // upper bound for standard schedules - kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules - kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39) + kmp_sched_lower = 0, // lower and upper bounds are for routine parameter check + // Note: need to adjust __kmp_sch_map global array in case enum is changed + kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33) + kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35) + kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36) + kmp_sched_auto = 4, // mapped to kmp_sch_auto (38) + kmp_sched_upper_std = 5, // upper bound for standard schedules + kmp_sched_lower_ext = 100, // lower bound of Intel extension schedules + kmp_sched_trapezoidal = 101, // mapped to kmp_sch_trapezoidal (39) #if KMP_STATIC_STEAL_ENABLED - kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44) + kmp_sched_static_steal = 102, // mapped to kmp_sch_static_steal (44) #endif - kmp_sched_upper, - kmp_sched_default = kmp_sched_static // default scheduling + kmp_sched_upper, + kmp_sched_default = kmp_sched_static // default scheduling } kmp_sched_t; #endif @@ -308,149 +313,148 @@ typedef enum kmp_sched { * Describes the loop schedule to be used for a parallel for loop. */ enum sched_type { - kmp_sch_lower = 32, /**< lower bound for unordered values */ - kmp_sch_static_chunked = 33, - kmp_sch_static = 34, /**< static unspecialized */ - kmp_sch_dynamic_chunked = 35, - kmp_sch_guided_chunked = 36, /**< guided unspecialized */ - kmp_sch_runtime = 37, - kmp_sch_auto = 38, /**< auto */ - kmp_sch_trapezoidal = 39, - - /* accessible only through KMP_SCHEDULE environment variable */ - kmp_sch_static_greedy = 40, - kmp_sch_static_balanced = 41, - /* accessible only through KMP_SCHEDULE environment variable */ - kmp_sch_guided_iterative_chunked = 42, - kmp_sch_guided_analytical_chunked = 43, - - kmp_sch_static_steal = 44, /**< accessible only through KMP_SCHEDULE environment variable */ + kmp_sch_lower = 32, /**< lower bound for unordered values */ + kmp_sch_static_chunked = 33, + kmp_sch_static = 34, /**< static unspecialized */ + kmp_sch_dynamic_chunked = 35, + kmp_sch_guided_chunked = 36, /**< guided unspecialized */ + kmp_sch_runtime = 37, + kmp_sch_auto = 38, /**< auto */ + kmp_sch_trapezoidal = 39, + + /* accessible only through KMP_SCHEDULE environment variable */ + kmp_sch_static_greedy = 40, + kmp_sch_static_balanced = 41, + /* accessible only through KMP_SCHEDULE environment variable */ + kmp_sch_guided_iterative_chunked = 42, + kmp_sch_guided_analytical_chunked = 43, + /* accessible only through KMP_SCHEDULE environment variable */ + kmp_sch_static_steal = 44, #if OMP_45_ENABLED - kmp_sch_static_balanced_chunked = 45, /**< static with chunk adjustment (e.g., simd) */ + /* static with chunk adjustment (e.g., simd) */ + kmp_sch_static_balanced_chunked = 45, #endif - /* accessible only through KMP_SCHEDULE environment variable */ - kmp_sch_upper = 46, /**< upper bound for unordered values */ + /* accessible only through KMP_SCHEDULE environment variable */ + kmp_sch_upper = 46, /**< upper bound for unordered values */ - kmp_ord_lower = 64, /**< lower bound for ordered values, must be power of 2 */ - kmp_ord_static_chunked = 65, - kmp_ord_static = 66, /**< ordered static unspecialized */ - kmp_ord_dynamic_chunked = 67, - kmp_ord_guided_chunked = 68, - kmp_ord_runtime = 69, - kmp_ord_auto = 70, /**< ordered auto */ - kmp_ord_trapezoidal = 71, - kmp_ord_upper = 72, /**< upper bound for ordered values */ + kmp_ord_lower = 64, /**< lower bound for ordered values, must be power of 2 */ + kmp_ord_static_chunked = 65, + kmp_ord_static = 66, /**< ordered static unspecialized */ + kmp_ord_dynamic_chunked = 67, + kmp_ord_guided_chunked = 68, + kmp_ord_runtime = 69, + kmp_ord_auto = 70, /**< ordered auto */ + kmp_ord_trapezoidal = 71, + kmp_ord_upper = 72, /**< upper bound for ordered values */ #if OMP_40_ENABLED - /* Schedules for Distribute construct */ - kmp_distribute_static_chunked = 91, /**< distribute static chunked */ - kmp_distribute_static = 92, /**< distribute static unspecialized */ -#endif - - /* - * For the "nomerge" versions, kmp_dispatch_next*() will always return - * a single iteration/chunk, even if the loop is serialized. For the - * schedule types listed above, the entire iteration vector is returned - * if the loop is serialized. This doesn't work for gcc/gcomp sections. - */ - kmp_nm_lower = 160, /**< lower bound for nomerge values */ - - kmp_nm_static_chunked = (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower), - kmp_nm_static = 162, /**< static unspecialized */ - kmp_nm_dynamic_chunked = 163, - kmp_nm_guided_chunked = 164, /**< guided unspecialized */ - kmp_nm_runtime = 165, - kmp_nm_auto = 166, /**< auto */ - kmp_nm_trapezoidal = 167, - - /* accessible only through KMP_SCHEDULE environment variable */ - kmp_nm_static_greedy = 168, - kmp_nm_static_balanced = 169, - /* accessible only through KMP_SCHEDULE environment variable */ - kmp_nm_guided_iterative_chunked = 170, - kmp_nm_guided_analytical_chunked = 171, - kmp_nm_static_steal = 172, /* accessible only through OMP_SCHEDULE environment variable */ - - kmp_nm_ord_static_chunked = 193, - kmp_nm_ord_static = 194, /**< ordered static unspecialized */ - kmp_nm_ord_dynamic_chunked = 195, - kmp_nm_ord_guided_chunked = 196, - kmp_nm_ord_runtime = 197, - kmp_nm_ord_auto = 198, /**< auto */ - kmp_nm_ord_trapezoidal = 199, - kmp_nm_upper = 200, /**< upper bound for nomerge values */ + /* Schedules for Distribute construct */ + kmp_distribute_static_chunked = 91, /**< distribute static chunked */ + kmp_distribute_static = 92, /**< distribute static unspecialized */ +#endif + + /* For the "nomerge" versions, kmp_dispatch_next*() will always return a + single iteration/chunk, even if the loop is serialized. For the schedule + types listed above, the entire iteration vector is returned if the loop is + serialized. This doesn't work for gcc/gcomp sections. */ + kmp_nm_lower = 160, /**< lower bound for nomerge values */ + + kmp_nm_static_chunked = + (kmp_sch_static_chunked - kmp_sch_lower + kmp_nm_lower), + kmp_nm_static = 162, /**< static unspecialized */ + kmp_nm_dynamic_chunked = 163, + kmp_nm_guided_chunked = 164, /**< guided unspecialized */ + kmp_nm_runtime = 165, + kmp_nm_auto = 166, /**< auto */ + kmp_nm_trapezoidal = 167, + + /* accessible only through KMP_SCHEDULE environment variable */ + kmp_nm_static_greedy = 168, + kmp_nm_static_balanced = 169, + /* accessible only through KMP_SCHEDULE environment variable */ + kmp_nm_guided_iterative_chunked = 170, + kmp_nm_guided_analytical_chunked = 171, + kmp_nm_static_steal = + 172, /* accessible only through OMP_SCHEDULE environment variable */ + + kmp_nm_ord_static_chunked = 193, + kmp_nm_ord_static = 194, /**< ordered static unspecialized */ + kmp_nm_ord_dynamic_chunked = 195, + kmp_nm_ord_guided_chunked = 196, + kmp_nm_ord_runtime = 197, + kmp_nm_ord_auto = 198, /**< auto */ + kmp_nm_ord_trapezoidal = 199, + kmp_nm_upper = 200, /**< upper bound for nomerge values */ #if OMP_45_ENABLED - /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. - * Since we need to distinguish the three possible cases (no modifier, monotonic modifier, - * nonmonotonic modifier), we need separate bits for each modifier. - * The absence of monotonic does not imply nonmonotonic, especially since 4.5 says - * that the behaviour of the "no modifier" case is implementation defined in 4.5, - * but will become "nonmonotonic" in 5.0. - * - * Since we're passing a full 32 bit value, we can use a couple of high bits for these - * flags; out of paranoia we avoid the sign bit. - * - * These modifiers can be or-ed into non-static schedules by the compiler to pass - * the additional information. - * They will be stripped early in the processing in __kmp_dispatch_init when setting up schedules, so - * most of the code won't ever see schedules with these bits set. - */ - kmp_sch_modifier_monotonic = (1<<29), /**< Set if the monotonic schedule modifier was present */ - kmp_sch_modifier_nonmonotonic = (1<<30), /**< Set if the nonmonotonic schedule modifier was present */ - -# define SCHEDULE_WITHOUT_MODIFIERS(s) (enum sched_type)((s) & ~ (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) -# define SCHEDULE_HAS_MONOTONIC(s) (((s) & kmp_sch_modifier_monotonic) != 0) -# define SCHEDULE_HAS_NONMONOTONIC(s) (((s) & kmp_sch_modifier_nonmonotonic) != 0) -# define SCHEDULE_HAS_NO_MODIFIERS(s) (((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0) + /* Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers. Since + we need to distinguish the three possible cases (no modifier, monotonic + modifier, nonmonotonic modifier), we need separate bits for each modifier. + The absence of monotonic does not imply nonmonotonic, especially since 4.5 + says that the behaviour of the "no modifier" case is implementation defined + in 4.5, but will become "nonmonotonic" in 5.0. + + Since we're passing a full 32 bit value, we can use a couple of high bits + for these flags; out of paranoia we avoid the sign bit. + + These modifiers can be or-ed into non-static schedules by the compiler to + pass the additional information. They will be stripped early in the + processing in __kmp_dispatch_init when setting up schedules, so most of the + code won't ever see schedules with these bits set. */ + kmp_sch_modifier_monotonic = + (1 << 29), /**< Set if the monotonic schedule modifier was present */ + kmp_sch_modifier_nonmonotonic = + (1 << 30), /**< Set if the nonmonotonic schedule modifier was present */ + +#define SCHEDULE_WITHOUT_MODIFIERS(s) \ + (enum sched_type)( \ + (s) & ~(kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) +#define SCHEDULE_HAS_MONOTONIC(s) (((s)&kmp_sch_modifier_monotonic) != 0) +#define SCHEDULE_HAS_NONMONOTONIC(s) (((s)&kmp_sch_modifier_nonmonotonic) != 0) +#define SCHEDULE_HAS_NO_MODIFIERS(s) \ + (((s) & (kmp_sch_modifier_nonmonotonic | kmp_sch_modifier_monotonic)) == 0) #else - /* By doing this we hope to avoid multiple tests on OMP_45_ENABLED. Compilers can now eliminate tests on compile time - * constants and dead code that results from them, so we can leave code guarded by such an if in place. - */ -# define SCHEDULE_WITHOUT_MODIFIERS(s) (s) -# define SCHEDULE_HAS_MONOTONIC(s) false -# define SCHEDULE_HAS_NONMONOTONIC(s) false -# define SCHEDULE_HAS_NO_MODIFIERS(s) true +/* By doing this we hope to avoid multiple tests on OMP_45_ENABLED. Compilers + can now eliminate tests on compile time constants and dead code that results + from them, so we can leave code guarded by such an if in place. */ +#define SCHEDULE_WITHOUT_MODIFIERS(s) (s) +#define SCHEDULE_HAS_MONOTONIC(s) false +#define SCHEDULE_HAS_NONMONOTONIC(s) false +#define SCHEDULE_HAS_NO_MODIFIERS(s) true #endif - kmp_sch_default = kmp_sch_static /**< default scheduling algorithm */ + kmp_sch_default = kmp_sch_static /**< default scheduling algorithm */ }; /* Type to keep runtime schedule set via OMP_SCHEDULE or omp_set_schedule() */ typedef struct kmp_r_sched { - enum sched_type r_sched_type; - int chunk; + enum sched_type r_sched_type; + int chunk; } kmp_r_sched_t; -extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our internal schedule types +extern enum sched_type __kmp_sch_map[]; // map OMP 3.0 schedule types with our +// internal schedule types enum library_type { - library_none, - library_serial, - library_turnaround, - library_throughput + library_none, + library_serial, + library_turnaround, + library_throughput }; #if KMP_OS_LINUX enum clock_function_type { - clock_function_gettimeofday, - clock_function_clock_gettime + clock_function_gettimeofday, + clock_function_clock_gettime }; #endif /* KMP_OS_LINUX */ #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) -enum mic_type { - non_mic, - mic1, - mic2, - mic3, - dummy -}; +enum mic_type { non_mic, mic1, mic2, mic3, dummy }; #endif -/* ------------------------------------------------------------------------ */ /* -- fast reduction stuff ------------------------------------------------ */ #undef KMP_FAST_REDUCTION_BARRIER @@ -458,97 +462,94 @@ enum mic_type { #undef KMP_FAST_REDUCTION_CORE_DUO #if KMP_ARCH_X86 || KMP_ARCH_X86_64 - #define KMP_FAST_REDUCTION_CORE_DUO 1 +#define KMP_FAST_REDUCTION_CORE_DUO 1 #endif enum _reduction_method { - reduction_method_not_defined = 0, - critical_reduce_block = ( 1 << 8 ), - atomic_reduce_block = ( 2 << 8 ), - tree_reduce_block = ( 3 << 8 ), - empty_reduce_block = ( 4 << 8 ) + reduction_method_not_defined = 0, + critical_reduce_block = (1 << 8), + atomic_reduce_block = (2 << 8), + tree_reduce_block = (3 << 8), + empty_reduce_block = (4 << 8) }; -// description of the packed_reduction_method variable -// the packed_reduction_method variable consists of two enum types variables that are packed together into 0-th byte and 1-st byte: -// 0: ( packed_reduction_method & 0x000000FF ) is a 'enum barrier_type' value of barrier that will be used in fast reduction: bs_plain_barrier or bs_reduction_barrier -// 1: ( packed_reduction_method & 0x0000FF00 ) is a reduction method that will be used in fast reduction; -// reduction method is of 'enum _reduction_method' type and it's defined the way so that the bits of 0-th byte are empty, -// so no need to execute a shift instruction while packing/unpacking +// Description of the packed_reduction_method variable: +// The packed_reduction_method variable consists of two enum types variables +// that are packed together into 0-th byte and 1-st byte: +// 0: (packed_reduction_method & 0x000000FF) is a 'enum barrier_type' value of +// barrier that will be used in fast reduction: bs_plain_barrier or +// bs_reduction_barrier +// 1: (packed_reduction_method & 0x0000FF00) is a reduction method that will +// be used in fast reduction; +// Reduction method is of 'enum _reduction_method' type and it's defined the way +// so that the bits of 0-th byte are empty, so no need to execute a shift +// instruction while packing/unpacking #if KMP_FAST_REDUCTION_BARRIER - #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method,barrier_type) \ - ( ( reduction_method ) | ( barrier_type ) ) +#define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type) \ + ((reduction_method) | (barrier_type)) - #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \ - ( ( enum _reduction_method )( ( packed_reduction_method ) & ( 0x0000FF00 ) ) ) +#define UNPACK_REDUCTION_METHOD(packed_reduction_method) \ + ((enum _reduction_method)((packed_reduction_method) & (0x0000FF00))) - #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \ - ( ( enum barrier_type )( ( packed_reduction_method ) & ( 0x000000FF ) ) ) +#define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \ + ((enum barrier_type)((packed_reduction_method) & (0x000000FF))) #else - #define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method,barrier_type) \ - ( reduction_method ) +#define PACK_REDUCTION_METHOD_AND_BARRIER(reduction_method, barrier_type) \ + (reduction_method) - #define UNPACK_REDUCTION_METHOD(packed_reduction_method) \ - ( packed_reduction_method ) +#define UNPACK_REDUCTION_METHOD(packed_reduction_method) \ + (packed_reduction_method) - #define UNPACK_REDUCTION_BARRIER(packed_reduction_method) \ - ( bs_plain_barrier ) +#define UNPACK_REDUCTION_BARRIER(packed_reduction_method) (bs_plain_barrier) #endif -#define TEST_REDUCTION_METHOD(packed_reduction_method,which_reduction_block) \ - ( ( UNPACK_REDUCTION_METHOD( packed_reduction_method ) ) == ( which_reduction_block ) ) +#define TEST_REDUCTION_METHOD(packed_reduction_method, which_reduction_block) \ + ((UNPACK_REDUCTION_METHOD(packed_reduction_method)) == \ + (which_reduction_block)) #if KMP_FAST_REDUCTION_BARRIER - #define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER \ - ( PACK_REDUCTION_METHOD_AND_BARRIER( tree_reduce_block, bs_reduction_barrier ) ) +#define TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER \ + (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_reduction_barrier)) - #define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER \ - ( PACK_REDUCTION_METHOD_AND_BARRIER( tree_reduce_block, bs_plain_barrier ) ) +#define TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER \ + (PACK_REDUCTION_METHOD_AND_BARRIER(tree_reduce_block, bs_plain_barrier)) #endif typedef int PACKED_REDUCTION_METHOD_T; /* -- end of fast reduction stuff ----------------------------------------- */ -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - #if KMP_OS_WINDOWS -# define USE_CBLKDATA -# pragma warning( push ) -# pragma warning( disable: 271 310 ) -# include -# pragma warning( pop ) +#define USE_CBLKDATA +#pragma warning(push) +#pragma warning(disable : 271 310) +#include +#pragma warning(pop) #endif #if KMP_OS_UNIX -# include -# include +#include +#include #endif -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* - * Only Linux* OS and Windows* OS support thread affinity. - */ +/* Only Linux* OS and Windows* OS support thread affinity. */ #if KMP_AFFINITY_SUPPORTED // GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later). -# if KMP_OS_WINDOWS -# if _MSC_VER < 1600 +#if KMP_OS_WINDOWS +#if _MSC_VER < 1600 typedef struct GROUP_AFFINITY { - KAFFINITY Mask; - WORD Group; - WORD Reserved[3]; + KAFFINITY Mask; + WORD Group; + WORD Reserved[3]; } GROUP_AFFINITY; -# endif /* _MSC_VER < 1600 */ -# if KMP_GROUP_AFFINITY +#endif /* _MSC_VER < 1600 */ +#if KMP_GROUP_AFFINITY extern int __kmp_num_proc_groups; -# else +#else static const int __kmp_num_proc_groups = 1; -# endif /* KMP_GROUP_AFFINITY */ +#endif /* KMP_GROUP_AFFINITY */ typedef DWORD (*kmp_GetActiveProcessorCount_t)(WORD); extern kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount; @@ -558,164 +559,170 @@ extern kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount; typedef BOOL (*kmp_GetThreadGroupAffinity_t)(HANDLE, GROUP_AFFINITY *); extern kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity; -typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, GROUP_AFFINITY *); +typedef BOOL (*kmp_SetThreadGroupAffinity_t)(HANDLE, const GROUP_AFFINITY *, + GROUP_AFFINITY *); extern kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity; -# endif /* KMP_OS_WINDOWS */ +#endif /* KMP_OS_WINDOWS */ -# if KMP_USE_HWLOC +#if KMP_USE_HWLOC extern hwloc_topology_t __kmp_hwloc_topology; extern int __kmp_hwloc_error; -# endif +#endif extern size_t __kmp_affin_mask_size; -# define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0) -# define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0) -# define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size) -# define KMP_CPU_SET_ITERATE(i,mask) \ - for (i = (mask)->begin(); i != (mask)->end() ; i = (mask)->next(i)) -# define KMP_CPU_SET(i,mask) (mask)->set(i) -# define KMP_CPU_ISSET(i,mask) (mask)->is_set(i) -# define KMP_CPU_CLR(i,mask) (mask)->clear(i) -# define KMP_CPU_ZERO(mask) (mask)->zero() -# define KMP_CPU_COPY(dest, src) (dest)->copy(src) -# define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src) -# define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not() -# define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src) -# define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask()) -# define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr) -# define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr) -# define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr) -# define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr) -# define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr) -# define KMP_CPU_INDEX(arr,i) __kmp_affinity_dispatch->index_mask_array(arr, i) -# define KMP_CPU_ALLOC_ARRAY(arr, n) (arr = __kmp_affinity_dispatch->allocate_mask_array(n)) -# define KMP_CPU_FREE_ARRAY(arr, n) __kmp_affinity_dispatch->deallocate_mask_array(arr) -# define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) KMP_CPU_ALLOC_ARRAY(arr, n) -# define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_CPU_FREE_ARRAY(arr, n) -# define __kmp_get_system_affinity(mask, abort_bool) (mask)->get_system_affinity(abort_bool) -# define __kmp_set_system_affinity(mask, abort_bool) (mask)->set_system_affinity(abort_bool) -# define __kmp_get_proc_group(mask) (mask)->get_proc_group() +#define KMP_AFFINITY_CAPABLE() (__kmp_affin_mask_size > 0) +#define KMP_AFFINITY_DISABLE() (__kmp_affin_mask_size = 0) +#define KMP_AFFINITY_ENABLE(mask_size) (__kmp_affin_mask_size = mask_size) +#define KMP_CPU_SET_ITERATE(i, mask) \ + for (i = (mask)->begin(); i != (mask)->end(); i = (mask)->next(i)) +#define KMP_CPU_SET(i, mask) (mask)->set(i) +#define KMP_CPU_ISSET(i, mask) (mask)->is_set(i) +#define KMP_CPU_CLR(i, mask) (mask)->clear(i) +#define KMP_CPU_ZERO(mask) (mask)->zero() +#define KMP_CPU_COPY(dest, src) (dest)->copy(src) +#define KMP_CPU_AND(dest, src) (dest)->bitwise_and(src) +#define KMP_CPU_COMPLEMENT(max_bit_number, mask) (mask)->bitwise_not() +#define KMP_CPU_UNION(dest, src) (dest)->bitwise_or(src) +#define KMP_CPU_ALLOC(ptr) (ptr = __kmp_affinity_dispatch->allocate_mask()) +#define KMP_CPU_FREE(ptr) __kmp_affinity_dispatch->deallocate_mask(ptr) +#define KMP_CPU_ALLOC_ON_STACK(ptr) KMP_CPU_ALLOC(ptr) +#define KMP_CPU_FREE_FROM_STACK(ptr) KMP_CPU_FREE(ptr) +#define KMP_CPU_INTERNAL_ALLOC(ptr) KMP_CPU_ALLOC(ptr) +#define KMP_CPU_INTERNAL_FREE(ptr) KMP_CPU_FREE(ptr) +#define KMP_CPU_INDEX(arr, i) __kmp_affinity_dispatch->index_mask_array(arr, i) +#define KMP_CPU_ALLOC_ARRAY(arr, n) \ + (arr = __kmp_affinity_dispatch->allocate_mask_array(n)) +#define KMP_CPU_FREE_ARRAY(arr, n) \ + __kmp_affinity_dispatch->deallocate_mask_array(arr) +#define KMP_CPU_INTERNAL_ALLOC_ARRAY(arr, n) KMP_CPU_ALLOC_ARRAY(arr, n) +#define KMP_CPU_INTERNAL_FREE_ARRAY(arr, n) KMP_CPU_FREE_ARRAY(arr, n) +#define __kmp_get_system_affinity(mask, abort_bool) \ + (mask)->get_system_affinity(abort_bool) +#define __kmp_set_system_affinity(mask, abort_bool) \ + (mask)->set_system_affinity(abort_bool) +#define __kmp_get_proc_group(mask) (mask)->get_proc_group() class KMPAffinity { public: - class Mask { - public: - void* operator new(size_t n); - void operator delete(void* p); - void* operator new[](size_t n); - void operator delete[](void* p); - virtual ~Mask() {} - // Set bit i to 1 - virtual void set(int i) {} - // Return bit i - virtual bool is_set(int i) const { return false; } - // Set bit i to 0 - virtual void clear(int i) {} - // Zero out entire mask - virtual void zero() {} - // Copy src into this mask - virtual void copy(const Mask* src) {} - // this &= rhs - virtual void bitwise_and(const Mask* rhs) {} - // this |= rhs - virtual void bitwise_or(const Mask* rhs) {} - // this = ~this - virtual void bitwise_not() {} - // API for iterating over an affinity mask - // for (int i = mask->begin(); i != mask->end(); i = mask->next(i)) - virtual int begin() const { return 0; } - virtual int end() const { return 0; } - virtual int next(int previous) const { return 0; } - // Set the system's affinity to this affinity mask's value - virtual int set_system_affinity(bool abort_on_error) const { return -1; } - // Set this affinity mask to the current system affinity - virtual int get_system_affinity(bool abort_on_error) { return -1; } - // Only 1 DWORD in the mask should have any procs set. - // Return the appropriate index, or -1 for an invalid mask. - virtual int get_proc_group() const { return -1; } - }; - void* operator new(size_t n); - void operator delete(void* p); - // Need virtual destructor - virtual ~KMPAffinity() = default; - // Determine if affinity is capable - virtual void determine_capable(const char* env_var) {} - // Bind the current thread to os proc - virtual void bind_thread(int proc) {} - // Factory functions to allocate/deallocate a mask - virtual Mask* allocate_mask() { return nullptr; } - virtual void deallocate_mask(Mask* m) { } - virtual Mask* allocate_mask_array(int num) { return nullptr; } - virtual void deallocate_mask_array(Mask* m) { } - virtual Mask* index_mask_array(Mask* m, int index) { return nullptr; } - static void pick_api(); - static void destroy_api(); - enum api_type { - NATIVE_OS + class Mask { + public: + void *operator new(size_t n); + void operator delete(void *p); + void *operator new[](size_t n); + void operator delete[](void *p); + virtual ~Mask() {} + // Set bit i to 1 + virtual void set(int i) {} + // Return bit i + virtual bool is_set(int i) const { return false; } + // Set bit i to 0 + virtual void clear(int i) {} + // Zero out entire mask + virtual void zero() {} + // Copy src into this mask + virtual void copy(const Mask *src) {} + // this &= rhs + virtual void bitwise_and(const Mask *rhs) {} + // this |= rhs + virtual void bitwise_or(const Mask *rhs) {} + // this = ~this + virtual void bitwise_not() {} + // API for iterating over an affinity mask + // for (int i = mask->begin(); i != mask->end(); i = mask->next(i)) + virtual int begin() const { return 0; } + virtual int end() const { return 0; } + virtual int next(int previous) const { return 0; } + // Set the system's affinity to this affinity mask's value + virtual int set_system_affinity(bool abort_on_error) const { return -1; } + // Set this affinity mask to the current system affinity + virtual int get_system_affinity(bool abort_on_error) { return -1; } + // Only 1 DWORD in the mask should have any procs set. + // Return the appropriate index, or -1 for an invalid mask. + virtual int get_proc_group() const { return -1; } + }; + void *operator new(size_t n); + void operator delete(void *p); + // Need virtual destructor + virtual ~KMPAffinity() = default; + // Determine if affinity is capable + virtual void determine_capable(const char *env_var) {} + // Bind the current thread to os proc + virtual void bind_thread(int proc) {} + // Factory functions to allocate/deallocate a mask + virtual Mask *allocate_mask() { return nullptr; } + virtual void deallocate_mask(Mask *m) {} + virtual Mask *allocate_mask_array(int num) { return nullptr; } + virtual void deallocate_mask_array(Mask *m) {} + virtual Mask *index_mask_array(Mask *m, int index) { return nullptr; } + static void pick_api(); + static void destroy_api(); + enum api_type { + NATIVE_OS #if KMP_USE_HWLOC - , HWLOC + , + HWLOC #endif - }; - virtual api_type get_api_type() const { KMP_ASSERT(0); return NATIVE_OS; }; + }; + virtual api_type get_api_type() const { + KMP_ASSERT(0); + return NATIVE_OS; + }; + private: - static bool picked_api; + static bool picked_api; }; typedef KMPAffinity::Mask kmp_affin_mask_t; -extern KMPAffinity* __kmp_affinity_dispatch; +extern KMPAffinity *__kmp_affinity_dispatch; -// // Declare local char buffers with this size for printing debug and info // messages, using __kmp_affinity_print_mask(). -// -#define KMP_AFFIN_MASK_PRINT_LEN 1024 +#define KMP_AFFIN_MASK_PRINT_LEN 1024 enum affinity_type { - affinity_none = 0, - affinity_physical, - affinity_logical, - affinity_compact, - affinity_scatter, - affinity_explicit, - affinity_balanced, - affinity_disabled, // not used outsize the env var parser - affinity_default + affinity_none = 0, + affinity_physical, + affinity_logical, + affinity_compact, + affinity_scatter, + affinity_explicit, + affinity_balanced, + affinity_disabled, // not used outsize the env var parser + affinity_default }; enum affinity_gran { - affinity_gran_fine = 0, - affinity_gran_thread, - affinity_gran_core, - affinity_gran_package, - affinity_gran_node, + affinity_gran_fine = 0, + affinity_gran_thread, + affinity_gran_core, + affinity_gran_package, + affinity_gran_node, #if KMP_GROUP_AFFINITY - // - // The "group" granularity isn't necesssarily coarser than all of the - // other levels, but we put it last in the enum. - // - affinity_gran_group, + // The "group" granularity isn't necesssarily coarser than all of the + // other levels, but we put it last in the enum. + affinity_gran_group, #endif /* KMP_GROUP_AFFINITY */ - affinity_gran_default + affinity_gran_default }; enum affinity_top_method { - affinity_top_method_all = 0, // try all (supported) methods, in order + affinity_top_method_all = 0, // try all (supported) methods, in order #if KMP_ARCH_X86 || KMP_ARCH_X86_64 - affinity_top_method_apicid, - affinity_top_method_x2apicid, + affinity_top_method_apicid, + affinity_top_method_x2apicid, #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too + affinity_top_method_cpuinfo, // KMP_CPUINFO_FILE is usable on Windows* OS, too #if KMP_GROUP_AFFINITY - affinity_top_method_group, + affinity_top_method_group, #endif /* KMP_GROUP_AFFINITY */ - affinity_top_method_flat, + affinity_top_method_flat, #if KMP_USE_HWLOC - affinity_top_method_hwloc, + affinity_top_method_hwloc, #endif - affinity_top_method_default + affinity_top_method_default }; -#define affinity_respect_mask_default (-1) +#define affinity_respect_mask_default (-1) extern enum affinity_type __kmp_affinity_type; /* Affinity type */ extern enum affinity_gran __kmp_affinity_gran; /* Affinity granularity */ @@ -726,57 +733,54 @@ extern int __kmp_affinity_compact; /* Affinity 'compact' value */ extern int __kmp_affinity_offset; /* Affinity offset value */ extern int __kmp_affinity_verbose; /* Was verbose specified for KMP_AFFINITY? */ extern int __kmp_affinity_warnings; /* KMP_AFFINITY warnings enabled ? */ -extern int __kmp_affinity_respect_mask; /* Respect process' initial affinity mask? */ -extern char * __kmp_affinity_proclist; /* proc ID list */ +extern int __kmp_affinity_respect_mask; // Respect process' init affinity mask? +extern char *__kmp_affinity_proclist; /* proc ID list */ extern kmp_affin_mask_t *__kmp_affinity_masks; extern unsigned __kmp_affinity_num_masks; extern void __kmp_affinity_bind_thread(int which); extern kmp_affin_mask_t *__kmp_affin_fullMask; -extern char const * __kmp_cpuinfo_file; +extern char const *__kmp_cpuinfo_file; #endif /* KMP_AFFINITY_SUPPORTED */ #if OMP_40_ENABLED -// // This needs to be kept in sync with the values in omp.h !!! -// typedef enum kmp_proc_bind_t { - proc_bind_false = 0, - proc_bind_true, - proc_bind_master, - proc_bind_close, - proc_bind_spread, - proc_bind_intel, // use KMP_AFFINITY interface - proc_bind_default + proc_bind_false = 0, + proc_bind_true, + proc_bind_master, + proc_bind_close, + proc_bind_spread, + proc_bind_intel, // use KMP_AFFINITY interface + proc_bind_default } kmp_proc_bind_t; typedef struct kmp_nested_proc_bind_t { - kmp_proc_bind_t *bind_types; - int size; - int used; + kmp_proc_bind_t *bind_types; + int size; + int used; } kmp_nested_proc_bind_t; extern kmp_nested_proc_bind_t __kmp_nested_proc_bind; #endif /* OMP_40_ENABLED */ -# if KMP_AFFINITY_SUPPORTED -# define KMP_PLACE_ALL (-1) -# define KMP_PLACE_UNDEFINED (-2) -# endif /* KMP_AFFINITY_SUPPORTED */ +#if KMP_AFFINITY_SUPPORTED +#define KMP_PLACE_ALL (-1) +#define KMP_PLACE_UNDEFINED (-2) +#endif /* KMP_AFFINITY_SUPPORTED */ extern int __kmp_affinity_num_places; - #if OMP_40_ENABLED typedef enum kmp_cancel_kind_t { - cancel_noreq = 0, - cancel_parallel = 1, - cancel_loop = 2, - cancel_sections = 3, - cancel_taskgroup = 4 + cancel_noreq = 0, + cancel_parallel = 1, + cancel_loop = 2, + cancel_sections = 3, + cancel_taskgroup = 4 } kmp_cancel_kind_t; #endif // OMP_40_ENABLED @@ -795,167 +799,176 @@ extern int __kmp_hws_requested; extern int __kmp_hws_abs_flag; // absolute or per-item number requested /* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ -#define KMP_PAD(type, sz) (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) +#define KMP_PAD(type, sz) \ + (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) -// // We need to avoid using -1 as a GTID as +1 is added to the gtid // when storing it in a lock, and the value 0 is reserved. -// -#define KMP_GTID_DNE (-2) /* Does not exist */ -#define KMP_GTID_SHUTDOWN (-3) /* Library is shutting down */ -#define KMP_GTID_MONITOR (-4) /* Monitor thread ID */ -#define KMP_GTID_UNKNOWN (-5) /* Is not known */ -#define KMP_GTID_MIN (-6) /* Minimal gtid for low bound check in DEBUG */ +#define KMP_GTID_DNE (-2) /* Does not exist */ +#define KMP_GTID_SHUTDOWN (-3) /* Library is shutting down */ +#define KMP_GTID_MONITOR (-4) /* Monitor thread ID */ +#define KMP_GTID_UNKNOWN (-5) /* Is not known */ +#define KMP_GTID_MIN (-6) /* Minimal gtid for low bound check in DEBUG */ -#define __kmp_get_gtid() __kmp_get_global_thread_id() -#define __kmp_entry_gtid() __kmp_get_global_thread_id_reg() +#define __kmp_get_gtid() __kmp_get_global_thread_id() +#define __kmp_entry_gtid() __kmp_get_global_thread_id_reg() -#define __kmp_tid_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \ - __kmp_threads[ (gtid) ]->th.th_info.ds.ds_tid ) +#define __kmp_tid_from_gtid(gtid) \ + (KMP_DEBUG_ASSERT((gtid) >= 0), __kmp_threads[(gtid)]->th.th_info.ds.ds_tid) -#define __kmp_get_tid() ( __kmp_tid_from_gtid( __kmp_get_gtid() ) ) -#define __kmp_gtid_from_tid(tid,team) ( KMP_DEBUG_ASSERT( (tid) >= 0 && (team) != NULL ), \ - team -> t.t_threads[ (tid) ] -> th.th_info .ds.ds_gtid ) +#define __kmp_get_tid() (__kmp_tid_from_gtid(__kmp_get_gtid())) +#define __kmp_gtid_from_tid(tid, team) \ + (KMP_DEBUG_ASSERT((tid) >= 0 && (team) != NULL), \ + team->t.t_threads[(tid)]->th.th_info.ds.ds_gtid) -#define __kmp_get_team() ( __kmp_threads[ (__kmp_get_gtid()) ]-> th.th_team ) -#define __kmp_team_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), \ - __kmp_threads[ (gtid) ]-> th.th_team ) +#define __kmp_get_team() (__kmp_threads[(__kmp_get_gtid())]->th.th_team) +#define __kmp_team_from_gtid(gtid) \ + (KMP_DEBUG_ASSERT((gtid) >= 0), __kmp_threads[(gtid)]->th.th_team) -#define __kmp_thread_from_gtid(gtid) ( KMP_DEBUG_ASSERT( (gtid) >= 0 ), __kmp_threads[ (gtid) ] ) -#define __kmp_get_thread() ( __kmp_thread_from_gtid( __kmp_get_gtid() ) ) +#define __kmp_thread_from_gtid(gtid) \ + (KMP_DEBUG_ASSERT((gtid) >= 0), __kmp_threads[(gtid)]) +#define __kmp_get_thread() (__kmp_thread_from_gtid(__kmp_get_gtid())) - // Returns current thread (pointer to kmp_info_t). In contrast to __kmp_get_thread(), it works - // with registered and not-yet-registered threads. -#define __kmp_gtid_from_thread(thr) ( KMP_DEBUG_ASSERT( (thr) != NULL ), \ - (thr)->th.th_info.ds.ds_gtid ) +// Returns current thread (pointer to kmp_info_t). In contrast to +// __kmp_get_thread(), it works with registered and not-yet-registered threads. +#define __kmp_gtid_from_thread(thr) \ + (KMP_DEBUG_ASSERT((thr) != NULL), (thr)->th.th_info.ds.ds_gtid) // AT: Which way is correct? // AT: 1. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc; // AT: 2. nproc = __kmp_threads[ ( gtid ) ] -> th.th_team_nproc; -#define __kmp_get_team_num_threads(gtid) ( __kmp_threads[ ( gtid ) ] -> th.th_team -> t.t_nproc ) +#define __kmp_get_team_num_threads(gtid) \ + (__kmp_threads[(gtid)]->th.th_team->t.t_nproc) - -/* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */ -#define KMP_UINT64_MAX (~((kmp_uint64)1<<((sizeof(kmp_uint64)*(1<<3))-1))) +#define KMP_UINT64_MAX \ + (~((kmp_uint64)1 << ((sizeof(kmp_uint64) * (1 << 3)) - 1))) -#define KMP_MIN_NTH 1 +#define KMP_MIN_NTH 1 #ifndef KMP_MAX_NTH -# if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX -# define KMP_MAX_NTH PTHREAD_THREADS_MAX -# else -# define KMP_MAX_NTH INT_MAX -# endif +#if defined(PTHREAD_THREADS_MAX) && PTHREAD_THREADS_MAX < INT_MAX +#define KMP_MAX_NTH PTHREAD_THREADS_MAX +#else +#define KMP_MAX_NTH INT_MAX +#endif #endif /* KMP_MAX_NTH */ #ifdef PTHREAD_STACK_MIN -# define KMP_MIN_STKSIZE PTHREAD_STACK_MIN +#define KMP_MIN_STKSIZE PTHREAD_STACK_MIN #else -# define KMP_MIN_STKSIZE ((size_t)(32 * 1024)) +#define KMP_MIN_STKSIZE ((size_t)(32 * 1024)) #endif -#define KMP_MAX_STKSIZE (~((size_t)1<<((sizeof(size_t)*(1<<3))-1))) +#define KMP_MAX_STKSIZE (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1))) #if KMP_ARCH_X86 -# define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024)) +#define KMP_DEFAULT_STKSIZE ((size_t)(2 * 1024 * 1024)) #elif KMP_ARCH_X86_64 -# define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024)) -# define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024)) +#define KMP_DEFAULT_STKSIZE ((size_t)(4 * 1024 * 1024)) +#define KMP_BACKUP_STKSIZE ((size_t)(2 * 1024 * 1024)) #else -# define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024)) +#define KMP_DEFAULT_STKSIZE ((size_t)(1024 * 1024)) #endif -#define KMP_DEFAULT_MALLOC_POOL_INCR ((size_t) (1024 * 1024)) -#define KMP_MIN_MALLOC_POOL_INCR ((size_t) (4 * 1024)) -#define KMP_MAX_MALLOC_POOL_INCR (~((size_t)1<<((sizeof(size_t)*(1<<3))-1))) +#define KMP_DEFAULT_MALLOC_POOL_INCR ((size_t)(1024 * 1024)) +#define KMP_MIN_MALLOC_POOL_INCR ((size_t)(4 * 1024)) +#define KMP_MAX_MALLOC_POOL_INCR \ + (~((size_t)1 << ((sizeof(size_t) * (1 << 3)) - 1))) -#define KMP_MIN_STKOFFSET (0) -#define KMP_MAX_STKOFFSET KMP_MAX_STKSIZE +#define KMP_MIN_STKOFFSET (0) +#define KMP_MAX_STKOFFSET KMP_MAX_STKSIZE #if KMP_OS_DARWIN -# define KMP_DEFAULT_STKOFFSET KMP_MIN_STKOFFSET +#define KMP_DEFAULT_STKOFFSET KMP_MIN_STKOFFSET #else -# define KMP_DEFAULT_STKOFFSET CACHE_LINE +#define KMP_DEFAULT_STKOFFSET CACHE_LINE #endif -#define KMP_MIN_STKPADDING (0) -#define KMP_MAX_STKPADDING (2 * 1024 * 1024) +#define KMP_MIN_STKPADDING (0) +#define KMP_MAX_STKPADDING (2 * 1024 * 1024) -#define KMP_BLOCKTIME_MULTIPLIER (1000) /* number of blocktime units per second */ -#define KMP_MIN_BLOCKTIME (0) -#define KMP_MAX_BLOCKTIME (INT_MAX) /* Must be this for "infinite" setting the work */ -#define KMP_DEFAULT_BLOCKTIME (200) /* __kmp_blocktime is in milliseconds */ +#define KMP_BLOCKTIME_MULTIPLIER \ + (1000) /* number of blocktime units per second */ +#define KMP_MIN_BLOCKTIME (0) +#define KMP_MAX_BLOCKTIME \ + (INT_MAX) /* Must be this for "infinite" setting the work */ +#define KMP_DEFAULT_BLOCKTIME (200) /* __kmp_blocktime is in milliseconds */ #if KMP_USE_MONITOR -#define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024)) -#define KMP_MIN_MONITOR_WAKEUPS (1) /* min number of times monitor wakes up per second */ -#define KMP_MAX_MONITOR_WAKEUPS (1000) /* maximum number of times monitor can wake up per second */ - -/* Calculate new number of monitor wakeups for a specific block time based on previous monitor_wakeups */ -/* Only allow increasing number of wakeups */ -#define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \ - ( ((blocktime) == KMP_MAX_BLOCKTIME) ? (monitor_wakeups) : \ - ((blocktime) == KMP_MIN_BLOCKTIME) ? KMP_MAX_MONITOR_WAKEUPS : \ - ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) ? (monitor_wakeups) : \ - (KMP_BLOCKTIME_MULTIPLIER) / (blocktime) ) - -/* Calculate number of intervals for a specific block time based on monitor_wakeups */ -#define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \ - ( ( (blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1 ) / \ - (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) ) +#define KMP_DEFAULT_MONITOR_STKSIZE ((size_t)(64 * 1024)) +#define KMP_MIN_MONITOR_WAKEUPS (1) // min times monitor wakes up per second +#define KMP_MAX_MONITOR_WAKEUPS (1000) // max times monitor can wake up per sec + +/* Calculate new number of monitor wakeups for a specific block time based on + previous monitor_wakeups. Only allow increasing number of wakeups */ +#define KMP_WAKEUPS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \ + (((blocktime) == KMP_MAX_BLOCKTIME) \ + ? (monitor_wakeups) \ + : ((blocktime) == KMP_MIN_BLOCKTIME) \ + ? KMP_MAX_MONITOR_WAKEUPS \ + : ((monitor_wakeups) > (KMP_BLOCKTIME_MULTIPLIER / (blocktime))) \ + ? (monitor_wakeups) \ + : (KMP_BLOCKTIME_MULTIPLIER) / (blocktime)) + +/* Calculate number of intervals for a specific block time based on + monitor_wakeups */ +#define KMP_INTERVALS_FROM_BLOCKTIME(blocktime, monitor_wakeups) \ + (((blocktime) + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups)) - 1) / \ + (KMP_BLOCKTIME_MULTIPLIER / (monitor_wakeups))) +#else +#if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) +// HW TSC is used to reduce overhead (clock tick instead of nanosecond). +extern kmp_uint64 __kmp_ticks_per_msec; +#if KMP_COMPILER_ICC +#define KMP_NOW() _rdtsc() +#else +#define KMP_NOW() __kmp_hardware_timestamp() +#endif +#define KMP_NOW_MSEC() (KMP_NOW() / __kmp_ticks_per_msec) +#define KMP_BLOCKTIME_INTERVAL() (__kmp_dflt_blocktime * __kmp_ticks_per_msec) +#define KMP_BLOCKING(goal, count) ((goal) > KMP_NOW()) #else -# if KMP_OS_UNIX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) - // HW TSC is used to reduce overhead (clock tick instead of nanosecond). - extern kmp_uint64 __kmp_ticks_per_msec; -# if KMP_COMPILER_ICC -# define KMP_NOW() _rdtsc() -# else -# define KMP_NOW() __kmp_hardware_timestamp() -# endif -# define KMP_NOW_MSEC() (KMP_NOW()/__kmp_ticks_per_msec) -# define KMP_BLOCKTIME_INTERVAL() (__kmp_dflt_blocktime * __kmp_ticks_per_msec) -# define KMP_BLOCKING(goal, count) ((goal) > KMP_NOW()) -# else - // System time is retrieved sporadically while blocking. - extern kmp_uint64 __kmp_now_nsec(); -# define KMP_NOW() __kmp_now_nsec() -# define KMP_NOW_MSEC() (KMP_NOW()/KMP_USEC_PER_SEC) -# define KMP_BLOCKTIME_INTERVAL() (__kmp_dflt_blocktime * KMP_USEC_PER_SEC) -# define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW()) -# endif -# define KMP_YIELD_NOW() (KMP_NOW_MSEC() / KMP_MAX(__kmp_dflt_blocktime, 1) \ - % (__kmp_yield_on_count + __kmp_yield_off_count) < (kmp_uint32)__kmp_yield_on_count) +// System time is retrieved sporadically while blocking. +extern kmp_uint64 __kmp_now_nsec(); +#define KMP_NOW() __kmp_now_nsec() +#define KMP_NOW_MSEC() (KMP_NOW() / KMP_USEC_PER_SEC) +#define KMP_BLOCKTIME_INTERVAL() (__kmp_dflt_blocktime * KMP_USEC_PER_SEC) +#define KMP_BLOCKING(goal, count) ((count) % 1000 != 0 || (goal) > KMP_NOW()) +#endif +#define KMP_YIELD_NOW() \ + (KMP_NOW_MSEC() / KMP_MAX(__kmp_dflt_blocktime, 1) % \ + (__kmp_yield_on_count + __kmp_yield_off_count) < \ + (kmp_uint32)__kmp_yield_on_count) #endif // KMP_USE_MONITOR -#define KMP_MIN_STATSCOLS 40 -#define KMP_MAX_STATSCOLS 4096 -#define KMP_DEFAULT_STATSCOLS 80 +#define KMP_MIN_STATSCOLS 40 +#define KMP_MAX_STATSCOLS 4096 +#define KMP_DEFAULT_STATSCOLS 80 -#define KMP_MIN_INTERVAL 0 -#define KMP_MAX_INTERVAL (INT_MAX-1) -#define KMP_DEFAULT_INTERVAL 0 +#define KMP_MIN_INTERVAL 0 +#define KMP_MAX_INTERVAL (INT_MAX - 1) +#define KMP_DEFAULT_INTERVAL 0 -#define KMP_MIN_CHUNK 1 -#define KMP_MAX_CHUNK (INT_MAX-1) -#define KMP_DEFAULT_CHUNK 1 +#define KMP_MIN_CHUNK 1 +#define KMP_MAX_CHUNK (INT_MAX - 1) +#define KMP_DEFAULT_CHUNK 1 -#define KMP_MIN_INIT_WAIT 1 -#define KMP_MAX_INIT_WAIT (INT_MAX/2) -#define KMP_DEFAULT_INIT_WAIT 2048U +#define KMP_MIN_INIT_WAIT 1 +#define KMP_MAX_INIT_WAIT (INT_MAX / 2) +#define KMP_DEFAULT_INIT_WAIT 2048U -#define KMP_MIN_NEXT_WAIT 1 -#define KMP_MAX_NEXT_WAIT (INT_MAX/2) -#define KMP_DEFAULT_NEXT_WAIT 1024U +#define KMP_MIN_NEXT_WAIT 1 +#define KMP_MAX_NEXT_WAIT (INT_MAX / 2) +#define KMP_DEFAULT_NEXT_WAIT 1024U -#define KMP_DFLT_DISP_NUM_BUFF 7 -#define KMP_MAX_ORDERED 8 +#define KMP_DFLT_DISP_NUM_BUFF 7 +#define KMP_MAX_ORDERED 8 -#define KMP_MAX_FIELDS 32 +#define KMP_MAX_FIELDS 32 -#define KMP_MAX_BRANCH_BITS 31 +#define KMP_MAX_BRANCH_BITS 31 #define KMP_MAX_ACTIVE_LEVELS_LIMIT INT_MAX @@ -963,204 +976,231 @@ extern int __kmp_hws_abs_flag; // absolute or per-item number requested #define KMP_MAX_TASK_PRIORITY_LIMIT INT_MAX -/* Minimum number of threads before switch to TLS gtid (experimentally determined) */ +/* Minimum number of threads before switch to TLS gtid (experimentally + determined) */ /* josh TODO: what about OS X* tuning? */ -#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -# define KMP_TLS_GTID_MIN 5 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#define KMP_TLS_GTID_MIN 5 #else -# define KMP_TLS_GTID_MIN INT_MAX +#define KMP_TLS_GTID_MIN INT_MAX #endif -#define KMP_MASTER_TID(tid) ( (tid) == 0 ) -#define KMP_WORKER_TID(tid) ( (tid) != 0 ) +#define KMP_MASTER_TID(tid) ((tid) == 0) +#define KMP_WORKER_TID(tid) ((tid) != 0) -#define KMP_MASTER_GTID(gtid) ( __kmp_tid_from_gtid((gtid)) == 0 ) -#define KMP_WORKER_GTID(gtid) ( __kmp_tid_from_gtid((gtid)) != 0 ) -#define KMP_UBER_GTID(gtid) \ - ( \ - KMP_DEBUG_ASSERT( (gtid) >= KMP_GTID_MIN ), \ - KMP_DEBUG_ASSERT( (gtid) < __kmp_threads_capacity ), \ - (gtid) >= 0 && __kmp_root[(gtid)] && __kmp_threads[(gtid)] && \ - (__kmp_threads[(gtid)] == __kmp_root[(gtid)]->r.r_uber_thread)\ - ) -#define KMP_INITIAL_GTID(gtid) ( (gtid) == 0 ) +#define KMP_MASTER_GTID(gtid) (__kmp_tid_from_gtid((gtid)) == 0) +#define KMP_WORKER_GTID(gtid) (__kmp_tid_from_gtid((gtid)) != 0) +#define KMP_UBER_GTID(gtid) \ + (KMP_DEBUG_ASSERT((gtid) >= KMP_GTID_MIN), \ + KMP_DEBUG_ASSERT((gtid) < __kmp_threads_capacity), \ + (gtid) >= 0 && __kmp_root[(gtid)] && __kmp_threads[(gtid)] && \ + (__kmp_threads[(gtid)] == __kmp_root[(gtid)]->r.r_uber_thread)) +#define KMP_INITIAL_GTID(gtid) ((gtid) == 0) #ifndef TRUE -#define FALSE 0 -#define TRUE (! FALSE) +#define FALSE 0 +#define TRUE (!FALSE) #endif /* NOTE: all of the following constants must be even */ #if KMP_OS_WINDOWS -# define KMP_INIT_WAIT 64U /* initial number of spin-tests */ -# define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */ +#define KMP_INIT_WAIT 64U /* initial number of spin-tests */ +#define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */ #elif KMP_OS_CNK -# define KMP_INIT_WAIT 16U /* initial number of spin-tests */ -# define KMP_NEXT_WAIT 8U /* susequent number of spin-tests */ +#define KMP_INIT_WAIT 16U /* initial number of spin-tests */ +#define KMP_NEXT_WAIT 8U /* susequent number of spin-tests */ #elif KMP_OS_LINUX -# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ -# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ +#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ +#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ #elif KMP_OS_DARWIN /* TODO: tune for KMP_OS_DARWIN */ -# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ -# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ +#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ +#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ #elif KMP_OS_FREEBSD /* TODO: tune for KMP_OS_FREEBSD */ -# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ -# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ +#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ +#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ #elif KMP_OS_NETBSD /* TODO: tune for KMP_OS_NETBSD */ -# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ -# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ +#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ +#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ #endif #if KMP_ARCH_X86 || KMP_ARCH_X86_64 typedef struct kmp_cpuid { - kmp_uint32 eax; - kmp_uint32 ebx; - kmp_uint32 ecx; - kmp_uint32 edx; + kmp_uint32 eax; + kmp_uint32 ebx; + kmp_uint32 ecx; + kmp_uint32 edx; } kmp_cpuid_t; -extern void __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p ); -# if KMP_ARCH_X86 - extern void __kmp_x86_pause( void ); -# elif KMP_MIC - static void __kmp_x86_pause( void ) { _mm_delay_32( 100 ); } -# else - static void __kmp_x86_pause( void ) { _mm_pause(); } -# endif -# define KMP_CPU_PAUSE() __kmp_x86_pause() -#elif KMP_ARCH_PPC64 -# define KMP_PPC64_PRI_LOW() __asm__ volatile ("or 1, 1, 1") -# define KMP_PPC64_PRI_MED() __asm__ volatile ("or 2, 2, 2") -# define KMP_PPC64_PRI_LOC_MB() __asm__ volatile ("" : : : "memory") -# define KMP_CPU_PAUSE() do { KMP_PPC64_PRI_LOW(); KMP_PPC64_PRI_MED(); KMP_PPC64_PRI_LOC_MB(); } while (0) +extern void __kmp_x86_cpuid(int mode, int mode2, struct kmp_cpuid *p); +#if KMP_ARCH_X86 +extern void __kmp_x86_pause(void); +#elif KMP_MIC +static void __kmp_x86_pause(void) { _mm_delay_32(100); } #else -# define KMP_CPU_PAUSE() /* nothing to do */ +static void __kmp_x86_pause(void) { _mm_pause(); } #endif - -#define KMP_INIT_YIELD(count) { (count) = __kmp_yield_init; } - -#define KMP_YIELD(cond) { KMP_CPU_PAUSE(); __kmp_yield( (cond) ); } - -// Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround, -// there should be no yielding since the starting value from KMP_INIT_YIELD() is odd. - -#define KMP_YIELD_WHEN(cond,count) { KMP_CPU_PAUSE(); (count) -= 2; \ - if (!(count)) { KMP_YIELD(cond); (count) = __kmp_yield_next; } } -#define KMP_YIELD_SPIN(count) { KMP_CPU_PAUSE(); (count) -=2; \ - if (!(count)) { KMP_YIELD(1); (count) = __kmp_yield_next; } } +#define KMP_CPU_PAUSE() __kmp_x86_pause() +#elif KMP_ARCH_PPC64 +#define KMP_PPC64_PRI_LOW() __asm__ volatile("or 1, 1, 1") +#define KMP_PPC64_PRI_MED() __asm__ volatile("or 2, 2, 2") +#define KMP_PPC64_PRI_LOC_MB() __asm__ volatile("" : : : "memory") +#define KMP_CPU_PAUSE() \ + do { \ + KMP_PPC64_PRI_LOW(); \ + KMP_PPC64_PRI_MED(); \ + KMP_PPC64_PRI_LOC_MB(); \ + } while (0) +#else +#define KMP_CPU_PAUSE() /* nothing to do */ +#endif + +#define KMP_INIT_YIELD(count) \ + { (count) = __kmp_yield_init; } + +#define KMP_YIELD(cond) \ + { \ + KMP_CPU_PAUSE(); \ + __kmp_yield((cond)); \ + } + +// Note the decrement of 2 in the following Macros. With KMP_LIBRARY=turnaround, +// there should be no yielding since initial value from KMP_INIT_YIELD() is odd. + +#define KMP_YIELD_WHEN(cond, count) \ + { \ + KMP_CPU_PAUSE(); \ + (count) -= 2; \ + if (!(count)) { \ + KMP_YIELD(cond); \ + (count) = __kmp_yield_next; \ + } \ + } +#define KMP_YIELD_SPIN(count) \ + { \ + KMP_CPU_PAUSE(); \ + (count) -= 2; \ + if (!(count)) { \ + KMP_YIELD(1); \ + (count) = __kmp_yield_next; \ + } \ + } /* ------------------------------------------------------------------------ */ /* Support datatypes for the orphaned construct nesting checks. */ /* ------------------------------------------------------------------------ */ enum cons_type { - ct_none, - ct_parallel, - ct_pdo, - ct_pdo_ordered, - ct_psections, - ct_psingle, - - /* the following must be left in order and not split up */ - ct_taskq, - ct_task, /* really task inside non-ordered taskq, considered a worksharing type */ - ct_task_ordered, /* really task inside ordered taskq, considered a worksharing type */ - /* the preceding must be left in order and not split up */ - - ct_critical, - ct_ordered_in_parallel, - ct_ordered_in_pdo, - ct_ordered_in_taskq, - ct_master, - ct_reduce, - ct_barrier + ct_none, + ct_parallel, + ct_pdo, + ct_pdo_ordered, + ct_psections, + ct_psingle, + + /* the following must be left in order and not split up */ + ct_taskq, + ct_task, // really task inside non-ordered taskq, considered worksharing type + ct_task_ordered, /* really task inside ordered taskq, considered a worksharing + type */ + /* the preceding must be left in order and not split up */ + + ct_critical, + ct_ordered_in_parallel, + ct_ordered_in_pdo, + ct_ordered_in_taskq, + ct_master, + ct_reduce, + ct_barrier }; /* test to see if we are in a taskq construct */ -# define IS_CONS_TYPE_TASKQ( ct ) ( ((int)(ct)) >= ((int)ct_taskq) && ((int)(ct)) <= ((int)ct_task_ordered) ) -# define IS_CONS_TYPE_ORDERED( ct ) ((ct) == ct_pdo_ordered || (ct) == ct_task_ordered) +#define IS_CONS_TYPE_TASKQ(ct) \ + (((int)(ct)) >= ((int)ct_taskq) && ((int)(ct)) <= ((int)ct_task_ordered)) +#define IS_CONS_TYPE_ORDERED(ct) \ + ((ct) == ct_pdo_ordered || (ct) == ct_task_ordered) struct cons_data { - ident_t const *ident; - enum cons_type type; - int prev; - kmp_user_lock_p name; /* address exclusively for critical section name comparison */ + ident_t const *ident; + enum cons_type type; + int prev; + kmp_user_lock_p + name; /* address exclusively for critical section name comparison */ }; struct cons_header { - int p_top, w_top, s_top; - int stack_size, stack_top; - struct cons_data *stack_data; + int p_top, w_top, s_top; + int stack_size, stack_top; + struct cons_data *stack_data; }; struct kmp_region_info { - char *text; - int offset[KMP_MAX_FIELDS]; - int length[KMP_MAX_FIELDS]; + char *text; + int offset[KMP_MAX_FIELDS]; + int length[KMP_MAX_FIELDS]; }; - /* ---------------------------------------------------------------------- */ /* ---------------------------------------------------------------------- */ #if KMP_OS_WINDOWS - typedef HANDLE kmp_thread_t; - typedef DWORD kmp_key_t; +typedef HANDLE kmp_thread_t; +typedef DWORD kmp_key_t; #endif /* KMP_OS_WINDOWS */ #if KMP_OS_UNIX - typedef pthread_t kmp_thread_t; - typedef pthread_key_t kmp_key_t; +typedef pthread_t kmp_thread_t; +typedef pthread_key_t kmp_key_t; #endif -extern kmp_key_t __kmp_gtid_threadprivate_key; +extern kmp_key_t __kmp_gtid_threadprivate_key; typedef struct kmp_sys_info { - long maxrss; /* the maximum resident set size utilized (in kilobytes) */ - long minflt; /* the number of page faults serviced without any I/O */ - long majflt; /* the number of page faults serviced that required I/O */ - long nswap; /* the number of times a process was "swapped" out of memory */ - long inblock; /* the number of times the file system had to perform input */ - long oublock; /* the number of times the file system had to perform output */ - long nvcsw; /* the number of times a context switch was voluntarily */ - long nivcsw; /* the number of times a context switch was forced */ + long maxrss; /* the maximum resident set size utilized (in kilobytes) */ + long minflt; /* the number of page faults serviced without any I/O */ + long majflt; /* the number of page faults serviced that required I/O */ + long nswap; /* the number of times a process was "swapped" out of memory */ + long inblock; /* the number of times the file system had to perform input */ + long oublock; /* the number of times the file system had to perform output */ + long nvcsw; /* the number of times a context switch was voluntarily */ + long nivcsw; /* the number of times a context switch was forced */ } kmp_sys_info_t; #if KMP_ARCH_X86 || KMP_ARCH_X86_64 typedef struct kmp_cpuinfo { - int initialized; // If 0, other fields are not initialized. - int signature; // CPUID(1).EAX - int family; // CPUID(1).EAX[27:20] + CPUID(1).EAX[11:8] ( Extended Family + Family ) - int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended Model << 4 ) + Model) - int stepping; // CPUID(1).EAX[3:0] ( Stepping ) - int sse2; // 0 if SSE2 instructions are not supported, 1 otherwise. - int rtm; // 0 if RTM instructions are not supported, 1 otherwise. - int cpu_stackoffset; - int apic_id; - int physical_id; - int logical_id; - kmp_uint64 frequency; // Nominal CPU frequency in Hz. - char name [3*sizeof (kmp_cpuid_t)]; // CPUID(0x80000002,0x80000003,0x80000004) + int initialized; // If 0, other fields are not initialized. + int signature; // CPUID(1).EAX + int family; // CPUID(1).EAX[27:20]+CPUID(1).EAX[11:8] (Extended Family+Family) + int model; // ( CPUID(1).EAX[19:16] << 4 ) + CPUID(1).EAX[7:4] ( ( Extended + // Model << 4 ) + Model) + int stepping; // CPUID(1).EAX[3:0] ( Stepping ) + int sse2; // 0 if SSE2 instructions are not supported, 1 otherwise. + int rtm; // 0 if RTM instructions are not supported, 1 otherwise. + int cpu_stackoffset; + int apic_id; + int physical_id; + int logical_id; + kmp_uint64 frequency; // Nominal CPU frequency in Hz. + char name[3 * sizeof(kmp_cpuid_t)]; // CPUID(0x80000002,0x80000003,0x80000004) } kmp_cpuinfo_t; #endif #ifdef BUILD_TV struct tv_threadprivate { - /* Record type #1 */ - void *global_addr; - void *thread_addr; + /* Record type #1 */ + void *global_addr; + void *thread_addr; }; struct tv_data { - struct tv_data *next; - void *type; - union tv_union { - struct tv_threadprivate tp; - } u; + struct tv_data *next; + void *type; + union tv_union { + struct tv_threadprivate tp; + } u; }; extern kmp_key_t __kmp_tv_key; @@ -1170,137 +1210,168 @@ extern kmp_key_t __kmp_tv_key; /* ------------------------------------------------------------------------ */ #if USE_ITT_BUILD -// We cannot include "kmp_itt.h" due to circular dependency. Declare the only required type here. -// Later we will check the type meets requirements. +// We cannot include "kmp_itt.h" due to circular dependency. Declare the only +// required type here. Later we will check the type meets requirements. typedef int kmp_itt_mark_t; #define KMP_ITT_DEBUG 0 #endif /* USE_ITT_BUILD */ -/* ------------------------------------------------------------------------ */ - -/* - * Taskq data structures - */ - -#define HIGH_WATER_MARK(nslots) (((nslots) * 3) / 4) -#define __KMP_TASKQ_THUNKS_PER_TH 1 /* num thunks that each thread can simultaneously execute from a task queue */ - -/* flags for taskq_global_flags, kmp_task_queue_t tq_flags, kmpc_thunk_t th_flags */ - -#define TQF_IS_ORDERED 0x0001 /* __kmpc_taskq interface, taskq ordered */ -#define TQF_IS_LASTPRIVATE 0x0002 /* __kmpc_taskq interface, taskq with lastprivate list */ -#define TQF_IS_NOWAIT 0x0004 /* __kmpc_taskq interface, end taskq nowait */ -#define TQF_HEURISTICS 0x0008 /* __kmpc_taskq interface, use heuristics to decide task queue size */ -#define TQF_INTERFACE_RESERVED1 0x0010 /* __kmpc_taskq interface, reserved for future use */ -#define TQF_INTERFACE_RESERVED2 0x0020 /* __kmpc_taskq interface, reserved for future use */ -#define TQF_INTERFACE_RESERVED3 0x0040 /* __kmpc_taskq interface, reserved for future use */ -#define TQF_INTERFACE_RESERVED4 0x0080 /* __kmpc_taskq interface, reserved for future use */ - -#define TQF_INTERFACE_FLAGS 0x00ff /* all the __kmpc_taskq interface flags */ - -#define TQF_IS_LAST_TASK 0x0100 /* internal/read by instrumentation; only used with TQF_IS_LASTPRIVATE */ -#define TQF_TASKQ_TASK 0x0200 /* internal use only; this thunk->th_task is the taskq_task */ -#define TQF_RELEASE_WORKERS 0x0400 /* internal use only; must release worker threads once ANY queued task exists (global) */ -#define TQF_ALL_TASKS_QUEUED 0x0800 /* internal use only; notify workers that master has finished enqueuing tasks */ -#define TQF_PARALLEL_CONTEXT 0x1000 /* internal use only: this queue encountered in a parallel context: not serialized */ -#define TQF_DEALLOCATED 0x2000 /* internal use only; this queue is on the freelist and not in use */ - -#define TQF_INTERNAL_FLAGS 0x3f00 /* all the internal use only flags */ +/* Taskq data structures */ + +#define HIGH_WATER_MARK(nslots) (((nslots)*3) / 4) +// num thunks that each thread can simultaneously execute from a task queue +#define __KMP_TASKQ_THUNKS_PER_TH 1 + +/* flags for taskq_global_flags, kmp_task_queue_t tq_flags, kmpc_thunk_t + th_flags */ + +#define TQF_IS_ORDERED 0x0001 // __kmpc_taskq interface, taskq ordered +// __kmpc_taskq interface, taskq with lastprivate list +#define TQF_IS_LASTPRIVATE 0x0002 +#define TQF_IS_NOWAIT 0x0004 // __kmpc_taskq interface, end taskq nowait +// __kmpc_taskq interface, use heuristics to decide task queue size +#define TQF_HEURISTICS 0x0008 + +// __kmpc_taskq interface, reserved for future use +#define TQF_INTERFACE_RESERVED1 0x0010 +// __kmpc_taskq interface, reserved for future use +#define TQF_INTERFACE_RESERVED2 0x0020 +// __kmpc_taskq interface, reserved for future use +#define TQF_INTERFACE_RESERVED3 0x0040 +// __kmpc_taskq interface, reserved for future use +#define TQF_INTERFACE_RESERVED4 0x0080 + +#define TQF_INTERFACE_FLAGS 0x00ff // all the __kmpc_taskq interface flags +// internal/read by instrumentation; only used with TQF_IS_LASTPRIVATE +#define TQF_IS_LAST_TASK 0x0100 +// internal use only; this thunk->th_task is the taskq_task +#define TQF_TASKQ_TASK 0x0200 +// internal use only; must release worker threads once ANY queued task +// exists (global) +#define TQF_RELEASE_WORKERS 0x0400 +// internal use only; notify workers that master has finished enqueuing tasks +#define TQF_ALL_TASKS_QUEUED 0x0800 +// internal use only: this queue encountered in parallel context: not serialized +#define TQF_PARALLEL_CONTEXT 0x1000 +// internal use only; this queue is on the freelist and not in use +#define TQF_DEALLOCATED 0x2000 + +#define TQF_INTERNAL_FLAGS 0x3f00 // all the internal use only flags typedef struct KMP_ALIGN_CACHE kmpc_aligned_int32_t { - kmp_int32 ai_data; + kmp_int32 ai_data; } kmpc_aligned_int32_t; typedef struct KMP_ALIGN_CACHE kmpc_aligned_queue_slot_t { - struct kmpc_thunk_t *qs_thunk; + struct kmpc_thunk_t *qs_thunk; } kmpc_aligned_queue_slot_t; typedef struct kmpc_task_queue_t { - /* task queue linkage fields for n-ary tree of queues (locked with global taskq_tree_lck) */ - kmp_lock_t tq_link_lck; /* lock for child link, child next/prev links and child ref counts */ - union { - struct kmpc_task_queue_t *tq_parent; /* pointer to parent taskq, not locked */ - struct kmpc_task_queue_t *tq_next_free; /* for taskq internal freelists, locked with global taskq_freelist_lck */ - } tq; - volatile struct kmpc_task_queue_t *tq_first_child; /* pointer to linked-list of children, locked by tq's tq_link_lck */ - struct kmpc_task_queue_t *tq_next_child; /* next child in linked-list, locked by parent tq's tq_link_lck */ - struct kmpc_task_queue_t *tq_prev_child; /* previous child in linked-list, locked by parent tq's tq_link_lck */ - volatile kmp_int32 tq_ref_count; /* reference count of threads with access to this task queue */ - /* (other than the thread executing the kmpc_end_taskq call) */ - /* locked by parent tq's tq_link_lck */ - - /* shared data for task queue */ - struct kmpc_aligned_shared_vars_t *tq_shareds; /* per-thread array of pointers to shared variable structures */ - /* only one array element exists for all but outermost taskq */ - - /* bookkeeping for ordered task queue */ - kmp_uint32 tq_tasknum_queuing; /* ordered task number assigned while queuing tasks */ - volatile kmp_uint32 tq_tasknum_serving; /* ordered number of next task to be served (executed) */ - - /* thunk storage management for task queue */ - kmp_lock_t tq_free_thunks_lck; /* lock for thunk freelist manipulation */ - struct kmpc_thunk_t *tq_free_thunks; /* thunk freelist, chained via th.th_next_free */ - struct kmpc_thunk_t *tq_thunk_space; /* space allocated for thunks for this task queue */ - - /* data fields for queue itself */ - kmp_lock_t tq_queue_lck; /* lock for [de]enqueue operations: tq_queue, tq_head, tq_tail, tq_nfull */ - kmpc_aligned_queue_slot_t *tq_queue; /* array of queue slots to hold thunks for tasks */ - volatile struct kmpc_thunk_t *tq_taskq_slot; /* special slot for taskq task thunk, occupied if not NULL */ - kmp_int32 tq_nslots; /* # of tq_thunk_space thunks alloc'd (not incl. tq_taskq_slot space) */ - kmp_int32 tq_head; /* enqueue puts next item in here (index into tq_queue array) */ - kmp_int32 tq_tail; /* dequeue takes next item out of here (index into tq_queue array) */ - volatile kmp_int32 tq_nfull; /* # of occupied entries in task queue right now */ - kmp_int32 tq_hiwat; /* high-water mark for tq_nfull and queue scheduling */ - volatile kmp_int32 tq_flags; /* TQF_xxx */ - - /* bookkeeping for outstanding thunks */ - struct kmpc_aligned_int32_t *tq_th_thunks; /* per-thread array for # of regular thunks currently being executed */ - kmp_int32 tq_nproc; /* number of thunks in the th_thunks array */ - - /* statistics library bookkeeping */ - ident_t *tq_loc; /* source location information for taskq directive */ + /* task queue linkage fields for n-ary tree of queues (locked with global + taskq_tree_lck) */ + kmp_lock_t tq_link_lck; /* lock for child link, child next/prev links and + child ref counts */ + union { + struct kmpc_task_queue_t *tq_parent; // pointer to parent taskq, not locked + // for taskq internal freelists, locked with global taskq_freelist_lck + struct kmpc_task_queue_t *tq_next_free; + } tq; + // pointer to linked-list of children, locked by tq's tq_link_lck + volatile struct kmpc_task_queue_t *tq_first_child; + // next child in linked-list, locked by parent tq's tq_link_lck + struct kmpc_task_queue_t *tq_next_child; + // previous child in linked-list, locked by parent tq's tq_link_lck + struct kmpc_task_queue_t *tq_prev_child; + // reference count of threads with access to this task queue + volatile kmp_int32 tq_ref_count; + /* (other than the thread executing the kmpc_end_taskq call) */ + /* locked by parent tq's tq_link_lck */ + + /* shared data for task queue */ + /* per-thread array of pointers to shared variable structures */ + struct kmpc_aligned_shared_vars_t *tq_shareds; + /* only one array element exists for all but outermost taskq */ + + /* bookkeeping for ordered task queue */ + kmp_uint32 tq_tasknum_queuing; // ordered task # assigned while queuing tasks + // ordered number of next task to be served (executed) + volatile kmp_uint32 tq_tasknum_serving; + + /* thunk storage management for task queue */ + kmp_lock_t tq_free_thunks_lck; /* lock for thunk freelist manipulation */ + // thunk freelist, chained via th.th_next_free + struct kmpc_thunk_t *tq_free_thunks; + // space allocated for thunks for this task queue + struct kmpc_thunk_t *tq_thunk_space; + + /* data fields for queue itself */ + kmp_lock_t tq_queue_lck; /* lock for [de]enqueue operations: tq_queue, + tq_head, tq_tail, tq_nfull */ + /* array of queue slots to hold thunks for tasks */ + kmpc_aligned_queue_slot_t *tq_queue; + volatile struct kmpc_thunk_t *tq_taskq_slot; /* special slot for taskq task + thunk, occupied if not NULL */ + kmp_int32 tq_nslots; /* # of tq_thunk_space thunks alloc'd (not incl. + tq_taskq_slot space) */ + kmp_int32 tq_head; // enqueue puts item here (index into tq_queue array) + kmp_int32 tq_tail; // dequeue takes item from here (index into tq_queue array) + volatile kmp_int32 tq_nfull; // # of occupied entries in task queue right now + kmp_int32 tq_hiwat; /* high-water mark for tq_nfull and queue scheduling */ + volatile kmp_int32 tq_flags; /* TQF_xxx */ + + /* bookkeeping for outstanding thunks */ + + /* per-thread array for # of regular thunks currently being executed */ + struct kmpc_aligned_int32_t *tq_th_thunks; + kmp_int32 tq_nproc; /* number of thunks in the th_thunks array */ + + /* statistics library bookkeeping */ + ident_t *tq_loc; /* source location information for taskq directive */ } kmpc_task_queue_t; -typedef void (*kmpc_task_t) (kmp_int32 global_tid, struct kmpc_thunk_t *thunk); +typedef void (*kmpc_task_t)(kmp_int32 global_tid, struct kmpc_thunk_t *thunk); /* sizeof_shareds passed as arg to __kmpc_taskq call */ -typedef struct kmpc_shared_vars_t { /* aligned during dynamic allocation */ - kmpc_task_queue_t *sv_queue; - /* (pointers to) shared vars */ +typedef struct kmpc_shared_vars_t { /* aligned during dynamic allocation */ + kmpc_task_queue_t *sv_queue; /* (pointers to) shared vars */ } kmpc_shared_vars_t; typedef struct KMP_ALIGN_CACHE kmpc_aligned_shared_vars_t { - volatile struct kmpc_shared_vars_t *ai_data; + volatile struct kmpc_shared_vars_t *ai_data; } kmpc_aligned_shared_vars_t; -/* sizeof_thunk passed as arg to kmpc_taskq call */ -typedef struct kmpc_thunk_t { /* aligned during dynamic allocation */ - union { /* field used for internal freelists too */ - kmpc_shared_vars_t *th_shareds; - struct kmpc_thunk_t *th_next_free; /* freelist of individual thunks within queue, head at tq_free_thunks */ - } th; - kmpc_task_t th_task; /* taskq_task if flags & TQF_TASKQ_TASK */ - struct kmpc_thunk_t *th_encl_thunk; /* pointer to dynamically enclosing thunk on this thread's call stack */ - kmp_int32 th_flags; /* TQF_xxx (tq_flags interface plus possible internal flags) */ - kmp_int32 th_status; - kmp_uint32 th_tasknum; /* task number assigned in order of queuing, used for ordered sections */ - /* private vars */ +/* sizeof_thunk passed as arg to kmpc_taskq call */ +typedef struct kmpc_thunk_t { /* aligned during dynamic allocation */ + union { /* field used for internal freelists too */ + kmpc_shared_vars_t *th_shareds; + struct kmpc_thunk_t *th_next_free; /* freelist of individual thunks within + queue, head at tq_free_thunks */ + } th; + kmpc_task_t th_task; /* taskq_task if flags & TQF_TASKQ_TASK */ + struct kmpc_thunk_t *th_encl_thunk; /* pointer to dynamically enclosing thunk + on this thread's call stack */ + // TQF_xxx(tq_flags interface plus possible internal flags) + kmp_int32 th_flags; + + kmp_int32 th_status; + kmp_uint32 th_tasknum; /* task number assigned in order of queuing, used for + ordered sections */ + /* private vars */ } kmpc_thunk_t; typedef struct KMP_ALIGN_CACHE kmp_taskq { - int tq_curr_thunk_capacity; + int tq_curr_thunk_capacity; - kmpc_task_queue_t *tq_root; - kmp_int32 tq_global_flags; + kmpc_task_queue_t *tq_root; + kmp_int32 tq_global_flags; - kmp_lock_t tq_freelist_lck; - kmpc_task_queue_t *tq_freelist; + kmp_lock_t tq_freelist_lck; + kmpc_task_queue_t *tq_freelist; - kmpc_thunk_t **tq_curr_thunk; + kmpc_thunk_t **tq_curr_thunk; } kmp_taskq_t; /* END Taskq data structures */ -/* --------------------------------------------------------------------------- */ typedef kmp_int32 kmp_critical_name[8]; @@ -1308,18 +1379,21 @@ typedef kmp_int32 kmp_critical_name[8]; @ingroup PARALLEL The type for a microtask which gets passed to @ref __kmpc_fork_call(). The arguments to the outlined function are -@param global_tid the global thread identity of the thread executing the function. +@param global_tid the global thread identity of the thread executing the +function. @param bound_tid the local identitiy of the thread executing the function @param ... pointers to shared variables accessed by the function. */ -typedef void (*kmpc_micro) ( kmp_int32 * global_tid, kmp_int32 * bound_tid, ... ); -typedef void (*kmpc_micro_bound) ( kmp_int32 * bound_tid, kmp_int32 * bound_nth, ... ); +typedef void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid, ...); +typedef void (*kmpc_micro_bound)(kmp_int32 *bound_tid, kmp_int32 *bound_nth, + ...); /*! @ingroup THREADPRIVATE @{ */ -/* --------------------------------------------------------------------------- */ +/* --------------------------------------------------------------------------- + */ /* Threadprivate initialization/finalization function declarations */ /* for non-array objects: __kmpc_threadprivate_register() */ @@ -1328,487 +1402,505 @@ typedef void (*kmpc_micro_bound) ( kmp_int32 * bound_tid, kmp_int32 * bou Pointer to the constructor function. The first argument is the this pointer */ -typedef void *(*kmpc_ctor) (void *); +typedef void *(*kmpc_ctor)(void *); /*! Pointer to the destructor function. The first argument is the this pointer */ -typedef void (*kmpc_dtor) (void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel compiler */ +typedef void (*kmpc_dtor)( + void * /*, size_t */); /* 2nd arg: magic number for KCC unused by Intel + compiler */ /*! Pointer to an alternate constructor. The first argument is the this pointer. */ -typedef void *(*kmpc_cctor) (void *, void *); +typedef void *(*kmpc_cctor)(void *, void *); -/* for array objects: __kmpc_threadprivate_register_vec() */ - /* First arg: "this" pointer */ - /* Last arg: number of array elements */ +/* for array objects: __kmpc_threadprivate_register_vec() */ +/* First arg: "this" pointer */ +/* Last arg: number of array elements */ /*! Array constructor. First argument is the this pointer Second argument the number of array elements. */ -typedef void *(*kmpc_ctor_vec) (void *, size_t); +typedef void *(*kmpc_ctor_vec)(void *, size_t); /*! Pointer to the array destructor function. The first argument is the this pointer Second argument the number of array elements. */ -typedef void (*kmpc_dtor_vec) (void *, size_t); +typedef void (*kmpc_dtor_vec)(void *, size_t); /*! Array constructor. First argument is the this pointer Third argument the number of array elements. */ -typedef void *(*kmpc_cctor_vec) (void *, void *, size_t); /* function unused by compiler */ +typedef void *(*kmpc_cctor_vec)(void *, void *, + size_t); /* function unused by compiler */ /*! @} */ - -/* ------------------------------------------------------------------------ */ - /* keeps tracked of threadprivate cache allocations for cleanup later */ typedef struct kmp_cached_addr { - void **addr; /* address of allocated cache */ - struct kmp_cached_addr *next; /* pointer to next cached address */ + void **addr; /* address of allocated cache */ + struct kmp_cached_addr *next; /* pointer to next cached address */ } kmp_cached_addr_t; struct private_data { - struct private_data *next; /* The next descriptor in the list */ - void *data; /* The data buffer for this descriptor */ - int more; /* The repeat count for this descriptor */ - size_t size; /* The data size for this descriptor */ + struct private_data *next; /* The next descriptor in the list */ + void *data; /* The data buffer for this descriptor */ + int more; /* The repeat count for this descriptor */ + size_t size; /* The data size for this descriptor */ }; struct private_common { - struct private_common *next; - struct private_common *link; - void *gbl_addr; - void *par_addr; /* par_addr == gbl_addr for MASTER thread */ - size_t cmn_size; + struct private_common *next; + struct private_common *link; + void *gbl_addr; + void *par_addr; /* par_addr == gbl_addr for MASTER thread */ + size_t cmn_size; }; -struct shared_common -{ - struct shared_common *next; - struct private_data *pod_init; - void *obj_init; - void *gbl_addr; - union { - kmpc_ctor ctor; - kmpc_ctor_vec ctorv; - } ct; - union { - kmpc_cctor cctor; - kmpc_cctor_vec cctorv; - } cct; - union { - kmpc_dtor dtor; - kmpc_dtor_vec dtorv; - } dt; - size_t vec_len; - int is_vec; - size_t cmn_size; +struct shared_common { + struct shared_common *next; + struct private_data *pod_init; + void *obj_init; + void *gbl_addr; + union { + kmpc_ctor ctor; + kmpc_ctor_vec ctorv; + } ct; + union { + kmpc_cctor cctor; + kmpc_cctor_vec cctorv; + } cct; + union { + kmpc_dtor dtor; + kmpc_dtor_vec dtorv; + } dt; + size_t vec_len; + int is_vec; + size_t cmn_size; }; -#define KMP_HASH_TABLE_LOG2 9 /* log2 of the hash table size */ -#define KMP_HASH_TABLE_SIZE (1 << KMP_HASH_TABLE_LOG2) /* size of the hash table */ -#define KMP_HASH_SHIFT 3 /* throw away this many low bits from the address */ -#define KMP_HASH(x) ((((kmp_uintptr_t) x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE-1)) +#define KMP_HASH_TABLE_LOG2 9 /* log2 of the hash table size */ +#define KMP_HASH_TABLE_SIZE \ + (1 << KMP_HASH_TABLE_LOG2) /* size of the hash table */ +#define KMP_HASH_SHIFT 3 /* throw away this many low bits from the address */ +#define KMP_HASH(x) \ + ((((kmp_uintptr_t)x) >> KMP_HASH_SHIFT) & (KMP_HASH_TABLE_SIZE - 1)) struct common_table { - struct private_common *data[ KMP_HASH_TABLE_SIZE ]; + struct private_common *data[KMP_HASH_TABLE_SIZE]; }; struct shared_table { - struct shared_common *data[ KMP_HASH_TABLE_SIZE ]; + struct shared_common *data[KMP_HASH_TABLE_SIZE]; }; -/* ------------------------------------------------------------------------ */ + /* ------------------------------------------------------------------------ */ #if KMP_STATIC_STEAL_ENABLED typedef struct KMP_ALIGN_CACHE dispatch_private_info32 { - kmp_int32 count; - kmp_int32 ub; - /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ - kmp_int32 lb; - kmp_int32 st; - kmp_int32 tc; - kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put after ub */ - - // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on ) - // a) parm3 is properly aligned and - // b) all parm1-4 are in the same cache line. - // Because of parm1-4 are used together, performance seems to be better - // if they are in the same line (not measured though). - - struct KMP_ALIGN( 32 ) { // AC: changed 16 to 32 in order to simplify template - kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should - kmp_int32 parm2; // make no real change at least while padding is off. - kmp_int32 parm3; - kmp_int32 parm4; - }; - - kmp_uint32 ordered_lower; - kmp_uint32 ordered_upper; + kmp_int32 count; + kmp_int32 ub; + /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ + kmp_int32 lb; + kmp_int32 st; + kmp_int32 tc; + kmp_int32 static_steal_counter; /* for static_steal only; maybe better to put + after ub */ + + // KMP_ALIGN( 16 ) ensures ( if the KMP_ALIGN macro is turned on ) + // a) parm3 is properly aligned and + // b) all parm1-4 are in the same cache line. + // Because of parm1-4 are used together, performance seems to be better + // if they are in the same line (not measured though). + + struct KMP_ALIGN(32) { // AC: changed 16 to 32 in order to simplify template + kmp_int32 parm1; // structures in kmp_dispatch.cpp. This should + kmp_int32 parm2; // make no real change at least while padding is off. + kmp_int32 parm3; + kmp_int32 parm4; + }; + + kmp_uint32 ordered_lower; + kmp_uint32 ordered_upper; #if KMP_OS_WINDOWS - // This var can be placed in the hole between 'tc' and 'parm1', instead of 'static_steal_counter'. - // It would be nice to measure execution times. - // Conditional if/endif can be removed at all. - kmp_int32 last_upper; +// This var can be placed in the hole between 'tc' and 'parm1', instead of +// 'static_steal_counter'. It would be nice to measure execution times. +// Conditional if/endif can be removed at all. + kmp_int32 last_upper; #endif /* KMP_OS_WINDOWS */ } dispatch_private_info32_t; typedef struct KMP_ALIGN_CACHE dispatch_private_info64 { - kmp_int64 count; /* current chunk number for static and static-steal scheduling*/ - kmp_int64 ub; /* upper-bound */ - /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ - kmp_int64 lb; /* lower-bound */ - kmp_int64 st; /* stride */ - kmp_int64 tc; /* trip count (number of iterations) */ - kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put after ub */ - - /* parm[1-4] are used in different ways by different scheduling algorithms */ - - // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on ) - // a) parm3 is properly aligned and - // b) all parm1-4 are in the same cache line. - // Because of parm1-4 are used together, performance seems to be better - // if they are in the same line (not measured though). - - struct KMP_ALIGN( 32 ) { - kmp_int64 parm1; - kmp_int64 parm2; - kmp_int64 parm3; - kmp_int64 parm4; - }; - - kmp_uint64 ordered_lower; - kmp_uint64 ordered_upper; + kmp_int64 count; // current chunk number for static & static-steal scheduling + kmp_int64 ub; /* upper-bound */ + /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ + kmp_int64 lb; /* lower-bound */ + kmp_int64 st; /* stride */ + kmp_int64 tc; /* trip count (number of iterations) */ + kmp_int64 static_steal_counter; /* for static_steal only; maybe better to put + after ub */ + + /* parm[1-4] are used in different ways by different scheduling algorithms */ + + // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on ) + // a) parm3 is properly aligned and + // b) all parm1-4 are in the same cache line. + // Because of parm1-4 are used together, performance seems to be better + // if they are in the same line (not measured though). + + struct KMP_ALIGN(32) { + kmp_int64 parm1; + kmp_int64 parm2; + kmp_int64 parm3; + kmp_int64 parm4; + }; + + kmp_uint64 ordered_lower; + kmp_uint64 ordered_upper; #if KMP_OS_WINDOWS - // This var can be placed in the hole between 'tc' and 'parm1', instead of 'static_steal_counter'. - // It would be nice to measure execution times. - // Conditional if/endif can be removed at all. - kmp_int64 last_upper; +// This var can be placed in the hole between 'tc' and 'parm1', instead of +// 'static_steal_counter'. It would be nice to measure execution times. +// Conditional if/endif can be removed at all. + kmp_int64 last_upper; #endif /* KMP_OS_WINDOWS */ } dispatch_private_info64_t; #else /* KMP_STATIC_STEAL_ENABLED */ typedef struct KMP_ALIGN_CACHE dispatch_private_info32 { - kmp_int32 lb; - kmp_int32 ub; - kmp_int32 st; - kmp_int32 tc; + kmp_int32 lb; + kmp_int32 ub; + kmp_int32 st; + kmp_int32 tc; - kmp_int32 parm1; - kmp_int32 parm2; - kmp_int32 parm3; - kmp_int32 parm4; + kmp_int32 parm1; + kmp_int32 parm2; + kmp_int32 parm3; + kmp_int32 parm4; - kmp_int32 count; + kmp_int32 count; - kmp_uint32 ordered_lower; - kmp_uint32 ordered_upper; + kmp_uint32 ordered_lower; + kmp_uint32 ordered_upper; #if KMP_OS_WINDOWS - kmp_int32 last_upper; + kmp_int32 last_upper; #endif /* KMP_OS_WINDOWS */ } dispatch_private_info32_t; typedef struct KMP_ALIGN_CACHE dispatch_private_info64 { - kmp_int64 lb; /* lower-bound */ - kmp_int64 ub; /* upper-bound */ - kmp_int64 st; /* stride */ - kmp_int64 tc; /* trip count (number of iterations) */ + kmp_int64 lb; /* lower-bound */ + kmp_int64 ub; /* upper-bound */ + kmp_int64 st; /* stride */ + kmp_int64 tc; /* trip count (number of iterations) */ - /* parm[1-4] are used in different ways by different scheduling algorithms */ - kmp_int64 parm1; - kmp_int64 parm2; - kmp_int64 parm3; - kmp_int64 parm4; + /* parm[1-4] are used in different ways by different scheduling algorithms */ + kmp_int64 parm1; + kmp_int64 parm2; + kmp_int64 parm3; + kmp_int64 parm4; - kmp_int64 count; /* current chunk number for static scheduling */ + kmp_int64 count; /* current chunk number for static scheduling */ - kmp_uint64 ordered_lower; - kmp_uint64 ordered_upper; + kmp_uint64 ordered_lower; + kmp_uint64 ordered_upper; #if KMP_OS_WINDOWS - kmp_int64 last_upper; + kmp_int64 last_upper; #endif /* KMP_OS_WINDOWS */ } dispatch_private_info64_t; #endif /* KMP_STATIC_STEAL_ENABLED */ typedef struct KMP_ALIGN_CACHE dispatch_private_info { - union private_info { - dispatch_private_info32_t p32; - dispatch_private_info64_t p64; - } u; - enum sched_type schedule; /* scheduling algorithm */ - kmp_int32 ordered; /* ordered clause specified */ - kmp_int32 ordered_bumped; - kmp_int32 ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size after making ordered_iteration scalar - struct dispatch_private_info * next; /* stack of buffers for nest of serial regions */ - kmp_int32 nomerge; /* don't merge iters if serialized */ - kmp_int32 type_size; /* the size of types in private_info */ - enum cons_type pushed_ws; + union private_info { + dispatch_private_info32_t p32; + dispatch_private_info64_t p64; + } u; + enum sched_type schedule; /* scheduling algorithm */ + kmp_int32 ordered; /* ordered clause specified */ + kmp_int32 ordered_bumped; + // To retain the structure size after making ordered_iteration scalar + kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 3]; + // Stack of buffers for nest of serial regions + struct dispatch_private_info *next; + kmp_int32 nomerge; /* don't merge iters if serialized */ + kmp_int32 type_size; /* the size of types in private_info */ + enum cons_type pushed_ws; } dispatch_private_info_t; typedef struct dispatch_shared_info32 { - /* chunk index under dynamic, number of idle threads under static-steal; - iteration index otherwise */ - volatile kmp_uint32 iteration; - volatile kmp_uint32 num_done; - volatile kmp_uint32 ordered_iteration; - kmp_int32 ordered_dummy[KMP_MAX_ORDERED-1]; // to retain the structure size after making ordered_iteration scalar + /* chunk index under dynamic, number of idle threads under static-steal; + iteration index otherwise */ + volatile kmp_uint32 iteration; + volatile kmp_uint32 num_done; + volatile kmp_uint32 ordered_iteration; + // Dummy to retain the structure size after making ordered_iteration scalar + kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 1]; } dispatch_shared_info32_t; typedef struct dispatch_shared_info64 { - /* chunk index under dynamic, number of idle threads under static-steal; - iteration index otherwise */ - volatile kmp_uint64 iteration; - volatile kmp_uint64 num_done; - volatile kmp_uint64 ordered_iteration; - kmp_int64 ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size after making ordered_iteration scalar + /* chunk index under dynamic, number of idle threads under static-steal; + iteration index otherwise */ + volatile kmp_uint64 iteration; + volatile kmp_uint64 num_done; + volatile kmp_uint64 ordered_iteration; + // Dummy to retain the structure size after making ordered_iteration scalar + kmp_int64 ordered_dummy[KMP_MAX_ORDERED - 3]; } dispatch_shared_info64_t; typedef struct dispatch_shared_info { - union shared_info { - dispatch_shared_info32_t s32; - dispatch_shared_info64_t s64; - } u; - volatile kmp_uint32 buffer_index; + union shared_info { + dispatch_shared_info32_t s32; + dispatch_shared_info64_t s64; + } u; + volatile kmp_uint32 buffer_index; #if OMP_45_ENABLED - volatile kmp_int32 doacross_buf_idx; // teamwise index - volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1) - kmp_int32 doacross_num_done; // count finished threads + volatile kmp_int32 doacross_buf_idx; // teamwise index + volatile kmp_uint32 *doacross_flags; // shared array of iteration flags (0/1) + kmp_int32 doacross_num_done; // count finished threads #endif #if KMP_USE_HWLOC - // When linking with libhwloc, the ORDERED EPCC test slows down on big - // machines (> 48 cores). Performance analysis showed that a cache thrash - // was occurring and this padding helps alleviate the problem. - char padding[64]; + // When linking with libhwloc, the ORDERED EPCC test slows down on big + // machines (> 48 cores). Performance analysis showed that a cache thrash + // was occurring and this padding helps alleviate the problem. + char padding[64]; #endif } dispatch_shared_info_t; typedef struct kmp_disp { - /* Vector for ORDERED SECTION */ - void (*th_deo_fcn)( int * gtid, int * cid, ident_t *); - /* Vector for END ORDERED SECTION */ - void (*th_dxo_fcn)( int * gtid, int * cid, ident_t *); + /* Vector for ORDERED SECTION */ + void (*th_deo_fcn)(int *gtid, int *cid, ident_t *); + /* Vector for END ORDERED SECTION */ + void (*th_dxo_fcn)(int *gtid, int *cid, ident_t *); - dispatch_shared_info_t *th_dispatch_sh_current; - dispatch_private_info_t *th_dispatch_pr_current; + dispatch_shared_info_t *th_dispatch_sh_current; + dispatch_private_info_t *th_dispatch_pr_current; - dispatch_private_info_t *th_disp_buffer; - kmp_int32 th_disp_index; + dispatch_private_info_t *th_disp_buffer; + kmp_int32 th_disp_index; #if OMP_45_ENABLED - kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index - volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags - union { // we can use union here because doacross cannot be used in nonmonotonic loops - kmp_int64 *th_doacross_info; // info on loop bounds - kmp_lock_t *th_steal_lock; // lock used for chunk stealing (8-byte variable) - }; + kmp_int32 th_doacross_buf_idx; // thread's doacross buffer index + volatile kmp_uint32 *th_doacross_flags; // pointer to shared array of flags + union { // we can use union here because doacross cannot be used in + // nonmonotonic loops + kmp_int64 *th_doacross_info; // info on loop bounds + kmp_lock_t *th_steal_lock; // lock used for chunk stealing (8-byte variable) + }; #else #if KMP_STATIC_STEAL_ENABLED - kmp_lock_t *th_steal_lock; // lock used for chunk stealing (8-byte variable) - void* dummy_padding[1]; // make it 64 bytes on Intel(R) 64 + kmp_lock_t *th_steal_lock; // lock used for chunk stealing (8-byte variable) + void *dummy_padding[1]; // make it 64 bytes on Intel(R) 64 #else - void* dummy_padding[2]; // make it 64 bytes on Intel(R) 64 + void *dummy_padding[2]; // make it 64 bytes on Intel(R) 64 #endif #endif #if KMP_USE_INTERNODE_ALIGNMENT - char more_padding[INTERNODE_CACHE_LINE]; + char more_padding[INTERNODE_CACHE_LINE]; #endif } kmp_disp_t; /* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - /* Barrier stuff */ /* constants for barrier state update */ -#define KMP_INIT_BARRIER_STATE 0 /* should probably start from zero */ -#define KMP_BARRIER_SLEEP_BIT 0 /* bit used for suspend/sleep part of state */ -#define KMP_BARRIER_UNUSED_BIT 1 /* bit that must never be set for valid state */ -#define KMP_BARRIER_BUMP_BIT 2 /* lsb used for bump of go/arrived state */ +#define KMP_INIT_BARRIER_STATE 0 /* should probably start from zero */ +#define KMP_BARRIER_SLEEP_BIT 0 /* bit used for suspend/sleep part of state */ +#define KMP_BARRIER_UNUSED_BIT 1 // bit that must never be set for valid state +#define KMP_BARRIER_BUMP_BIT 2 /* lsb used for bump of go/arrived state */ -#define KMP_BARRIER_SLEEP_STATE (1 << KMP_BARRIER_SLEEP_BIT) -#define KMP_BARRIER_UNUSED_STATE (1 << KMP_BARRIER_UNUSED_BIT) -#define KMP_BARRIER_STATE_BUMP (1 << KMP_BARRIER_BUMP_BIT) +#define KMP_BARRIER_SLEEP_STATE (1 << KMP_BARRIER_SLEEP_BIT) +#define KMP_BARRIER_UNUSED_STATE (1 << KMP_BARRIER_UNUSED_BIT) +#define KMP_BARRIER_STATE_BUMP (1 << KMP_BARRIER_BUMP_BIT) #if (KMP_BARRIER_SLEEP_BIT >= KMP_BARRIER_BUMP_BIT) -# error "Barrier sleep bit must be smaller than barrier bump bit" +#error "Barrier sleep bit must be smaller than barrier bump bit" #endif #if (KMP_BARRIER_UNUSED_BIT >= KMP_BARRIER_BUMP_BIT) -# error "Barrier unused bit must be smaller than barrier bump bit" +#error "Barrier unused bit must be smaller than barrier bump bit" #endif // Constants for release barrier wait state: currently, hierarchical only -#define KMP_BARRIER_NOT_WAITING 0 // Normal state; worker not in wait_sleep -#define KMP_BARRIER_OWN_FLAG 1 // Normal state; worker waiting on own b_go flag in release -#define KMP_BARRIER_PARENT_FLAG 2 // Special state; worker waiting on parent's b_go flag in release -#define KMP_BARRIER_SWITCH_TO_OWN_FLAG 3 // Special state; tells worker to shift from parent to own b_go -#define KMP_BARRIER_SWITCHING 4 // Special state; worker resets appropriate flag on wake-up - -#define KMP_NOT_SAFE_TO_REAP 0 // Thread th_reap_state: not safe to reap (tasking) -#define KMP_SAFE_TO_REAP 1 // Thread th_reap_state: safe to reap (not tasking) +#define KMP_BARRIER_NOT_WAITING 0 // Normal state; worker not in wait_sleep +#define KMP_BARRIER_OWN_FLAG \ + 1 // Normal state; worker waiting on own b_go flag in release +#define KMP_BARRIER_PARENT_FLAG \ + 2 // Special state; worker waiting on parent's b_go flag in release +#define KMP_BARRIER_SWITCH_TO_OWN_FLAG \ + 3 // Special state; tells worker to shift from parent to own b_go +#define KMP_BARRIER_SWITCHING \ + 4 // Special state; worker resets appropriate flag on wake-up + +#define KMP_NOT_SAFE_TO_REAP \ + 0 // Thread th_reap_state: not safe to reap (tasking) +#define KMP_SAFE_TO_REAP 1 // Thread th_reap_state: safe to reap (not tasking) enum barrier_type { - bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction barriers if enabled) */ - bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */ - #if KMP_FAST_REDUCTION_BARRIER - bs_reduction_barrier, /* 2, All barriers that are used in reduction */ - #endif // KMP_FAST_REDUCTION_BARRIER - bs_last_barrier /* Just a placeholder to mark the end */ + bs_plain_barrier = 0, /* 0, All non-fork/join barriers (except reduction + barriers if enabled) */ + bs_forkjoin_barrier, /* 1, All fork/join (parallel region) barriers */ +#if KMP_FAST_REDUCTION_BARRIER + bs_reduction_barrier, /* 2, All barriers that are used in reduction */ +#endif // KMP_FAST_REDUCTION_BARRIER + bs_last_barrier /* Just a placeholder to mark the end */ }; // to work with reduction barriers just like with plain barriers #if !KMP_FAST_REDUCTION_BARRIER - #define bs_reduction_barrier bs_plain_barrier +#define bs_reduction_barrier bs_plain_barrier #endif // KMP_FAST_REDUCTION_BARRIER -typedef enum kmp_bar_pat { /* Barrier communication patterns */ - bp_linear_bar = 0, /* Single level (degenerate) tree */ - bp_tree_bar = 1, /* Balanced tree with branching factor 2^n */ - bp_hyper_bar = 2, /* Hypercube-embedded tree with min branching factor 2^n */ - bp_hierarchical_bar = 3, /* Machine hierarchy tree */ - bp_last_bar = 4 /* Placeholder to mark the end */ +typedef enum kmp_bar_pat { /* Barrier communication patterns */ + bp_linear_bar = + 0, /* Single level (degenerate) tree */ + bp_tree_bar = + 1, /* Balanced tree with branching factor 2^n */ + bp_hyper_bar = + 2, /* Hypercube-embedded tree with min branching + factor 2^n */ + bp_hierarchical_bar = 3, /* Machine hierarchy tree */ + bp_last_bar = 4 /* Placeholder to mark the end */ } kmp_bar_pat_e; -# define KMP_BARRIER_ICV_PUSH 1 +#define KMP_BARRIER_ICV_PUSH 1 /* Record for holding the values of the internal controls stack records */ typedef struct kmp_internal_control { - int serial_nesting_level; /* corresponds to the value of the th_team_serialized field */ - kmp_int8 nested; /* internal control for nested parallelism (per thread) */ - kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per thread) */ - kmp_int8 bt_set; /* internal control for whether blocktime is explicitly set */ - int blocktime; /* internal control for blocktime */ + int serial_nesting_level; /* corresponds to the value of the + th_team_serialized field */ + kmp_int8 nested; /* internal control for nested parallelism (per thread) */ + kmp_int8 dynamic; /* internal control for dynamic adjustment of threads (per + thread) */ + kmp_int8 + bt_set; /* internal control for whether blocktime is explicitly set */ + int blocktime; /* internal control for blocktime */ #if KMP_USE_MONITOR - int bt_intervals; /* internal control for blocktime intervals */ + int bt_intervals; /* internal control for blocktime intervals */ #endif - int nproc; /* internal control for #threads for next parallel region (per thread) */ - int max_active_levels; /* internal control for max_active_levels */ - kmp_r_sched_t sched; /* internal control for runtime schedule {sched,chunk} pair */ + int nproc; /* internal control for #threads for next parallel region (per + thread) */ + int max_active_levels; /* internal control for max_active_levels */ + kmp_r_sched_t + sched; /* internal control for runtime schedule {sched,chunk} pair */ #if OMP_40_ENABLED - kmp_proc_bind_t proc_bind; /* internal control for affinity */ - kmp_int32 default_device; /* internal control for default device */ + kmp_proc_bind_t proc_bind; /* internal control for affinity */ + kmp_int32 default_device; /* internal control for default device */ #endif // OMP_40_ENABLED - struct kmp_internal_control *next; + struct kmp_internal_control *next; } kmp_internal_control_t; -static inline void -copy_icvs( kmp_internal_control_t *dst, kmp_internal_control_t *src ) { - *dst = *src; +static inline void copy_icvs(kmp_internal_control_t *dst, + kmp_internal_control_t *src) { + *dst = *src; } /* Thread barrier needs volatile barrier fields */ typedef struct KMP_ALIGN_CACHE kmp_bstate { - // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all uses of it). - // It is not explicitly aligned below, because we *don't* want it to be padded -- instead, - // we fit b_go into the same cache line with th_fixed_icvs, enabling NGO cache lines - // stores in the hierarchical barrier. - kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread - // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with same NGO store - volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical) - KMP_ALIGN_CACHE volatile kmp_uint64 b_arrived; // STATE => task reached synch point. - kmp_uint32 *skip_per_level; - kmp_uint32 my_level; - kmp_int32 parent_tid; - kmp_int32 old_tid; - kmp_uint32 depth; - struct kmp_bstate *parent_bar; - kmp_team_t *team; - kmp_uint64 leaf_state; - kmp_uint32 nproc; - kmp_uint8 base_leaf_kids; - kmp_uint8 leaf_kids; - kmp_uint8 offset; - kmp_uint8 wait_flag; - kmp_uint8 use_oncore_barrier; + // th_fixed_icvs is aligned by virtue of kmp_bstate being aligned (and all + // uses of it). It is not explicitly aligned below, because we *don't* want + // it to be padded -- instead, we fit b_go into the same cache line with + // th_fixed_icvs, enabling NGO cache lines stores in the hierarchical barrier. + kmp_internal_control_t th_fixed_icvs; // Initial ICVs for the thread + // Tuck b_go into end of th_fixed_icvs cache line, so it can be stored with + // same NGO store + volatile kmp_uint64 b_go; // STATE => task should proceed (hierarchical) + KMP_ALIGN_CACHE volatile kmp_uint64 + b_arrived; // STATE => task reached synch point. + kmp_uint32 *skip_per_level; + kmp_uint32 my_level; + kmp_int32 parent_tid; + kmp_int32 old_tid; + kmp_uint32 depth; + struct kmp_bstate *parent_bar; + kmp_team_t *team; + kmp_uint64 leaf_state; + kmp_uint32 nproc; + kmp_uint8 base_leaf_kids; + kmp_uint8 leaf_kids; + kmp_uint8 offset; + kmp_uint8 wait_flag; + kmp_uint8 use_oncore_barrier; #if USE_DEBUGGER - // The following field is intended for the debugger solely. Only the worker thread itself accesses this - // field: the worker increases it by 1 when it arrives to a barrier. - KMP_ALIGN_CACHE kmp_uint b_worker_arrived; + // The following field is intended for the debugger solely. Only the worker + // thread itself accesses this field: the worker increases it by 1 when it + // arrives to a barrier. + KMP_ALIGN_CACHE kmp_uint b_worker_arrived; #endif /* USE_DEBUGGER */ } kmp_bstate_t; union KMP_ALIGN_CACHE kmp_barrier_union { - double b_align; /* use worst case alignment */ - char b_pad[ KMP_PAD(kmp_bstate_t, CACHE_LINE) ]; - kmp_bstate_t bb; + double b_align; /* use worst case alignment */ + char b_pad[KMP_PAD(kmp_bstate_t, CACHE_LINE)]; + kmp_bstate_t bb; }; typedef union kmp_barrier_union kmp_balign_t; /* Team barrier needs only non-volatile arrived counter */ union KMP_ALIGN_CACHE kmp_barrier_team_union { - double b_align; /* use worst case alignment */ - char b_pad[ CACHE_LINE ]; - struct { - kmp_uint64 b_arrived; /* STATE => task reached synch point. */ + double b_align; /* use worst case alignment */ + char b_pad[CACHE_LINE]; + struct { + kmp_uint64 b_arrived; /* STATE => task reached synch point. */ #if USE_DEBUGGER - // The following two fields are indended for the debugger solely. Only master of the team accesses - // these fields: the first one is increased by 1 when master arrives to a barrier, the - // second one is increased by one when all the threads arrived. - kmp_uint b_master_arrived; - kmp_uint b_team_arrived; -#endif - }; + // The following two fields are indended for the debugger solely. Only + // master of the team accesses these fields: the first one is increased by + // 1 when master arrives to a barrier, the second one is increased by one + // when all the threads arrived. + kmp_uint b_master_arrived; + kmp_uint b_team_arrived; +#endif + }; }; typedef union kmp_barrier_team_union kmp_balign_team_t; -/* - * Padding for Linux* OS pthreads condition variables and mutexes used to signal - * threads when a condition changes. This is to workaround an NPTL bug - * where padding was added to pthread_cond_t which caused the initialization - * routine to write outside of the structure if compiled on pre-NPTL threads. - */ - +/* Padding for Linux* OS pthreads condition variables and mutexes used to signal + threads when a condition changes. This is to workaround an NPTL bug where + padding was added to pthread_cond_t which caused the initialization routine + to write outside of the structure if compiled on pre-NPTL threads. */ #if KMP_OS_WINDOWS -typedef struct kmp_win32_mutex -{ - /* The Lock */ - CRITICAL_SECTION cs; +typedef struct kmp_win32_mutex { + /* The Lock */ + CRITICAL_SECTION cs; } kmp_win32_mutex_t; -typedef struct kmp_win32_cond -{ - /* Count of the number of waiters. */ - int waiters_count_; +typedef struct kmp_win32_cond { + /* Count of the number of waiters. */ + int waiters_count_; - /* Serialize access to */ - kmp_win32_mutex_t waiters_count_lock_; + /* Serialize access to */ + kmp_win32_mutex_t waiters_count_lock_; - /* Number of threads to release via a or a */ - /* */ - int release_count_; + /* Number of threads to release via a or a */ + int release_count_; - /* Keeps track of the current "generation" so that we don't allow */ - /* one thread to steal all the "releases" from the broadcast. */ - int wait_generation_count_; + /* Keeps track of the current "generation" so that we don't allow */ + /* one thread to steal all the "releases" from the broadcast. */ + int wait_generation_count_; - /* A manual-reset event that's used to block and release waiting */ - /* threads. */ - HANDLE event_; + /* A manual-reset event that's used to block and release waiting threads. */ + HANDLE event_; } kmp_win32_cond_t; #endif #if KMP_OS_UNIX union KMP_ALIGN_CACHE kmp_cond_union { - double c_align; - char c_pad[ CACHE_LINE ]; - pthread_cond_t c_cond; + double c_align; + char c_pad[CACHE_LINE]; + pthread_cond_t c_cond; }; typedef union kmp_cond_union kmp_cond_align_t; union KMP_ALIGN_CACHE kmp_mutex_union { - double m_align; - char m_pad[ CACHE_LINE ]; - pthread_mutex_t m_mutex; + double m_align; + char m_pad[CACHE_LINE]; + pthread_mutex_t m_mutex; }; typedef union kmp_mutex_union kmp_mutex_align_t; @@ -1816,145 +1908,159 @@ typedef union kmp_mutex_union kmp_mutex_align_t; #endif /* KMP_OS_UNIX */ typedef struct kmp_desc_base { - void *ds_stackbase; - size_t ds_stacksize; - int ds_stackgrow; - kmp_thread_t ds_thread; - volatile int ds_tid; - int ds_gtid; + void *ds_stackbase; + size_t ds_stacksize; + int ds_stackgrow; + kmp_thread_t ds_thread; + volatile int ds_tid; + int ds_gtid; #if KMP_OS_WINDOWS - volatile int ds_alive; - DWORD ds_thread_id; - /* - ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes. However, - debugger support (libomp_db) cannot work with handles, because they uncomparable. For - example, debugger requests info about thread with handle h. h is valid within debugger - process, and meaningless within debugee process. Even if h is duped by call to - DuplicateHandle(), so the result h' is valid within debugee process, but it is a *new* - handle which does *not* equal to any other handle in debugee... The only way to - compare handles is convert them to system-wide ids. GetThreadId() function is - available only in Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is - available on all Windows* OS flavours (including Windows* 95). Thus, we have to get thread id by - call to GetCurrentThreadId() from within the thread and save it to let libomp_db - identify threads. - */ + volatile int ds_alive; + DWORD ds_thread_id; +/* ds_thread keeps thread handle on Windows* OS. It is enough for RTL purposes. + However, debugger support (libomp_db) cannot work with handles, because they + uncomparable. For example, debugger requests info about thread with handle h. + h is valid within debugger process, and meaningless within debugee process. + Even if h is duped by call to DuplicateHandle(), so the result h' is valid + within debugee process, but it is a *new* handle which does *not* equal to + any other handle in debugee... The only way to compare handles is convert + them to system-wide ids. GetThreadId() function is available only in + Longhorn and Server 2003. :-( In contrast, GetCurrentThreadId() is available + on all Windows* OS flavours (including Windows* 95). Thus, we have to get + thread id by call to GetCurrentThreadId() from within the thread and save it + to let libomp_db identify threads. */ #endif /* KMP_OS_WINDOWS */ } kmp_desc_base_t; typedef union KMP_ALIGN_CACHE kmp_desc { - double ds_align; /* use worst case alignment */ - char ds_pad[ KMP_PAD(kmp_desc_base_t, CACHE_LINE) ]; - kmp_desc_base_t ds; + double ds_align; /* use worst case alignment */ + char ds_pad[KMP_PAD(kmp_desc_base_t, CACHE_LINE)]; + kmp_desc_base_t ds; } kmp_desc_t; - typedef struct kmp_local { - volatile int this_construct; /* count of single's encountered by thread */ - void *reduce_data; + volatile int this_construct; /* count of single's encountered by thread */ + void *reduce_data; #if KMP_USE_BGET - void *bget_data; - void *bget_list; -#if ! USE_CMP_XCHG_FOR_BGET + void *bget_data; + void *bget_list; +#if !USE_CMP_XCHG_FOR_BGET #ifdef USE_QUEUING_LOCK_FOR_BGET - kmp_lock_t bget_lock; /* Lock for accessing bget free list */ + kmp_lock_t bget_lock; /* Lock for accessing bget free list */ #else - kmp_bootstrap_lock_t bget_lock; /* Lock for accessing bget free list */ - /* Must be bootstrap lock so we can use it at library shutdown */ + kmp_bootstrap_lock_t bget_lock; // Lock for accessing bget free list. Must be +// bootstrap lock so we can use it at library +// shutdown. #endif /* USE_LOCK_FOR_BGET */ #endif /* ! USE_CMP_XCHG_FOR_BGET */ #endif /* KMP_USE_BGET */ #ifdef BUILD_TV - struct tv_data *tv_data; + struct tv_data *tv_data; #endif - PACKED_REDUCTION_METHOD_T packed_reduction_method; /* stored by __kmpc_reduce*(), used by __kmpc_end_reduce*() */ + PACKED_REDUCTION_METHOD_T + packed_reduction_method; /* stored by __kmpc_reduce*(), used by + __kmpc_end_reduce*() */ } kmp_local_t; -#define KMP_CHECK_UPDATE(a, b) if ((a) != (b)) (a) = (b) -#define KMP_CHECK_UPDATE_SYNC(a, b) if ((a) != (b)) TCW_SYNC_PTR((a), (b)) - -#define get__blocktime( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) -#define get__bt_set( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) +#define KMP_CHECK_UPDATE(a, b) \ + if ((a) != (b)) \ + (a) = (b) +#define KMP_CHECK_UPDATE_SYNC(a, b) \ + if ((a) != (b)) \ + TCW_SYNC_PTR((a), (b)) + +#define get__blocktime(xteam, xtid) \ + ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) +#define get__bt_set(xteam, xtid) \ + ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) #if KMP_USE_MONITOR -#define get__bt_intervals( xteam, xtid ) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) +#define get__bt_intervals(xteam, xtid) \ + ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) #endif -#define get__nested_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nested) -#define get__dynamic_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic) -#define get__nproc_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc) -#define get__sched_2(xteam,xtid) ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched) +#define get__nested_2(xteam, xtid) \ + ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nested) +#define get__dynamic_2(xteam, xtid) \ + ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.dynamic) +#define get__nproc_2(xteam, xtid) \ + ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.nproc) +#define get__sched_2(xteam, xtid) \ + ((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.sched) -#define set__blocktime_team( xteam, xtid, xval ) \ - ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime ) = (xval) ) +#define set__blocktime_team(xteam, xtid, xval) \ + (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.blocktime) = \ + (xval)) #if KMP_USE_MONITOR -#define set__bt_intervals_team( xteam, xtid, xval ) \ - ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals ) = (xval) ) +#define set__bt_intervals_team(xteam, xtid, xval) \ + (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_intervals) = \ + (xval)) #endif -#define set__bt_set_team( xteam, xtid, xval ) \ - ( ( (xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set ) = (xval) ) +#define set__bt_set_team(xteam, xtid, xval) \ + (((xteam)->t.t_threads[(xtid)]->th.th_current_task->td_icvs.bt_set) = (xval)) +#define set__nested(xthread, xval) \ + (((xthread)->th.th_current_task->td_icvs.nested) = (xval)) +#define get__nested(xthread) \ + (((xthread)->th.th_current_task->td_icvs.nested) ? (FTN_TRUE) : (FTN_FALSE)) -#define set__nested( xthread, xval ) \ - ( ( (xthread)->th.th_current_task->td_icvs.nested ) = (xval) ) -#define get__nested( xthread ) \ - ( ( (xthread)->th.th_current_task->td_icvs.nested ) ? (FTN_TRUE) : (FTN_FALSE) ) +#define set__dynamic(xthread, xval) \ + (((xthread)->th.th_current_task->td_icvs.dynamic) = (xval)) +#define get__dynamic(xthread) \ + (((xthread)->th.th_current_task->td_icvs.dynamic) ? (FTN_TRUE) : (FTN_FALSE)) -#define set__dynamic( xthread, xval ) \ - ( ( (xthread)->th.th_current_task->td_icvs.dynamic ) = (xval) ) -#define get__dynamic( xthread ) \ - ( ( (xthread)->th.th_current_task->td_icvs.dynamic ) ? (FTN_TRUE) : (FTN_FALSE) ) +#define set__nproc(xthread, xval) \ + (((xthread)->th.th_current_task->td_icvs.nproc) = (xval)) -#define set__nproc( xthread, xval ) \ - ( ( (xthread)->th.th_current_task->td_icvs.nproc ) = (xval) ) +#define set__max_active_levels(xthread, xval) \ + (((xthread)->th.th_current_task->td_icvs.max_active_levels) = (xval)) -#define set__max_active_levels( xthread, xval ) \ - ( ( (xthread)->th.th_current_task->td_icvs.max_active_levels ) = (xval) ) - -#define set__sched( xthread, xval ) \ - ( ( (xthread)->th.th_current_task->td_icvs.sched ) = (xval) ) +#define set__sched(xthread, xval) \ + (((xthread)->th.th_current_task->td_icvs.sched) = (xval)) #if OMP_40_ENABLED -#define set__proc_bind( xthread, xval ) \ - ( ( (xthread)->th.th_current_task->td_icvs.proc_bind ) = (xval) ) -#define get__proc_bind( xthread ) \ - ( (xthread)->th.th_current_task->td_icvs.proc_bind ) +#define set__proc_bind(xthread, xval) \ + (((xthread)->th.th_current_task->td_icvs.proc_bind) = (xval)) +#define get__proc_bind(xthread) \ + ((xthread)->th.th_current_task->td_icvs.proc_bind) #endif /* OMP_40_ENABLED */ - -/* ------------------------------------------------------------------------ */ // OpenMP tasking data structures -// typedef enum kmp_tasking_mode { - tskm_immediate_exec = 0, - tskm_extra_barrier = 1, - tskm_task_teams = 2, - tskm_max = 2 + tskm_immediate_exec = 0, + tskm_extra_barrier = 1, + tskm_task_teams = 2, + tskm_max = 2 } kmp_tasking_mode_t; -extern kmp_tasking_mode_t __kmp_tasking_mode; /* determines how/when to execute tasks */ +extern kmp_tasking_mode_t + __kmp_tasking_mode; /* determines how/when to execute tasks */ extern kmp_int32 __kmp_task_stealing_constraint; #if OMP_40_ENABLED - extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if specified, defaults to 0 otherwise +extern kmp_int32 __kmp_default_device; // Set via OMP_DEFAULT_DEVICE if +// specified, defaults to 0 otherwise #endif #if OMP_45_ENABLED - extern kmp_int32 __kmp_max_task_priority; // Set via OMP_MAX_TASK_PRIORITY if specified, defaults to 0 otherwise +extern kmp_int32 __kmp_max_task_priority; // Set via OMP_MAX_TASK_PRIORITY if +// specified, defaults to 0 otherwise #endif -/* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with taskdata first */ -#define KMP_TASK_TO_TASKDATA(task) (((kmp_taskdata_t *) task) - 1) -#define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *) (taskdata + 1) +/* NOTE: kmp_taskdata_t and kmp_task_t structures allocated in single block with + taskdata first */ +#define KMP_TASK_TO_TASKDATA(task) (((kmp_taskdata_t *)task) - 1) +#define KMP_TASKDATA_TO_TASK(taskdata) (kmp_task_t *)(taskdata + 1) -// The tt_found_tasks flag is a signal to all threads in the team that tasks were spawned and -// queued since the previous barrier release. -#define KMP_TASKING_ENABLED(task_team) \ - (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE) +// The tt_found_tasks flag is a signal to all threads in the team that tasks +// were spawned and queued since the previous barrier release. +#define KMP_TASKING_ENABLED(task_team) \ + (TCR_SYNC_4((task_team)->tt.tt_found_tasks) == TRUE) /*! @ingroup BASIC_TYPES @{ @@ -1962,33 +2068,37 @@ extern kmp_int32 __kmp_task_stealing_constraint; /*! */ -typedef kmp_int32 (* kmp_routine_entry_t)( kmp_int32, void * ); +typedef kmp_int32 (*kmp_routine_entry_t)(kmp_int32, void *); #if OMP_40_ENABLED || OMP_45_ENABLED typedef union kmp_cmplrdata { #if OMP_45_ENABLED - kmp_int32 priority; /**< priority specified by user for the task */ + kmp_int32 priority; /**< priority specified by user for the task */ #endif // OMP_45_ENABLED #if OMP_40_ENABLED - kmp_routine_entry_t destructors; /* pointer to function to invoke deconstructors of firstprivate C++ objects */ + kmp_routine_entry_t + destructors; /* pointer to function to invoke deconstructors of + firstprivate C++ objects */ #endif // OMP_40_ENABLED - /* future data */ + /* future data */ } kmp_cmplrdata_t; #endif /* sizeof_kmp_task_t passed as arg to kmpc_omp_task call */ /*! */ -typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */ - void * shareds; /**< pointer to block of pointers to shared vars */ - kmp_routine_entry_t routine; /**< pointer to routine to call for executing task */ - kmp_int32 part_id; /**< part id for the task */ +typedef struct kmp_task { /* GEH: Shouldn't this be aligned somehow? */ + void *shareds; /**< pointer to block of pointers to shared vars */ + kmp_routine_entry_t + routine; /**< pointer to routine to call for executing task */ + kmp_int32 part_id; /**< part id for the task */ #if OMP_40_ENABLED || OMP_45_ENABLED - kmp_cmplrdata_t data1; /* Two known optional additions: destructors and priority */ - kmp_cmplrdata_t data2; /* Process destructors first, priority second */ - /* future data */ + kmp_cmplrdata_t + data1; /* Two known optional additions: destructors and priority */ + kmp_cmplrdata_t data2; /* Process destructors first, priority second */ +/* future data */ #endif - /* private vars */ + /* private vars */ } kmp_task_t; /*! @@ -1997,69 +2107,69 @@ typedef struct kmp_task { /* GEH: Shouldn't this be aligned so #if OMP_40_ENABLED typedef struct kmp_taskgroup { - kmp_uint32 count; // number of allocated and not yet complete tasks - kmp_int32 cancel_request; // request for cancellation of this taskgroup - struct kmp_taskgroup *parent; // parent taskgroup + kmp_uint32 count; // number of allocated and not yet complete tasks + kmp_int32 cancel_request; // request for cancellation of this taskgroup + struct kmp_taskgroup *parent; // parent taskgroup // TODO: change to OMP_50_ENABLED, need to change build tools for this to work #if OMP_45_ENABLED - // Block of data to perform task reduction - void *reduce_data; // reduction related info - kmp_int32 reduce_num_data; // number of data items to reduce + // Block of data to perform task reduction + void *reduce_data; // reduction related info + kmp_int32 reduce_num_data; // number of data items to reduce #endif } kmp_taskgroup_t; // forward declarations -typedef union kmp_depnode kmp_depnode_t; -typedef struct kmp_depnode_list kmp_depnode_list_t; +typedef union kmp_depnode kmp_depnode_t; +typedef struct kmp_depnode_list kmp_depnode_list_t; typedef struct kmp_dephash_entry kmp_dephash_entry_t; typedef struct kmp_depend_info { - kmp_intptr_t base_addr; - size_t len; - struct { - bool in:1; - bool out:1; - } flags; + kmp_intptr_t base_addr; + size_t len; + struct { + bool in : 1; + bool out : 1; + } flags; } kmp_depend_info_t; struct kmp_depnode_list { - kmp_depnode_t * node; - kmp_depnode_list_t * next; + kmp_depnode_t *node; + kmp_depnode_list_t *next; }; typedef struct kmp_base_depnode { - kmp_depnode_list_t * successors; - kmp_task_t * task; + kmp_depnode_list_t *successors; + kmp_task_t *task; - kmp_lock_t lock; + kmp_lock_t lock; #if KMP_SUPPORT_GRAPH_OUTPUT - kmp_uint32 id; + kmp_uint32 id; #endif - volatile kmp_int32 npredecessors; - volatile kmp_int32 nrefs; + volatile kmp_int32 npredecessors; + volatile kmp_int32 nrefs; } kmp_base_depnode_t; union KMP_ALIGN_CACHE kmp_depnode { - double dn_align; /* use worst case alignment */ - char dn_pad[ KMP_PAD(kmp_base_depnode_t, CACHE_LINE) ]; - kmp_base_depnode_t dn; + double dn_align; /* use worst case alignment */ + char dn_pad[KMP_PAD(kmp_base_depnode_t, CACHE_LINE)]; + kmp_base_depnode_t dn; }; struct kmp_dephash_entry { - kmp_intptr_t addr; - kmp_depnode_t * last_out; - kmp_depnode_list_t * last_ins; - kmp_dephash_entry_t * next_in_bucket; + kmp_intptr_t addr; + kmp_depnode_t *last_out; + kmp_depnode_list_t *last_ins; + kmp_dephash_entry_t *next_in_bucket; }; typedef struct kmp_dephash { - kmp_dephash_entry_t ** buckets; - size_t size; + kmp_dephash_entry_t **buckets; + size_t size; #ifdef KMP_DEBUG - kmp_uint32 nelements; - kmp_uint32 nconflicts; + kmp_uint32 nelements; + kmp_uint32 nconflicts; #endif } kmp_dephash_t; @@ -2069,556 +2179,583 @@ typedef struct kmp_dephash { /* Tied Task stack definitions */ typedef struct kmp_stack_block { - kmp_taskdata_t * sb_block[ TASK_STACK_BLOCK_SIZE ]; - struct kmp_stack_block * sb_next; - struct kmp_stack_block * sb_prev; + kmp_taskdata_t *sb_block[TASK_STACK_BLOCK_SIZE]; + struct kmp_stack_block *sb_next; + struct kmp_stack_block *sb_prev; } kmp_stack_block_t; typedef struct kmp_task_stack { - kmp_stack_block_t ts_first_block; // first block of stack entries - kmp_taskdata_t ** ts_top; // pointer to the top of stack - kmp_int32 ts_entries; // number of entries on the stack + kmp_stack_block_t ts_first_block; // first block of stack entries + kmp_taskdata_t **ts_top; // pointer to the top of stack + kmp_int32 ts_entries; // number of entries on the stack } kmp_task_stack_t; #endif // BUILD_TIED_TASK_STACK -typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ - /* Compiler flags */ /* Total compiler flags must be 16 bits */ - unsigned tiedness : 1; /* task is either tied (1) or untied (0) */ - unsigned final : 1; /* task is final(1) so execute immediately */ - unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0 code path */ +typedef struct kmp_tasking_flags { /* Total struct must be exactly 32 bits */ + /* Compiler flags */ /* Total compiler flags must be 16 bits */ + unsigned tiedness : 1; /* task is either tied (1) or untied (0) */ + unsigned final : 1; /* task is final(1) so execute immediately */ + unsigned merged_if0 : 1; /* no __kmpc_task_{begin/complete}_if0 calls in if0 + code path */ #if OMP_40_ENABLED - unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to invoke destructors from the runtime */ + unsigned destructors_thunk : 1; /* set if the compiler creates a thunk to + invoke destructors from the runtime */ #if OMP_45_ENABLED - unsigned proxy : 1; /* task is a proxy task (it will be executed outside the context of the RTL) */ - unsigned priority_specified :1; /* set if the compiler provides priority setting for the task */ - unsigned reserved : 10; /* reserved for compiler use */ + unsigned proxy : 1; /* task is a proxy task (it will be executed outside the + context of the RTL) */ + unsigned priority_specified : 1; /* set if the compiler provides priority + setting for the task */ + unsigned reserved : 10; /* reserved for compiler use */ #else - unsigned reserved : 12; /* reserved for compiler use */ + unsigned reserved : 12; /* reserved for compiler use */ #endif #else // OMP_40_ENABLED - unsigned reserved : 13; /* reserved for compiler use */ + unsigned reserved : 13; /* reserved for compiler use */ #endif // OMP_40_ENABLED - /* Library flags */ /* Total library flags must be 16 bits */ - unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ - unsigned task_serial : 1; /* this task is executed immediately (1) or deferred (0) */ - unsigned tasking_ser : 1; /* all tasks in team are either executed immediately (1) or may be deferred (0) */ - unsigned team_serial : 1; /* entire team is serial (1) [1 thread] or parallel (0) [>= 2 threads] */ - /* If either team_serial or tasking_ser is set, task team may be NULL */ - /* Task State Flags: */ - unsigned started : 1; /* 1==started, 0==not started */ - unsigned executing : 1; /* 1==executing, 0==not executing */ - unsigned complete : 1; /* 1==complete, 0==not complete */ - unsigned freed : 1; /* 1==freed, 0==allocateed */ - unsigned native : 1; /* 1==gcc-compiled task, 0==intel */ - unsigned reserved31 : 7; /* reserved for library use */ + /* Library flags */ /* Total library flags must be 16 bits */ + unsigned tasktype : 1; /* task is either explicit(1) or implicit (0) */ + unsigned task_serial : 1; // task is executed immediately (1) or deferred (0) + unsigned tasking_ser : 1; // all tasks in team are either executed immediately + // (1) or may be deferred (0) + unsigned team_serial : 1; // entire team is serial (1) [1 thread] or parallel + // (0) [>= 2 threads] + /* If either team_serial or tasking_ser is set, task team may be NULL */ + /* Task State Flags: */ + unsigned started : 1; /* 1==started, 0==not started */ + unsigned executing : 1; /* 1==executing, 0==not executing */ + unsigned complete : 1; /* 1==complete, 0==not complete */ + unsigned freed : 1; /* 1==freed, 0==allocateed */ + unsigned native : 1; /* 1==gcc-compiled task, 0==intel */ + unsigned reserved31 : 7; /* reserved for library use */ } kmp_tasking_flags_t; - -struct kmp_taskdata { /* aligned during dynamic allocation */ - kmp_int32 td_task_id; /* id, assigned by debugger */ - kmp_tasking_flags_t td_flags; /* task flags */ - kmp_team_t * td_team; /* team for this task */ - kmp_info_p * td_alloc_thread; /* thread that allocated data structures */ - /* Currently not used except for perhaps IDB */ - kmp_taskdata_t * td_parent; /* parent task */ - kmp_int32 td_level; /* task nesting level */ - kmp_int32 td_untied_count; /* untied task active parts counter */ - ident_t * td_ident; /* task identifier */ - // Taskwait data. - ident_t * td_taskwait_ident; - kmp_uint32 td_taskwait_counter; - kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */ - KMP_ALIGN_CACHE kmp_internal_control_t td_icvs; /* Internal control variables for the task */ - KMP_ALIGN_CACHE volatile kmp_uint32 td_allocated_child_tasks; /* Child tasks (+ current task) not yet deallocated */ - volatile kmp_uint32 td_incomplete_child_tasks; /* Child tasks not yet complete */ +struct kmp_taskdata { /* aligned during dynamic allocation */ + kmp_int32 td_task_id; /* id, assigned by debugger */ + kmp_tasking_flags_t td_flags; /* task flags */ + kmp_team_t *td_team; /* team for this task */ + kmp_info_p *td_alloc_thread; /* thread that allocated data structures */ + /* Currently not used except for perhaps IDB */ + kmp_taskdata_t *td_parent; /* parent task */ + kmp_int32 td_level; /* task nesting level */ + kmp_int32 td_untied_count; /* untied task active parts counter */ + ident_t *td_ident; /* task identifier */ + // Taskwait data. + ident_t *td_taskwait_ident; + kmp_uint32 td_taskwait_counter; + kmp_int32 td_taskwait_thread; /* gtid + 1 of thread encountered taskwait */ + KMP_ALIGN_CACHE kmp_internal_control_t + td_icvs; /* Internal control variables for the task */ + KMP_ALIGN_CACHE volatile kmp_uint32 + td_allocated_child_tasks; /* Child tasks (+ current task) not yet + deallocated */ + volatile kmp_uint32 + td_incomplete_child_tasks; /* Child tasks not yet complete */ #if OMP_40_ENABLED - kmp_taskgroup_t * td_taskgroup; // Each task keeps pointer to its current taskgroup - kmp_dephash_t * td_dephash; // Dependencies for children tasks are tracked from here - kmp_depnode_t * td_depnode; // Pointer to graph node if this task has dependencies + kmp_taskgroup_t + *td_taskgroup; // Each task keeps pointer to its current taskgroup + kmp_dephash_t + *td_dephash; // Dependencies for children tasks are tracked from here + kmp_depnode_t + *td_depnode; // Pointer to graph node if this task has dependencies #endif #if OMPT_SUPPORT - ompt_task_info_t ompt_task_info; + ompt_task_info_t ompt_task_info; #endif #if OMP_45_ENABLED - kmp_task_team_t * td_task_team; - kmp_int32 td_size_alloc; // The size of task structure, including shareds etc. + kmp_task_team_t *td_task_team; + kmp_int32 td_size_alloc; // The size of task structure, including shareds etc. #endif }; // struct kmp_taskdata // Make sure padding above worked -KMP_BUILD_ASSERT( sizeof(kmp_taskdata_t) % sizeof(void *) == 0 ); +KMP_BUILD_ASSERT(sizeof(kmp_taskdata_t) % sizeof(void *) == 0); // Data for task team but per thread typedef struct kmp_base_thread_data { - kmp_info_p * td_thr; // Pointer back to thread info - // Used only in __kmp_execute_tasks_template, maybe not avail until task is queued? - kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque - kmp_taskdata_t ** td_deque; // Deque of tasks encountered by td_thr, dynamically allocated - kmp_int32 td_deque_size; // Size of deck - kmp_uint32 td_deque_head; // Head of deque (will wrap) - kmp_uint32 td_deque_tail; // Tail of deque (will wrap) - kmp_int32 td_deque_ntasks; // Number of tasks in deque - // GEH: shouldn't this be volatile since used in while-spin? - kmp_int32 td_deque_last_stolen; // Thread number of last successful steal + kmp_info_p *td_thr; // Pointer back to thread info + // Used only in __kmp_execute_tasks_template, maybe not avail until task is + // queued? + kmp_bootstrap_lock_t td_deque_lock; // Lock for accessing deque + kmp_taskdata_t * + *td_deque; // Deque of tasks encountered by td_thr, dynamically allocated + kmp_int32 td_deque_size; // Size of deck + kmp_uint32 td_deque_head; // Head of deque (will wrap) + kmp_uint32 td_deque_tail; // Tail of deque (will wrap) + kmp_int32 td_deque_ntasks; // Number of tasks in deque + // GEH: shouldn't this be volatile since used in while-spin? + kmp_int32 td_deque_last_stolen; // Thread number of last successful steal #ifdef BUILD_TIED_TASK_STACK - kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task scheduling constraint + kmp_task_stack_t td_susp_tied_tasks; // Stack of suspended tied tasks for task +// scheduling constraint #endif // BUILD_TIED_TASK_STACK } kmp_base_thread_data_t; -#define TASK_DEQUE_BITS 8 // Used solely to define INITIAL_TASK_DEQUE_SIZE -#define INITIAL_TASK_DEQUE_SIZE ( 1 << TASK_DEQUE_BITS ) +#define TASK_DEQUE_BITS 8 // Used solely to define INITIAL_TASK_DEQUE_SIZE +#define INITIAL_TASK_DEQUE_SIZE (1 << TASK_DEQUE_BITS) -#define TASK_DEQUE_SIZE(td) ((td).td_deque_size) -#define TASK_DEQUE_MASK(td) ((td).td_deque_size - 1) +#define TASK_DEQUE_SIZE(td) ((td).td_deque_size) +#define TASK_DEQUE_MASK(td) ((td).td_deque_size - 1) typedef union KMP_ALIGN_CACHE kmp_thread_data { - kmp_base_thread_data_t td; - double td_align; /* use worst case alignment */ - char td_pad[ KMP_PAD(kmp_base_thread_data_t, CACHE_LINE) ]; + kmp_base_thread_data_t td; + double td_align; /* use worst case alignment */ + char td_pad[KMP_PAD(kmp_base_thread_data_t, CACHE_LINE)]; } kmp_thread_data_t; - // Data for task teams which are used when tasking is enabled for the team typedef struct kmp_base_task_team { - kmp_bootstrap_lock_t tt_threads_lock; /* Lock used to allocate per-thread part of task team */ - /* must be bootstrap lock since used at library shutdown*/ - kmp_task_team_t * tt_next; /* For linking the task team free list */ - kmp_thread_data_t * tt_threads_data; /* Array of per-thread structures for task team */ - /* Data survives task team deallocation */ - kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while executing this team? */ - /* TRUE means tt_threads_data is set up and initialized */ - kmp_int32 tt_nproc; /* #threads in team */ - kmp_int32 tt_max_threads; /* number of entries allocated for threads_data array */ + kmp_bootstrap_lock_t + tt_threads_lock; /* Lock used to allocate per-thread part of task team */ + /* must be bootstrap lock since used at library shutdown*/ + kmp_task_team_t *tt_next; /* For linking the task team free list */ + kmp_thread_data_t + *tt_threads_data; /* Array of per-thread structures for task team */ + /* Data survives task team deallocation */ + kmp_int32 tt_found_tasks; /* Have we found tasks and queued them while + executing this team? */ + /* TRUE means tt_threads_data is set up and initialized */ + kmp_int32 tt_nproc; /* #threads in team */ + kmp_int32 + tt_max_threads; /* number of entries allocated for threads_data array */ #if OMP_45_ENABLED - kmp_int32 tt_found_proxy_tasks; /* Have we found proxy tasks since last barrier */ + kmp_int32 + tt_found_proxy_tasks; /* Have we found proxy tasks since last barrier */ #endif - KMP_ALIGN_CACHE - volatile kmp_uint32 tt_unfinished_threads; /* #threads still active */ + KMP_ALIGN_CACHE + volatile kmp_uint32 tt_unfinished_threads; /* #threads still active */ - KMP_ALIGN_CACHE - volatile kmp_uint32 tt_active; /* is the team still actively executing tasks */ + KMP_ALIGN_CACHE + volatile kmp_uint32 + tt_active; /* is the team still actively executing tasks */ } kmp_base_task_team_t; union KMP_ALIGN_CACHE kmp_task_team { - kmp_base_task_team_t tt; - double tt_align; /* use worst case alignment */ - char tt_pad[ KMP_PAD(kmp_base_task_team_t, CACHE_LINE) ]; + kmp_base_task_team_t tt; + double tt_align; /* use worst case alignment */ + char tt_pad[KMP_PAD(kmp_base_task_team_t, CACHE_LINE)]; }; -#if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 ) -// Free lists keep same-size free memory slots for fast memory allocation routines +#if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5) +// Free lists keep same-size free memory slots for fast memory allocation +// routines typedef struct kmp_free_list { - void *th_free_list_self; // Self-allocated tasks free list - void *th_free_list_sync; // Self-allocated tasks stolen/returned by other threads - void *th_free_list_other; // Non-self free list (to be returned to owner's sync list) + void *th_free_list_self; // Self-allocated tasks free list + void *th_free_list_sync; // Self-allocated tasks stolen/returned by other + // threads + void *th_free_list_other; // Non-self free list (to be returned to owner's + // sync list) } kmp_free_list_t; #endif #if KMP_NESTED_HOT_TEAMS -// Hot teams array keeps hot teams and their sizes for given thread. -// Hot teams are not put in teams pool, and they don't put threads in threads pool. +// Hot teams array keeps hot teams and their sizes for given thread. Hot teams +// are not put in teams pool, and they don't put threads in threads pool. typedef struct kmp_hot_team_ptr { - kmp_team_p *hot_team; // pointer to hot_team of given nesting level - kmp_int32 hot_team_nth; // number of threads allocated for the hot_team + kmp_team_p *hot_team; // pointer to hot_team of given nesting level + kmp_int32 hot_team_nth; // number of threads allocated for the hot_team } kmp_hot_team_ptr_t; #endif #if OMP_40_ENABLED typedef struct kmp_teams_size { - kmp_int32 nteams; // number of teams in a league - kmp_int32 nth; // number of threads in each team of the league + kmp_int32 nteams; // number of teams in a league + kmp_int32 nth; // number of threads in each team of the league } kmp_teams_size_t; #endif -/* ------------------------------------------------------------------------ */ // OpenMP thread data structures -// typedef struct KMP_ALIGN_CACHE kmp_base_info { -/* - * Start with the readonly data which is cache aligned and padded. - * this is written before the thread starts working by the master. - * (uber masters may update themselves later) - * (usage does not consider serialized regions) - */ - kmp_desc_t th_info; - kmp_team_p *th_team; /* team we belong to */ - kmp_root_p *th_root; /* pointer to root of task hierarchy */ - kmp_info_p *th_next_pool; /* next available thread in the pool */ - kmp_disp_t *th_dispatch; /* thread's dispatch data */ - int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */ - - /* The following are cached from the team info structure */ - /* TODO use these in more places as determined to be needed via profiling */ - int th_team_nproc; /* number of threads in a team */ - kmp_info_p *th_team_master; /* the team's master thread */ - int th_team_serialized; /* team is serialized */ + /* Start with the readonly data which is cache aligned and padded. This is + written before the thread starts working by the master. Uber masters may + update themselves later. Usage does not consider serialized regions. */ + kmp_desc_t th_info; + kmp_team_p *th_team; /* team we belong to */ + kmp_root_p *th_root; /* pointer to root of task hierarchy */ + kmp_info_p *th_next_pool; /* next available thread in the pool */ + kmp_disp_t *th_dispatch; /* thread's dispatch data */ + int th_in_pool; /* in thread pool (32 bits for TCR/TCW) */ + + /* The following are cached from the team info structure */ + /* TODO use these in more places as determined to be needed via profiling */ + int th_team_nproc; /* number of threads in a team */ + kmp_info_p *th_team_master; /* the team's master thread */ + int th_team_serialized; /* team is serialized */ #if OMP_40_ENABLED - microtask_t th_teams_microtask; /* save entry address for teams construct */ - int th_teams_level; /* save initial level of teams construct */ - /* it is 0 on device but may be any on host */ + microtask_t th_teams_microtask; /* save entry address for teams construct */ + int th_teams_level; /* save initial level of teams construct */ +/* it is 0 on device but may be any on host */ #endif - /* The blocktime info is copied from the team struct to the thread sruct */ - /* at the start of a barrier, and the values stored in the team are used */ - /* at points in the code where the team struct is no longer guaranteed */ - /* to exist (from the POV of worker threads). */ +/* The blocktime info is copied from the team struct to the thread sruct */ +/* at the start of a barrier, and the values stored in the team are used */ +/* at points in the code where the team struct is no longer guaranteed */ +/* to exist (from the POV of worker threads). */ #if KMP_USE_MONITOR - int th_team_bt_intervals; - int th_team_bt_set; + int th_team_bt_intervals; + int th_team_bt_set; #else - kmp_uint64 th_team_bt_intervals; + kmp_uint64 th_team_bt_intervals; #endif #if KMP_AFFINITY_SUPPORTED - kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */ + kmp_affin_mask_t *th_affin_mask; /* thread's current affinity mask */ #endif -/* - * The data set by the master at reinit, then R/W by the worker - */ - KMP_ALIGN_CACHE int th_set_nproc; /* if > 0, then only use this request for the next fork */ + /* The data set by the master at reinit, then R/W by the worker */ + KMP_ALIGN_CACHE int + th_set_nproc; /* if > 0, then only use this request for the next fork */ #if KMP_NESTED_HOT_TEAMS - kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */ + kmp_hot_team_ptr_t *th_hot_teams; /* array of hot teams */ #endif #if OMP_40_ENABLED - kmp_proc_bind_t th_set_proc_bind; /* if != proc_bind_default, use request for next fork */ - kmp_teams_size_t th_teams_size; /* number of teams/threads in teams construct */ -# if KMP_AFFINITY_SUPPORTED - int th_current_place; /* place currently bound to */ - int th_new_place; /* place to bind to in par reg */ - int th_first_place; /* first place in partition */ - int th_last_place; /* last place in partition */ -# endif + kmp_proc_bind_t + th_set_proc_bind; /* if != proc_bind_default, use request for next fork */ + kmp_teams_size_t + th_teams_size; /* number of teams/threads in teams construct */ +#if KMP_AFFINITY_SUPPORTED + int th_current_place; /* place currently bound to */ + int th_new_place; /* place to bind to in par reg */ + int th_first_place; /* first place in partition */ + int th_last_place; /* last place in partition */ +#endif #endif #if USE_ITT_BUILD - kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */ - kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */ - kmp_uint64 th_frame_time; /* frame timestamp */ + kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */ + kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */ + kmp_uint64 th_frame_time; /* frame timestamp */ #endif /* USE_ITT_BUILD */ - kmp_local_t th_local; - struct private_common *th_pri_head; + kmp_local_t th_local; + struct private_common *th_pri_head; -/* - * Now the data only used by the worker (after initial allocation) - */ - /* TODO the first serial team should actually be stored in the info_t - * structure. this will help reduce initial allocation overhead */ - KMP_ALIGN_CACHE kmp_team_p *th_serial_team; /*serialized team held in reserve*/ + /* Now the data only used by the worker (after initial allocation) */ + /* TODO the first serial team should actually be stored in the info_t + structure. this will help reduce initial allocation overhead */ + KMP_ALIGN_CACHE kmp_team_p + *th_serial_team; /*serialized team held in reserve*/ #if OMPT_SUPPORT - ompt_thread_info_t ompt_thread_info; + ompt_thread_info_t ompt_thread_info; #endif -/* The following are also read by the master during reinit */ - struct common_table *th_pri_common; + /* The following are also read by the master during reinit */ + struct common_table *th_pri_common; - volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */ - /* while awaiting queuing lock acquire */ + volatile kmp_uint32 th_spin_here; /* thread-local location for spinning */ + /* while awaiting queuing lock acquire */ - volatile void *th_sleep_loc; // this points at a kmp_flag + volatile void *th_sleep_loc; // this points at a kmp_flag - ident_t *th_ident; - unsigned th_x; // Random number generator data - unsigned th_a; // Random number generator data + ident_t *th_ident; + unsigned th_x; // Random number generator data + unsigned th_a; // Random number generator data -/* - * Tasking-related data for the thread - */ - kmp_task_team_t * th_task_team; // Task team struct - kmp_taskdata_t * th_current_task; // Innermost Task being executed - kmp_uint8 th_task_state; // alternating 0/1 for task team identification - kmp_uint8 * th_task_state_memo_stack; // Stack holding memos of th_task_state at nested levels - kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack - kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack - kmp_uint32 th_reap_state; // Non-zero indicates thread is not - // tasking, thus safe to reap - - /* - * More stuff for keeping track of active/sleeping threads - * (this part is written by the worker thread) - */ - kmp_uint8 th_active_in_pool; // included in count of - // #active threads in pool - int th_active; // ! sleeping - // 32 bits for TCR/TCW - - struct cons_header * th_cons; // used for consistency check + /* Tasking-related data for the thread */ + kmp_task_team_t *th_task_team; // Task team struct + kmp_taskdata_t *th_current_task; // Innermost Task being executed + kmp_uint8 th_task_state; // alternating 0/1 for task team identification + kmp_uint8 *th_task_state_memo_stack; // Stack holding memos of th_task_state + // at nested levels + kmp_uint32 th_task_state_top; // Top element of th_task_state_memo_stack + kmp_uint32 th_task_state_stack_sz; // Size of th_task_state_memo_stack + kmp_uint32 th_reap_state; // Non-zero indicates thread is not + // tasking, thus safe to reap -/* - * Add the syncronizing data which is cache aligned and padded. - */ - KMP_ALIGN_CACHE kmp_balign_t th_bar[ bs_last_barrier ]; + /* More stuff for keeping track of active/sleeping threads (this part is + written by the worker thread) */ + kmp_uint8 th_active_in_pool; // included in count of #active threads in pool + int th_active; // ! sleeping; 32 bits for TCR/TCW + struct cons_header *th_cons; // used for consistency check - KMP_ALIGN_CACHE volatile kmp_int32 th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */ + /* Add the syncronizing data which is cache aligned and padded. */ + KMP_ALIGN_CACHE kmp_balign_t th_bar[bs_last_barrier]; -#if ( USE_FAST_MEMORY == 3 ) || ( USE_FAST_MEMORY == 5 ) - #define NUM_LISTS 4 - kmp_free_list_t th_free_lists[NUM_LISTS]; // Free lists for fast memory allocation routines + KMP_ALIGN_CACHE volatile kmp_int32 + th_next_waiting; /* gtid+1 of next thread on lock wait queue, 0 if none */ + +#if (USE_FAST_MEMORY == 3) || (USE_FAST_MEMORY == 5) +#define NUM_LISTS 4 + kmp_free_list_t th_free_lists[NUM_LISTS]; // Free lists for fast memory +// allocation routines #endif #if KMP_OS_WINDOWS - kmp_win32_cond_t th_suspend_cv; - kmp_win32_mutex_t th_suspend_mx; - int th_suspend_init; + kmp_win32_cond_t th_suspend_cv; + kmp_win32_mutex_t th_suspend_mx; + int th_suspend_init; #endif #if KMP_OS_UNIX - kmp_cond_align_t th_suspend_cv; - kmp_mutex_align_t th_suspend_mx; - int th_suspend_init_count; + kmp_cond_align_t th_suspend_cv; + kmp_mutex_align_t th_suspend_mx; + int th_suspend_init_count; #endif #if USE_ITT_BUILD - kmp_itt_mark_t th_itt_mark_single; - // alignment ??? + kmp_itt_mark_t th_itt_mark_single; +// alignment ??? #endif /* USE_ITT_BUILD */ #if KMP_STATS_ENABLED - kmp_stats_list* th_stats; + kmp_stats_list *th_stats; #endif } kmp_base_info_t; typedef union KMP_ALIGN_CACHE kmp_info { - double th_align; /* use worst case alignment */ - char th_pad[ KMP_PAD(kmp_base_info_t, CACHE_LINE) ]; - kmp_base_info_t th; + double th_align; /* use worst case alignment */ + char th_pad[KMP_PAD(kmp_base_info_t, CACHE_LINE)]; + kmp_base_info_t th; } kmp_info_t; -/* ------------------------------------------------------------------------ */ // OpenMP thread team data structures -// -typedef struct kmp_base_data { - volatile kmp_uint32 t_value; -} kmp_base_data_t; + +typedef struct kmp_base_data { volatile kmp_uint32 t_value; } kmp_base_data_t; typedef union KMP_ALIGN_CACHE kmp_sleep_team { - double dt_align; /* use worst case alignment */ - char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ]; - kmp_base_data_t dt; + double dt_align; /* use worst case alignment */ + char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)]; + kmp_base_data_t dt; } kmp_sleep_team_t; typedef union KMP_ALIGN_CACHE kmp_ordered_team { - double dt_align; /* use worst case alignment */ - char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ]; - kmp_base_data_t dt; + double dt_align; /* use worst case alignment */ + char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)]; + kmp_base_data_t dt; } kmp_ordered_team_t; -typedef int (*launch_t)( int gtid ); +typedef int (*launch_t)(int gtid); /* Minimum number of ARGV entries to malloc if necessary */ -#define KMP_MIN_MALLOC_ARGV_ENTRIES 100 +#define KMP_MIN_MALLOC_ARGV_ENTRIES 100 -// Set up how many argv pointers will fit in cache lines containing t_inline_argv. Historically, we -// have supported at least 96 bytes. Using a larger value for more space between the master write/worker -// read section and read/write by all section seems to buy more performance on EPCC PARALLEL. +// Set up how many argv pointers will fit in cache lines containing +// t_inline_argv. Historically, we have supported at least 96 bytes. Using a +// larger value for more space between the master write/worker read section and +// read/write by all section seems to buy more performance on EPCC PARALLEL. #if KMP_ARCH_X86 || KMP_ARCH_X86_64 -# define KMP_INLINE_ARGV_BYTES ( 4 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + sizeof(kmp_int16) + sizeof(kmp_uint32) ) % CACHE_LINE ) ) +#define KMP_INLINE_ARGV_BYTES \ + (4 * CACHE_LINE - \ + ((3 * KMP_PTR_SKIP + 2 * sizeof(int) + 2 * sizeof(kmp_int8) + \ + sizeof(kmp_int16) + sizeof(kmp_uint32)) % \ + CACHE_LINE)) #else -# define KMP_INLINE_ARGV_BYTES ( 2 * CACHE_LINE - ( ( 3 * KMP_PTR_SKIP + 2 * sizeof(int) ) % CACHE_LINE ) ) +#define KMP_INLINE_ARGV_BYTES \ + (2 * CACHE_LINE - ((3 * KMP_PTR_SKIP + 2 * sizeof(int)) % CACHE_LINE)) #endif -#define KMP_INLINE_ARGV_ENTRIES (int)( KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP ) +#define KMP_INLINE_ARGV_ENTRIES (int)(KMP_INLINE_ARGV_BYTES / KMP_PTR_SKIP) typedef struct KMP_ALIGN_CACHE kmp_base_team { - // Synchronization Data --------------------------------------------------------------------------------- - KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered; - kmp_balign_team_t t_bar[ bs_last_barrier ]; - volatile int t_construct; // count of single directive encountered by team - kmp_lock_t t_single_lock; // team specific lock - - // Master only ----------------------------------------------------------------------------------------- - KMP_ALIGN_CACHE int t_master_tid; // tid of master in parent team - int t_master_this_cons; // "this_construct" single counter of master in parent team - ident_t *t_ident; // if volatile, have to change too much other crud to volatile too - kmp_team_p *t_parent; // parent team - kmp_team_p *t_next_pool; // next free team in the team pool - kmp_disp_t *t_dispatch; // thread's dispatch data - kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2 + // Synchronization Data + // --------------------------------------------------------------------------- + KMP_ALIGN_CACHE kmp_ordered_team_t t_ordered; + kmp_balign_team_t t_bar[bs_last_barrier]; + volatile int t_construct; // count of single directive encountered by team + kmp_lock_t t_single_lock; // team specific lock + + // Master only + // --------------------------------------------------------------------------- + KMP_ALIGN_CACHE int t_master_tid; // tid of master in parent team + int t_master_this_cons; // "this_construct" single counter of master in parent + // team + ident_t *t_ident; // if volatile, have to change too much other crud to + // volatile too + kmp_team_p *t_parent; // parent team + kmp_team_p *t_next_pool; // next free team in the team pool + kmp_disp_t *t_dispatch; // thread's dispatch data + kmp_task_team_t *t_task_team[2]; // Task team struct; switch between 2 #if OMP_40_ENABLED - kmp_proc_bind_t t_proc_bind; // bind type for par region + kmp_proc_bind_t t_proc_bind; // bind type for par region #endif // OMP_40_ENABLED #if USE_ITT_BUILD - kmp_uint64 t_region_time; // region begin timestamp + kmp_uint64 t_region_time; // region begin timestamp #endif /* USE_ITT_BUILD */ - // Master write, workers read -------------------------------------------------------------------------- - KMP_ALIGN_CACHE void **t_argv; - int t_argc; - int t_nproc; // number of threads in team - microtask_t t_pkfn; - launch_t t_invoke; // procedure to launch the microtask + // Master write, workers read + // -------------------------------------------------------------------------- + KMP_ALIGN_CACHE void **t_argv; + int t_argc; + int t_nproc; // number of threads in team + microtask_t t_pkfn; + launch_t t_invoke; // procedure to launch the microtask #if OMPT_SUPPORT - ompt_team_info_t ompt_team_info; - ompt_lw_taskteam_t *ompt_serialized_team_info; + ompt_team_info_t ompt_team_info; + ompt_lw_taskteam_t *ompt_serialized_team_info; #endif #if KMP_ARCH_X86 || KMP_ARCH_X86_64 - kmp_int8 t_fp_control_saved; - kmp_int8 t_pad2b; - kmp_int16 t_x87_fpu_control_word; // FP control regs - kmp_uint32 t_mxcsr; + kmp_int8 t_fp_control_saved; + kmp_int8 t_pad2b; + kmp_int16 t_x87_fpu_control_word; // FP control regs + kmp_uint32 t_mxcsr; #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - void *t_inline_argv[ KMP_INLINE_ARGV_ENTRIES ]; + void *t_inline_argv[KMP_INLINE_ARGV_ENTRIES]; - KMP_ALIGN_CACHE kmp_info_t **t_threads; - kmp_taskdata_t *t_implicit_task_taskdata; // Taskdata for the thread's implicit task - int t_level; // nested parallel level + KMP_ALIGN_CACHE kmp_info_t **t_threads; + kmp_taskdata_t + *t_implicit_task_taskdata; // Taskdata for the thread's implicit task + int t_level; // nested parallel level - KMP_ALIGN_CACHE int t_max_argc; - int t_max_nproc; // maximum threads this team can handle (dynamicly expandable) - int t_serialized; // levels deep of serialized teams - dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system - int t_id; // team's id, assigned by debugger. - int t_active_level; // nested active parallel level - kmp_r_sched_t t_sched; // run-time schedule for the team + KMP_ALIGN_CACHE int t_max_argc; + int t_max_nproc; // max threads this team can handle (dynamicly expandable) + int t_serialized; // levels deep of serialized teams + dispatch_shared_info_t *t_disp_buffer; // buffers for dispatch system + int t_id; // team's id, assigned by debugger. + int t_active_level; // nested active parallel level + kmp_r_sched_t t_sched; // run-time schedule for the team #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - int t_first_place; // first & last place in parent thread's partition. - int t_last_place; // Restore these values to master after par region. + int t_first_place; // first & last place in parent thread's partition. + int t_last_place; // Restore these values to master after par region. #endif // OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via omp_set_num_threads() call + int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via +// omp_set_num_threads() call - // Read/write by workers as well ----------------------------------------------------------------------- +// Read/write by workers as well #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) - // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf regression of epcc 'parallel' - // and 'barrier' on fxe256lin01. This extra padding serves to fix the performance of epcc 'parallel' - // and 'barrier' when CACHE_LINE=64. TODO: investigate more and get rid if this padding. - char dummy_padding[1024]; -#endif - KMP_ALIGN_CACHE kmp_internal_control_t *t_control_stack_top; // internal control stack for additional nested teams. - // for SERIALIZED teams nested 2 or more levels deep + // Using CACHE_LINE=64 reduces memory footprint, but causes a big perf + // regression of epcc 'parallel' and 'barrier' on fxe256lin01. This extra + // padding serves to fix the performance of epcc 'parallel' and 'barrier' when + // CACHE_LINE=64. TODO: investigate more and get rid if this padding. + char dummy_padding[1024]; +#endif + // Internal control stack for additional nested teams. + KMP_ALIGN_CACHE kmp_internal_control_t *t_control_stack_top; +// for SERIALIZED teams nested 2 or more levels deep #if OMP_40_ENABLED - kmp_int32 t_cancel_request; // typed flag to store request state of cancellation + // typed flag to store request state of cancellation + kmp_int32 t_cancel_request; #endif - int t_master_active; // save on fork, restore on join - kmp_taskq_t t_taskq; // this team's task queue - void *t_copypriv_data; // team specific pointer to copyprivate data array - kmp_uint32 t_copyin_counter; + int t_master_active; // save on fork, restore on join + kmp_taskq_t t_taskq; // this team's task queue + void *t_copypriv_data; // team specific pointer to copyprivate data array + kmp_uint32 t_copyin_counter; #if USE_ITT_BUILD - void *t_stack_id; // team specific stack stitching id (for ittnotify) + void *t_stack_id; // team specific stack stitching id (for ittnotify) #endif /* USE_ITT_BUILD */ } kmp_base_team_t; union KMP_ALIGN_CACHE kmp_team { - kmp_base_team_t t; - double t_align; /* use worst case alignment */ - char t_pad[ KMP_PAD(kmp_base_team_t, CACHE_LINE) ]; + kmp_base_team_t t; + double t_align; /* use worst case alignment */ + char t_pad[KMP_PAD(kmp_base_team_t, CACHE_LINE)]; }; - typedef union KMP_ALIGN_CACHE kmp_time_global { - double dt_align; /* use worst case alignment */ - char dt_pad[ KMP_PAD(kmp_base_data_t, CACHE_LINE) ]; - kmp_base_data_t dt; + double dt_align; /* use worst case alignment */ + char dt_pad[KMP_PAD(kmp_base_data_t, CACHE_LINE)]; + kmp_base_data_t dt; } kmp_time_global_t; typedef struct kmp_base_global { - /* cache-aligned */ - kmp_time_global_t g_time; + /* cache-aligned */ + kmp_time_global_t g_time; - /* non cache-aligned */ - volatile int g_abort; - volatile int g_done; + /* non cache-aligned */ + volatile int g_abort; + volatile int g_done; - int g_dynamic; - enum dynamic_mode g_dynamic_mode; + int g_dynamic; + enum dynamic_mode g_dynamic_mode; } kmp_base_global_t; typedef union KMP_ALIGN_CACHE kmp_global { - kmp_base_global_t g; - double g_align; /* use worst case alignment */ - char g_pad[ KMP_PAD(kmp_base_global_t, CACHE_LINE) ]; + kmp_base_global_t g; + double g_align; /* use worst case alignment */ + char g_pad[KMP_PAD(kmp_base_global_t, CACHE_LINE)]; } kmp_global_t; - typedef struct kmp_base_root { - // TODO: GEH - combine r_active with r_in_parallel then r_active == (r_in_parallel>= 0) - // TODO: GEH - then replace r_active with t_active_levels if we can to reduce the synch - // overhead or keeping r_active - - volatile int r_active; /* TRUE if some region in a nest has > 1 thread */ - // GEH: This is misnamed, should be r_in_parallel - volatile int r_nested; // TODO: GEH - This is unused, just remove it entirely. - int r_in_parallel; /* keeps a count of active parallel regions per root */ - // GEH: This is misnamed, should be r_active_levels - kmp_team_t *r_root_team; - kmp_team_t *r_hot_team; - kmp_info_t *r_uber_thread; - kmp_lock_t r_begin_lock; - volatile int r_begin; - int r_blocktime; /* blocktime for this root and descendants */ + // TODO: GEH - combine r_active with r_in_parallel then r_active == + // (r_in_parallel>= 0) + // TODO: GEH - then replace r_active with t_active_levels if we can to reduce + // the synch overhead or keeping r_active + volatile int r_active; /* TRUE if some region in a nest has > 1 thread */ + // GEH: This is misnamed, should be r_in_parallel + volatile int r_nested; // TODO: GEH - This is unused, just remove it entirely. + int r_in_parallel; /* keeps a count of active parallel regions per root */ + // GEH: This is misnamed, should be r_active_levels + kmp_team_t *r_root_team; + kmp_team_t *r_hot_team; + kmp_info_t *r_uber_thread; + kmp_lock_t r_begin_lock; + volatile int r_begin; + int r_blocktime; /* blocktime for this root and descendants */ } kmp_base_root_t; typedef union KMP_ALIGN_CACHE kmp_root { - kmp_base_root_t r; - double r_align; /* use worst case alignment */ - char r_pad[ KMP_PAD(kmp_base_root_t, CACHE_LINE) ]; + kmp_base_root_t r; + double r_align; /* use worst case alignment */ + char r_pad[KMP_PAD(kmp_base_root_t, CACHE_LINE)]; } kmp_root_t; struct fortran_inx_info { - kmp_int32 data; + kmp_int32 data; }; /* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -extern int __kmp_settings; -extern int __kmp_duplicate_library_ok; +extern int __kmp_settings; +extern int __kmp_duplicate_library_ok; #if USE_ITT_BUILD -extern int __kmp_forkjoin_frames; -extern int __kmp_forkjoin_frames_mode; +extern int __kmp_forkjoin_frames; +extern int __kmp_forkjoin_frames_mode; #endif extern PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method; -extern int __kmp_determ_red; +extern int __kmp_determ_red; #ifdef KMP_DEBUG -extern int kmp_a_debug; -extern int kmp_b_debug; -extern int kmp_c_debug; -extern int kmp_d_debug; -extern int kmp_e_debug; -extern int kmp_f_debug; +extern int kmp_a_debug; +extern int kmp_b_debug; +extern int kmp_c_debug; +extern int kmp_d_debug; +extern int kmp_e_debug; +extern int kmp_f_debug; #endif /* KMP_DEBUG */ /* For debug information logging using rotating buffer */ -#define KMP_DEBUG_BUF_LINES_INIT 512 -#define KMP_DEBUG_BUF_LINES_MIN 1 - -#define KMP_DEBUG_BUF_CHARS_INIT 128 -#define KMP_DEBUG_BUF_CHARS_MIN 2 - -extern int __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */ -extern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */ -extern int __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */ -extern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer entry pointer */ - -extern char *__kmp_debug_buffer; /* Debug buffer itself */ -extern int __kmp_debug_count; /* Counter for number of lines printed in buffer so far */ -extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase recommended in warnings */ +#define KMP_DEBUG_BUF_LINES_INIT 512 +#define KMP_DEBUG_BUF_LINES_MIN 1 + +#define KMP_DEBUG_BUF_CHARS_INIT 128 +#define KMP_DEBUG_BUF_CHARS_MIN 2 + +extern int + __kmp_debug_buf; /* TRUE means use buffer, FALSE means print to stderr */ +extern int __kmp_debug_buf_lines; /* How many lines of debug stored in buffer */ +extern int + __kmp_debug_buf_chars; /* How many characters allowed per line in buffer */ +extern int __kmp_debug_buf_atomic; /* TRUE means use atomic update of buffer + entry pointer */ + +extern char *__kmp_debug_buffer; /* Debug buffer itself */ +extern int __kmp_debug_count; /* Counter for number of lines printed in buffer + so far */ +extern int __kmp_debug_buf_warn_chars; /* Keep track of char increase + recommended in warnings */ /* end rotating debug buffer */ #ifdef KMP_DEBUG -extern int __kmp_par_range; /* +1 => only go par for constructs in range */ +extern int __kmp_par_range; /* +1 => only go par for constructs in range */ -#define KMP_PAR_RANGE_ROUTINE_LEN 1024 -extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN]; -#define KMP_PAR_RANGE_FILENAME_LEN 1024 -extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN]; -extern int __kmp_par_range_lb; -extern int __kmp_par_range_ub; +#define KMP_PAR_RANGE_ROUTINE_LEN 1024 +extern char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN]; +#define KMP_PAR_RANGE_FILENAME_LEN 1024 +extern char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN]; +extern int __kmp_par_range_lb; +extern int __kmp_par_range_ub; #endif /* For printing out dynamic storage map for threads and teams */ -extern int __kmp_storage_map; /* True means print storage map for threads and teams */ -extern int __kmp_storage_map_verbose; /* True means storage map includes placement info */ -extern int __kmp_storage_map_verbose_specified; +extern int + __kmp_storage_map; /* True means print storage map for threads and teams */ +extern int __kmp_storage_map_verbose; /* True means storage map includes + placement info */ +extern int __kmp_storage_map_verbose_specified; #if KMP_ARCH_X86 || KMP_ARCH_X86_64 -extern kmp_cpuinfo_t __kmp_cpuinfo; +extern kmp_cpuinfo_t __kmp_cpuinfo; #endif extern volatile int __kmp_init_serial; @@ -2638,65 +2775,72 @@ extern int __kmp_version; extern kmp_cached_addr_t *__kmp_threadpriv_cache_list; /* Barrier algorithm types and options */ -extern kmp_uint32 __kmp_barrier_gather_bb_dflt; -extern kmp_uint32 __kmp_barrier_release_bb_dflt; +extern kmp_uint32 __kmp_barrier_gather_bb_dflt; +extern kmp_uint32 __kmp_barrier_release_bb_dflt; extern kmp_bar_pat_e __kmp_barrier_gather_pat_dflt; extern kmp_bar_pat_e __kmp_barrier_release_pat_dflt; -extern kmp_uint32 __kmp_barrier_gather_branch_bits [ bs_last_barrier ]; -extern kmp_uint32 __kmp_barrier_release_branch_bits [ bs_last_barrier ]; -extern kmp_bar_pat_e __kmp_barrier_gather_pattern [ bs_last_barrier ]; -extern kmp_bar_pat_e __kmp_barrier_release_pattern [ bs_last_barrier ]; -extern char const *__kmp_barrier_branch_bit_env_name [ bs_last_barrier ]; -extern char const *__kmp_barrier_pattern_env_name [ bs_last_barrier ]; -extern char const *__kmp_barrier_type_name [ bs_last_barrier ]; -extern char const *__kmp_barrier_pattern_name [ bp_last_bar ]; +extern kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier]; +extern kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier]; +extern kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier]; +extern kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier]; +extern char const *__kmp_barrier_branch_bit_env_name[bs_last_barrier]; +extern char const *__kmp_barrier_pattern_env_name[bs_last_barrier]; +extern char const *__kmp_barrier_type_name[bs_last_barrier]; +extern char const *__kmp_barrier_pattern_name[bp_last_bar]; /* Global Locks */ -extern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */ -extern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */ -extern kmp_bootstrap_lock_t __kmp_exit_lock; /* exit() is not always thread-safe */ +extern kmp_bootstrap_lock_t __kmp_initz_lock; /* control initialization */ +extern kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */ +extern kmp_bootstrap_lock_t + __kmp_exit_lock; /* exit() is not always thread-safe */ #if KMP_USE_MONITOR -extern kmp_bootstrap_lock_t __kmp_monitor_lock; /* control monitor thread creation */ +extern kmp_bootstrap_lock_t + __kmp_monitor_lock; /* control monitor thread creation */ #endif -extern kmp_bootstrap_lock_t __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and __kmp_threads expansion to co-exist */ +extern kmp_bootstrap_lock_t + __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and + __kmp_threads expansion to co-exist */ -extern kmp_lock_t __kmp_global_lock; /* control OS/global access */ -extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */ -extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */ +extern kmp_lock_t __kmp_global_lock; /* control OS/global access */ +extern kmp_queuing_lock_t __kmp_dispatch_lock; /* control dispatch access */ +extern kmp_lock_t __kmp_debug_lock; /* control I/O access for KMP_DEBUG */ /* used for yielding spin-waits */ -extern unsigned int __kmp_init_wait; /* initial number of spin-tests */ -extern unsigned int __kmp_next_wait; /* susequent number of spin-tests */ +extern unsigned int __kmp_init_wait; /* initial number of spin-tests */ +extern unsigned int __kmp_next_wait; /* susequent number of spin-tests */ extern enum library_type __kmp_library; -extern enum sched_type __kmp_sched; /* default runtime scheduling */ -extern enum sched_type __kmp_static; /* default static scheduling method */ -extern enum sched_type __kmp_guided; /* default guided scheduling method */ -extern enum sched_type __kmp_auto; /* default auto scheduling method */ -extern int __kmp_chunk; /* default runtime chunk size */ +extern enum sched_type __kmp_sched; /* default runtime scheduling */ +extern enum sched_type __kmp_static; /* default static scheduling method */ +extern enum sched_type __kmp_guided; /* default guided scheduling method */ +extern enum sched_type __kmp_auto; /* default auto scheduling method */ +extern int __kmp_chunk; /* default runtime chunk size */ -extern size_t __kmp_stksize; /* stack size per thread */ +extern size_t __kmp_stksize; /* stack size per thread */ #if KMP_USE_MONITOR -extern size_t __kmp_monitor_stksize;/* stack size for monitor thread */ -#endif -extern size_t __kmp_stkoffset; /* stack offset per thread */ -extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */ - -extern size_t __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */ -extern int __kmp_env_chunk; /* was KMP_CHUNK specified? */ -extern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */ -extern int __kmp_env_omp_stksize;/* was OMP_STACKSIZE specified? */ -extern int __kmp_env_all_threads; /* was KMP_ALL_THREADS or KMP_MAX_THREADS specified? */ -extern int __kmp_env_omp_all_threads;/* was OMP_THREAD_LIMIT specified? */ -extern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */ -extern int __kmp_env_checks; /* was KMP_CHECKS specified? */ -extern int __kmp_env_consistency_check; /* was KMP_CONSISTENCY_CHECK specified? */ -extern int __kmp_generate_warnings; /* should we issue warnings? */ -extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */ +extern size_t __kmp_monitor_stksize; /* stack size for monitor thread */ +#endif +extern size_t __kmp_stkoffset; /* stack offset per thread */ +extern int __kmp_stkpadding; /* Should we pad root thread(s) stack */ + +extern size_t + __kmp_malloc_pool_incr; /* incremental size of pool for kmp_malloc() */ +extern int __kmp_env_chunk; /* was KMP_CHUNK specified? */ +extern int __kmp_env_stksize; /* was KMP_STACKSIZE specified? */ +extern int __kmp_env_omp_stksize; /* was OMP_STACKSIZE specified? */ +extern int __kmp_env_all_threads; /* was KMP_ALL_THREADS or KMP_MAX_THREADS + specified? */ +extern int __kmp_env_omp_all_threads; /* was OMP_THREAD_LIMIT specified? */ +extern int __kmp_env_blocktime; /* was KMP_BLOCKTIME specified? */ +extern int __kmp_env_checks; /* was KMP_CHECKS specified? */ +extern int + __kmp_env_consistency_check; /* was KMP_CONSISTENCY_CHECK specified? */ +extern int __kmp_generate_warnings; /* should we issue warnings? */ +extern int __kmp_reserve_warn; /* have we issued reserve_threads warning? */ #ifdef DEBUG_SUSPEND -extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */ +extern int __kmp_suspend_count; /* count inside __kmp_suspend_template() */ #endif extern kmp_uint32 __kmp_yield_init; @@ -2706,87 +2850,107 @@ extern kmp_uint32 __kmp_yield_next; extern kmp_uint32 __kmp_yielding_on; #endif extern kmp_uint32 __kmp_yield_cycle; -extern kmp_int32 __kmp_yield_on_count; -extern kmp_int32 __kmp_yield_off_count; +extern kmp_int32 __kmp_yield_on_count; +extern kmp_int32 __kmp_yield_off_count; /* ------------------------------------------------------------------------- */ -extern int __kmp_allThreadsSpecified; +extern int __kmp_allThreadsSpecified; -extern size_t __kmp_align_alloc; +extern size_t __kmp_align_alloc; /* following data protected by initialization routines */ -extern int __kmp_xproc; /* number of processors in the system */ -extern int __kmp_avail_proc; /* number of processors available to the process */ -extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */ -extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */ -extern int __kmp_max_nth; /* maximum total number of concurrently-existing threads */ -extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and __kmp_root */ -extern int __kmp_dflt_team_nth; /* default number of threads in a parallel region a la OMP_NUM_THREADS */ -extern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial initialization */ -extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is used (fixed) */ -extern int __kmp_tp_cached; /* whether threadprivate cache has been created (__kmpc_threadprivate_cached()) */ -extern int __kmp_dflt_nested; /* nested parallelism enabled by default a la OMP_NESTED */ -extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before blocking (env setting) */ +extern int __kmp_xproc; /* number of processors in the system */ +extern int __kmp_avail_proc; /* number of processors available to the process */ +extern size_t __kmp_sys_min_stksize; /* system-defined minimum stack size */ +extern int __kmp_sys_max_nth; /* system-imposed maximum number of threads */ +extern int + __kmp_max_nth; /* maximum total number of concurrently-existing threads */ +extern int __kmp_threads_capacity; /* capacity of the arrays __kmp_threads and + __kmp_root */ +extern int __kmp_dflt_team_nth; /* default number of threads in a parallel + region a la OMP_NUM_THREADS */ +extern int __kmp_dflt_team_nth_ub; /* upper bound on "" determined at serial + initialization */ +extern int __kmp_tp_capacity; /* capacity of __kmp_threads if threadprivate is + used (fixed) */ +extern int __kmp_tp_cached; /* whether threadprivate cache has been created + (__kmpc_threadprivate_cached()) */ +extern int __kmp_dflt_nested; /* nested parallelism enabled by default a la + OMP_NESTED */ +extern int __kmp_dflt_blocktime; /* number of milliseconds to wait before + blocking (env setting) */ #if KMP_USE_MONITOR -extern int __kmp_monitor_wakeups;/* number of times monitor wakes up per second */ -extern int __kmp_bt_intervals; /* number of monitor timestamp intervals before blocking */ +extern int + __kmp_monitor_wakeups; /* number of times monitor wakes up per second */ +extern int __kmp_bt_intervals; /* number of monitor timestamp intervals before + blocking */ #endif #ifdef KMP_ADJUST_BLOCKTIME -extern int __kmp_zero_bt; /* whether blocktime has been forced to zero */ +extern int __kmp_zero_bt; /* whether blocktime has been forced to zero */ #endif /* KMP_ADJUST_BLOCKTIME */ #ifdef KMP_DFLT_NTH_CORES -extern int __kmp_ncores; /* Total number of cores for threads placement */ -#endif -extern int __kmp_abort_delay; /* Number of millisecs to delay on abort for VTune */ - -extern int __kmp_need_register_atfork_specified; -extern int __kmp_need_register_atfork;/* At initialization, call pthread_atfork to install fork handler */ -extern int __kmp_gtid_mode; /* Method of getting gtid, values: - 0 - not set, will be set at runtime - 1 - using stack search - 2 - dynamic TLS (pthread_getspecific(Linux* OS/OS X*) or TlsGetValue(Windows* OS)) - 3 - static TLS (__declspec(thread) __kmp_gtid), Linux* OS .so only. - */ -extern int __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */ +extern int __kmp_ncores; /* Total number of cores for threads placement */ +#endif +extern int + __kmp_abort_delay; /* Number of millisecs to delay on abort for VTune */ + +extern int __kmp_need_register_atfork_specified; +extern int + __kmp_need_register_atfork; /* At initialization, call pthread_atfork to + install fork handler */ +extern int __kmp_gtid_mode; /* Method of getting gtid, values: + 0 - not set, will be set at runtime + 1 - using stack search + 2 - dynamic TLS (pthread_getspecific(Linux* OS/OS + X*) or TlsGetValue(Windows* OS)) + 3 - static TLS (__declspec(thread) __kmp_gtid), + Linux* OS .so only. */ +extern int + __kmp_adjust_gtid_mode; /* If true, adjust method based on #threads */ #ifdef KMP_TDATA_GTID #if KMP_OS_WINDOWS -extern __declspec(thread) int __kmp_gtid; /* This thread's gtid, if __kmp_gtid_mode == 3 */ +extern __declspec( + thread) int __kmp_gtid; /* This thread's gtid, if __kmp_gtid_mode == 3 */ #else extern __thread int __kmp_gtid; -#endif /* KMP_OS_WINDOWS - workaround because Intel(R) Many Integrated Core compiler 20110316 doesn't accept __declspec */ +#endif /* KMP_OS_WINDOWS - workaround because Intel(R) Many Integrated Core \ + compiler 20110316 doesn't accept __declspec */ #endif -extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */ -extern int __kmp_foreign_tp; /* If true, separate TP var for each foreign thread */ +extern int __kmp_tls_gtid_min; /* #threads below which use sp search for gtid */ +extern int __kmp_foreign_tp; // If true, separate TP var for each foreign thread #if KMP_ARCH_X86 || KMP_ARCH_X86_64 -extern int __kmp_inherit_fp_control; /* copy fp creg(s) parent->workers at fork */ -extern kmp_int16 __kmp_init_x87_fpu_control_word; /* init thread's FP control reg */ -extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */ +extern int __kmp_inherit_fp_control; // copy fp creg(s) parent->workers at fork +extern kmp_int16 __kmp_init_x87_fpu_control_word; // init thread's FP ctrl reg +extern kmp_uint32 __kmp_init_mxcsr; /* init thread's mxscr */ #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ -extern int __kmp_dflt_max_active_levels; /* max_active_levels for nested parallelism enabled by default a la OMP_MAX_ACTIVE_LEVELS */ -extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in concurrent execution per team */ +extern int __kmp_dflt_max_active_levels; /* max_active_levels for nested + parallelism enabled by default via + OMP_MAX_ACTIVE_LEVELS */ +extern int __kmp_dispatch_num_buffers; /* max possible dynamic loops in + concurrent execution per team */ #if KMP_NESTED_HOT_TEAMS -extern int __kmp_hot_teams_mode; -extern int __kmp_hot_teams_max_level; +extern int __kmp_hot_teams_mode; +extern int __kmp_hot_teams_max_level; #endif -# if KMP_OS_LINUX +#if KMP_OS_LINUX extern enum clock_function_type __kmp_clock_function; extern int __kmp_clock_function_param; -# endif /* KMP_OS_LINUX */ +#endif /* KMP_OS_LINUX */ #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) extern enum mic_type __kmp_mic_type; #endif -# ifdef USE_LOAD_BALANCE -extern double __kmp_load_balance_interval; /* Interval for the load balance algorithm */ -# endif /* USE_LOAD_BALANCE */ +#ifdef USE_LOAD_BALANCE +extern double __kmp_load_balance_interval; // load balance algorithm interval +#endif /* USE_LOAD_BALANCE */ // OpenMP 3.1 - Nested num threads array typedef struct kmp_nested_nthreads_t { - int * nth; - int size; - int used; + int *nth; + int size; + int used; } kmp_nested_nthreads_t; extern kmp_nested_nthreads_t __kmp_nested_nth; @@ -2795,290 +2959,313 @@ extern kmp_nested_nthreads_t __kmp_nested_nth; // Parameters for the speculative lock backoff system. struct kmp_adaptive_backoff_params_t { - // Number of soft retries before it counts as a hard retry. - kmp_uint32 max_soft_retries; - // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to the right - kmp_uint32 max_badness; + // Number of soft retries before it counts as a hard retry. + kmp_uint32 max_soft_retries; + // Badness is a bit mask : 0,1,3,7,15,... on each hard failure we move one to + // the right + kmp_uint32 max_badness; }; extern kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params; #if KMP_DEBUG_ADAPTIVE_LOCKS -extern char * __kmp_speculative_statsfile; +extern char *__kmp_speculative_statsfile; #endif #endif // KMP_USE_ADAPTIVE_LOCKS #if OMP_40_ENABLED -extern int __kmp_display_env; /* TRUE or FALSE */ -extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */ -extern int __kmp_omp_cancellation; /* TRUE or FALSE */ +extern int __kmp_display_env; /* TRUE or FALSE */ +extern int __kmp_display_env_verbose; /* TRUE if OMP_DISPLAY_ENV=VERBOSE */ +extern int __kmp_omp_cancellation; /* TRUE or FALSE */ #endif /* ------------------------------------------------------------------------- */ -/* --------------------------------------------------------------------------- */ /* the following are protected by the fork/join lock */ /* write: lock read: anytime */ -extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */ +extern kmp_info_t **__kmp_threads; /* Descriptors for the threads */ /* read/write: lock */ -extern volatile kmp_team_t * __kmp_team_pool; -extern volatile kmp_info_t * __kmp_thread_pool; +extern volatile kmp_team_t *__kmp_team_pool; +extern volatile kmp_info_t *__kmp_thread_pool; -/* total number of threads reachable from some root thread including all root threads*/ +// total num threads reachable from some root thread including all root threads extern volatile int __kmp_nth; -/* total number of threads reachable from some root thread including all root threads, - and those in the thread pool */ +/* total number of threads reachable from some root thread including all root + threads, and those in the thread pool */ extern volatile int __kmp_all_nth; extern int __kmp_thread_pool_nth; extern volatile int __kmp_thread_pool_active_nth; -extern kmp_root_t **__kmp_root; /* root of thread hierarchy */ +extern kmp_root_t **__kmp_root; /* root of thread hierarchy */ /* end data protected by fork/join lock */ -/* --------------------------------------------------------------------------- */ +/* ------------------------------------------------------------------------- */ -extern kmp_global_t __kmp_global; /* global status */ +extern kmp_global_t __kmp_global; /* global status */ extern kmp_info_t __kmp_monitor; -extern volatile kmp_uint32 __kmp_team_counter; // Used by Debugging Support Library. -extern volatile kmp_uint32 __kmp_task_counter; // Used by Debugging Support Library. +extern volatile kmp_uint32 __kmp_team_counter; // For Debugging Support Library +extern volatile kmp_uint32 __kmp_task_counter; // For Debugging Support Library #if USE_DEBUGGER -#define _KMP_GEN_ID( counter ) \ - ( \ - __kmp_debugging \ - ? \ - KMP_TEST_THEN_INC32( (volatile kmp_int32 *) & counter ) + 1 \ - : \ - ~ 0 \ - ) +#define _KMP_GEN_ID(counter) \ + (__kmp_debugging ? KMP_TEST_THEN_INC32((volatile kmp_int32 *)&counter) + 1 \ + : ~0) #else -#define _KMP_GEN_ID( counter ) \ - ( \ - ~ 0 \ - ) +#define _KMP_GEN_ID(counter) (~0) #endif /* USE_DEBUGGER */ -#define KMP_GEN_TASK_ID() _KMP_GEN_ID( __kmp_task_counter ) -#define KMP_GEN_TEAM_ID() _KMP_GEN_ID( __kmp_team_counter ) +#define KMP_GEN_TASK_ID() _KMP_GEN_ID(__kmp_task_counter) +#define KMP_GEN_TEAM_ID() _KMP_GEN_ID(__kmp_team_counter) /* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ -extern void __kmp_print_storage_map_gtid( int gtid, void *p1, void* p2, size_t size, char const *format, ... ); +extern void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, + size_t size, char const *format, ...); -extern void __kmp_serial_initialize( void ); -extern void __kmp_middle_initialize( void ); -extern void __kmp_parallel_initialize( void ); +extern void __kmp_serial_initialize(void); +extern void __kmp_middle_initialize(void); +extern void __kmp_parallel_initialize(void); -extern void __kmp_internal_begin( void ); -extern void __kmp_internal_end_library( int gtid ); -extern void __kmp_internal_end_thread( int gtid ); -extern void __kmp_internal_end_atexit( void ); -extern void __kmp_internal_end_fini( void ); -extern void __kmp_internal_end_dtor( void ); -extern void __kmp_internal_end_dest( void* ); +extern void __kmp_internal_begin(void); +extern void __kmp_internal_end_library(int gtid); +extern void __kmp_internal_end_thread(int gtid); +extern void __kmp_internal_end_atexit(void); +extern void __kmp_internal_end_fini(void); +extern void __kmp_internal_end_dtor(void); +extern void __kmp_internal_end_dest(void *); -extern int __kmp_register_root( int initial_thread ); -extern void __kmp_unregister_root( int gtid ); +extern int __kmp_register_root(int initial_thread); +extern void __kmp_unregister_root(int gtid); -extern int __kmp_ignore_mppbeg( void ); -extern int __kmp_ignore_mppend( void ); +extern int __kmp_ignore_mppbeg(void); +extern int __kmp_ignore_mppend(void); -extern int __kmp_enter_single( int gtid, ident_t *id_ref, int push_ws ); -extern void __kmp_exit_single( int gtid ); +extern int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws); +extern void __kmp_exit_single(int gtid); -extern void __kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ); -extern void __kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ); +extern void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref); +extern void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref); #ifdef USE_LOAD_BALANCE -extern int __kmp_get_load_balance( int ); +extern int __kmp_get_load_balance(int); #endif #ifdef BUILD_TV -extern void __kmp_tv_threadprivate_store( kmp_info_t *th, void *global_addr, void *thread_addr ); +extern void __kmp_tv_threadprivate_store(kmp_info_t *th, void *global_addr, + void *thread_addr); #endif -extern int __kmp_get_global_thread_id( void ); -extern int __kmp_get_global_thread_id_reg( void ); -extern void __kmp_exit_thread( int exit_status ); -extern void __kmp_abort( char const * format, ... ); -extern void __kmp_abort_thread( void ); -extern void __kmp_abort_process( void ); -extern void __kmp_warn( char const * format, ... ); +extern int __kmp_get_global_thread_id(void); +extern int __kmp_get_global_thread_id_reg(void); +extern void __kmp_exit_thread(int exit_status); +extern void __kmp_abort(char const *format, ...); +extern void __kmp_abort_thread(void); +extern void __kmp_abort_process(void); +extern void __kmp_warn(char const *format, ...); -extern void __kmp_set_num_threads( int new_nth, int gtid ); +extern void __kmp_set_num_threads(int new_nth, int gtid); -// Returns current thread (pointer to kmp_info_t). Current thread *must* be registered. -static inline kmp_info_t * __kmp_entry_thread() -{ - int gtid = __kmp_entry_gtid(); +// Returns current thread (pointer to kmp_info_t). Current thread *must* be +// registered. +static inline kmp_info_t *__kmp_entry_thread() { + int gtid = __kmp_entry_gtid(); - return __kmp_threads[gtid]; + return __kmp_threads[gtid]; } -extern void __kmp_set_max_active_levels( int gtid, int new_max_active_levels ); -extern int __kmp_get_max_active_levels( int gtid ); -extern int __kmp_get_ancestor_thread_num( int gtid, int level ); -extern int __kmp_get_team_size( int gtid, int level ); -extern void __kmp_set_schedule( int gtid, kmp_sched_t new_sched, int chunk ); -extern void __kmp_get_schedule( int gtid, kmp_sched_t * sched, int * chunk ); +extern void __kmp_set_max_active_levels(int gtid, int new_max_active_levels); +extern int __kmp_get_max_active_levels(int gtid); +extern int __kmp_get_ancestor_thread_num(int gtid, int level); +extern int __kmp_get_team_size(int gtid, int level); +extern void __kmp_set_schedule(int gtid, kmp_sched_t new_sched, int chunk); +extern void __kmp_get_schedule(int gtid, kmp_sched_t *sched, int *chunk); -extern unsigned short __kmp_get_random( kmp_info_t * thread ); -extern void __kmp_init_random( kmp_info_t * thread ); +extern unsigned short __kmp_get_random(kmp_info_t *thread); +extern void __kmp_init_random(kmp_info_t *thread); -extern kmp_r_sched_t __kmp_get_schedule_global( void ); -extern void __kmp_adjust_num_threads( int new_nproc ); +extern kmp_r_sched_t __kmp_get_schedule_global(void); +extern void __kmp_adjust_num_threads(int new_nproc); -extern void * ___kmp_allocate( size_t size KMP_SRC_LOC_DECL ); -extern void * ___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL ); -extern void ___kmp_free( void * ptr KMP_SRC_LOC_DECL ); -#define __kmp_allocate( size ) ___kmp_allocate( (size) KMP_SRC_LOC_CURR ) -#define __kmp_page_allocate( size ) ___kmp_page_allocate( (size) KMP_SRC_LOC_CURR ) -#define __kmp_free( ptr ) ___kmp_free( (ptr) KMP_SRC_LOC_CURR ) +extern void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL); +extern void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL); +extern void ___kmp_free(void *ptr KMP_SRC_LOC_DECL); +#define __kmp_allocate(size) ___kmp_allocate((size)KMP_SRC_LOC_CURR) +#define __kmp_page_allocate(size) ___kmp_page_allocate((size)KMP_SRC_LOC_CURR) +#define __kmp_free(ptr) ___kmp_free((ptr)KMP_SRC_LOC_CURR) #if USE_FAST_MEMORY -extern void * ___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL ); -extern void ___kmp_fast_free( kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL ); -extern void __kmp_free_fast_memory( kmp_info_t *this_thr ); -extern void __kmp_initialize_fast_memory( kmp_info_t *this_thr ); -#define __kmp_fast_allocate( this_thr, size ) ___kmp_fast_allocate( (this_thr), (size) KMP_SRC_LOC_CURR ) -#define __kmp_fast_free( this_thr, ptr ) ___kmp_fast_free( (this_thr), (ptr) KMP_SRC_LOC_CURR ) -#endif - -extern void * ___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL ); -extern void * ___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL ); -extern void * ___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL ); -extern void ___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL ); -#define __kmp_thread_malloc( th, size ) ___kmp_thread_malloc( (th), (size) KMP_SRC_LOC_CURR ) -#define __kmp_thread_calloc( th, nelem, elsize ) ___kmp_thread_calloc( (th), (nelem), (elsize) KMP_SRC_LOC_CURR ) -#define __kmp_thread_realloc( th, ptr, size ) ___kmp_thread_realloc( (th), (ptr), (size) KMP_SRC_LOC_CURR ) -#define __kmp_thread_free( th, ptr ) ___kmp_thread_free( (th), (ptr) KMP_SRC_LOC_CURR ) - -#define KMP_INTERNAL_MALLOC(sz) malloc(sz) -#define KMP_INTERNAL_FREE(p) free(p) -#define KMP_INTERNAL_REALLOC(p,sz) realloc((p),(sz)) -#define KMP_INTERNAL_CALLOC(n,sz) calloc((n),(sz)) - -extern void __kmp_push_num_threads( ident_t *loc, int gtid, int num_threads ); +extern void *___kmp_fast_allocate(kmp_info_t *this_thr, + size_t size KMP_SRC_LOC_DECL); +extern void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL); +extern void __kmp_free_fast_memory(kmp_info_t *this_thr); +extern void __kmp_initialize_fast_memory(kmp_info_t *this_thr); +#define __kmp_fast_allocate(this_thr, size) \ + ___kmp_fast_allocate((this_thr), (size)KMP_SRC_LOC_CURR) +#define __kmp_fast_free(this_thr, ptr) \ + ___kmp_fast_free((this_thr), (ptr)KMP_SRC_LOC_CURR) +#endif + +extern void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL); +extern void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem, + size_t elsize KMP_SRC_LOC_DECL); +extern void *___kmp_thread_realloc(kmp_info_t *th, void *ptr, + size_t size KMP_SRC_LOC_DECL); +extern void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL); +#define __kmp_thread_malloc(th, size) \ + ___kmp_thread_malloc((th), (size)KMP_SRC_LOC_CURR) +#define __kmp_thread_calloc(th, nelem, elsize) \ + ___kmp_thread_calloc((th), (nelem), (elsize)KMP_SRC_LOC_CURR) +#define __kmp_thread_realloc(th, ptr, size) \ + ___kmp_thread_realloc((th), (ptr), (size)KMP_SRC_LOC_CURR) +#define __kmp_thread_free(th, ptr) \ + ___kmp_thread_free((th), (ptr)KMP_SRC_LOC_CURR) + +#define KMP_INTERNAL_MALLOC(sz) malloc(sz) +#define KMP_INTERNAL_FREE(p) free(p) +#define KMP_INTERNAL_REALLOC(p, sz) realloc((p), (sz)) +#define KMP_INTERNAL_CALLOC(n, sz) calloc((n), (sz)) + +extern void __kmp_push_num_threads(ident_t *loc, int gtid, int num_threads); #if OMP_40_ENABLED -extern void __kmp_push_proc_bind( ident_t *loc, int gtid, kmp_proc_bind_t proc_bind ); -extern void __kmp_push_num_teams( ident_t *loc, int gtid, int num_teams, int num_threads ); -#endif - -extern void __kmp_yield( int cond ); - -extern void __kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, - kmp_int32 chunk ); -extern void __kmpc_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, - kmp_int32 chunk ); -extern void __kmpc_dispatch_init_8( ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, - kmp_int64 chunk ); -extern void __kmpc_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, - kmp_int64 chunk ); - -extern int __kmpc_dispatch_next_4( ident_t *loc, kmp_int32 gtid, - kmp_int32 *p_last, kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st ); -extern int __kmpc_dispatch_next_4u( ident_t *loc, kmp_int32 gtid, - kmp_int32 *p_last, kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st ); -extern int __kmpc_dispatch_next_8( ident_t *loc, kmp_int32 gtid, - kmp_int32 *p_last, kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st ); -extern int __kmpc_dispatch_next_8u( ident_t *loc, kmp_int32 gtid, - kmp_int32 *p_last, kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st ); - -extern void __kmpc_dispatch_fini_4( ident_t *loc, kmp_int32 gtid ); -extern void __kmpc_dispatch_fini_8( ident_t *loc, kmp_int32 gtid ); -extern void __kmpc_dispatch_fini_4u( ident_t *loc, kmp_int32 gtid ); -extern void __kmpc_dispatch_fini_8u( ident_t *loc, kmp_int32 gtid ); - +extern void __kmp_push_proc_bind(ident_t *loc, int gtid, + kmp_proc_bind_t proc_bind); +extern void __kmp_push_num_teams(ident_t *loc, int gtid, int num_teams, + int num_threads); +#endif + +extern void __kmp_yield(int cond); + +extern void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_int32 lb, + kmp_int32 ub, kmp_int32 st, kmp_int32 chunk); +extern void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_uint32 lb, + kmp_uint32 ub, kmp_int32 st, + kmp_int32 chunk); +extern void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_int64 lb, + kmp_int64 ub, kmp_int64 st, kmp_int64 chunk); +extern void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_uint64 lb, + kmp_uint64 ub, kmp_int64 st, + kmp_int64 chunk); + +extern int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, + kmp_int32 *p_last, kmp_int32 *p_lb, + kmp_int32 *p_ub, kmp_int32 *p_st); +extern int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, + kmp_int32 *p_last, kmp_uint32 *p_lb, + kmp_uint32 *p_ub, kmp_int32 *p_st); +extern int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, + kmp_int32 *p_last, kmp_int64 *p_lb, + kmp_int64 *p_ub, kmp_int64 *p_st); +extern int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, + kmp_int32 *p_last, kmp_uint64 *p_lb, + kmp_uint64 *p_ub, kmp_int64 *p_st); + +extern void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid); +extern void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid); +extern void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid); +extern void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid); #ifdef KMP_GOMP_COMPAT -extern void __kmp_aux_dispatch_init_4( ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, - kmp_int32 chunk, int push_ws ); -extern void __kmp_aux_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, - kmp_int32 chunk, int push_ws ); -extern void __kmp_aux_dispatch_init_8( ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, - kmp_int64 chunk, int push_ws ); -extern void __kmp_aux_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, - enum sched_type schedule, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, - kmp_int64 chunk, int push_ws ); -extern void __kmp_aux_dispatch_fini_chunk_4( ident_t *loc, kmp_int32 gtid ); -extern void __kmp_aux_dispatch_fini_chunk_8( ident_t *loc, kmp_int32 gtid ); -extern void __kmp_aux_dispatch_fini_chunk_4u( ident_t *loc, kmp_int32 gtid ); -extern void __kmp_aux_dispatch_fini_chunk_8u( ident_t *loc, kmp_int32 gtid ); +extern void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_int32 lb, + kmp_int32 ub, kmp_int32 st, + kmp_int32 chunk, int push_ws); +extern void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_uint32 lb, + kmp_uint32 ub, kmp_int32 st, + kmp_int32 chunk, int push_ws); +extern void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_int64 lb, + kmp_int64 ub, kmp_int64 st, + kmp_int64 chunk, int push_ws); +extern void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_uint64 lb, + kmp_uint64 ub, kmp_int64 st, + kmp_int64 chunk, int push_ws); +extern void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid); +extern void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid); +extern void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid); +extern void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid); #endif /* KMP_GOMP_COMPAT */ - -extern kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker ); -extern kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker ); -extern kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker ); -extern kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker ); -extern kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker ); -extern kmp_uint32 __kmp_wait_yield_4( kmp_uint32 volatile * spinner, kmp_uint32 checker, kmp_uint32 (*pred) (kmp_uint32, kmp_uint32), void * obj ); -extern void __kmp_wait_yield_4_ptr( void * spinner, kmp_uint32 checker, kmp_uint32 (* pred)( void *, kmp_uint32 ), void * obj ); +extern kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker); +extern kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker); +extern kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker); +extern kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker); +extern kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker); +extern kmp_uint32 __kmp_wait_yield_4(kmp_uint32 volatile *spinner, + kmp_uint32 checker, + kmp_uint32 (*pred)(kmp_uint32, kmp_uint32), + void *obj); +extern void __kmp_wait_yield_4_ptr(void *spinner, kmp_uint32 checker, + kmp_uint32 (*pred)(void *, kmp_uint32), + void *obj); class kmp_flag_32; class kmp_flag_64; class kmp_flag_oncore; -extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, int final_spin +extern void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, + int final_spin #if USE_ITT_BUILD - , void * itt_sync_obj + , + void *itt_sync_obj #endif - ); + ); extern void __kmp_release_64(kmp_flag_64 *flag); -extern void __kmp_infinite_loop( void ); +extern void __kmp_infinite_loop(void); -extern void __kmp_cleanup( void ); +extern void __kmp_cleanup(void); #if KMP_HANDLE_SIGNALS - extern int __kmp_handle_signals; - extern void __kmp_install_signals( int parallel_init ); - extern void __kmp_remove_signals( void ); +extern int __kmp_handle_signals; +extern void __kmp_install_signals(int parallel_init); +extern void __kmp_remove_signals(void); #endif -extern void __kmp_clear_system_time( void ); -extern void __kmp_read_system_time( double *delta ); +extern void __kmp_clear_system_time(void); +extern void __kmp_read_system_time(double *delta); -extern void __kmp_check_stack_overlap( kmp_info_t *thr ); +extern void __kmp_check_stack_overlap(kmp_info_t *thr); -extern void __kmp_expand_host_name( char *buffer, size_t size ); -extern void __kmp_expand_file_name( char *result, size_t rlen, char *pattern ); +extern void __kmp_expand_host_name(char *buffer, size_t size); +extern void __kmp_expand_file_name(char *result, size_t rlen, char *pattern); #if KMP_ARCH_X86 || KMP_ARCH_X86_64 -extern void __kmp_initialize_system_tick( void ); /* Initialize timer tick value */ +extern void +__kmp_initialize_system_tick(void); /* Initialize timer tick value */ #endif -extern void __kmp_runtime_initialize( void ); /* machine specific initialization */ -extern void __kmp_runtime_destroy( void ); +extern void +__kmp_runtime_initialize(void); /* machine specific initialization */ +extern void __kmp_runtime_destroy(void); #if KMP_AFFINITY_SUPPORTED -extern char *__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask); +extern char *__kmp_affinity_print_mask(char *buf, int buf_len, + kmp_affin_mask_t *mask); extern void __kmp_affinity_initialize(void); extern void __kmp_affinity_uninitialize(void); -extern void __kmp_affinity_set_init_mask(int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */ +extern void __kmp_affinity_set_init_mask( + int gtid, int isa_root); /* set affinity according to KMP_AFFINITY */ #if OMP_40_ENABLED extern void __kmp_affinity_set_place(int gtid); #endif -extern void __kmp_affinity_determine_capable( const char *env_var ); +extern void __kmp_affinity_determine_capable(const char *env_var); extern int __kmp_aux_set_affinity(void **mask); extern int __kmp_aux_get_affinity(void **mask); extern int __kmp_aux_get_affinity_max_proc(); extern int __kmp_aux_set_affinity_mask_proc(int proc, void **mask); extern int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask); extern int __kmp_aux_get_affinity_mask_proc(int proc, void **mask); -extern void __kmp_balanced_affinity( int tid, int team_size ); +extern void __kmp_balanced_affinity(int tid, int team_size); #endif /* KMP_AFFINITY_SUPPORTED */ extern void __kmp_cleanup_hierarchy(); @@ -3086,208 +3273,226 @@ extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar); #if KMP_USE_FUTEX -extern int __kmp_futex_determine_capable( void ); +extern int __kmp_futex_determine_capable(void); #endif // KMP_USE_FUTEX -extern void __kmp_gtid_set_specific( int gtid ); -extern int __kmp_gtid_get_specific( void ); +extern void __kmp_gtid_set_specific(int gtid); +extern int __kmp_gtid_get_specific(void); -extern double __kmp_read_cpu_time( void ); +extern double __kmp_read_cpu_time(void); -extern int __kmp_read_system_info( struct kmp_sys_info *info ); +extern int __kmp_read_system_info(struct kmp_sys_info *info); #if KMP_USE_MONITOR -extern void __kmp_create_monitor( kmp_info_t *th ); +extern void __kmp_create_monitor(kmp_info_t *th); #endif -extern void *__kmp_launch_thread( kmp_info_t *thr ); +extern void *__kmp_launch_thread(kmp_info_t *thr); -extern void __kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size ); +extern void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size); #if KMP_OS_WINDOWS -extern int __kmp_still_running(kmp_info_t *th); -extern int __kmp_is_thread_alive( kmp_info_t * th, DWORD *exit_val ); -extern void __kmp_free_handle( kmp_thread_t tHandle ); +extern int __kmp_still_running(kmp_info_t *th); +extern int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val); +extern void __kmp_free_handle(kmp_thread_t tHandle); #endif #if KMP_USE_MONITOR -extern void __kmp_reap_monitor( kmp_info_t *th ); +extern void __kmp_reap_monitor(kmp_info_t *th); #endif -extern void __kmp_reap_worker( kmp_info_t *th ); -extern void __kmp_terminate_thread( int gtid ); +extern void __kmp_reap_worker(kmp_info_t *th); +extern void __kmp_terminate_thread(int gtid); -extern void __kmp_suspend_32( int th_gtid, kmp_flag_32 *flag ); -extern void __kmp_suspend_64( int th_gtid, kmp_flag_64 *flag ); -extern void __kmp_suspend_oncore( int th_gtid, kmp_flag_oncore *flag ); -extern void __kmp_resume_32( int target_gtid, kmp_flag_32 *flag ); -extern void __kmp_resume_64( int target_gtid, kmp_flag_64 *flag ); -extern void __kmp_resume_oncore( int target_gtid, kmp_flag_oncore *flag ); +extern void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag); +extern void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag); +extern void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag); +extern void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag); +extern void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag); +extern void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag); -extern void __kmp_elapsed( double * ); -extern void __kmp_elapsed_tick( double * ); +extern void __kmp_elapsed(double *); +extern void __kmp_elapsed_tick(double *); -extern void __kmp_enable( int old_state ); -extern void __kmp_disable( int *old_state ); +extern void __kmp_enable(int old_state); +extern void __kmp_disable(int *old_state); -extern void __kmp_thread_sleep( int millis ); +extern void __kmp_thread_sleep(int millis); -extern void __kmp_common_initialize( void ); -extern void __kmp_common_destroy( void ); -extern void __kmp_common_destroy_gtid( int gtid ); +extern void __kmp_common_initialize(void); +extern void __kmp_common_destroy(void); +extern void __kmp_common_destroy_gtid(int gtid); #if KMP_OS_UNIX -extern void __kmp_register_atfork( void ); +extern void __kmp_register_atfork(void); #endif -extern void __kmp_suspend_initialize( void ); -extern void __kmp_suspend_uninitialize_thread( kmp_info_t *th ); +extern void __kmp_suspend_initialize(void); +extern void __kmp_suspend_uninitialize_thread(kmp_info_t *th); -extern kmp_info_t * __kmp_allocate_thread( kmp_root_t *root, - kmp_team_t *team, int tid); +extern kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, + int tid); #if OMP_40_ENABLED -extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, +extern kmp_team_t * +__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, #if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id, + ompt_parallel_id_t ompt_parallel_id, #endif - kmp_proc_bind_t proc_bind, - kmp_internal_control_t *new_icvs, - int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) ); + kmp_proc_bind_t proc_bind, kmp_internal_control_t *new_icvs, + int argc USE_NESTED_HOT_ARG(kmp_info_t *thr)); #else -extern kmp_team_t * __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, +extern kmp_team_t * +__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, #if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id, + ompt_parallel_id_t ompt_parallel_id, #endif - kmp_internal_control_t *new_icvs, - int argc USE_NESTED_HOT_ARG(kmp_info_t *thr) ); + kmp_internal_control_t *new_icvs, + int argc USE_NESTED_HOT_ARG(kmp_info_t *thr)); #endif // OMP_40_ENABLED -extern void __kmp_free_thread( kmp_info_t * ); -extern void __kmp_free_team( kmp_root_t *, kmp_team_t * USE_NESTED_HOT_ARG(kmp_info_t *) ); -extern kmp_team_t * __kmp_reap_team( kmp_team_t * ); +extern void __kmp_free_thread(kmp_info_t *); +extern void __kmp_free_team(kmp_root_t *, + kmp_team_t *USE_NESTED_HOT_ARG(kmp_info_t *)); +extern kmp_team_t *__kmp_reap_team(kmp_team_t *); /* ------------------------------------------------------------------------ */ -extern void __kmp_initialize_bget( kmp_info_t *th ); -extern void __kmp_finalize_bget( kmp_info_t *th ); +extern void __kmp_initialize_bget(kmp_info_t *th); +extern void __kmp_finalize_bget(kmp_info_t *th); -KMP_EXPORT void *kmpc_malloc( size_t size ); -KMP_EXPORT void *kmpc_aligned_malloc( size_t size, size_t alignment ); -KMP_EXPORT void *kmpc_calloc( size_t nelem, size_t elsize ); -KMP_EXPORT void *kmpc_realloc( void *ptr, size_t size ); -KMP_EXPORT void kmpc_free( void *ptr ); +KMP_EXPORT void *kmpc_malloc(size_t size); +KMP_EXPORT void *kmpc_aligned_malloc(size_t size, size_t alignment); +KMP_EXPORT void *kmpc_calloc(size_t nelem, size_t elsize); +KMP_EXPORT void *kmpc_realloc(void *ptr, size_t size); +KMP_EXPORT void kmpc_free(void *ptr); -/* ------------------------------------------------------------------------ */ /* declarations for internal use */ -extern int __kmp_barrier( enum barrier_type bt, int gtid, int is_split, - size_t reduce_size, void *reduce_data, void (*reduce)(void *, void *) ); -extern void __kmp_end_split_barrier ( enum barrier_type bt, int gtid ); +extern int __kmp_barrier(enum barrier_type bt, int gtid, int is_split, + size_t reduce_size, void *reduce_data, + void (*reduce)(void *, void *)); +extern void __kmp_end_split_barrier(enum barrier_type bt, int gtid); /*! - * Tell the fork call which compiler generated the fork call, and therefore how to deal with the call. + * Tell the fork call which compiler generated the fork call, and therefore how + * to deal with the call. */ -enum fork_context_e -{ - fork_context_gnu, /**< Called from GNU generated code, so must not invoke the microtask internally. */ - fork_context_intel, /**< Called from Intel generated code. */ - fork_context_last +enum fork_context_e { + fork_context_gnu, /**< Called from GNU generated code, so must not invoke the + microtask internally. */ + fork_context_intel, /**< Called from Intel generated code. */ + fork_context_last }; -extern int __kmp_fork_call( ident_t *loc, int gtid, enum fork_context_e fork_context, - kmp_int32 argc, +extern int __kmp_fork_call(ident_t *loc, int gtid, + enum fork_context_e fork_context, kmp_int32 argc, #if OMPT_SUPPORT - void *unwrapped_task, + void *unwrapped_task, #endif - microtask_t microtask, launch_t invoker, + microtask_t microtask, launch_t invoker, /* TODO: revert workaround for Intel(R) 64 tracker #96 */ #if (KMP_ARCH_ARM || KMP_ARCH_X86_64 || KMP_ARCH_AARCH64) && KMP_OS_LINUX - va_list *ap + va_list *ap #else - va_list ap + va_list ap #endif - ); + ); -extern void __kmp_join_call( ident_t *loc, int gtid +extern void __kmp_join_call(ident_t *loc, int gtid #if OMPT_SUPPORT - , enum fork_context_e fork_context + , + enum fork_context_e fork_context #endif #if OMP_40_ENABLED - , int exit_teams = 0 + , + int exit_teams = 0 #endif - ); + ); extern void __kmp_serialized_parallel(ident_t *id, kmp_int32 gtid); -extern void __kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team ); -extern void __kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team ); -extern int __kmp_invoke_task_func( int gtid ); -extern void __kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team ); -extern void __kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr, kmp_team_t *team ); +extern void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team); +extern void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team); +extern int __kmp_invoke_task_func(int gtid); +extern void __kmp_run_before_invoked_task(int gtid, int tid, + kmp_info_t *this_thr, + kmp_team_t *team); +extern void __kmp_run_after_invoked_task(int gtid, int tid, + kmp_info_t *this_thr, + kmp_team_t *team); // should never have been exported -KMP_EXPORT int __kmpc_invoke_task_func( int gtid ); +KMP_EXPORT int __kmpc_invoke_task_func(int gtid); #if OMP_40_ENABLED -extern int __kmp_invoke_teams_master( int gtid ); -extern void __kmp_teams_master( int gtid ); +extern int __kmp_invoke_teams_master(int gtid); +extern void __kmp_teams_master(int gtid); #endif -extern void __kmp_save_internal_controls( kmp_info_t * thread ); -extern void __kmp_user_set_library (enum library_type arg); -extern void __kmp_aux_set_library (enum library_type arg); -extern void __kmp_aux_set_stacksize( size_t arg); -extern void __kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid); -extern void __kmp_aux_set_defaults( char const * str, int len ); +extern void __kmp_save_internal_controls(kmp_info_t *thread); +extern void __kmp_user_set_library(enum library_type arg); +extern void __kmp_aux_set_library(enum library_type arg); +extern void __kmp_aux_set_stacksize(size_t arg); +extern void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid); +extern void __kmp_aux_set_defaults(char const *str, int len); /* Functions called from __kmp_aux_env_initialize() in kmp_settings.cpp */ -void kmpc_set_blocktime (int arg); -void ompc_set_nested( int flag ); -void ompc_set_dynamic( int flag ); -void ompc_set_num_threads( int arg ); - -extern void __kmp_push_current_task_to_thread( kmp_info_t *this_thr, - kmp_team_t *team, int tid ); -extern void __kmp_pop_current_task_from_thread( kmp_info_t *this_thr ); -extern kmp_task_t* __kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, - kmp_tasking_flags_t *flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds, - kmp_routine_entry_t task_entry ); -extern void __kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, - kmp_team_t *team, int tid, int set_curr_task ); +void kmpc_set_blocktime(int arg); +void ompc_set_nested(int flag); +void ompc_set_dynamic(int flag); +void ompc_set_num_threads(int arg); + +extern void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, + kmp_team_t *team, int tid); +extern void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr); +extern kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, + kmp_tasking_flags_t *flags, + size_t sizeof_kmp_task_t, + size_t sizeof_shareds, + kmp_routine_entry_t task_entry); +extern void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, + kmp_team_t *team, int tid, + int set_curr_task); extern void __kmp_finish_implicit_task(kmp_info_t *this_thr); extern void __kmp_free_implicit_task(kmp_info_t *this_thr); - -int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin, +int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, + kmp_flag_32 *flag, int final_spin, int *thread_finished, #if USE_ITT_BUILD - void * itt_sync_obj, + void *itt_sync_obj, #endif /* USE_ITT_BUILD */ kmp_int32 is_constrained); -int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin, +int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, + kmp_flag_64 *flag, int final_spin, int *thread_finished, #if USE_ITT_BUILD - void * itt_sync_obj, + void *itt_sync_obj, #endif /* USE_ITT_BUILD */ kmp_int32 is_constrained); -int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin, +int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, + kmp_flag_oncore *flag, int final_spin, int *thread_finished, #if USE_ITT_BUILD - void * itt_sync_obj, + void *itt_sync_obj, #endif /* USE_ITT_BUILD */ kmp_int32 is_constrained); -extern void __kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team ); -extern void __kmp_reap_task_teams( void ); -extern void __kmp_wait_to_unref_task_teams( void ); -extern void __kmp_task_team_setup ( kmp_info_t *this_thr, kmp_team_t *team, int always ); -extern void __kmp_task_team_sync ( kmp_info_t *this_thr, kmp_team_t *team ); -extern void __kmp_task_team_wait ( kmp_info_t *this_thr, kmp_team_t *team +extern void __kmp_free_task_team(kmp_info_t *thread, + kmp_task_team_t *task_team); +extern void __kmp_reap_task_teams(void); +extern void __kmp_wait_to_unref_task_teams(void); +extern void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team, + int always); +extern void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team); +extern void __kmp_task_team_wait(kmp_info_t *this_thr, kmp_team_t *team #if USE_ITT_BUILD - , void * itt_sync_obj + , + void *itt_sync_obj #endif /* USE_ITT_BUILD */ - , int wait=1 -); -extern void __kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid ); + , + int wait = 1); +extern void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, + int gtid); -extern int __kmp_is_address_mapped( void *addr ); +extern int __kmp_is_address_mapped(void *addr); extern kmp_uint64 __kmp_hardware_timestamp(void); #if KMP_OS_UNIX -extern int __kmp_read_from_file( char const *path, char const *format, ... ); +extern int __kmp_read_from_file(char const *path, char const *format, ...); #endif /* ------------------------------------------------------------------------ */ @@ -3297,127 +3502,145 @@ extern int __kmp_read_from_file( char const *path, char const *format, ... ); #if KMP_ARCH_X86 || KMP_ARCH_X86_64 -extern void __kmp_query_cpuid( kmp_cpuinfo_t *p ); +extern void __kmp_query_cpuid(kmp_cpuinfo_t *p); #define __kmp_load_mxcsr(p) _mm_setcsr(*(p)) -static inline void __kmp_store_mxcsr( kmp_uint32 *p ) { *p = _mm_getcsr(); } +static inline void __kmp_store_mxcsr(kmp_uint32 *p) { *p = _mm_getcsr(); } -extern void __kmp_load_x87_fpu_control_word( kmp_int16 *p ); -extern void __kmp_store_x87_fpu_control_word( kmp_int16 *p ); +extern void __kmp_load_x87_fpu_control_word(kmp_int16 *p); +extern void __kmp_store_x87_fpu_control_word(kmp_int16 *p); extern void __kmp_clear_x87_fpu_status_word(); -# define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */ +#define KMP_X86_MXCSR_MASK 0xffffffc0 /* ignore status flags (6 lsb) */ #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ -extern int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int npr, int argc, void *argv[] +extern int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int npr, int argc, + void *argv[] #if OMPT_SUPPORT - , void **exit_frame_ptr + , + void **exit_frame_ptr #endif -); - + ); /* ------------------------------------------------------------------------ */ -KMP_EXPORT void __kmpc_begin ( ident_t *, kmp_int32 flags ); -KMP_EXPORT void __kmpc_end ( ident_t * ); - -KMP_EXPORT void __kmpc_threadprivate_register_vec ( ident_t *, void * data, kmpc_ctor_vec ctor, - kmpc_cctor_vec cctor, kmpc_dtor_vec dtor, size_t vector_length ); -KMP_EXPORT void __kmpc_threadprivate_register ( ident_t *, void * data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor ); -KMP_EXPORT void * __kmpc_threadprivate ( ident_t *, kmp_int32 global_tid, void * data, size_t size ); - -KMP_EXPORT kmp_int32 __kmpc_global_thread_num ( ident_t * ); -KMP_EXPORT kmp_int32 __kmpc_global_num_threads ( ident_t * ); -KMP_EXPORT kmp_int32 __kmpc_bound_thread_num ( ident_t * ); -KMP_EXPORT kmp_int32 __kmpc_bound_num_threads ( ident_t * ); - -KMP_EXPORT kmp_int32 __kmpc_ok_to_fork ( ident_t * ); -KMP_EXPORT void __kmpc_fork_call ( ident_t *, kmp_int32 nargs, kmpc_micro microtask, ... ); - -KMP_EXPORT void __kmpc_serialized_parallel ( ident_t *, kmp_int32 global_tid ); -KMP_EXPORT void __kmpc_end_serialized_parallel ( ident_t *, kmp_int32 global_tid ); - -KMP_EXPORT void __kmpc_flush ( ident_t *); -KMP_EXPORT void __kmpc_barrier ( ident_t *, kmp_int32 global_tid ); -KMP_EXPORT kmp_int32 __kmpc_master ( ident_t *, kmp_int32 global_tid ); -KMP_EXPORT void __kmpc_end_master ( ident_t *, kmp_int32 global_tid ); -KMP_EXPORT void __kmpc_ordered ( ident_t *, kmp_int32 global_tid ); -KMP_EXPORT void __kmpc_end_ordered ( ident_t *, kmp_int32 global_tid ); -KMP_EXPORT void __kmpc_critical ( ident_t *, kmp_int32 global_tid, kmp_critical_name * ); -KMP_EXPORT void __kmpc_end_critical ( ident_t *, kmp_int32 global_tid, kmp_critical_name * ); +KMP_EXPORT void __kmpc_begin(ident_t *, kmp_int32 flags); +KMP_EXPORT void __kmpc_end(ident_t *); + +KMP_EXPORT void __kmpc_threadprivate_register_vec(ident_t *, void *data, + kmpc_ctor_vec ctor, + kmpc_cctor_vec cctor, + kmpc_dtor_vec dtor, + size_t vector_length); +KMP_EXPORT void __kmpc_threadprivate_register(ident_t *, void *data, + kmpc_ctor ctor, kmpc_cctor cctor, + kmpc_dtor dtor); +KMP_EXPORT void *__kmpc_threadprivate(ident_t *, kmp_int32 global_tid, + void *data, size_t size); + +KMP_EXPORT kmp_int32 __kmpc_global_thread_num(ident_t *); +KMP_EXPORT kmp_int32 __kmpc_global_num_threads(ident_t *); +KMP_EXPORT kmp_int32 __kmpc_bound_thread_num(ident_t *); +KMP_EXPORT kmp_int32 __kmpc_bound_num_threads(ident_t *); + +KMP_EXPORT kmp_int32 __kmpc_ok_to_fork(ident_t *); +KMP_EXPORT void __kmpc_fork_call(ident_t *, kmp_int32 nargs, + kmpc_micro microtask, ...); + +KMP_EXPORT void __kmpc_serialized_parallel(ident_t *, kmp_int32 global_tid); +KMP_EXPORT void __kmpc_end_serialized_parallel(ident_t *, kmp_int32 global_tid); + +KMP_EXPORT void __kmpc_flush(ident_t *); +KMP_EXPORT void __kmpc_barrier(ident_t *, kmp_int32 global_tid); +KMP_EXPORT kmp_int32 __kmpc_master(ident_t *, kmp_int32 global_tid); +KMP_EXPORT void __kmpc_end_master(ident_t *, kmp_int32 global_tid); +KMP_EXPORT void __kmpc_ordered(ident_t *, kmp_int32 global_tid); +KMP_EXPORT void __kmpc_end_ordered(ident_t *, kmp_int32 global_tid); +KMP_EXPORT void __kmpc_critical(ident_t *, kmp_int32 global_tid, + kmp_critical_name *); +KMP_EXPORT void __kmpc_end_critical(ident_t *, kmp_int32 global_tid, + kmp_critical_name *); #if OMP_45_ENABLED -KMP_EXPORT void __kmpc_critical_with_hint ( ident_t *, kmp_int32 global_tid, kmp_critical_name *, uintptr_t hint ); -#endif - -KMP_EXPORT kmp_int32 __kmpc_barrier_master ( ident_t *, kmp_int32 global_tid ); -KMP_EXPORT void __kmpc_end_barrier_master ( ident_t *, kmp_int32 global_tid ); - -KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait ( ident_t *, kmp_int32 global_tid ); - -KMP_EXPORT kmp_int32 __kmpc_single ( ident_t *, kmp_int32 global_tid ); -KMP_EXPORT void __kmpc_end_single ( ident_t *, kmp_int32 global_tid ); - -KMP_EXPORT void KMPC_FOR_STATIC_INIT ( ident_t *loc, kmp_int32 global_tid, kmp_int32 schedtype, kmp_int32 *plastiter, - kmp_int *plower, kmp_int *pupper, kmp_int *pstride, kmp_int incr, kmp_int chunk ); - -KMP_EXPORT void __kmpc_for_static_fini ( ident_t *loc, kmp_int32 global_tid ); - -KMP_EXPORT void __kmpc_copyprivate( ident_t *loc, kmp_int32 global_tid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit ); - -extern void KMPC_SET_NUM_THREADS ( int arg ); -extern void KMPC_SET_DYNAMIC ( int flag ); -extern void KMPC_SET_NESTED ( int flag ); - -/* --------------------------------------------------------------------------- */ - -/* - * Taskq interface routines - */ - -KMP_EXPORT kmpc_thunk_t * __kmpc_taskq (ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task, size_t sizeof_thunk, - size_t sizeof_shareds, kmp_int32 flags, kmpc_shared_vars_t **shareds); -KMP_EXPORT void __kmpc_end_taskq (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk); -KMP_EXPORT kmp_int32 __kmpc_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk); -KMP_EXPORT void __kmpc_taskq_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status); -KMP_EXPORT void __kmpc_end_taskq_task (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk); -KMP_EXPORT kmpc_thunk_t * __kmpc_task_buffer (ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task); - -/* ------------------------------------------------------------------------ */ - -/* - * OMP 3.0 tasking interface routines - */ - -KMP_EXPORT kmp_int32 -__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task ); -KMP_EXPORT kmp_task_t* -__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, - size_t sizeof_kmp_task_t, size_t sizeof_shareds, - kmp_routine_entry_t task_entry ); -KMP_EXPORT void -__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task ); -KMP_EXPORT void -__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task ); -KMP_EXPORT kmp_int32 -__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task ); -KMP_EXPORT kmp_int32 -__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid ); - -KMP_EXPORT kmp_int32 -__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part ); +KMP_EXPORT void __kmpc_critical_with_hint(ident_t *, kmp_int32 global_tid, + kmp_critical_name *, uintptr_t hint); +#endif + +KMP_EXPORT kmp_int32 __kmpc_barrier_master(ident_t *, kmp_int32 global_tid); +KMP_EXPORT void __kmpc_end_barrier_master(ident_t *, kmp_int32 global_tid); + +KMP_EXPORT kmp_int32 __kmpc_barrier_master_nowait(ident_t *, + kmp_int32 global_tid); + +KMP_EXPORT kmp_int32 __kmpc_single(ident_t *, kmp_int32 global_tid); +KMP_EXPORT void __kmpc_end_single(ident_t *, kmp_int32 global_tid); + +KMP_EXPORT void KMPC_FOR_STATIC_INIT(ident_t *loc, kmp_int32 global_tid, + kmp_int32 schedtype, kmp_int32 *plastiter, + kmp_int *plower, kmp_int *pupper, + kmp_int *pstride, kmp_int incr, + kmp_int chunk); + +KMP_EXPORT void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid); + +KMP_EXPORT void __kmpc_copyprivate(ident_t *loc, kmp_int32 global_tid, + size_t cpy_size, void *cpy_data, + void (*cpy_func)(void *, void *), + kmp_int32 didit); + +extern void KMPC_SET_NUM_THREADS(int arg); +extern void KMPC_SET_DYNAMIC(int flag); +extern void KMPC_SET_NESTED(int flag); + +/* Taskq interface routines */ +KMP_EXPORT kmpc_thunk_t *__kmpc_taskq(ident_t *loc, kmp_int32 global_tid, + kmpc_task_t taskq_task, + size_t sizeof_thunk, + size_t sizeof_shareds, kmp_int32 flags, + kmpc_shared_vars_t **shareds); +KMP_EXPORT void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid, + kmpc_thunk_t *thunk); +KMP_EXPORT kmp_int32 __kmpc_task(ident_t *loc, kmp_int32 global_tid, + kmpc_thunk_t *thunk); +KMP_EXPORT void __kmpc_taskq_task(ident_t *loc, kmp_int32 global_tid, + kmpc_thunk_t *thunk, kmp_int32 status); +KMP_EXPORT void __kmpc_end_taskq_task(ident_t *loc, kmp_int32 global_tid, + kmpc_thunk_t *thunk); +KMP_EXPORT kmpc_thunk_t *__kmpc_task_buffer(ident_t *loc, kmp_int32 global_tid, + kmpc_thunk_t *taskq_thunk, + kmpc_task_t task); + +/* OMP 3.0 tasking interface routines */ +KMP_EXPORT kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task); +KMP_EXPORT kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 flags, + size_t sizeof_kmp_task_t, + size_t sizeof_shareds, + kmp_routine_entry_t task_entry); +KMP_EXPORT void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task); +KMP_EXPORT void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task); +KMP_EXPORT kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task); +KMP_EXPORT kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid); + +KMP_EXPORT kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, + int end_part); #if TASK_UNUSED -void __kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task ); -void __kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task ); +void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task); +void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task); #endif // TASK_UNUSED /* ------------------------------------------------------------------------ */ #if OMP_40_ENABLED -KMP_EXPORT void __kmpc_taskgroup( ident_t * loc, int gtid ); -KMP_EXPORT void __kmpc_end_taskgroup( ident_t * loc, int gtid ); +KMP_EXPORT void __kmpc_taskgroup(ident_t *loc, int gtid); +KMP_EXPORT void __kmpc_end_taskgroup(ident_t *loc, int gtid); KMP_EXPORT kmp_int32 __kmpc_omp_task_with_deps( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 ndeps, @@ -3432,154 +3655,169 @@ extern void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task); extern void __kmp_dephash_free_entries(kmp_info_t *thread, kmp_dephash_t *h); extern void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h); -extern kmp_int32 __kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate ); +extern kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task, + bool serialize_immediate); -KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind); -KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind); -KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t* loc_ref, kmp_int32 gtid); +KMP_EXPORT kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 cncl_kind); +KMP_EXPORT kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 cncl_kind); +KMP_EXPORT kmp_int32 __kmpc_cancel_barrier(ident_t *loc_ref, kmp_int32 gtid); KMP_EXPORT int __kmp_get_cancellation_status(int cancel_kind); #if OMP_45_ENABLED -KMP_EXPORT void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask ); -KMP_EXPORT void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask ); -KMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task, kmp_int32 if_val, - kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, - kmp_int32 nogroup, kmp_int32 sched, kmp_uint64 grainsize, void * task_dup ); +KMP_EXPORT void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask); +KMP_EXPORT void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask); +KMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task, + kmp_int32 if_val, kmp_uint64 *lb, + kmp_uint64 *ub, kmp_int64 st, kmp_int32 nogroup, + kmp_int32 sched, kmp_uint64 grainsize, + void *task_dup); #endif // TODO: change to OMP_50_ENABLED, need to change build tools for this to work #if OMP_45_ENABLED -KMP_EXPORT void* __kmpc_task_reduction_init(int gtid, int num_data, void *data); -KMP_EXPORT void* __kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d); -#endif - -#endif - - -/* - * Lock interface routines (fast versions with gtid passed in) - */ -KMP_EXPORT void __kmpc_init_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); -KMP_EXPORT void __kmpc_init_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); -KMP_EXPORT void __kmpc_destroy_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); -KMP_EXPORT void __kmpc_destroy_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); -KMP_EXPORT void __kmpc_set_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); -KMP_EXPORT void __kmpc_set_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); -KMP_EXPORT void __kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); -KMP_EXPORT void __kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); -KMP_EXPORT int __kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); -KMP_EXPORT int __kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ); +KMP_EXPORT void *__kmpc_task_reduction_init(int gtid, int num_data, void *data); +KMP_EXPORT void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d); +#endif + +#endif + +/* Lock interface routines (fast versions with gtid passed in) */ +KMP_EXPORT void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, + void **user_lock); +KMP_EXPORT void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, + void **user_lock); +KMP_EXPORT void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, + void **user_lock); +KMP_EXPORT void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, + void **user_lock); +KMP_EXPORT void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock); +KMP_EXPORT void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, + void **user_lock); +KMP_EXPORT void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, + void **user_lock); +KMP_EXPORT void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, + void **user_lock); +KMP_EXPORT int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock); +KMP_EXPORT int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, + void **user_lock); #if OMP_45_ENABLED -KMP_EXPORT void __kmpc_init_lock_with_hint( ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint ); -KMP_EXPORT void __kmpc_init_nest_lock_with_hint( ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint ); -#endif - -/* ------------------------------------------------------------------------ */ - -/* - * Interface to fast scalable reduce methods routines - */ - -KMP_EXPORT kmp_int32 __kmpc_reduce_nowait( ident_t *loc, kmp_int32 global_tid, - kmp_int32 num_vars, size_t reduce_size, - void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), - kmp_critical_name *lck ); -KMP_EXPORT void __kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ); -KMP_EXPORT kmp_int32 __kmpc_reduce( ident_t *loc, kmp_int32 global_tid, - kmp_int32 num_vars, size_t reduce_size, - void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), - kmp_critical_name *lck ); -KMP_EXPORT void __kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ); - -/* - * internal fast reduction routines - */ - -extern PACKED_REDUCTION_METHOD_T -__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid, - kmp_int32 num_vars, size_t reduce_size, - void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), - kmp_critical_name *lck ); +KMP_EXPORT void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, + void **user_lock, uintptr_t hint); +KMP_EXPORT void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, + void **user_lock, + uintptr_t hint); +#endif + +/* Interface to fast scalable reduce methods routines */ + +KMP_EXPORT kmp_int32 __kmpc_reduce_nowait( + ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, + void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), + kmp_critical_name *lck); +KMP_EXPORT void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *lck); +KMP_EXPORT kmp_int32 __kmpc_reduce( + ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, + void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), + kmp_critical_name *lck); +KMP_EXPORT void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *lck); + +/* Internal fast reduction routines */ + +extern PACKED_REDUCTION_METHOD_T __kmp_determine_reduction_method( + ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, + void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), + kmp_critical_name *lck); // this function is for testing set/get/determine reduce method -KMP_EXPORT kmp_int32 __kmp_get_reduce_method( void ); +KMP_EXPORT kmp_int32 __kmp_get_reduce_method(void); KMP_EXPORT kmp_uint64 __kmpc_get_taskid(); KMP_EXPORT kmp_uint64 __kmpc_get_parent_taskid(); -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - // C++ port // missing 'extern "C"' declarations -KMP_EXPORT kmp_int32 __kmpc_in_parallel( ident_t *loc ); -KMP_EXPORT void __kmpc_pop_num_threads( ident_t *loc, kmp_int32 global_tid ); -KMP_EXPORT void __kmpc_push_num_threads( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads ); +KMP_EXPORT kmp_int32 __kmpc_in_parallel(ident_t *loc); +KMP_EXPORT void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid); +KMP_EXPORT void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, + kmp_int32 num_threads); #if OMP_40_ENABLED -KMP_EXPORT void __kmpc_push_proc_bind( ident_t *loc, kmp_int32 global_tid, int proc_bind ); -KMP_EXPORT void __kmpc_push_num_teams( ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads ); -KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...); +KMP_EXPORT void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, + int proc_bind); +KMP_EXPORT void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, + kmp_int32 num_teams, + kmp_int32 num_threads); +KMP_EXPORT void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, + kmpc_micro microtask, ...); #endif #if OMP_45_ENABLED -struct kmp_dim { // loop bounds info casted to kmp_int64 - kmp_int64 lo; // lower - kmp_int64 up; // upper - kmp_int64 st; // stride +struct kmp_dim { // loop bounds info casted to kmp_int64 + kmp_int64 lo; // lower + kmp_int64 up; // upper + kmp_int64 st; // stride }; -KMP_EXPORT void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, kmp_int32 num_dims, struct kmp_dim * dims); -KMP_EXPORT void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, kmp_int64 *vec); -KMP_EXPORT void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, kmp_int64 *vec); +KMP_EXPORT void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid, + kmp_int32 num_dims, struct kmp_dim *dims); +KMP_EXPORT void __kmpc_doacross_wait(ident_t *loc, kmp_int32 gtid, + kmp_int64 *vec); +KMP_EXPORT void __kmpc_doacross_post(ident_t *loc, kmp_int32 gtid, + kmp_int64 *vec); KMP_EXPORT void __kmpc_doacross_fini(ident_t *loc, kmp_int32 gtid); #endif -KMP_EXPORT void* -__kmpc_threadprivate_cached( ident_t * loc, kmp_int32 global_tid, - void * data, size_t size, void *** cache ); +KMP_EXPORT void *__kmpc_threadprivate_cached(ident_t *loc, kmp_int32 global_tid, + void *data, size_t size, + void ***cache); // Symbols for MS mutual detection. extern int _You_must_link_with_exactly_one_OpenMP_library; extern int _You_must_link_with_Intel_OpenMP_library; -#if KMP_OS_WINDOWS && ( KMP_VERSION_MAJOR > 4 ) - extern int _You_must_link_with_Microsoft_OpenMP_library; +#if KMP_OS_WINDOWS && (KMP_VERSION_MAJOR > 4) +extern int _You_must_link_with_Microsoft_OpenMP_library; #endif // The routines below are not exported. // Consider making them 'static' in corresponding source files. -void -kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size ); -struct private_common * -kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size ); +void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr, + void *data_addr, size_t pc_size); +struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr, + void *data_addr, + size_t pc_size); -// // ompc_, kmpc_ entries moved from omp.h. -// #if KMP_OS_WINDOWS -# define KMPC_CONVENTION __cdecl +#define KMPC_CONVENTION __cdecl #else -# define KMPC_CONVENTION +#define KMPC_CONVENTION #endif #ifndef __OMP_H typedef enum omp_sched_t { - omp_sched_static = 1, - omp_sched_dynamic = 2, - omp_sched_guided = 3, - omp_sched_auto = 4 + omp_sched_static = 1, + omp_sched_dynamic = 2, + omp_sched_guided = 3, + omp_sched_auto = 4 } omp_sched_t; -typedef void * kmp_affinity_mask_t; +typedef void *kmp_affinity_mask_t; #endif KMP_EXPORT void KMPC_CONVENTION ompc_set_max_active_levels(int); KMP_EXPORT void KMPC_CONVENTION ompc_set_schedule(omp_sched_t, int); -KMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int); -KMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int); -KMP_EXPORT int KMPC_CONVENTION kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *); -KMP_EXPORT int KMPC_CONVENTION kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *); -KMP_EXPORT int KMPC_CONVENTION kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *); +KMP_EXPORT int KMPC_CONVENTION ompc_get_ancestor_thread_num(int); +KMP_EXPORT int KMPC_CONVENTION ompc_get_team_size(int); +KMP_EXPORT int KMPC_CONVENTION +kmpc_set_affinity_mask_proc(int, kmp_affinity_mask_t *); +KMP_EXPORT int KMPC_CONVENTION +kmpc_unset_affinity_mask_proc(int, kmp_affinity_mask_t *); +KMP_EXPORT int KMPC_CONVENTION +kmpc_get_affinity_mask_proc(int, kmp_affinity_mask_t *); KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize(int); KMP_EXPORT void KMPC_CONVENTION kmpc_set_stacksize_s(size_t); @@ -3592,4 +3830,3 @@ KMP_EXPORT void KMPC_CONVENTION kmpc_set_disp_num_buffers(int); #endif #endif /* KMP_H */ - diff --git a/openmp/runtime/src/kmp_affinity.cpp b/openmp/runtime/src/kmp_affinity.cpp index f8d579a..2d7f7a3 100644 --- a/openmp/runtime/src/kmp_affinity.cpp +++ b/openmp/runtime/src/kmp_affinity.cpp @@ -14,156 +14,149 @@ #include "kmp.h" +#include "kmp_affinity.h" #include "kmp_i18n.h" #include "kmp_io.h" #include "kmp_str.h" #include "kmp_wrapper_getpid.h" -#include "kmp_affinity.h" // Store the real or imagined machine hierarchy here static hierarchy_info machine_hierarchy; -void __kmp_cleanup_hierarchy() { - machine_hierarchy.fini(); -} - -void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) { - kmp_uint32 depth; - // The test below is true if affinity is available, but set to "none". Need to init on first use of hierarchical barrier. - if (TCR_1(machine_hierarchy.uninitialized)) - machine_hierarchy.init(NULL, nproc); - - // Adjust the hierarchy in case num threads exceeds original - if (nproc > machine_hierarchy.base_num_threads) - machine_hierarchy.resize(nproc); +void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); } - depth = machine_hierarchy.depth; - KMP_DEBUG_ASSERT(depth > 0); - thr_bar->depth = depth; - thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1; - thr_bar->skip_per_level = machine_hierarchy.skipPerLevel; +void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) { + kmp_uint32 depth; + // The test below is true if affinity is available, but set to "none". Need to + // init on first use of hierarchical barrier. + if (TCR_1(machine_hierarchy.uninitialized)) + machine_hierarchy.init(NULL, nproc); + + // Adjust the hierarchy in case num threads exceeds original + if (nproc > machine_hierarchy.base_num_threads) + machine_hierarchy.resize(nproc); + + depth = machine_hierarchy.depth; + KMP_DEBUG_ASSERT(depth > 0); + + thr_bar->depth = depth; + thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0] - 1; + thr_bar->skip_per_level = machine_hierarchy.skipPerLevel; } #if KMP_AFFINITY_SUPPORTED bool KMPAffinity::picked_api = false; -void* KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); } -void* KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n); } -void KMPAffinity::Mask::operator delete(void* p) { __kmp_free(p); } -void KMPAffinity::Mask::operator delete[](void* p) { __kmp_free(p); } -void* KMPAffinity::operator new(size_t n) { return __kmp_allocate(n); } -void KMPAffinity::operator delete(void* p) { __kmp_free(p); } +void *KMPAffinity::Mask::operator new(size_t n) { return __kmp_allocate(n); } +void *KMPAffinity::Mask::operator new[](size_t n) { return __kmp_allocate(n); } +void KMPAffinity::Mask::operator delete(void *p) { __kmp_free(p); } +void KMPAffinity::Mask::operator delete[](void *p) { __kmp_free(p); } +void *KMPAffinity::operator new(size_t n) { return __kmp_allocate(n); } +void KMPAffinity::operator delete(void *p) { __kmp_free(p); } void KMPAffinity::pick_api() { - KMPAffinity* affinity_dispatch; - if (picked_api) - return; + KMPAffinity *affinity_dispatch; + if (picked_api) + return; #if KMP_USE_HWLOC - if (__kmp_affinity_top_method == affinity_top_method_hwloc) { - affinity_dispatch = new KMPHwlocAffinity(); - } else + if (__kmp_affinity_top_method == affinity_top_method_hwloc) { + affinity_dispatch = new KMPHwlocAffinity(); + } else #endif - { - affinity_dispatch = new KMPNativeAffinity(); - } - __kmp_affinity_dispatch = affinity_dispatch; - picked_api = true; + { + affinity_dispatch = new KMPNativeAffinity(); + } + __kmp_affinity_dispatch = affinity_dispatch; + picked_api = true; } void KMPAffinity::destroy_api() { - if (__kmp_affinity_dispatch != NULL) { - delete __kmp_affinity_dispatch; - __kmp_affinity_dispatch = NULL; - picked_api = false; - } + if (__kmp_affinity_dispatch != NULL) { + delete __kmp_affinity_dispatch; + __kmp_affinity_dispatch = NULL; + picked_api = false; + } } -// // Print the affinity mask to the character array in a pretty format. -// -char * -__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask) -{ - KMP_ASSERT(buf_len >= 40); - char *scan = buf; - char *end = buf + buf_len - 1; - - // - // Find first element / check for empty set. - // - size_t i; - i = mask->begin(); - if (i == mask->end()) { - KMP_SNPRINTF(scan, end-scan+1, "{}"); - while (*scan != '\0') scan++; - KMP_ASSERT(scan <= end); - return buf; - } - - KMP_SNPRINTF(scan, end-scan+1, "{%ld", (long)i); - while (*scan != '\0') scan++; - i++; - for (; i != mask->end(); i = mask->next(i)) { - if (! KMP_CPU_ISSET(i, mask)) { - continue; - } - - // - // Check for buffer overflow. A string of the form "," will have - // at most 10 characters, plus we want to leave room to print ",...}" - // if the set is too large to print for a total of 15 characters. - // We already left room for '\0' in setting end. - // - if (end - scan < 15) { - break; - } - KMP_SNPRINTF(scan, end-scan+1, ",%-ld", (long)i); - while (*scan != '\0') scan++; - } - if (i != mask->end()) { - KMP_SNPRINTF(scan, end-scan+1, ",..."); - while (*scan != '\0') scan++; - } - KMP_SNPRINTF(scan, end-scan+1, "}"); - while (*scan != '\0') scan++; +char *__kmp_affinity_print_mask(char *buf, int buf_len, + kmp_affin_mask_t *mask) { + KMP_ASSERT(buf_len >= 40); + char *scan = buf; + char *end = buf + buf_len - 1; + + // Find first element / check for empty set. + size_t i; + i = mask->begin(); + if (i == mask->end()) { + KMP_SNPRINTF(scan, end - scan + 1, "{}"); + while (*scan != '\0') + scan++; KMP_ASSERT(scan <= end); return buf; + } + + KMP_SNPRINTF(scan, end - scan + 1, "{%ld", (long)i); + while (*scan != '\0') + scan++; + i++; + for (; i != mask->end(); i = mask->next(i)) { + if (!KMP_CPU_ISSET(i, mask)) { + continue; + } + + // Check for buffer overflow. A string of the form "," will have at most + // 10 characters, plus we want to leave room to print ",...}" if the set is + // too large to print for a total of 15 characters. We already left room for + // '\0' in setting end. + if (end - scan < 15) { + break; + } + KMP_SNPRINTF(scan, end - scan + 1, ",%-ld", (long)i); + while (*scan != '\0') + scan++; + } + if (i != mask->end()) { + KMP_SNPRINTF(scan, end - scan + 1, ",..."); + while (*scan != '\0') + scan++; + } + KMP_SNPRINTF(scan, end - scan + 1, "}"); + while (*scan != '\0') + scan++; + KMP_ASSERT(scan <= end); + return buf; } +void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) { + KMP_CPU_ZERO(mask); -void -__kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) -{ - KMP_CPU_ZERO(mask); - -# if KMP_GROUP_AFFINITY +#if KMP_GROUP_AFFINITY - if (__kmp_num_proc_groups > 1) { - int group; - KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL); - for (group = 0; group < __kmp_num_proc_groups; group++) { - int i; - int num = __kmp_GetActiveProcessorCount(group); - for (i = 0; i < num; i++) { - KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask); - } - } + if (__kmp_num_proc_groups > 1) { + int group; + KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL); + for (group = 0; group < __kmp_num_proc_groups; group++) { + int i; + int num = __kmp_GetActiveProcessorCount(group); + for (i = 0; i < num; i++) { + KMP_CPU_SET(i + group * (CHAR_BIT * sizeof(DWORD_PTR)), mask); + } } - else + } else -# endif /* KMP_GROUP_AFFINITY */ +#endif /* KMP_GROUP_AFFINITY */ - { - int proc; - for (proc = 0; proc < __kmp_xproc; proc++) { - KMP_CPU_SET(proc, mask); - } + { + int proc; + for (proc = 0; proc < __kmp_xproc; proc++) { + KMP_CPU_SET(proc, mask); } + } } -// // When sorting by labels, __kmp_affinity_assign_child_nums() must first be // called to renumber the labels from [0..n] and place them into the child_num // vector of the address object. This is done in case the labels used for @@ -175,59 +168,53 @@ __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) // because we are paying attention to the labels themselves, not the ordinal // child numbers. By using the child numbers in the sort, the result is // {0,0}=601, {0,1}=603, {1,0}=602, {1,1}=604. -// -static void -__kmp_affinity_assign_child_nums(AddrUnsPair *address2os, - int numAddrs) -{ - KMP_DEBUG_ASSERT(numAddrs > 0); - int depth = address2os->first.depth; - unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); - unsigned *lastLabel = (unsigned *)__kmp_allocate(depth - * sizeof(unsigned)); - int labCt; +static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os, + int numAddrs) { + KMP_DEBUG_ASSERT(numAddrs > 0); + int depth = address2os->first.depth; + unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); + unsigned *lastLabel = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); + int labCt; + for (labCt = 0; labCt < depth; labCt++) { + address2os[0].first.childNums[labCt] = counts[labCt] = 0; + lastLabel[labCt] = address2os[0].first.labels[labCt]; + } + int i; + for (i = 1; i < numAddrs; i++) { for (labCt = 0; labCt < depth; labCt++) { - address2os[0].first.childNums[labCt] = counts[labCt] = 0; - lastLabel[labCt] = address2os[0].first.labels[labCt]; + if (address2os[i].first.labels[labCt] != lastLabel[labCt]) { + int labCt2; + for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) { + counts[labCt2] = 0; + lastLabel[labCt2] = address2os[i].first.labels[labCt2]; + } + counts[labCt]++; + lastLabel[labCt] = address2os[i].first.labels[labCt]; + break; + } } - int i; - for (i = 1; i < numAddrs; i++) { - for (labCt = 0; labCt < depth; labCt++) { - if (address2os[i].first.labels[labCt] != lastLabel[labCt]) { - int labCt2; - for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) { - counts[labCt2] = 0; - lastLabel[labCt2] = address2os[i].first.labels[labCt2]; - } - counts[labCt]++; - lastLabel[labCt] = address2os[i].first.labels[labCt]; - break; - } - } - for (labCt = 0; labCt < depth; labCt++) { - address2os[i].first.childNums[labCt] = counts[labCt]; - } - for (; labCt < (int)Address::maxDepth; labCt++) { - address2os[i].first.childNums[labCt] = 0; - } + for (labCt = 0; labCt < depth; labCt++) { + address2os[i].first.childNums[labCt] = counts[labCt]; } - __kmp_free(lastLabel); - __kmp_free(counts); + for (; labCt < (int)Address::maxDepth; labCt++) { + address2os[i].first.childNums[labCt] = 0; + } + } + __kmp_free(lastLabel); + __kmp_free(counts); } - -// // All of the __kmp_affinity_create_*_map() routines should set // __kmp_affinity_masks to a vector of affinity mask objects of length -// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and -// return the number of levels in the machine topology tree (zero if +// __kmp_affinity_num_masks, if __kmp_affinity_type != affinity_none, and return +// the number of levels in the machine topology tree (zero if // __kmp_affinity_type == affinity_none). // -// All of the __kmp_affinity_create_*_map() routines should set *__kmp_affin_fullMask -// to the affinity mask for the initialization thread. They need to save and -// restore the mask, and it could be needed later, so saving it is just an -// optimization to avoid calling kmp_get_system_affinity() again. -// +// All of the __kmp_affinity_create_*_map() routines should set +// *__kmp_affin_fullMask to the affinity mask for the initialization thread. +// They need to save and restore the mask, and it could be needed later, so +// saving it is just an optimization to avoid calling kmp_get_system_affinity() +// again. kmp_affin_mask_t *__kmp_affin_fullMask = NULL; static int nCoresPerPkg, nPackages; @@ -237,58 +224,45 @@ static int __kmp_ncores; #endif static int *__kmp_pu_os_idx = NULL; -// // __kmp_affinity_uniform_topology() doesn't work when called from // places which support arbitrarily many levels in the machine topology // map, i.e. the non-default cases in __kmp_affinity_create_cpuinfo_map() // __kmp_affinity_create_x2apicid_map(). -// -inline static bool -__kmp_affinity_uniform_topology() -{ - return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages); +inline static bool __kmp_affinity_uniform_topology() { + return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages); } - -// // Print out the detailed machine topology map, i.e. the physical locations // of each OS proc. -// -static void -__kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth, - int pkgLevel, int coreLevel, int threadLevel) -{ - int proc; +static void __kmp_affinity_print_topology(AddrUnsPair *address2os, int len, + int depth, int pkgLevel, + int coreLevel, int threadLevel) { + int proc; - KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY"); - for (proc = 0; proc < len; proc++) { - int level; - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - for (level = 0; level < depth; level++) { - if (level == threadLevel) { - __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread)); - } - else if (level == coreLevel) { - __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core)); - } - else if (level == pkgLevel) { - __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package)); - } - else if (level > pkgLevel) { - __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node), - level - pkgLevel - 1); - } - else { - __kmp_str_buf_print(&buf, "L%d ", level); - } - __kmp_str_buf_print(&buf, "%d ", - address2os[proc].first.labels[level]); - } - KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second, - buf.str); - __kmp_str_buf_free(&buf); + KMP_INFORM(OSProcToPhysicalThreadMap, "KMP_AFFINITY"); + for (proc = 0; proc < len; proc++) { + int level; + kmp_str_buf_t buf; + __kmp_str_buf_init(&buf); + for (level = 0; level < depth; level++) { + if (level == threadLevel) { + __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Thread)); + } else if (level == coreLevel) { + __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Core)); + } else if (level == pkgLevel) { + __kmp_str_buf_print(&buf, "%s ", KMP_I18N_STR(Package)); + } else if (level > pkgLevel) { + __kmp_str_buf_print(&buf, "%s_%d ", KMP_I18N_STR(Node), + level - pkgLevel - 1); + } else { + __kmp_str_buf_print(&buf, "L%d ", level); + } + __kmp_str_buf_print(&buf, "%d ", address2os[proc].first.labels[level]); } + KMP_INFORM(OSProcMapToPack, "KMP_AFFINITY", address2os[proc].second, + buf.str); + __kmp_str_buf_free(&buf); + } } #if KMP_USE_HWLOC @@ -298,2734 +272,2423 @@ __kmp_affinity_print_topology(AddrUnsPair *address2os, int len, int depth, // have one thread context per core, we don't want the extra thread context // level if it offers no unique labels. So they are removed. // return value: the new depth of address2os -static int -__kmp_affinity_remove_radix_one_levels(AddrUnsPair *address2os, int nActiveThreads, int depth, int* pkgLevel, int* coreLevel, int* threadLevel) { - int level; - int i; - int radix1_detected; - - for (level = depth-1; level >= 0; --level) { - // Always keep the package level - if (level == *pkgLevel) - continue; - // Detect if this level is radix 1 - radix1_detected = 1; - for (i = 1; i < nActiveThreads; ++i) { - if (address2os[0].first.labels[level] != address2os[i].first.labels[level]) { - // There are differing label values for this level so it stays - radix1_detected = 0; - break; - } - } - if (!radix1_detected) - continue; - // Radix 1 was detected - if (level == *threadLevel) { - // If only one thread per core, then just decrement - // the depth which removes the threadlevel from address2os - for (i = 0; i < nActiveThreads; ++i) { - address2os[i].first.depth--; - } - *threadLevel = -1; - } else if (level == *coreLevel) { - // For core level, we move the thread labels over if they are still - // valid (*threadLevel != -1), and also reduce the depth another level - for (i = 0; i < nActiveThreads; ++i) { - if (*threadLevel != -1) { - address2os[i].first.labels[*coreLevel] = address2os[i].first.labels[*threadLevel]; - } - address2os[i].first.depth--; - } - *coreLevel = -1; - } +static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *address2os, + int nActiveThreads, int depth, + int *pkgLevel, int *coreLevel, + int *threadLevel) { + int level; + int i; + int radix1_detected; + + for (level = depth - 1; level >= 0; --level) { + // Always keep the package level + if (level == *pkgLevel) + continue; + // Detect if this level is radix 1 + radix1_detected = 1; + for (i = 1; i < nActiveThreads; ++i) { + if (address2os[0].first.labels[level] != + address2os[i].first.labels[level]) { + // There are differing label values for this level so it stays + radix1_detected = 0; + break; + } } - return address2os[0].first.depth; -} - -// Returns the number of objects of type 'type' below 'obj' within the topology tree structure. -// e.g., if obj is a HWLOC_OBJ_PACKAGE object, and type is HWLOC_OBJ_PU, then -// this will return the number of PU's under the SOCKET object. -static int -__kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj, hwloc_obj_type_t type) { - int retval = 0; - hwloc_obj_t first; - for(first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type, obj->logical_index, type, 0); - first != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) == obj; - first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type, first)) - { - ++retval; + if (!radix1_detected) + continue; + // Radix 1 was detected + if (level == *threadLevel) { + // If only one thread per core, then just decrement + // the depth which removes the threadlevel from address2os + for (i = 0; i < nActiveThreads; ++i) { + address2os[i].first.depth--; + } + *threadLevel = -1; + } else if (level == *coreLevel) { + // For core level, we move the thread labels over if they are still + // valid (*threadLevel != -1), and also reduce the depth another level + for (i = 0; i < nActiveThreads; ++i) { + if (*threadLevel != -1) { + address2os[i].first.labels[*coreLevel] = + address2os[i].first.labels[*threadLevel]; + } + address2os[i].first.depth--; + } + *coreLevel = -1; } - return retval; + } + return address2os[0].first.depth; } -static int -__kmp_affinity_create_hwloc_map(AddrUnsPair **address2os, - kmp_i18n_id_t *const msg_id) -{ - *address2os = NULL; - *msg_id = kmp_i18n_null; +// Returns the number of objects of type 'type' below 'obj' within the topology +// tree structure. e.g., if obj is a HWLOC_OBJ_PACKAGE object, and type is +// HWLOC_OBJ_PU, then this will return the number of PU's under the SOCKET +// object. +static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj, + hwloc_obj_type_t type) { + int retval = 0; + hwloc_obj_t first; + for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type, + obj->logical_index, type, 0); + first != NULL && + hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) == + obj; + first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type, + first)) { + ++retval; + } + return retval; +} - // - // Save the affinity mask for the current thread. - // - kmp_affin_mask_t *oldMask; - KMP_CPU_ALLOC(oldMask); - __kmp_get_system_affinity(oldMask, TRUE); +static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os, + kmp_i18n_id_t *const msg_id) { + *address2os = NULL; + *msg_id = kmp_i18n_null; + + // Save the affinity mask for the current thread. + kmp_affin_mask_t *oldMask; + KMP_CPU_ALLOC(oldMask); + __kmp_get_system_affinity(oldMask, TRUE); + + int depth = 3; + int pkgLevel = 0; + int coreLevel = 1; + int threadLevel = 2; + + if (!KMP_AFFINITY_CAPABLE()) { + // Hack to try and infer the machine topology using only the data + // available from cpuid on the current thread, and __kmp_xproc. + KMP_ASSERT(__kmp_affinity_type == affinity_none); + + nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj( + hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, 0), + HWLOC_OBJ_CORE); + __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj( + hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), + HWLOC_OBJ_PU); + __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore; + nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; + if (__kmp_affinity_verbose) { + KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY"); + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + if (__kmp_affinity_uniform_topology()) { + KMP_INFORM(Uniform, "KMP_AFFINITY"); + } else { + KMP_INFORM(NonUniform, "KMP_AFFINITY"); + } + KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); + } + KMP_CPU_FREE(oldMask); + return 0; + } + + // Allocate the data structure to be returned. + AddrUnsPair *retval = + (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc); + __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc); + + // When affinity is off, this routine will still be called to set + // __kmp_ncores, as well as __kmp_nThreadsPerCore, + // nCoresPerPkg, & nPackages. Make sure all these vars are set + // correctly, and return if affinity is not enabled. + + hwloc_obj_t pu; + hwloc_obj_t core; + hwloc_obj_t socket; + int nActiveThreads = 0; + int socket_identifier = 0; + // re-calculate globals to count only accessible resources + __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0; + for (socket = + hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, 0); + socket != NULL; + socket = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, + HWLOC_OBJ_PACKAGE, socket), + socket_identifier++) { + int core_identifier = 0; + int num_active_cores = 0; + for (core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type, + socket->logical_index, + HWLOC_OBJ_CORE, 0); + core != NULL && + hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type, + core) == socket; + core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, + core), + core_identifier++) { + int pu_identifier = 0; + int num_active_threads = 0; + for (pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type, + core->logical_index, HWLOC_OBJ_PU, + 0); + pu != NULL && + hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type, + pu) == core; + pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU, + pu), + pu_identifier++) { + Address addr(3); + if(!KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask)) + continue; // skip inactive (inaccessible) unit + KA_TRACE(20, + ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n", + socket->os_index, socket->logical_index, core->os_index, + core->logical_index, pu->os_index,pu->logical_index)); + addr.labels[0] = socket_identifier; // package + addr.labels[1] = core_identifier; // core + addr.labels[2] = pu_identifier; // pu + retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index); + __kmp_pu_os_idx[nActiveThreads] = + pu->os_index; // keep os index for each active pu + nActiveThreads++; + ++num_active_threads; // count active threads per core + } + if (num_active_threads) { // were there any active threads on the core? + ++__kmp_ncores; // count total active cores + ++num_active_cores; // count active cores per socket + if (num_active_threads > __kmp_nThreadsPerCore) + __kmp_nThreadsPerCore = num_active_threads; // calc maximum + } + } + if (num_active_cores) { // were there any active cores on the socket? + ++nPackages; // count total active packages + if (num_active_cores > nCoresPerPkg) + nCoresPerPkg = num_active_cores; // calc maximum + } + } - int depth = 3; - int pkgLevel = 0; - int coreLevel = 1; - int threadLevel = 2; + // If there's only one thread context to bind to, return now. + KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc); + KMP_ASSERT(nActiveThreads > 0); + if (nActiveThreads == 1) { + __kmp_ncores = nPackages = 1; + __kmp_nThreadsPerCore = nCoresPerPkg = 1; + if (__kmp_affinity_verbose) { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); - if (! KMP_AFFINITY_CAPABLE()) - { - // - // Hack to try and infer the machine topology using only the data - // available from cpuid on the current thread, and __kmp_xproc. - // - KMP_ASSERT(__kmp_affinity_type == affinity_none); - - nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, 0), HWLOC_OBJ_CORE); - __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0), HWLOC_OBJ_PU); - __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore; - nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; - if (__kmp_affinity_verbose) { - KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY"); - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (__kmp_affinity_uniform_topology()) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - KMP_CPU_FREE(oldMask); - return 0; + KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); + if (__kmp_affinity_respect_mask) { + KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); + } else { + KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); + } + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + KMP_INFORM(Uniform, "KMP_AFFINITY"); + KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); } - // - // Allocate the data structure to be returned. - // - AddrUnsPair *retval = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc); - __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc); + if (__kmp_affinity_type == affinity_none) { + __kmp_free(retval); + KMP_CPU_FREE(oldMask); + return 0; + } - // - // When affinity is off, this routine will still be called to set - // __kmp_ncores, as well as __kmp_nThreadsPerCore, - // nCoresPerPkg, & nPackages. Make sure all these vars are set - // correctly, and return if affinity is not enabled. - // + // Form an Address object which only includes the package level. + Address addr(1); + addr.labels[0] = retval[0].first.labels[pkgLevel]; + retval[0].first = addr; - hwloc_obj_t pu; - hwloc_obj_t core; - hwloc_obj_t socket; - int nActiveThreads = 0; - int socket_identifier = 0; - // re-calculate globals to count only accessible resources - __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0; - for(socket = hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, 0); - socket != NULL; - socket = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, socket), - socket_identifier++) - { - int core_identifier = 0; - int num_active_cores = 0; - for(core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type, socket->logical_index, HWLOC_OBJ_CORE, 0); - core != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type, core) == socket; - core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, core), - core_identifier++) - { - int pu_identifier = 0; - int num_active_threads = 0; - for(pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type, core->logical_index, HWLOC_OBJ_PU, 0); - pu != NULL && hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type, pu) == core; - pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU, pu), - pu_identifier++) - { - Address addr(3); - if(! KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask)) - continue; // skip inactive (inaccessible) unit - KA_TRACE(20, ("Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n", - socket->os_index, socket->logical_index, core->os_index, core->logical_index, pu->os_index,pu->logical_index)); - addr.labels[0] = socket_identifier; // package - addr.labels[1] = core_identifier; // core - addr.labels[2] = pu_identifier; // pu - retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index); - __kmp_pu_os_idx[nActiveThreads] = pu->os_index; // keep os index for each active pu - nActiveThreads++; - ++num_active_threads; // count active threads per core - } - if (num_active_threads) { // were there any active threads on the core? - ++__kmp_ncores; // count total active cores - ++num_active_cores; // count active cores per socket - if (num_active_threads > __kmp_nThreadsPerCore) - __kmp_nThreadsPerCore = num_active_threads; // calc maximum - } - } - if (num_active_cores) { // were there any active cores on the socket? - ++nPackages; // count total active packages - if (num_active_cores > nCoresPerPkg) - nCoresPerPkg = num_active_cores; // calc maximum - } + if (__kmp_affinity_gran_levels < 0) { + __kmp_affinity_gran_levels = 0; } - // - // If there's only one thread context to bind to, return now. - // - KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc); - KMP_ASSERT(nActiveThreads > 0); - if (nActiveThreads == 1) { - __kmp_ncores = nPackages = 1; - __kmp_nThreadsPerCore = nCoresPerPkg = 1; - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); - - KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } + if (__kmp_affinity_verbose) { + __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1); + } - if (__kmp_affinity_type == affinity_none) { - __kmp_free(retval); - KMP_CPU_FREE(oldMask); - return 0; - } + *address2os = retval; + KMP_CPU_FREE(oldMask); + return 1; + } - // - // Form an Address object which only includes the package level. - // - Address addr(1); - addr.labels[0] = retval[0].first.labels[pkgLevel]; - retval[0].first = addr; + // Sort the table by physical Id. + qsort(retval, nActiveThreads, sizeof(*retval), + __kmp_affinity_cmp_Address_labels); - if (__kmp_affinity_gran_levels < 0) { - __kmp_affinity_gran_levels = 0; - } + // Check to see if the machine topology is uniform + unsigned uniform = + (nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads); - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1); - } + // Print the machine topology summary. + if (__kmp_affinity_verbose) { + char mask[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask); - *address2os = retval; - KMP_CPU_FREE(oldMask); - return 1; + KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); + if (__kmp_affinity_respect_mask) { + KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask); + } else { + KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask); + } + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + if (uniform) { + KMP_INFORM(Uniform, "KMP_AFFINITY"); + } else { + KMP_INFORM(NonUniform, "KMP_AFFINITY"); } - // - // Sort the table by physical Id. - // - qsort(retval, nActiveThreads, sizeof(*retval), __kmp_affinity_cmp_Address_labels); - - // - // Check to see if the machine topology is uniform - // - unsigned uniform = (nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads); + kmp_str_buf_t buf; + __kmp_str_buf_init(&buf); - // - // Print the machine topology summary. - // - if (__kmp_affinity_verbose) { - char mask[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask); + __kmp_str_buf_print(&buf, "%d", nPackages); + // for (level = 1; level <= pkgLevel; level++) { + // __kmp_str_buf_print(&buf, " x %d", maxCt[level]); + // } + KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); - KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (uniform) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } + __kmp_str_buf_free(&buf); + } - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); + if (__kmp_affinity_type == affinity_none) { + __kmp_free(retval); + KMP_CPU_FREE(oldMask); + return 0; + } - __kmp_str_buf_print(&buf, "%d", nPackages); - //for (level = 1; level <= pkgLevel; level++) { - // __kmp_str_buf_print(&buf, " x %d", maxCt[level]); - // } - KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); + // Find any levels with radiix 1, and remove them from the map + // (except for the package level). + depth = __kmp_affinity_remove_radix_one_levels( + retval, nActiveThreads, depth, &pkgLevel, &coreLevel, &threadLevel); - __kmp_str_buf_free(&buf); + if (__kmp_affinity_gran_levels < 0) { + // Set the granularity level based on what levels are modeled + // in the machine topology map. + __kmp_affinity_gran_levels = 0; + if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) { + __kmp_affinity_gran_levels++; } - - if (__kmp_affinity_type == affinity_none) { - __kmp_free(retval); - KMP_CPU_FREE(oldMask); - return 0; + if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) { + __kmp_affinity_gran_levels++; } - - // - // Find any levels with radiix 1, and remove them from the map - // (except for the package level). - // - depth = __kmp_affinity_remove_radix_one_levels(retval, nActiveThreads, depth, &pkgLevel, &coreLevel, &threadLevel); - - if (__kmp_affinity_gran_levels < 0) { - // - // Set the granularity level based on what levels are modeled - // in the machine topology map. - // - __kmp_affinity_gran_levels = 0; - if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) { - __kmp_affinity_gran_levels++; - } - if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) { - __kmp_affinity_gran_levels++; - } - if (__kmp_affinity_gran > affinity_gran_package) { - __kmp_affinity_gran_levels++; - } + if (__kmp_affinity_gran > affinity_gran_package) { + __kmp_affinity_gran_levels++; } + } - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(retval, nActiveThreads, depth, pkgLevel, - coreLevel, threadLevel); - } + if (__kmp_affinity_verbose) { + __kmp_affinity_print_topology(retval, nActiveThreads, depth, pkgLevel, + coreLevel, threadLevel); + } - KMP_CPU_FREE(oldMask); - *address2os = retval; - return depth; + KMP_CPU_FREE(oldMask); + *address2os = retval; + return depth; } #endif // KMP_USE_HWLOC -// // If we don't know how to retrieve the machine's processor topology, or // encounter an error in doing so, this routine is called to form a "flat" // mapping of os thread id's <-> processor id's. -// -static int -__kmp_affinity_create_flat_map(AddrUnsPair **address2os, - kmp_i18n_id_t *const msg_id) -{ - *address2os = NULL; - *msg_id = kmp_i18n_null; - - // - // Even if __kmp_affinity_type == affinity_none, this routine might still - // called to set __kmp_ncores, as well as - // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages. - // - if (! KMP_AFFINITY_CAPABLE()) { - KMP_ASSERT(__kmp_affinity_type == affinity_none); - __kmp_ncores = nPackages = __kmp_xproc; - __kmp_nThreadsPerCore = nCoresPerPkg = 1; - if (__kmp_affinity_verbose) { - KMP_INFORM(AffFlatTopology, "KMP_AFFINITY"); - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - return 0; - } - - // - // When affinity is off, this routine will still be called to set - // __kmp_ncores, as well as __kmp_nThreadsPerCore, - // nCoresPerPkg, & nPackages. Make sure all these vars are set - // correctly, and return now if affinity is not enabled. - // - __kmp_ncores = nPackages = __kmp_avail_proc; +static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os, + kmp_i18n_id_t *const msg_id) { + *address2os = NULL; + *msg_id = kmp_i18n_null; + + // Even if __kmp_affinity_type == affinity_none, this routine might still + // called to set __kmp_ncores, as well as + // __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages. + if (!KMP_AFFINITY_CAPABLE()) { + KMP_ASSERT(__kmp_affinity_type == affinity_none); + __kmp_ncores = nPackages = __kmp_xproc; __kmp_nThreadsPerCore = nCoresPerPkg = 1; if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, __kmp_affin_fullMask); - - KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); + KMP_INFORM(AffFlatTopology, "KMP_AFFINITY"); + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + KMP_INFORM(Uniform, "KMP_AFFINITY"); + KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); } - KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); - __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc); - if (__kmp_affinity_type == affinity_none) { - int avail_ct = 0; - int i; - KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) { - if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) - continue; - __kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat - } - return 0; - } - - // - // Contruct the data structure to be returned. - // - *address2os = (AddrUnsPair*) - __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc); + return 0; + } + + // When affinity is off, this routine will still be called to set + // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages. + // Make sure all these vars are set correctly, and return now if affinity is + // not enabled. + __kmp_ncores = nPackages = __kmp_avail_proc; + __kmp_nThreadsPerCore = nCoresPerPkg = 1; + if (__kmp_affinity_verbose) { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, + __kmp_affin_fullMask); + + KMP_INFORM(AffCapableUseFlat, "KMP_AFFINITY"); + if (__kmp_affinity_respect_mask) { + KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); + } else { + KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); + } + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + KMP_INFORM(Uniform, "KMP_AFFINITY"); + KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); + } + KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); + __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc); + if (__kmp_affinity_type == affinity_none) { int avail_ct = 0; - unsigned int i; + int i; KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) { - // - // Skip this proc if it is not included in the machine model. - // - if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) { - continue; - } - __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat - Address addr(1); - addr.labels[0] = i; - (*address2os)[avail_ct++] = AddrUnsPair(addr,i); - } - if (__kmp_affinity_verbose) { - KMP_INFORM(OSProcToPackage, "KMP_AFFINITY"); + if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) + continue; + __kmp_pu_os_idx[avail_ct++] = i; // suppose indices are flat } - - if (__kmp_affinity_gran_levels < 0) { - // - // Only the package level is modeled in the machine topology map, - // so the #levels of granularity is either 0 or 1. - // - if (__kmp_affinity_gran > affinity_gran_package) { - __kmp_affinity_gran_levels = 1; - } - else { - __kmp_affinity_gran_levels = 0; - } + return 0; + } + + // Contruct the data structure to be returned. + *address2os = + (AddrUnsPair *)__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc); + int avail_ct = 0; + unsigned int i; + KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) { + // Skip this proc if it is not included in the machine model. + if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) { + continue; + } + __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat + Address addr(1); + addr.labels[0] = i; + (*address2os)[avail_ct++] = AddrUnsPair(addr, i); + } + if (__kmp_affinity_verbose) { + KMP_INFORM(OSProcToPackage, "KMP_AFFINITY"); + } + + if (__kmp_affinity_gran_levels < 0) { + // Only the package level is modeled in the machine topology map, + // so the #levels of granularity is either 0 or 1. + if (__kmp_affinity_gran > affinity_gran_package) { + __kmp_affinity_gran_levels = 1; + } else { + __kmp_affinity_gran_levels = 0; } - return 1; + } + return 1; } +#if KMP_GROUP_AFFINITY -# if KMP_GROUP_AFFINITY - -// // If multiple Windows* OS processor groups exist, we can create a 2-level -// topology map with the groups at level 0 and the individual procs at -// level 1. -// +// topology map with the groups at level 0 and the individual procs at level 1. // This facilitates letting the threads float among all procs in a group, // if granularity=group (the default when there are multiple groups). -// -static int -__kmp_affinity_create_proc_group_map(AddrUnsPair **address2os, - kmp_i18n_id_t *const msg_id) -{ - *address2os = NULL; - *msg_id = kmp_i18n_null; - - // - // If we don't have multiple processor groups, return now. - // The flat mapping will be used. - // - if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(__kmp_affin_fullMask) >= 0)) { - // FIXME set *msg_id - return -1; - } - - // - // Contruct the data structure to be returned. - // - *address2os = (AddrUnsPair*) - __kmp_allocate(sizeof(**address2os) * __kmp_avail_proc); - KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); - __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc); - int avail_ct = 0; - int i; - KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) { - // - // Skip this proc if it is not included in the machine model. - // - if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) { - continue; - } - __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat - Address addr(2); - addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR)); - addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR)); - (*address2os)[avail_ct++] = AddrUnsPair(addr,i); +static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os, + kmp_i18n_id_t *const msg_id) { + *address2os = NULL; + *msg_id = kmp_i18n_null; + + // If we don't have multiple processor groups, return now. + // The flat mapping will be used. + if ((!KMP_AFFINITY_CAPABLE()) || + (__kmp_get_proc_group(__kmp_affin_fullMask) >= 0)) { + // FIXME set *msg_id + return -1; + } + + // Contruct the data structure to be returned. + *address2os = + (AddrUnsPair *)__kmp_allocate(sizeof(**address2os) * __kmp_avail_proc); + KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); + __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc); + int avail_ct = 0; + int i; + KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) { + // Skip this proc if it is not included in the machine model. + if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) { + continue; + } + __kmp_pu_os_idx[avail_ct] = i; // suppose indices are flat + Address addr(2); + addr.labels[0] = i / (CHAR_BIT * sizeof(DWORD_PTR)); + addr.labels[1] = i % (CHAR_BIT * sizeof(DWORD_PTR)); + (*address2os)[avail_ct++] = AddrUnsPair(addr, i); - if (__kmp_affinity_verbose) { - KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0], - addr.labels[1]); - } + if (__kmp_affinity_verbose) { + KMP_INFORM(AffOSProcToGroup, "KMP_AFFINITY", i, addr.labels[0], + addr.labels[1]); } + } - if (__kmp_affinity_gran_levels < 0) { - if (__kmp_affinity_gran == affinity_gran_group) { - __kmp_affinity_gran_levels = 1; - } - else if ((__kmp_affinity_gran == affinity_gran_fine) - || (__kmp_affinity_gran == affinity_gran_thread)) { - __kmp_affinity_gran_levels = 0; - } - else { - const char *gran_str = NULL; - if (__kmp_affinity_gran == affinity_gran_core) { - gran_str = "core"; - } - else if (__kmp_affinity_gran == affinity_gran_package) { - gran_str = "package"; - } - else if (__kmp_affinity_gran == affinity_gran_node) { - gran_str = "node"; - } - else { - KMP_ASSERT(0); - } + if (__kmp_affinity_gran_levels < 0) { + if (__kmp_affinity_gran == affinity_gran_group) { + __kmp_affinity_gran_levels = 1; + } else if ((__kmp_affinity_gran == affinity_gran_fine) || + (__kmp_affinity_gran == affinity_gran_thread)) { + __kmp_affinity_gran_levels = 0; + } else { + const char *gran_str = NULL; + if (__kmp_affinity_gran == affinity_gran_core) { + gran_str = "core"; + } else if (__kmp_affinity_gran == affinity_gran_package) { + gran_str = "package"; + } else if (__kmp_affinity_gran == affinity_gran_node) { + gran_str = "node"; + } else { + KMP_ASSERT(0); + } - // Warning: can't use affinity granularity \"gran\" with group topology method, using "thread" - __kmp_affinity_gran_levels = 0; - } + // Warning: can't use affinity granularity \"gran\" with group topology + // method, using "thread" + __kmp_affinity_gran_levels = 0; } - return 2; + } + return 2; } -# endif /* KMP_GROUP_AFFINITY */ - +#endif /* KMP_GROUP_AFFINITY */ -# if KMP_ARCH_X86 || KMP_ARCH_X86_64 +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -static int -__kmp_cpuid_mask_width(int count) { - int r = 0; +static int __kmp_cpuid_mask_width(int count) { + int r = 0; - while((1<osId < bb->osId) return -1; - if (aa->osId > bb->osId) return 1; - return 0; +static int __kmp_affinity_cmp_apicThreadInfo_os_id(const void *a, + const void *b) { + const apicThreadInfo *aa = (const apicThreadInfo *)a; + const apicThreadInfo *bb = (const apicThreadInfo *)b; + if (aa->osId < bb->osId) + return -1; + if (aa->osId > bb->osId) + return 1; + return 0; } - -static int -__kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, const void *b) -{ - const apicThreadInfo *aa = (const apicThreadInfo *)a; - const apicThreadInfo *bb = (const apicThreadInfo *)b; - if (aa->pkgId < bb->pkgId) return -1; - if (aa->pkgId > bb->pkgId) return 1; - if (aa->coreId < bb->coreId) return -1; - if (aa->coreId > bb->coreId) return 1; - if (aa->threadId < bb->threadId) return -1; - if (aa->threadId > bb->threadId) return 1; - return 0; +static int __kmp_affinity_cmp_apicThreadInfo_phys_id(const void *a, + const void *b) { + const apicThreadInfo *aa = (const apicThreadInfo *)a; + const apicThreadInfo *bb = (const apicThreadInfo *)b; + if (aa->pkgId < bb->pkgId) + return -1; + if (aa->pkgId > bb->pkgId) + return 1; + if (aa->coreId < bb->coreId) + return -1; + if (aa->coreId > bb->coreId) + return 1; + if (aa->threadId < bb->threadId) + return -1; + if (aa->threadId > bb->threadId) + return 1; + return 0; } - -// // On IA-32 architecture and Intel(R) 64 architecture, we attempt to use // an algorithm which cycles through the available os threads, setting // the current thread's affinity mask to that thread, and then retrieves // the Apic Id for each thread context using the cpuid instruction. -// -static int -__kmp_affinity_create_apicid_map(AddrUnsPair **address2os, - kmp_i18n_id_t *const msg_id) -{ - kmp_cpuid buf; - int rc; - *address2os = NULL; - *msg_id = kmp_i18n_null; - - // - // Check if cpuid leaf 4 is supported. - // - __kmp_x86_cpuid(0, 0, &buf); - if (buf.eax < 4) { - *msg_id = kmp_i18n_str_NoLeaf4Support; - return -1; - } - - // - // The algorithm used starts by setting the affinity to each available - // thread and retrieving info from the cpuid instruction, so if we are - // not capable of calling __kmp_get_system_affinity() and - // _kmp_get_system_affinity(), then we need to do something else - use - // the defaults that we calculated from issuing cpuid without binding - // to each proc. - // - if (! KMP_AFFINITY_CAPABLE()) { - // - // Hack to try and infer the machine topology using only the data - // available from cpuid on the current thread, and __kmp_xproc. - // - KMP_ASSERT(__kmp_affinity_type == affinity_none); - - // - // Get an upper bound on the number of threads per package using - // cpuid(1). - // - // On some OS/chps combinations where HT is supported by the chip - // but is disabled, this value will be 2 on a single core chip. - // Usually, it will be 2 if HT is enabled and 1 if HT is disabled. - // - __kmp_x86_cpuid(1, 0, &buf); - int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff; - if (maxThreadsPerPkg == 0) { - maxThreadsPerPkg = 1; - } - - // - // The num cores per pkg comes from cpuid(4). - // 1 must be added to the encoded value. - // - // The author of cpu_count.cpp treated this only an upper bound - // on the number of cores, but I haven't seen any cases where it - // was greater than the actual number of cores, so we will treat - // it as exact in this block of code. - // - // First, we need to check if cpuid(4) is supported on this chip. - // To see if cpuid(n) is supported, issue cpuid(0) and check if eax - // has the value n or greater. - // - __kmp_x86_cpuid(0, 0, &buf); - if (buf.eax >= 4) { - __kmp_x86_cpuid(4, 0, &buf); - nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1; - } - else { - nCoresPerPkg = 1; - } - - // - // There is no way to reliably tell if HT is enabled without issuing - // the cpuid instruction from every thread, can correlating the cpuid - // info, so if the machine is not affinity capable, we assume that HT - // is off. We have seen quite a few machines where maxThreadsPerPkg - // is 2, yet the machine does not support HT. - // - // - Older OSes are usually found on machines with older chips, which - // do not support HT. - // - // - The performance penalty for mistakenly identifying a machine as - // HT when it isn't (which results in blocktime being incorrecly set - // to 0) is greater than the penalty when for mistakenly identifying - // a machine as being 1 thread/core when it is really HT enabled - // (which results in blocktime being incorrectly set to a positive - // value). - // - __kmp_ncores = __kmp_xproc; - nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; - __kmp_nThreadsPerCore = 1; - if (__kmp_affinity_verbose) { - KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY"); - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (__kmp_affinity_uniform_topology()) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - return 0; - } - - // - // - // From here on, we can assume that it is safe to call - // __kmp_get_system_affinity() and __kmp_set_system_affinity(), - // even if __kmp_affinity_type = affinity_none. - // - - // - // Save the affinity mask for the current thread. - // - kmp_affin_mask_t *oldMask; - KMP_CPU_ALLOC(oldMask); - KMP_ASSERT(oldMask != NULL); - __kmp_get_system_affinity(oldMask, TRUE); - - // - // Run through each of the available contexts, binding the current thread - // to it, and obtaining the pertinent information using the cpuid instr. - // - // The relevant information is: - // - // Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context - // has a uniqie Apic Id, which is of the form pkg# : core# : thread#. - // - // Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The - // value of this field determines the width of the core# + thread# - // fields in the Apic Id. It is also an upper bound on the number - // of threads per package, but it has been verified that situations - // happen were it is not exact. In particular, on certain OS/chip - // combinations where Intel(R) Hyper-Threading Technology is supported - // by the chip but has - // been disabled, the value of this field will be 2 (for a single core - // chip). On other OS/chip combinations supporting - // Intel(R) Hyper-Threading Technology, the value of - // this field will be 1 when Intel(R) Hyper-Threading Technology is - // disabled and 2 when it is enabled. - // - // Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The - // value of this field (+1) determines the width of the core# field in - // the Apic Id. The comments in "cpucount.cpp" say that this value is - // an upper bound, but the IA-32 architecture manual says that it is - // exactly the number of cores per package, and I haven't seen any - // case where it wasn't. - // - // From this information, deduce the package Id, core Id, and thread Id, - // and set the corresponding fields in the apicThreadInfo struct. - // - unsigned i; - apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate( - __kmp_avail_proc * sizeof(apicThreadInfo)); - unsigned nApics = 0; - KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) { - // - // Skip this proc if it is not included in the machine model. - // - if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) { - continue; - } - KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc); - - __kmp_affinity_dispatch->bind_thread(i); - threadInfo[nApics].osId = i; - - // - // The apic id and max threads per pkg come from cpuid(1). - // - __kmp_x86_cpuid(1, 0, &buf); - if (((buf.edx >> 9) & 1) == 0) { - __kmp_set_system_affinity(oldMask, TRUE); - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_ApicNotPresent; - return -1; - } - threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff; - threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff; - if (threadInfo[nApics].maxThreadsPerPkg == 0) { - threadInfo[nApics].maxThreadsPerPkg = 1; - } - - // - // Max cores per pkg comes from cpuid(4). - // 1 must be added to the encoded value. - // - // First, we need to check if cpuid(4) is supported on this chip. - // To see if cpuid(n) is supported, issue cpuid(0) and check if eax - // has the value n or greater. - // - __kmp_x86_cpuid(0, 0, &buf); - if (buf.eax >= 4) { - __kmp_x86_cpuid(4, 0, &buf); - threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1; - } - else { - threadInfo[nApics].maxCoresPerPkg = 1; - } - - // - // Infer the pkgId / coreId / threadId using only the info - // obtained locally. - // - int widthCT = __kmp_cpuid_mask_width( - threadInfo[nApics].maxThreadsPerPkg); - threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT; - - int widthC = __kmp_cpuid_mask_width( - threadInfo[nApics].maxCoresPerPkg); - int widthT = widthCT - widthC; - if (widthT < 0) { - // - // I've never seen this one happen, but I suppose it could, if - // the cpuid instruction on a chip was really screwed up. - // Make sure to restore the affinity mask before the tail call. - // - __kmp_set_system_affinity(oldMask, TRUE); - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_InvalidCpuidInfo; - return -1; - } - - int maskC = (1 << widthC) - 1; - threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) - &maskC; - - int maskT = (1 << widthT) - 1; - threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT; - - nApics++; - } - - // - // We've collected all the info we need. - // Restore the old affinity mask for this thread. - // - __kmp_set_system_affinity(oldMask, TRUE); - - // - // If there's only one thread context to bind to, form an Address object - // with depth 1 and return immediately (or, if affinity is off, set - // address2os to NULL and return). - // - // If it is configured to omit the package level when there is only a - // single package, the logic at the end of this routine won't work if - // there is only a single thread - it would try to form an Address - // object with depth 0. - // - KMP_ASSERT(nApics > 0); - if (nApics == 1) { - __kmp_ncores = nPackages = 1; - __kmp_nThreadsPerCore = nCoresPerPkg = 1; - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); - - KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - - if (__kmp_affinity_type == affinity_none) { - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - return 0; - } - - *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair)); - Address addr(1); - addr.labels[0] = threadInfo[0].pkgId; - (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId); - - if (__kmp_affinity_gran_levels < 0) { - __kmp_affinity_gran_levels = 0; - } - - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1); - } - - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - return 1; - } - - // - // Sort the threadInfo table by physical Id. - // - qsort(threadInfo, nApics, sizeof(*threadInfo), - __kmp_affinity_cmp_apicThreadInfo_phys_id); - - // - // The table is now sorted by pkgId / coreId / threadId, but we really - // don't know the radix of any of the fields. pkgId's may be sparsely - // assigned among the chips on a system. Although coreId's are usually - // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned - // [0..threadsPerCore-1], we don't want to make any such assumptions. - // - // For that matter, we don't know what coresPerPkg and threadsPerCore - // (or the total # packages) are at this point - we want to determine - // that now. We only have an upper bound on the first two figures. - // - // We also perform a consistency check at this point: the values returned - // by the cpuid instruction for any thread bound to a given package had - // better return the same info for maxThreadsPerPkg and maxCoresPerPkg. - // - nPackages = 1; - nCoresPerPkg = 1; +static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os, + kmp_i18n_id_t *const msg_id) { + kmp_cpuid buf; + int rc; + *address2os = NULL; + *msg_id = kmp_i18n_null; + + // Check if cpuid leaf 4 is supported. + __kmp_x86_cpuid(0, 0, &buf); + if (buf.eax < 4) { + *msg_id = kmp_i18n_str_NoLeaf4Support; + return -1; + } + + // The algorithm used starts by setting the affinity to each available thread + // and retrieving info from the cpuid instruction, so if we are not capable of + // calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we + // need to do something else - use the defaults that we calculated from + // issuing cpuid without binding to each proc. + if (!KMP_AFFINITY_CAPABLE()) { + // Hack to try and infer the machine topology using only the data + // available from cpuid on the current thread, and __kmp_xproc. + KMP_ASSERT(__kmp_affinity_type == affinity_none); + + // Get an upper bound on the number of threads per package using cpuid(1). + // On some OS/chps combinations where HT is supported by the chip but is + // disabled, this value will be 2 on a single core chip. Usually, it will be + // 2 if HT is enabled and 1 if HT is disabled. + __kmp_x86_cpuid(1, 0, &buf); + int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff; + if (maxThreadsPerPkg == 0) { + maxThreadsPerPkg = 1; + } + + // The num cores per pkg comes from cpuid(4). 1 must be added to the encoded + // value. + // + // The author of cpu_count.cpp treated this only an upper bound on the + // number of cores, but I haven't seen any cases where it was greater than + // the actual number of cores, so we will treat it as exact in this block of + // code. + // + // First, we need to check if cpuid(4) is supported on this chip. To see if + // cpuid(n) is supported, issue cpuid(0) and check if eax has the value n or + // greater. + __kmp_x86_cpuid(0, 0, &buf); + if (buf.eax >= 4) { + __kmp_x86_cpuid(4, 0, &buf); + nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1; + } else { + nCoresPerPkg = 1; + } + + // There is no way to reliably tell if HT is enabled without issuing the + // cpuid instruction from every thread, can correlating the cpuid info, so + // if the machine is not affinity capable, we assume that HT is off. We have + // seen quite a few machines where maxThreadsPerPkg is 2, yet the machine + // does not support HT. + // + // - Older OSes are usually found on machines with older chips, which do not + // support HT. + // - The performance penalty for mistakenly identifying a machine as HT when + // it isn't (which results in blocktime being incorrecly set to 0) is + // greater than the penalty when for mistakenly identifying a machine as + // being 1 thread/core when it is really HT enabled (which results in + // blocktime being incorrectly set to a positive value). + __kmp_ncores = __kmp_xproc; + nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; __kmp_nThreadsPerCore = 1; - unsigned nCores = 1; - - unsigned pkgCt = 1; // to determine radii - unsigned lastPkgId = threadInfo[0].pkgId; - unsigned coreCt = 1; - unsigned lastCoreId = threadInfo[0].coreId; - unsigned threadCt = 1; - unsigned lastThreadId = threadInfo[0].threadId; - - // intra-pkg consist checks - unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg; - unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg; - - for (i = 1; i < nApics; i++) { - if (threadInfo[i].pkgId != lastPkgId) { - nCores++; - pkgCt++; - lastPkgId = threadInfo[i].pkgId; - if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt; - coreCt = 1; - lastCoreId = threadInfo[i].coreId; - if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt; - threadCt = 1; - lastThreadId = threadInfo[i].threadId; - - // - // This is a different package, so go on to the next iteration - // without doing any consistency checks. Reset the consistency - // check vars, though. - // - prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg; - prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg; - continue; - } - - if (threadInfo[i].coreId != lastCoreId) { - nCores++; - coreCt++; - lastCoreId = threadInfo[i].coreId; - if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt; - threadCt = 1; - lastThreadId = threadInfo[i].threadId; - } - else if (threadInfo[i].threadId != lastThreadId) { - threadCt++; - lastThreadId = threadInfo[i].threadId; - } - else { - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique; - return -1; - } - - // - // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg - // fields agree between all the threads bounds to a given package. - // - if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) - || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) { - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_InconsistentCpuidInfo; - return -1; - } + if (__kmp_affinity_verbose) { + KMP_INFORM(AffNotCapableUseLocCpuid, "KMP_AFFINITY"); + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + if (__kmp_affinity_uniform_topology()) { + KMP_INFORM(Uniform, "KMP_AFFINITY"); + } else { + KMP_INFORM(NonUniform, "KMP_AFFINITY"); + } + KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); } - nPackages = pkgCt; - if ((int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt; - if ((int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt; - - // - // When affinity is off, this routine will still be called to set - // __kmp_ncores, as well as __kmp_nThreadsPerCore, - // nCoresPerPkg, & nPackages. Make sure all these vars are set - // correctly, and return now if affinity is not enabled. - // - __kmp_ncores = nCores; + return 0; + } + + // From here on, we can assume that it is safe to call + // __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if + // __kmp_affinity_type = affinity_none. + + // Save the affinity mask for the current thread. + kmp_affin_mask_t *oldMask; + KMP_CPU_ALLOC(oldMask); + KMP_ASSERT(oldMask != NULL); + __kmp_get_system_affinity(oldMask, TRUE); + + // Run through each of the available contexts, binding the current thread + // to it, and obtaining the pertinent information using the cpuid instr. + // + // The relevant information is: + // - Apic Id: Bits 24:31 of ebx after issuing cpuid(1) - each thread context + // has a uniqie Apic Id, which is of the form pkg# : core# : thread#. + // - Max Threads Per Pkg: Bits 16:23 of ebx after issuing cpuid(1). The value + // of this field determines the width of the core# + thread# fields in the + // Apic Id. It is also an upper bound on the number of threads per + // package, but it has been verified that situations happen were it is not + // exact. In particular, on certain OS/chip combinations where Intel(R) + // Hyper-Threading Technology is supported by the chip but has been + // disabled, the value of this field will be 2 (for a single core chip). + // On other OS/chip combinations supporting Intel(R) Hyper-Threading + // Technology, the value of this field will be 1 when Intel(R) + // Hyper-Threading Technology is disabled and 2 when it is enabled. + // - Max Cores Per Pkg: Bits 26:31 of eax after issuing cpuid(4). The value + // of this field (+1) determines the width of the core# field in the Apic + // Id. The comments in "cpucount.cpp" say that this value is an upper + // bound, but the IA-32 architecture manual says that it is exactly the + // number of cores per package, and I haven't seen any case where it + // wasn't. + // + // From this information, deduce the package Id, core Id, and thread Id, + // and set the corresponding fields in the apicThreadInfo struct. + unsigned i; + apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate( + __kmp_avail_proc * sizeof(apicThreadInfo)); + unsigned nApics = 0; + KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) { + // Skip this proc if it is not included in the machine model. + if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) { + continue; + } + KMP_DEBUG_ASSERT((int)nApics < __kmp_avail_proc); + + __kmp_affinity_dispatch->bind_thread(i); + threadInfo[nApics].osId = i; + + // The apic id and max threads per pkg come from cpuid(1). + __kmp_x86_cpuid(1, 0, &buf); + if (((buf.edx >> 9) & 1) == 0) { + __kmp_set_system_affinity(oldMask, TRUE); + __kmp_free(threadInfo); + KMP_CPU_FREE(oldMask); + *msg_id = kmp_i18n_str_ApicNotPresent; + return -1; + } + threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff; + threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff; + if (threadInfo[nApics].maxThreadsPerPkg == 0) { + threadInfo[nApics].maxThreadsPerPkg = 1; + } + + // Max cores per pkg comes from cpuid(4). 1 must be added to the encoded + // value. + // + // First, we need to check if cpuid(4) is supported on this chip. To see if + // cpuid(n) is supported, issue cpuid(0) and check if eax has the value n + // or greater. + __kmp_x86_cpuid(0, 0, &buf); + if (buf.eax >= 4) { + __kmp_x86_cpuid(4, 0, &buf); + threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1; + } else { + threadInfo[nApics].maxCoresPerPkg = 1; + } + + // Infer the pkgId / coreId / threadId using only the info obtained locally. + int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg); + threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT; + + int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg); + int widthT = widthCT - widthC; + if (widthT < 0) { + // I've never seen this one happen, but I suppose it could, if the cpuid + // instruction on a chip was really screwed up. Make sure to restore the + // affinity mask before the tail call. + __kmp_set_system_affinity(oldMask, TRUE); + __kmp_free(threadInfo); + KMP_CPU_FREE(oldMask); + *msg_id = kmp_i18n_str_InvalidCpuidInfo; + return -1; + } + + int maskC = (1 << widthC) - 1; + threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC; + + int maskT = (1 << widthT) - 1; + threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT; + + nApics++; + } + + // We've collected all the info we need. + // Restore the old affinity mask for this thread. + __kmp_set_system_affinity(oldMask, TRUE); + + // If there's only one thread context to bind to, form an Address object + // with depth 1 and return immediately (or, if affinity is off, set + // address2os to NULL and return). + // + // If it is configured to omit the package level when there is only a single + // package, the logic at the end of this routine won't work if there is only + // a single thread - it would try to form an Address object with depth 0. + KMP_ASSERT(nApics > 0); + if (nApics == 1) { + __kmp_ncores = nPackages = 1; + __kmp_nThreadsPerCore = nCoresPerPkg = 1; if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); - KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (__kmp_affinity_uniform_topology()) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - - } - KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); - KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc); - __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc); - for (i = 0; i < nApics; ++i) { - __kmp_pu_os_idx[i] = threadInfo[i].osId; + KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY"); + if (__kmp_affinity_respect_mask) { + KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); + } else { + KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); + } + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + KMP_INFORM(Uniform, "KMP_AFFINITY"); + KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); } + if (__kmp_affinity_type == affinity_none) { - __kmp_free(threadInfo); - KMP_CPU_FREE(oldMask); - return 0; + __kmp_free(threadInfo); + KMP_CPU_FREE(oldMask); + return 0; } - // - // Now that we've determined the number of packages, the number of cores - // per package, and the number of threads per core, we can construct the - // data structure that is to be returned. - // - int pkgLevel = 0; - int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1; - int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1); - unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0); - - KMP_ASSERT(depth > 0); - *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair) * nApics); - - for (i = 0; i < nApics; ++i) { - Address addr(depth); - unsigned os = threadInfo[i].osId; - int d = 0; - - if (pkgLevel >= 0) { - addr.labels[d++] = threadInfo[i].pkgId; - } - if (coreLevel >= 0) { - addr.labels[d++] = threadInfo[i].coreId; - } - if (threadLevel >= 0) { - addr.labels[d++] = threadInfo[i].threadId; - } - (*address2os)[i] = AddrUnsPair(addr, os); - } + *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair)); + Address addr(1); + addr.labels[0] = threadInfo[0].pkgId; + (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId); if (__kmp_affinity_gran_levels < 0) { - // - // Set the granularity level based on what levels are modeled - // in the machine topology map. - // - __kmp_affinity_gran_levels = 0; - if ((threadLevel >= 0) - && (__kmp_affinity_gran > affinity_gran_thread)) { - __kmp_affinity_gran_levels++; - } - if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) { - __kmp_affinity_gran_levels++; - } - if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) { - __kmp_affinity_gran_levels++; - } + __kmp_affinity_gran_levels = 0; } if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel, - coreLevel, threadLevel); + __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1); } __kmp_free(threadInfo); KMP_CPU_FREE(oldMask); - return depth; -} + return 1; + } + + // Sort the threadInfo table by physical Id. + qsort(threadInfo, nApics, sizeof(*threadInfo), + __kmp_affinity_cmp_apicThreadInfo_phys_id); + + // The table is now sorted by pkgId / coreId / threadId, but we really don't + // know the radix of any of the fields. pkgId's may be sparsely assigned among + // the chips on a system. Although coreId's are usually assigned + // [0 .. coresPerPkg-1] and threadId's are usually assigned + // [0..threadsPerCore-1], we don't want to make any such assumptions. + // + // For that matter, we don't know what coresPerPkg and threadsPerCore (or the + // total # packages) are at this point - we want to determine that now. We + // only have an upper bound on the first two figures. + // + // We also perform a consistency check at this point: the values returned by + // the cpuid instruction for any thread bound to a given package had better + // return the same info for maxThreadsPerPkg and maxCoresPerPkg. + nPackages = 1; + nCoresPerPkg = 1; + __kmp_nThreadsPerCore = 1; + unsigned nCores = 1; + + unsigned pkgCt = 1; // to determine radii + unsigned lastPkgId = threadInfo[0].pkgId; + unsigned coreCt = 1; + unsigned lastCoreId = threadInfo[0].coreId; + unsigned threadCt = 1; + unsigned lastThreadId = threadInfo[0].threadId; + + // intra-pkg consist checks + unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg; + unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg; + + for (i = 1; i < nApics; i++) { + if (threadInfo[i].pkgId != lastPkgId) { + nCores++; + pkgCt++; + lastPkgId = threadInfo[i].pkgId; + if ((int)coreCt > nCoresPerPkg) + nCoresPerPkg = coreCt; + coreCt = 1; + lastCoreId = threadInfo[i].coreId; + if ((int)threadCt > __kmp_nThreadsPerCore) + __kmp_nThreadsPerCore = threadCt; + threadCt = 1; + lastThreadId = threadInfo[i].threadId; + + // This is a different package, so go on to the next iteration without + // doing any consistency checks. Reset the consistency check vars, though. + prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg; + prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg; + continue; + } + + if (threadInfo[i].coreId != lastCoreId) { + nCores++; + coreCt++; + lastCoreId = threadInfo[i].coreId; + if ((int)threadCt > __kmp_nThreadsPerCore) + __kmp_nThreadsPerCore = threadCt; + threadCt = 1; + lastThreadId = threadInfo[i].threadId; + } else if (threadInfo[i].threadId != lastThreadId) { + threadCt++; + lastThreadId = threadInfo[i].threadId; + } else { + __kmp_free(threadInfo); + KMP_CPU_FREE(oldMask); + *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique; + return -1; + } + + // Check to make certain that the maxCoresPerPkg and maxThreadsPerPkg + // fields agree between all the threads bounds to a given package. + if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) || + (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) { + __kmp_free(threadInfo); + KMP_CPU_FREE(oldMask); + *msg_id = kmp_i18n_str_InconsistentCpuidInfo; + return -1; + } + } + nPackages = pkgCt; + if ((int)coreCt > nCoresPerPkg) + nCoresPerPkg = coreCt; + if ((int)threadCt > __kmp_nThreadsPerCore) + __kmp_nThreadsPerCore = threadCt; + + // When affinity is off, this routine will still be called to set + // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages. + // Make sure all these vars are set correctly, and return now if affinity is + // not enabled. + __kmp_ncores = nCores; + if (__kmp_affinity_verbose) { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); + + KMP_INFORM(AffUseGlobCpuid, "KMP_AFFINITY"); + if (__kmp_affinity_respect_mask) { + KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); + } else { + KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); + } + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + if (__kmp_affinity_uniform_topology()) { + KMP_INFORM(Uniform, "KMP_AFFINITY"); + } else { + KMP_INFORM(NonUniform, "KMP_AFFINITY"); + } + KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); + } + KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); + KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc); + __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc); + for (i = 0; i < nApics; ++i) { + __kmp_pu_os_idx[i] = threadInfo[i].osId; + } + if (__kmp_affinity_type == affinity_none) { + __kmp_free(threadInfo); + KMP_CPU_FREE(oldMask); + return 0; + } + // Now that we've determined the number of packages, the number of cores per + // package, and the number of threads per core, we can construct the data + // structure that is to be returned. + int pkgLevel = 0; + int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1; + int threadLevel = + (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1); + unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0); -// -// Intel(R) microarchitecture code name Nehalem, Dunnington and later -// architectures support a newer interface for specifying the x2APIC Ids, -// based on cpuid leaf 11. -// -static int -__kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os, - kmp_i18n_id_t *const msg_id) -{ - kmp_cpuid buf; + KMP_ASSERT(depth > 0); + *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * nApics); - *address2os = NULL; - *msg_id = kmp_i18n_null; + for (i = 0; i < nApics; ++i) { + Address addr(depth); + unsigned os = threadInfo[i].osId; + int d = 0; - // - // Check to see if cpuid leaf 11 is supported. - // - __kmp_x86_cpuid(0, 0, &buf); - if (buf.eax < 11) { - *msg_id = kmp_i18n_str_NoLeaf11Support; - return -1; + if (pkgLevel >= 0) { + addr.labels[d++] = threadInfo[i].pkgId; } - __kmp_x86_cpuid(11, 0, &buf); - if (buf.ebx == 0) { - *msg_id = kmp_i18n_str_NoLeaf11Support; - return -1; + if (coreLevel >= 0) { + addr.labels[d++] = threadInfo[i].coreId; } - - // - // Find the number of levels in the machine topology. While we're at it, - // get the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will - // try to get more accurate values later by explicitly counting them, - // but get reasonable defaults now, in case we return early. - // - int level; - int threadLevel = -1; - int coreLevel = -1; - int pkgLevel = -1; - __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1; - - for (level = 0;; level++) { - if (level > 31) { - // - // FIXME: Hack for DPD200163180 - // - // If level is big then something went wrong -> exiting - // - // There could actually be 32 valid levels in the machine topology, - // but so far, the only machine we have seen which does not exit - // this loop before iteration 32 has fubar x2APIC settings. - // - // For now, just reject this case based upon loop trip count. - // - *msg_id = kmp_i18n_str_InvalidCpuidInfo; - return -1; - } - __kmp_x86_cpuid(11, level, &buf); - if (buf.ebx == 0) { - if (pkgLevel < 0) { - // - // Will infer nPackages from __kmp_xproc - // - pkgLevel = level; - level++; - } - break; - } - int kind = (buf.ecx >> 8) & 0xff; - if (kind == 1) { - // - // SMT level - // - threadLevel = level; - coreLevel = -1; - pkgLevel = -1; - __kmp_nThreadsPerCore = buf.ebx & 0xffff; - if (__kmp_nThreadsPerCore == 0) { - *msg_id = kmp_i18n_str_InvalidCpuidInfo; - return -1; - } - } - else if (kind == 2) { - // - // core level - // - coreLevel = level; - pkgLevel = -1; - nCoresPerPkg = buf.ebx & 0xffff; - if (nCoresPerPkg == 0) { - *msg_id = kmp_i18n_str_InvalidCpuidInfo; - return -1; - } - } - else { - if (level <= 0) { - *msg_id = kmp_i18n_str_InvalidCpuidInfo; - return -1; - } - if (pkgLevel >= 0) { - continue; - } - pkgLevel = level; - nPackages = buf.ebx & 0xffff; - if (nPackages == 0) { - *msg_id = kmp_i18n_str_InvalidCpuidInfo; - return -1; - } - } + if (threadLevel >= 0) { + addr.labels[d++] = threadInfo[i].threadId; } - int depth = level; + (*address2os)[i] = AddrUnsPair(addr, os); + } - // - // In the above loop, "level" was counted from the finest level (usually - // thread) to the coarsest. The caller expects that we will place the - // labels in (*address2os)[].first.labels[] in the inverse order, so - // we need to invert the vars saying which level means what. - // - if (threadLevel >= 0) { - threadLevel = depth - threadLevel - 1; + if (__kmp_affinity_gran_levels < 0) { + // Set the granularity level based on what levels are modeled in the machine + // topology map. + __kmp_affinity_gran_levels = 0; + if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) { + __kmp_affinity_gran_levels++; } - if (coreLevel >= 0) { - coreLevel = depth - coreLevel - 1; + if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) { + __kmp_affinity_gran_levels++; } - KMP_DEBUG_ASSERT(pkgLevel >= 0); - pkgLevel = depth - pkgLevel - 1; - - // - // The algorithm used starts by setting the affinity to each available - // thread and retrieving info from the cpuid instruction, so if we are - // not capable of calling __kmp_get_system_affinity() and - // _kmp_get_system_affinity(), then we need to do something else - use - // the defaults that we calculated from issuing cpuid without binding - // to each proc. - // - if (! KMP_AFFINITY_CAPABLE()) - { - // - // Hack to try and infer the machine topology using only the data - // available from cpuid on the current thread, and __kmp_xproc. - // - KMP_ASSERT(__kmp_affinity_type == affinity_none); - - __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore; - nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; - if (__kmp_affinity_verbose) { - KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY"); - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (__kmp_affinity_uniform_topology()) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - return 0; + if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) { + __kmp_affinity_gran_levels++; } + } - // - // - // From here on, we can assume that it is safe to call - // __kmp_get_system_affinity() and __kmp_set_system_affinity(), - // even if __kmp_affinity_type = affinity_none. - // - - // - // Save the affinity mask for the current thread. - // - kmp_affin_mask_t *oldMask; - KMP_CPU_ALLOC(oldMask); - __kmp_get_system_affinity(oldMask, TRUE); - - // - // Allocate the data structure to be returned. - // - AddrUnsPair *retval = (AddrUnsPair *) - __kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc); + if (__kmp_affinity_verbose) { + __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel, + coreLevel, threadLevel); + } - // - // Run through each of the available contexts, binding the current thread - // to it, and obtaining the pertinent information using the cpuid instr. - // - unsigned int proc; - int nApics = 0; - KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) { - // - // Skip this proc if it is not included in the machine model. - // - if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) { - continue; - } - KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc); - - __kmp_affinity_dispatch->bind_thread(proc); - - // - // Extrach the labels for each level in the machine topology map - // from the Apic ID. - // - Address addr(depth); - int prev_shift = 0; - - for (level = 0; level < depth; level++) { - __kmp_x86_cpuid(11, level, &buf); - unsigned apicId = buf.edx; - if (buf.ebx == 0) { - if (level != depth - 1) { - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_InconsistentCpuidInfo; - return -1; - } - addr.labels[depth - level - 1] = apicId >> prev_shift; - level++; - break; - } - int shift = buf.eax & 0x1f; - int mask = (1 << shift) - 1; - addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift; - prev_shift = shift; - } - if (level != depth) { - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_InconsistentCpuidInfo; - return -1; - } + __kmp_free(threadInfo); + KMP_CPU_FREE(oldMask); + return depth; +} - retval[nApics] = AddrUnsPair(addr, proc); - nApics++; +// Intel(R) microarchitecture code name Nehalem, Dunnington and later +// architectures support a newer interface for specifying the x2APIC Ids, +// based on cpuid leaf 11. +static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os, + kmp_i18n_id_t *const msg_id) { + kmp_cpuid buf; + *address2os = NULL; + *msg_id = kmp_i18n_null; + + // Check to see if cpuid leaf 11 is supported. + __kmp_x86_cpuid(0, 0, &buf); + if (buf.eax < 11) { + *msg_id = kmp_i18n_str_NoLeaf11Support; + return -1; + } + __kmp_x86_cpuid(11, 0, &buf); + if (buf.ebx == 0) { + *msg_id = kmp_i18n_str_NoLeaf11Support; + return -1; + } + + // Find the number of levels in the machine topology. While we're at it, get + // the default values for __kmp_nThreadsPerCore & nCoresPerPkg. We will try to + // get more accurate values later by explicitly counting them, but get + // reasonable defaults now, in case we return early. + int level; + int threadLevel = -1; + int coreLevel = -1; + int pkgLevel = -1; + __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1; + + for (level = 0;; level++) { + if (level > 31) { + // FIXME: Hack for DPD200163180 + // + // If level is big then something went wrong -> exiting + // + // There could actually be 32 valid levels in the machine topology, but so + // far, the only machine we have seen which does not exit this loop before + // iteration 32 has fubar x2APIC settings. + // + // For now, just reject this case based upon loop trip count. + *msg_id = kmp_i18n_str_InvalidCpuidInfo; + return -1; + } + __kmp_x86_cpuid(11, level, &buf); + if (buf.ebx == 0) { + if (pkgLevel < 0) { + // Will infer nPackages from __kmp_xproc + pkgLevel = level; + level++; + } + break; + } + int kind = (buf.ecx >> 8) & 0xff; + if (kind == 1) { + // SMT level + threadLevel = level; + coreLevel = -1; + pkgLevel = -1; + __kmp_nThreadsPerCore = buf.ebx & 0xffff; + if (__kmp_nThreadsPerCore == 0) { + *msg_id = kmp_i18n_str_InvalidCpuidInfo; + return -1; + } + } else if (kind == 2) { + // core level + coreLevel = level; + pkgLevel = -1; + nCoresPerPkg = buf.ebx & 0xffff; + if (nCoresPerPkg == 0) { + *msg_id = kmp_i18n_str_InvalidCpuidInfo; + return -1; + } + } else { + if (level <= 0) { + *msg_id = kmp_i18n_str_InvalidCpuidInfo; + return -1; + } + if (pkgLevel >= 0) { + continue; + } + pkgLevel = level; + nPackages = buf.ebx & 0xffff; + if (nPackages == 0) { + *msg_id = kmp_i18n_str_InvalidCpuidInfo; + return -1; + } } + } + int depth = level; + + // In the above loop, "level" was counted from the finest level (usually + // thread) to the coarsest. The caller expects that we will place the labels + // in (*address2os)[].first.labels[] in the inverse order, so we need to + // invert the vars saying which level means what. + if (threadLevel >= 0) { + threadLevel = depth - threadLevel - 1; + } + if (coreLevel >= 0) { + coreLevel = depth - coreLevel - 1; + } + KMP_DEBUG_ASSERT(pkgLevel >= 0); + pkgLevel = depth - pkgLevel - 1; + + // The algorithm used starts by setting the affinity to each available thread + // and retrieving info from the cpuid instruction, so if we are not capable of + // calling __kmp_get_system_affinity() and _kmp_get_system_affinity(), then we + // need to do something else - use the defaults that we calculated from + // issuing cpuid without binding to each proc. + if (!KMP_AFFINITY_CAPABLE()) { + // Hack to try and infer the machine topology using only the data + // available from cpuid on the current thread, and __kmp_xproc. + KMP_ASSERT(__kmp_affinity_type == affinity_none); + + __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore; + nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg; + if (__kmp_affinity_verbose) { + KMP_INFORM(AffNotCapableUseLocCpuidL11, "KMP_AFFINITY"); + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + if (__kmp_affinity_uniform_topology()) { + KMP_INFORM(Uniform, "KMP_AFFINITY"); + } else { + KMP_INFORM(NonUniform, "KMP_AFFINITY"); + } + KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); + } + return 0; + } - // - // We've collected all the info we need. - // Restore the old affinity mask for this thread. - // - __kmp_set_system_affinity(oldMask, TRUE); - - // - // If there's only one thread context to bind to, return now. - // - KMP_ASSERT(nApics > 0); - if (nApics == 1) { - __kmp_ncores = nPackages = 1; - __kmp_nThreadsPerCore = nCoresPerPkg = 1; - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); - - KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - } - - if (__kmp_affinity_type == affinity_none) { - __kmp_free(retval); - KMP_CPU_FREE(oldMask); - return 0; - } - - // - // Form an Address object which only includes the package level. - // - Address addr(1); - addr.labels[0] = retval[0].first.labels[pkgLevel]; - retval[0].first = addr; + // From here on, we can assume that it is safe to call + // __kmp_get_system_affinity() and __kmp_set_system_affinity(), even if + // __kmp_affinity_type = affinity_none. - if (__kmp_affinity_gran_levels < 0) { - __kmp_affinity_gran_levels = 0; - } + // Save the affinity mask for the current thread. + kmp_affin_mask_t *oldMask; + KMP_CPU_ALLOC(oldMask); + __kmp_get_system_affinity(oldMask, TRUE); - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1); - } + // Allocate the data structure to be returned. + AddrUnsPair *retval = + (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * __kmp_avail_proc); - *address2os = retval; - KMP_CPU_FREE(oldMask); - return 1; + // Run through each of the available contexts, binding the current thread + // to it, and obtaining the pertinent information using the cpuid instr. + unsigned int proc; + int nApics = 0; + KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) { + // Skip this proc if it is not included in the machine model. + if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) { + continue; } + KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc); - // - // Sort the table by physical Id. - // - qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels); - - // - // Find the radix at each of the levels. - // - unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); - unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); - unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); - unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); - for (level = 0; level < depth; level++) { - totals[level] = 1; - maxCt[level] = 1; - counts[level] = 1; - last[level] = retval[0].first.labels[level]; - } + __kmp_affinity_dispatch->bind_thread(proc); - // - // From here on, the iteration variable "level" runs from the finest - // level to the coarsest, i.e. we iterate forward through - // (*address2os)[].first.labels[] - in the previous loops, we iterated - // backwards. - // - for (proc = 1; (int)proc < nApics; proc++) { - int level; - for (level = 0; level < depth; level++) { - if (retval[proc].first.labels[level] != last[level]) { - int j; - for (j = level + 1; j < depth; j++) { - totals[j]++; - counts[j] = 1; - // The line below causes printing incorrect topology information - // in case the max value for some level (maxCt[level]) is encountered earlier than - // some less value while going through the array. - // For example, let pkg0 has 4 cores and pkg1 has 2 cores. Then maxCt[1] == 2 - // whereas it must be 4. - // TODO!!! Check if it can be commented safely - //maxCt[j] = 1; - last[j] = retval[proc].first.labels[j]; - } - totals[level]++; - counts[level]++; - if (counts[level] > maxCt[level]) { - maxCt[level] = counts[level]; - } - last[level] = retval[proc].first.labels[level]; - break; - } - else if (level == depth - 1) { - __kmp_free(last); - __kmp_free(maxCt); - __kmp_free(counts); - __kmp_free(totals); - __kmp_free(retval); - KMP_CPU_FREE(oldMask); - *msg_id = kmp_i18n_str_x2ApicIDsNotUnique; - return -1; - } - } - } + // Extract labels for each level in the machine topology map from Apic ID. + Address addr(depth); + int prev_shift = 0; - // - // When affinity is off, this routine will still be called to set - // __kmp_ncores, as well as __kmp_nThreadsPerCore, - // nCoresPerPkg, & nPackages. Make sure all these vars are set - // correctly, and return if affinity is not enabled. - // - if (threadLevel >= 0) { - __kmp_nThreadsPerCore = maxCt[threadLevel]; + for (level = 0; level < depth; level++) { + __kmp_x86_cpuid(11, level, &buf); + unsigned apicId = buf.edx; + if (buf.ebx == 0) { + if (level != depth - 1) { + KMP_CPU_FREE(oldMask); + *msg_id = kmp_i18n_str_InconsistentCpuidInfo; + return -1; + } + addr.labels[depth - level - 1] = apicId >> prev_shift; + level++; + break; + } + int shift = buf.eax & 0x1f; + int mask = (1 << shift) - 1; + addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift; + prev_shift = shift; } - else { - __kmp_nThreadsPerCore = 1; + if (level != depth) { + KMP_CPU_FREE(oldMask); + *msg_id = kmp_i18n_str_InconsistentCpuidInfo; + return -1; } - nPackages = totals[pkgLevel]; - if (coreLevel >= 0) { - __kmp_ncores = totals[coreLevel]; - nCoresPerPkg = maxCt[coreLevel]; - } - else { - __kmp_ncores = nPackages; - nCoresPerPkg = 1; - } + retval[nApics] = AddrUnsPair(addr, proc); + nApics++; + } - // - // Check to see if the machine topology is uniform - // - unsigned prod = maxCt[0]; - for (level = 1; level < depth; level++) { - prod *= maxCt[level]; - } - bool uniform = (prod == totals[level - 1]); + // We've collected all the info we need. + // Restore the old affinity mask for this thread. + __kmp_set_system_affinity(oldMask, TRUE); - // - // Print the machine topology summary. - // + // If there's only one thread context to bind to, return now. + KMP_ASSERT(nApics > 0); + if (nApics == 1) { + __kmp_ncores = nPackages = 1; + __kmp_nThreadsPerCore = nCoresPerPkg = 1; if (__kmp_affinity_verbose) { - char mask[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask); + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask); - KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (uniform) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } + KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY"); + if (__kmp_affinity_respect_mask) { + KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); + } else { + KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); + } + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + KMP_INFORM(Uniform, "KMP_AFFINITY"); + KMP_INFORM(Topology, "KMP_AFFINITY", nPackages, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); + } - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); + if (__kmp_affinity_type == affinity_none) { + __kmp_free(retval); + KMP_CPU_FREE(oldMask); + return 0; + } - __kmp_str_buf_print(&buf, "%d", totals[0]); - for (level = 1; level <= pkgLevel; level++) { - __kmp_str_buf_print(&buf, " x %d", maxCt[level]); - } - KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); + // Form an Address object which only includes the package level. + Address addr(1); + addr.labels[0] = retval[0].first.labels[pkgLevel]; + retval[0].first = addr; - __kmp_str_buf_free(&buf); + if (__kmp_affinity_gran_levels < 0) { + __kmp_affinity_gran_levels = 0; } - KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); - KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc); - __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc); - for (proc = 0; (int)proc < nApics; ++proc) { - __kmp_pu_os_idx[proc] = retval[proc].second; + + if (__kmp_affinity_verbose) { + __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1); } - if (__kmp_affinity_type == affinity_none) { + + *address2os = retval; + KMP_CPU_FREE(oldMask); + return 1; + } + + // Sort the table by physical Id. + qsort(retval, nApics, sizeof(*retval), __kmp_affinity_cmp_Address_labels); + + // Find the radix at each of the levels. + unsigned *totals = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); + unsigned *counts = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); + unsigned *maxCt = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); + unsigned *last = (unsigned *)__kmp_allocate(depth * sizeof(unsigned)); + for (level = 0; level < depth; level++) { + totals[level] = 1; + maxCt[level] = 1; + counts[level] = 1; + last[level] = retval[0].first.labels[level]; + } + + // From here on, the iteration variable "level" runs from the finest level to + // the coarsest, i.e. we iterate forward through + // (*address2os)[].first.labels[] - in the previous loops, we iterated + // backwards. + for (proc = 1; (int)proc < nApics; proc++) { + int level; + for (level = 0; level < depth; level++) { + if (retval[proc].first.labels[level] != last[level]) { + int j; + for (j = level + 1; j < depth; j++) { + totals[j]++; + counts[j] = 1; + // The line below causes printing incorrect topology information in + // case the max value for some level (maxCt[level]) is encountered + // earlier than some less value while going through the array. For + // example, let pkg0 has 4 cores and pkg1 has 2 cores. Then + // maxCt[1] == 2 + // whereas it must be 4. + // TODO!!! Check if it can be commented safely + // maxCt[j] = 1; + last[j] = retval[proc].first.labels[j]; + } + totals[level]++; + counts[level]++; + if (counts[level] > maxCt[level]) { + maxCt[level] = counts[level]; + } + last[level] = retval[proc].first.labels[level]; + break; + } else if (level == depth - 1) { __kmp_free(last); __kmp_free(maxCt); __kmp_free(counts); __kmp_free(totals); __kmp_free(retval); KMP_CPU_FREE(oldMask); - return 0; + *msg_id = kmp_i18n_str_x2ApicIDsNotUnique; + return -1; + } } + } - // - // Find any levels with radiix 1, and remove them from the map - // (except for the package level). - // - int new_depth = 0; - for (level = 0; level < depth; level++) { - if ((maxCt[level] == 1) && (level != pkgLevel)) { - continue; - } - new_depth++; + // When affinity is off, this routine will still be called to set + // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages. + // Make sure all these vars are set correctly, and return if affinity is not + // enabled. + if (threadLevel >= 0) { + __kmp_nThreadsPerCore = maxCt[threadLevel]; + } else { + __kmp_nThreadsPerCore = 1; + } + nPackages = totals[pkgLevel]; + + if (coreLevel >= 0) { + __kmp_ncores = totals[coreLevel]; + nCoresPerPkg = maxCt[coreLevel]; + } else { + __kmp_ncores = nPackages; + nCoresPerPkg = 1; + } + + // Check to see if the machine topology is uniform + unsigned prod = maxCt[0]; + for (level = 1; level < depth; level++) { + prod *= maxCt[level]; + } + bool uniform = (prod == totals[level - 1]); + + // Print the machine topology summary. + if (__kmp_affinity_verbose) { + char mask[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask); + + KMP_INFORM(AffUseGlobCpuidL11, "KMP_AFFINITY"); + if (__kmp_affinity_respect_mask) { + KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", mask); + } else { + KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", mask); } - - // - // If we are removing any levels, allocate a new vector to return, - // and copy the relevant information to it. - // - if (new_depth != depth) { - AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate( - sizeof(AddrUnsPair) * nApics); - for (proc = 0; (int)proc < nApics; proc++) { - Address addr(new_depth); - new_retval[proc] = AddrUnsPair(addr, retval[proc].second); - } - int new_level = 0; - int newPkgLevel = -1; - int newCoreLevel = -1; - int newThreadLevel = -1; - int i; - for (level = 0; level < depth; level++) { - if ((maxCt[level] == 1) - && (level != pkgLevel)) { - // - // Remove this level. Never remove the package level - // - continue; - } - if (level == pkgLevel) { - newPkgLevel = level; - } - if (level == coreLevel) { - newCoreLevel = level; - } - if (level == threadLevel) { - newThreadLevel = level; - } - for (proc = 0; (int)proc < nApics; proc++) { - new_retval[proc].first.labels[new_level] - = retval[proc].first.labels[level]; - } - new_level++; - } - - __kmp_free(retval); - retval = new_retval; - depth = new_depth; - pkgLevel = newPkgLevel; - coreLevel = newCoreLevel; - threadLevel = newThreadLevel; + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + if (uniform) { + KMP_INFORM(Uniform, "KMP_AFFINITY"); + } else { + KMP_INFORM(NonUniform, "KMP_AFFINITY"); } - if (__kmp_affinity_gran_levels < 0) { - // - // Set the granularity level based on what levels are modeled - // in the machine topology map. - // - __kmp_affinity_gran_levels = 0; - if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) { - __kmp_affinity_gran_levels++; - } - if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) { - __kmp_affinity_gran_levels++; - } - if (__kmp_affinity_gran > affinity_gran_package) { - __kmp_affinity_gran_levels++; - } - } + kmp_str_buf_t buf; + __kmp_str_buf_init(&buf); - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, - coreLevel, threadLevel); + __kmp_str_buf_print(&buf, "%d", totals[0]); + for (level = 1; level <= pkgLevel; level++) { + __kmp_str_buf_print(&buf, " x %d", maxCt[level]); } + KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); + __kmp_str_buf_free(&buf); + } + KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); + KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc); + __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc); + for (proc = 0; (int)proc < nApics; ++proc) { + __kmp_pu_os_idx[proc] = retval[proc].second; + } + if (__kmp_affinity_type == affinity_none) { __kmp_free(last); __kmp_free(maxCt); __kmp_free(counts); __kmp_free(totals); + __kmp_free(retval); KMP_CPU_FREE(oldMask); - *address2os = retval; - return depth; + return 0; + } + + // Find any levels with radiix 1, and remove them from the map + // (except for the package level). + int new_depth = 0; + for (level = 0; level < depth; level++) { + if ((maxCt[level] == 1) && (level != pkgLevel)) { + continue; + } + new_depth++; + } + + // If we are removing any levels, allocate a new vector to return, + // and copy the relevant information to it. + if (new_depth != depth) { + AddrUnsPair *new_retval = + (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * nApics); + for (proc = 0; (int)proc < nApics; proc++) { + Address addr(new_depth); + new_retval[proc] = AddrUnsPair(addr, retval[proc].second); + } + int new_level = 0; + int newPkgLevel = -1; + int newCoreLevel = -1; + int newThreadLevel = -1; + int i; + for (level = 0; level < depth; level++) { + if ((maxCt[level] == 1) && (level != pkgLevel)) { + // Remove this level. Never remove the package level + continue; + } + if (level == pkgLevel) { + newPkgLevel = level; + } + if (level == coreLevel) { + newCoreLevel = level; + } + if (level == threadLevel) { + newThreadLevel = level; + } + for (proc = 0; (int)proc < nApics; proc++) { + new_retval[proc].first.labels[new_level] = + retval[proc].first.labels[level]; + } + new_level++; + } + + __kmp_free(retval); + retval = new_retval; + depth = new_depth; + pkgLevel = newPkgLevel; + coreLevel = newCoreLevel; + threadLevel = newThreadLevel; + } + + if (__kmp_affinity_gran_levels < 0) { + // Set the granularity level based on what levels are modeled + // in the machine topology map. + __kmp_affinity_gran_levels = 0; + if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) { + __kmp_affinity_gran_levels++; + } + if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) { + __kmp_affinity_gran_levels++; + } + if (__kmp_affinity_gran > affinity_gran_package) { + __kmp_affinity_gran_levels++; + } + } + + if (__kmp_affinity_verbose) { + __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, coreLevel, + threadLevel); + } + + __kmp_free(last); + __kmp_free(maxCt); + __kmp_free(counts); + __kmp_free(totals); + KMP_CPU_FREE(oldMask); + *address2os = retval; + return depth; } +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ -# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - - -#define osIdIndex 0 -#define threadIdIndex 1 -#define coreIdIndex 2 -#define pkgIdIndex 3 -#define nodeIdIndex 4 +#define osIdIndex 0 +#define threadIdIndex 1 +#define coreIdIndex 2 +#define pkgIdIndex 3 +#define nodeIdIndex 4 typedef unsigned *ProcCpuInfo; static unsigned maxIndex = pkgIdIndex; - -static int -__kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b) -{ - const unsigned *aa = (const unsigned *)a; - const unsigned *bb = (const unsigned *)b; - if (aa[osIdIndex] < bb[osIdIndex]) return -1; - if (aa[osIdIndex] > bb[osIdIndex]) return 1; - return 0; +static int __kmp_affinity_cmp_ProcCpuInfo_os_id(const void *a, const void *b) { + const unsigned *aa = (const unsigned *)a; + const unsigned *bb = (const unsigned *)b; + if (aa[osIdIndex] < bb[osIdIndex]) + return -1; + if (aa[osIdIndex] > bb[osIdIndex]) + return 1; + return 0; }; - -static int -__kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, const void *b) -{ - unsigned i; - const unsigned *aa = *((const unsigned **)a); - const unsigned *bb = *((const unsigned **)b); - for (i = maxIndex; ; i--) { - if (aa[i] < bb[i]) return -1; - if (aa[i] > bb[i]) return 1; - if (i == osIdIndex) break; - } - return 0; +static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(const void *a, + const void *b) { + unsigned i; + const unsigned *aa = *((const unsigned **)a); + const unsigned *bb = *((const unsigned **)b); + for (i = maxIndex;; i--) { + if (aa[i] < bb[i]) + return -1; + if (aa[i] > bb[i]) + return 1; + if (i == osIdIndex) + break; + } + return 0; } - -// // Parse /proc/cpuinfo (or an alternate file in the same format) to obtain the // affinity map. -// -static int -__kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, int *line, - kmp_i18n_id_t *const msg_id, FILE *f) -{ - *address2os = NULL; - *msg_id = kmp_i18n_null; - - // - // Scan of the file, and count the number of "processor" (osId) fields, - // and find the highest value of for a node_ field. - // - char buf[256]; - unsigned num_records = 0; - while (! feof(f)) { - buf[sizeof(buf) - 1] = 1; - if (! fgets(buf, sizeof(buf), f)) { - // - // Read errors presumably because of EOF - // - break; - } - - char s1[] = "processor"; - if (strncmp(buf, s1, sizeof(s1) - 1) == 0) { - num_records++; - continue; - } - - // - // FIXME - this will match "node_ " - // - unsigned level; - if (KMP_SSCANF(buf, "node_%d id", &level) == 1) { - if (nodeIdIndex + level >= maxIndex) { - maxIndex = nodeIdIndex + level; - } - continue; - } - } - - // - // Check for empty file / no valid processor records, or too many. - // The number of records can't exceed the number of valid bits in the - // affinity mask. - // - if (num_records == 0) { - *line = 0; - *msg_id = kmp_i18n_str_NoProcRecords; - return -1; - } - if (num_records > (unsigned)__kmp_xproc) { - *line = 0; - *msg_id = kmp_i18n_str_TooManyProcRecords; - return -1; - } - - // - // Set the file pointer back to the begginning, so that we can scan the - // file again, this time performing a full parse of the data. - // Allocate a vector of ProcCpuInfo object, where we will place the data. - // Adding an extra element at the end allows us to remove a lot of extra - // checks for termination conditions. - // - if (fseek(f, 0, SEEK_SET) != 0) { - *line = 0; - *msg_id = kmp_i18n_str_CantRewindCpuinfo; - return -1; - } - - // - // Allocate the array of records to store the proc info in. The dummy - // element at the end makes the logic in filling them out easier to code. - // - unsigned **threadInfo = (unsigned **)__kmp_allocate((num_records + 1) - * sizeof(unsigned *)); - unsigned i; - for (i = 0; i <= num_records; i++) { - threadInfo[i] = (unsigned *)__kmp_allocate((maxIndex + 1) - * sizeof(unsigned)); - } - -#define CLEANUP_THREAD_INFO \ - for (i = 0; i <= num_records; i++) { \ - __kmp_free(threadInfo[i]); \ - } \ - __kmp_free(threadInfo); - - // - // A value of UINT_MAX means that we didn't find the field - // - unsigned __index; - -#define INIT_PROC_INFO(p) \ - for (__index = 0; __index <= maxIndex; __index++) { \ - (p)[__index] = UINT_MAX; \ - } - - for (i = 0; i <= num_records; i++) { - INIT_PROC_INFO(threadInfo[i]); +static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os, + int *line, + kmp_i18n_id_t *const msg_id, + FILE *f) { + *address2os = NULL; + *msg_id = kmp_i18n_null; + + // Scan of the file, and count the number of "processor" (osId) fields, + // and find the highest value of for a node_ field. + char buf[256]; + unsigned num_records = 0; + while (!feof(f)) { + buf[sizeof(buf) - 1] = 1; + if (!fgets(buf, sizeof(buf), f)) { + // Read errors presumably because of EOF + break; + } + + char s1[] = "processor"; + if (strncmp(buf, s1, sizeof(s1) - 1) == 0) { + num_records++; + continue; + } + + // FIXME - this will match "node_ " + unsigned level; + if (KMP_SSCANF(buf, "node_%d id", &level) == 1) { + if (nodeIdIndex + level >= maxIndex) { + maxIndex = nodeIdIndex + level; + } + continue; } + } - unsigned num_avail = 0; + // Check for empty file / no valid processor records, or too many. The number + // of records can't exceed the number of valid bits in the affinity mask. + if (num_records == 0) { *line = 0; - while (! feof(f)) { - // - // Create an inner scoping level, so that all the goto targets at the - // end of the loop appear in an outer scoping level. This avoids - // warnings about jumping past an initialization to a target in the - // same block. - // - { - buf[sizeof(buf) - 1] = 1; - bool long_line = false; - if (! fgets(buf, sizeof(buf), f)) { - // - // Read errors presumably because of EOF - // - // If there is valid data in threadInfo[num_avail], then fake - // a blank line in ensure that the last address gets parsed. - // - bool valid = false; - for (i = 0; i <= maxIndex; i++) { - if (threadInfo[num_avail][i] != UINT_MAX) { - valid = true; - } - } - if (! valid) { - break; - } - buf[0] = 0; - } else if (!buf[sizeof(buf) - 1]) { - // - // The line is longer than the buffer. Set a flag and don't - // emit an error if we were going to ignore the line, anyway. - // - long_line = true; - -#define CHECK_LINE \ - if (long_line) { \ - CLEANUP_THREAD_INFO; \ - *msg_id = kmp_i18n_str_LongLineCpuinfo; \ - return -1; \ - } - } - (*line)++; - - char s1[] = "processor"; - if (strncmp(buf, s1, sizeof(s1) - 1) == 0) { - CHECK_LINE; - char *p = strchr(buf + sizeof(s1) - 1, ':'); - unsigned val; - if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val; - if (threadInfo[num_avail][osIdIndex] != UINT_MAX) goto dup_field; - threadInfo[num_avail][osIdIndex] = val; + *msg_id = kmp_i18n_str_NoProcRecords; + return -1; + } + if (num_records > (unsigned)__kmp_xproc) { + *line = 0; + *msg_id = kmp_i18n_str_TooManyProcRecords; + return -1; + } + + // Set the file pointer back to the begginning, so that we can scan the file + // again, this time performing a full parse of the data. Allocate a vector of + // ProcCpuInfo object, where we will place the data. Adding an extra element + // at the end allows us to remove a lot of extra checks for termination + // conditions. + if (fseek(f, 0, SEEK_SET) != 0) { + *line = 0; + *msg_id = kmp_i18n_str_CantRewindCpuinfo; + return -1; + } + + // Allocate the array of records to store the proc info in. The dummy + // element at the end makes the logic in filling them out easier to code. + unsigned **threadInfo = + (unsigned **)__kmp_allocate((num_records + 1) * sizeof(unsigned *)); + unsigned i; + for (i = 0; i <= num_records; i++) { + threadInfo[i] = + (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned)); + } + +#define CLEANUP_THREAD_INFO \ + for (i = 0; i <= num_records; i++) { \ + __kmp_free(threadInfo[i]); \ + } \ + __kmp_free(threadInfo); + + // A value of UINT_MAX means that we didn't find the field + unsigned __index; + +#define INIT_PROC_INFO(p) \ + for (__index = 0; __index <= maxIndex; __index++) { \ + (p)[__index] = UINT_MAX; \ + } + + for (i = 0; i <= num_records; i++) { + INIT_PROC_INFO(threadInfo[i]); + } + + unsigned num_avail = 0; + *line = 0; + while (!feof(f)) { + // Create an inner scoping level, so that all the goto targets at the end of + // the loop appear in an outer scoping level. This avoids warnings about + // jumping past an initialization to a target in the same block. + { + buf[sizeof(buf) - 1] = 1; + bool long_line = false; + if (!fgets(buf, sizeof(buf), f)) { + // Read errors presumably because of EOF + // If there is valid data in threadInfo[num_avail], then fake + // a blank line in ensure that the last address gets parsed. + bool valid = false; + for (i = 0; i <= maxIndex; i++) { + if (threadInfo[num_avail][i] != UINT_MAX) { + valid = true; + } + } + if (!valid) { + break; + } + buf[0] = 0; + } else if (!buf[sizeof(buf) - 1]) { + // The line is longer than the buffer. Set a flag and don't + // emit an error if we were going to ignore the line, anyway. + long_line = true; + +#define CHECK_LINE \ + if (long_line) { \ + CLEANUP_THREAD_INFO; \ + *msg_id = kmp_i18n_str_LongLineCpuinfo; \ + return -1; \ + } + } + (*line)++; + + char s1[] = "processor"; + if (strncmp(buf, s1, sizeof(s1) - 1) == 0) { + CHECK_LINE; + char *p = strchr(buf + sizeof(s1) - 1, ':'); + unsigned val; + if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) + goto no_val; + if (threadInfo[num_avail][osIdIndex] != UINT_MAX) + goto dup_field; + threadInfo[num_avail][osIdIndex] = val; #if KMP_OS_LINUX && USE_SYSFS_INFO - char path[256]; - KMP_SNPRINTF(path, sizeof(path), - "/sys/devices/system/cpu/cpu%u/topology/physical_package_id", - threadInfo[num_avail][osIdIndex]); - __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]); - - KMP_SNPRINTF(path, sizeof(path), - "/sys/devices/system/cpu/cpu%u/topology/core_id", - threadInfo[num_avail][osIdIndex]); - __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]); - continue; + char path[256]; + KMP_SNPRINTF( + path, sizeof(path), + "/sys/devices/system/cpu/cpu%u/topology/physical_package_id", + threadInfo[num_avail][osIdIndex]); + __kmp_read_from_file(path, "%u", &threadInfo[num_avail][pkgIdIndex]); + + KMP_SNPRINTF(path, sizeof(path), + "/sys/devices/system/cpu/cpu%u/topology/core_id", + threadInfo[num_avail][osIdIndex]); + __kmp_read_from_file(path, "%u", &threadInfo[num_avail][coreIdIndex]); + continue; #else - } - char s2[] = "physical id"; - if (strncmp(buf, s2, sizeof(s2) - 1) == 0) { - CHECK_LINE; - char *p = strchr(buf + sizeof(s2) - 1, ':'); - unsigned val; - if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val; - if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) goto dup_field; - threadInfo[num_avail][pkgIdIndex] = val; - continue; - } - char s3[] = "core id"; - if (strncmp(buf, s3, sizeof(s3) - 1) == 0) { - CHECK_LINE; - char *p = strchr(buf + sizeof(s3) - 1, ':'); - unsigned val; - if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val; - if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) goto dup_field; - threadInfo[num_avail][coreIdIndex] = val; - continue; + } + char s2[] = "physical id"; + if (strncmp(buf, s2, sizeof(s2) - 1) == 0) { + CHECK_LINE; + char *p = strchr(buf + sizeof(s2) - 1, ':'); + unsigned val; + if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) + goto no_val; + if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX) + goto dup_field; + threadInfo[num_avail][pkgIdIndex] = val; + continue; + } + char s3[] = "core id"; + if (strncmp(buf, s3, sizeof(s3) - 1) == 0) { + CHECK_LINE; + char *p = strchr(buf + sizeof(s3) - 1, ':'); + unsigned val; + if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) + goto no_val; + if (threadInfo[num_avail][coreIdIndex] != UINT_MAX) + goto dup_field; + threadInfo[num_avail][coreIdIndex] = val; + continue; #endif // KMP_OS_LINUX && USE_SYSFS_INFO - } - char s4[] = "thread id"; - if (strncmp(buf, s4, sizeof(s4) - 1) == 0) { - CHECK_LINE; - char *p = strchr(buf + sizeof(s4) - 1, ':'); - unsigned val; - if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val; - if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) goto dup_field; - threadInfo[num_avail][threadIdIndex] = val; - continue; - } - unsigned level; - if (KMP_SSCANF(buf, "node_%d id", &level) == 1) { - CHECK_LINE; - char *p = strchr(buf + sizeof(s4) - 1, ':'); - unsigned val; - if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) goto no_val; - KMP_ASSERT(nodeIdIndex + level <= maxIndex); - if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) goto dup_field; - threadInfo[num_avail][nodeIdIndex + level] = val; - continue; - } - - // - // We didn't recognize the leading token on the line. - // There are lots of leading tokens that we don't recognize - - // if the line isn't empty, go on to the next line. - // - if ((*buf != 0) && (*buf != '\n')) { - // - // If the line is longer than the buffer, read characters - // until we find a newline. - // - if (long_line) { - int ch; - while (((ch = fgetc(f)) != EOF) && (ch != '\n')); - } - continue; - } - - // - // A newline has signalled the end of the processor record. - // Check that there aren't too many procs specified. - // - if ((int)num_avail == __kmp_xproc) { - CLEANUP_THREAD_INFO; - *msg_id = kmp_i18n_str_TooManyEntries; - return -1; - } - - // - // Check for missing fields. The osId field must be there, and we - // currently require that the physical id field is specified, also. - // - if (threadInfo[num_avail][osIdIndex] == UINT_MAX) { - CLEANUP_THREAD_INFO; - *msg_id = kmp_i18n_str_MissingProcField; - return -1; - } - if (threadInfo[0][pkgIdIndex] == UINT_MAX) { - CLEANUP_THREAD_INFO; - *msg_id = kmp_i18n_str_MissingPhysicalIDField; - return -1; - } - - // - // Skip this proc if it is not included in the machine model. - // - if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], __kmp_affin_fullMask)) { - INIT_PROC_INFO(threadInfo[num_avail]); - continue; - } + } + char s4[] = "thread id"; + if (strncmp(buf, s4, sizeof(s4) - 1) == 0) { + CHECK_LINE; + char *p = strchr(buf + sizeof(s4) - 1, ':'); + unsigned val; + if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) + goto no_val; + if (threadInfo[num_avail][threadIdIndex] != UINT_MAX) + goto dup_field; + threadInfo[num_avail][threadIdIndex] = val; + continue; + } + unsigned level; + if (KMP_SSCANF(buf, "node_%d id", &level) == 1) { + CHECK_LINE; + char *p = strchr(buf + sizeof(s4) - 1, ':'); + unsigned val; + if ((p == NULL) || (KMP_SSCANF(p + 1, "%u\n", &val) != 1)) + goto no_val; + KMP_ASSERT(nodeIdIndex + level <= maxIndex); + if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX) + goto dup_field; + threadInfo[num_avail][nodeIdIndex + level] = val; + continue; + } - // - // We have a successful parse of this proc's info. - // Increment the counter, and prepare for the next proc. - // - num_avail++; - KMP_ASSERT(num_avail <= num_records); - INIT_PROC_INFO(threadInfo[num_avail]); + // We didn't recognize the leading token on the line. There are lots of + // leading tokens that we don't recognize - if the line isn't empty, go on + // to the next line. + if ((*buf != 0) && (*buf != '\n')) { + // If the line is longer than the buffer, read characters + // until we find a newline. + if (long_line) { + int ch; + while (((ch = fgetc(f)) != EOF) && (ch != '\n')) + ; } continue; + } - no_val: + // A newline has signalled the end of the processor record. + // Check that there aren't too many procs specified. + if ((int)num_avail == __kmp_xproc) { CLEANUP_THREAD_INFO; - *msg_id = kmp_i18n_str_MissingValCpuinfo; + *msg_id = kmp_i18n_str_TooManyEntries; return -1; + } - dup_field: + // Check for missing fields. The osId field must be there, and we + // currently require that the physical id field is specified, also. + if (threadInfo[num_avail][osIdIndex] == UINT_MAX) { CLEANUP_THREAD_INFO; - *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo; + *msg_id = kmp_i18n_str_MissingProcField; return -1; - } - *line = 0; - -# if KMP_MIC && REDUCE_TEAM_SIZE - unsigned teamSize = 0; -# endif // KMP_MIC && REDUCE_TEAM_SIZE - - // check for num_records == __kmp_xproc ??? - - // - // If there's only one thread context to bind to, form an Address object - // with depth 1 and return immediately (or, if affinity is off, set - // address2os to NULL and return). - // - // If it is configured to omit the package level when there is only a - // single package, the logic at the end of this routine won't work if - // there is only a single thread - it would try to form an Address - // object with depth 0. - // - KMP_ASSERT(num_avail > 0); - KMP_ASSERT(num_avail <= num_records); - if (num_avail == 1) { - __kmp_ncores = 1; - __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1; - if (__kmp_affinity_verbose) { - if (! KMP_AFFINITY_CAPABLE()) { - KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY"); - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } - else { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - __kmp_affin_fullMask); - KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } - int index; - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - __kmp_str_buf_print(&buf, "1"); - for (index = maxIndex - 1; index > pkgIdIndex; index--) { - __kmp_str_buf_print(&buf, " x 1"); - } - KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1); - __kmp_str_buf_free(&buf); - } - - if (__kmp_affinity_type == affinity_none) { - CLEANUP_THREAD_INFO; - return 0; - } - - *address2os = (AddrUnsPair*)__kmp_allocate(sizeof(AddrUnsPair)); - Address addr(1); - addr.labels[0] = threadInfo[0][pkgIdIndex]; - (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]); - - if (__kmp_affinity_gran_levels < 0) { - __kmp_affinity_gran_levels = 0; - } - - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1); - } - + } + if (threadInfo[0][pkgIdIndex] == UINT_MAX) { CLEANUP_THREAD_INFO; - return 1; - } - - // - // Sort the threadInfo table by physical Id. - // - qsort(threadInfo, num_avail, sizeof(*threadInfo), - __kmp_affinity_cmp_ProcCpuInfo_phys_id); + *msg_id = kmp_i18n_str_MissingPhysicalIDField; + return -1; + } - // - // The table is now sorted by pkgId / coreId / threadId, but we really - // don't know the radix of any of the fields. pkgId's may be sparsely - // assigned among the chips on a system. Although coreId's are usually - // assigned [0 .. coresPerPkg-1] and threadId's are usually assigned - // [0..threadsPerCore-1], we don't want to make any such assumptions. - // - // For that matter, we don't know what coresPerPkg and threadsPerCore - // (or the total # packages) are at this point - we want to determine - // that now. We only have an upper bound on the first two figures. - // - unsigned *counts = (unsigned *)__kmp_allocate((maxIndex + 1) - * sizeof(unsigned)); - unsigned *maxCt = (unsigned *)__kmp_allocate((maxIndex + 1) - * sizeof(unsigned)); - unsigned *totals = (unsigned *)__kmp_allocate((maxIndex + 1) - * sizeof(unsigned)); - unsigned *lastId = (unsigned *)__kmp_allocate((maxIndex + 1) - * sizeof(unsigned)); - - bool assign_thread_ids = false; - unsigned threadIdCt; - unsigned index; - - restart_radix_check: - threadIdCt = 0; + // Skip this proc if it is not included in the machine model. + if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], + __kmp_affin_fullMask)) { + INIT_PROC_INFO(threadInfo[num_avail]); + continue; + } - // - // Initialize the counter arrays with data from threadInfo[0]. - // - if (assign_thread_ids) { - if (threadInfo[0][threadIdIndex] == UINT_MAX) { - threadInfo[0][threadIdIndex] = threadIdCt++; - } - else if (threadIdCt <= threadInfo[0][threadIdIndex]) { - threadIdCt = threadInfo[0][threadIdIndex] + 1; - } + // We have a successful parse of this proc's info. + // Increment the counter, and prepare for the next proc. + num_avail++; + KMP_ASSERT(num_avail <= num_records); + INIT_PROC_INFO(threadInfo[num_avail]); } - for (index = 0; index <= maxIndex; index++) { - counts[index] = 1; - maxCt[index] = 1; - totals[index] = 1; - lastId[index] = threadInfo[0][index];; - } - - // - // Run through the rest of the OS procs. - // - for (i = 1; i < num_avail; i++) { - // - // Find the most significant index whose id differs - // from the id for the previous OS proc. - // - for (index = maxIndex; index >= threadIdIndex; index--) { - if (assign_thread_ids && (index == threadIdIndex)) { - // - // Auto-assign the thread id field if it wasn't specified. - // - if (threadInfo[i][threadIdIndex] == UINT_MAX) { - threadInfo[i][threadIdIndex] = threadIdCt++; - } + continue; - // - // Aparrently the thread id field was specified for some - // entries and not others. Start the thread id counter - // off at the next higher thread id. - // - else if (threadIdCt <= threadInfo[i][threadIdIndex]) { - threadIdCt = threadInfo[i][threadIdIndex] + 1; - } - } - if (threadInfo[i][index] != lastId[index]) { - // - // Run through all indices which are less significant, - // and reset the counts to 1. - // - // At all levels up to and including index, we need to - // increment the totals and record the last id. - // - unsigned index2; - for (index2 = threadIdIndex; index2 < index; index2++) { - totals[index2]++; - if (counts[index2] > maxCt[index2]) { - maxCt[index2] = counts[index2]; - } - counts[index2] = 1; - lastId[index2] = threadInfo[i][index2]; - } - counts[index]++; - totals[index]++; - lastId[index] = threadInfo[i][index]; - - if (assign_thread_ids && (index > threadIdIndex)) { - -# if KMP_MIC && REDUCE_TEAM_SIZE - // - // The default team size is the total #threads in the machine - // minus 1 thread for every core that has 3 or more threads. - // - teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 ); -# endif // KMP_MIC && REDUCE_TEAM_SIZE - - // - // Restart the thread counter, as we are on a new core. - // - threadIdCt = 0; - - // - // Auto-assign the thread id field if it wasn't specified. - // - if (threadInfo[i][threadIdIndex] == UINT_MAX) { - threadInfo[i][threadIdIndex] = threadIdCt++; - } - - // - // Aparrently the thread id field was specified for some - // entries and not others. Start the thread id counter - // off at the next higher thread id. - // - else if (threadIdCt <= threadInfo[i][threadIdIndex]) { - threadIdCt = threadInfo[i][threadIdIndex] + 1; - } - } - break; - } - } - if (index < threadIdIndex) { - // - // If thread ids were specified, it is an error if they are not - // unique. Also, check that we waven't already restarted the - // loop (to be safe - shouldn't need to). - // - if ((threadInfo[i][threadIdIndex] != UINT_MAX) - || assign_thread_ids) { - __kmp_free(lastId); - __kmp_free(totals); - __kmp_free(maxCt); - __kmp_free(counts); - CLEANUP_THREAD_INFO; - *msg_id = kmp_i18n_str_PhysicalIDsNotUnique; - return -1; - } + no_val: + CLEANUP_THREAD_INFO; + *msg_id = kmp_i18n_str_MissingValCpuinfo; + return -1; - // - // If the thread ids were not specified and we see entries - // entries that are duplicates, start the loop over and - // assign the thread ids manually. - // - assign_thread_ids = true; - goto restart_radix_check; + dup_field: + CLEANUP_THREAD_INFO; + *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo; + return -1; + } + *line = 0; + +#if KMP_MIC && REDUCE_TEAM_SIZE + unsigned teamSize = 0; +#endif // KMP_MIC && REDUCE_TEAM_SIZE + + // check for num_records == __kmp_xproc ??? + + // If there's only one thread context to bind to, form an Address object with + // depth 1 and return immediately (or, if affinity is off, set address2os to + // NULL and return). + // + // If it is configured to omit the package level when there is only a single + // package, the logic at the end of this routine won't work if there is only a + // single thread - it would try to form an Address object with depth 0. + KMP_ASSERT(num_avail > 0); + KMP_ASSERT(num_avail <= num_records); + if (num_avail == 1) { + __kmp_ncores = 1; + __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1; + if (__kmp_affinity_verbose) { + if (!KMP_AFFINITY_CAPABLE()) { + KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY"); + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + KMP_INFORM(Uniform, "KMP_AFFINITY"); + } else { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, + __kmp_affin_fullMask); + KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY"); + if (__kmp_affinity_respect_mask) { + KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); + } else { + KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); } - } - -# if KMP_MIC && REDUCE_TEAM_SIZE - // - // The default team size is the total #threads in the machine - // minus 1 thread for every core that has 3 or more threads. - // - teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 ); -# endif // KMP_MIC && REDUCE_TEAM_SIZE + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + KMP_INFORM(Uniform, "KMP_AFFINITY"); + } + int index; + kmp_str_buf_t buf; + __kmp_str_buf_init(&buf); + __kmp_str_buf_print(&buf, "1"); + for (index = maxIndex - 1; index > pkgIdIndex; index--) { + __kmp_str_buf_print(&buf, " x 1"); + } + KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, 1, 1, 1); + __kmp_str_buf_free(&buf); + } - for (index = threadIdIndex; index <= maxIndex; index++) { - if (counts[index] > maxCt[index]) { - maxCt[index] = counts[index]; - } + if (__kmp_affinity_type == affinity_none) { + CLEANUP_THREAD_INFO; + return 0; } - __kmp_nThreadsPerCore = maxCt[threadIdIndex]; - nCoresPerPkg = maxCt[coreIdIndex]; - nPackages = totals[pkgIdIndex]; + *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair)); + Address addr(1); + addr.labels[0] = threadInfo[0][pkgIdIndex]; + (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]); - // - // Check to see if the machine topology is uniform - // - unsigned prod = totals[maxIndex]; - for (index = threadIdIndex; index < maxIndex; index++) { - prod *= maxCt[index]; + if (__kmp_affinity_gran_levels < 0) { + __kmp_affinity_gran_levels = 0; } - bool uniform = (prod == totals[threadIdIndex]); - - // - // When affinity is off, this routine will still be called to set - // __kmp_ncores, as well as __kmp_nThreadsPerCore, - // nCoresPerPkg, & nPackages. Make sure all these vars are set - // correctly, and return now if affinity is not enabled. - // - __kmp_ncores = totals[coreIdIndex]; if (__kmp_affinity_verbose) { - if (! KMP_AFFINITY_CAPABLE()) { - KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY"); - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (uniform) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } + __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1); + } + + CLEANUP_THREAD_INFO; + return 1; + } + + // Sort the threadInfo table by physical Id. + qsort(threadInfo, num_avail, sizeof(*threadInfo), + __kmp_affinity_cmp_ProcCpuInfo_phys_id); + + // The table is now sorted by pkgId / coreId / threadId, but we really don't + // know the radix of any of the fields. pkgId's may be sparsely assigned among + // the chips on a system. Although coreId's are usually assigned + // [0 .. coresPerPkg-1] and threadId's are usually assigned + // [0..threadsPerCore-1], we don't want to make any such assumptions. + // + // For that matter, we don't know what coresPerPkg and threadsPerCore (or the + // total # packages) are at this point - we want to determine that now. We + // only have an upper bound on the first two figures. + unsigned *counts = + (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned)); + unsigned *maxCt = + (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned)); + unsigned *totals = + (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned)); + unsigned *lastId = + (unsigned *)__kmp_allocate((maxIndex + 1) * sizeof(unsigned)); + + bool assign_thread_ids = false; + unsigned threadIdCt; + unsigned index; + +restart_radix_check: + threadIdCt = 0; + + // Initialize the counter arrays with data from threadInfo[0]. + if (assign_thread_ids) { + if (threadInfo[0][threadIdIndex] == UINT_MAX) { + threadInfo[0][threadIdIndex] = threadIdCt++; + } else if (threadIdCt <= threadInfo[0][threadIdIndex]) { + threadIdCt = threadInfo[0][threadIdIndex] + 1; + } + } + for (index = 0; index <= maxIndex; index++) { + counts[index] = 1; + maxCt[index] = 1; + totals[index] = 1; + lastId[index] = threadInfo[0][index]; + ; + } + + // Run through the rest of the OS procs. + for (i = 1; i < num_avail; i++) { + // Find the most significant index whose id differs from the id for the + // previous OS proc. + for (index = maxIndex; index >= threadIdIndex; index--) { + if (assign_thread_ids && (index == threadIdIndex)) { + // Auto-assign the thread id field if it wasn't specified. + if (threadInfo[i][threadIdIndex] == UINT_MAX) { + threadInfo[i][threadIdIndex] = threadIdCt++; + } + // Aparrently the thread id field was specified for some entries and not + // others. Start the thread id counter off at the next higher thread id. + else if (threadIdCt <= threadInfo[i][threadIdIndex]) { + threadIdCt = threadInfo[i][threadIdIndex] + 1; } - else { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, __kmp_affin_fullMask); - KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY"); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); - } - KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); - if (uniform) { - KMP_INFORM(Uniform, "KMP_AFFINITY"); - } else { - KMP_INFORM(NonUniform, "KMP_AFFINITY"); - } + } + if (threadInfo[i][index] != lastId[index]) { + // Run through all indices which are less significant, and reset the + // counts to 1. At all levels up to and including index, we need to + // increment the totals and record the last id. + unsigned index2; + for (index2 = threadIdIndex; index2 < index; index2++) { + totals[index2]++; + if (counts[index2] > maxCt[index2]) { + maxCt[index2] = counts[index2]; + } + counts[index2] = 1; + lastId[index2] = threadInfo[i][index2]; } - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); + counts[index]++; + totals[index]++; + lastId[index] = threadInfo[i][index]; - __kmp_str_buf_print(&buf, "%d", totals[maxIndex]); - for (index = maxIndex - 1; index >= pkgIdIndex; index--) { - __kmp_str_buf_print(&buf, " x %d", maxCt[index]); - } - KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex], - maxCt[threadIdIndex], __kmp_ncores); + if (assign_thread_ids && (index > threadIdIndex)) { - __kmp_str_buf_free(&buf); - } +#if KMP_MIC && REDUCE_TEAM_SIZE + // The default team size is the total #threads in the machine + // minus 1 thread for every core that has 3 or more threads. + teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1); +#endif // KMP_MIC && REDUCE_TEAM_SIZE -# if KMP_MIC && REDUCE_TEAM_SIZE - // - // Set the default team size. - // - if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) { - __kmp_dflt_team_nth = teamSize; - KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n", - __kmp_dflt_team_nth)); - } -# endif // KMP_MIC && REDUCE_TEAM_SIZE + // Restart the thread counter, as we are on a new core. + threadIdCt = 0; - KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); - KMP_DEBUG_ASSERT(num_avail == __kmp_avail_proc); - __kmp_pu_os_idx = (int*)__kmp_allocate(sizeof(int) * __kmp_avail_proc); - for (i = 0; i < num_avail; ++i) { // fill the os indices - __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex]; - } + // Auto-assign the thread id field if it wasn't specified. + if (threadInfo[i][threadIdIndex] == UINT_MAX) { + threadInfo[i][threadIdIndex] = threadIdCt++; + } - if (__kmp_affinity_type == affinity_none) { + // Aparrently the thread id field was specified for some entries and + // not others. Start the thread id counter off at the next higher + // thread id. + else if (threadIdCt <= threadInfo[i][threadIdIndex]) { + threadIdCt = threadInfo[i][threadIdIndex] + 1; + } + } + break; + } + } + if (index < threadIdIndex) { + // If thread ids were specified, it is an error if they are not unique. + // Also, check that we waven't already restarted the loop (to be safe - + // shouldn't need to). + if ((threadInfo[i][threadIdIndex] != UINT_MAX) || assign_thread_ids) { __kmp_free(lastId); __kmp_free(totals); __kmp_free(maxCt); __kmp_free(counts); CLEANUP_THREAD_INFO; - return 0; - } - - // - // Count the number of levels which have more nodes at that level than - // at the parent's level (with there being an implicit root node of - // the top level). This is equivalent to saying that there is at least - // one node at this level which has a sibling. These levels are in the - // map, and the package level is always in the map. - // - bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool)); - int level = 0; - for (index = threadIdIndex; index < maxIndex; index++) { - KMP_ASSERT(totals[index] >= totals[index + 1]); - inMap[index] = (totals[index] > totals[index + 1]); - } - inMap[maxIndex] = (totals[maxIndex] > 1); - inMap[pkgIdIndex] = true; - - int depth = 0; - for (index = threadIdIndex; index <= maxIndex; index++) { - if (inMap[index]) { - depth++; - } - } - KMP_ASSERT(depth > 0); - - // - // Construct the data structure that is to be returned. - // - *address2os = (AddrUnsPair*) - __kmp_allocate(sizeof(AddrUnsPair) * num_avail); - int pkgLevel = -1; - int coreLevel = -1; - int threadLevel = -1; - - for (i = 0; i < num_avail; ++i) { - Address addr(depth); - unsigned os = threadInfo[i][osIdIndex]; - int src_index; - int dst_index = 0; - - for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) { - if (! inMap[src_index]) { - continue; - } - addr.labels[dst_index] = threadInfo[i][src_index]; - if (src_index == pkgIdIndex) { - pkgLevel = dst_index; - } - else if (src_index == coreIdIndex) { - coreLevel = dst_index; - } - else if (src_index == threadIdIndex) { - threadLevel = dst_index; - } - dst_index++; - } - (*address2os)[i] = AddrUnsPair(addr, os); - } - - if (__kmp_affinity_gran_levels < 0) { - // - // Set the granularity level based on what levels are modeled - // in the machine topology map. - // - unsigned src_index; - __kmp_affinity_gran_levels = 0; - for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) { - if (! inMap[src_index]) { - continue; - } - switch (src_index) { - case threadIdIndex: - if (__kmp_affinity_gran > affinity_gran_thread) { - __kmp_affinity_gran_levels++; - } - - break; - case coreIdIndex: - if (__kmp_affinity_gran > affinity_gran_core) { - __kmp_affinity_gran_levels++; - } - break; - - case pkgIdIndex: - if (__kmp_affinity_gran > affinity_gran_package) { - __kmp_affinity_gran_levels++; - } - break; - } - } - } + *msg_id = kmp_i18n_str_PhysicalIDsNotUnique; + return -1; + } - if (__kmp_affinity_verbose) { - __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel, - coreLevel, threadLevel); + // If the thread ids were not specified and we see entries entries that + // are duplicates, start the loop over and assign the thread ids manually. + assign_thread_ids = true; + goto restart_radix_check; + } + } + +#if KMP_MIC && REDUCE_TEAM_SIZE + // The default team size is the total #threads in the machine + // minus 1 thread for every core that has 3 or more threads. + teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1); +#endif // KMP_MIC && REDUCE_TEAM_SIZE + + for (index = threadIdIndex; index <= maxIndex; index++) { + if (counts[index] > maxCt[index]) { + maxCt[index] = counts[index]; + } + } + + __kmp_nThreadsPerCore = maxCt[threadIdIndex]; + nCoresPerPkg = maxCt[coreIdIndex]; + nPackages = totals[pkgIdIndex]; + + // Check to see if the machine topology is uniform + unsigned prod = totals[maxIndex]; + for (index = threadIdIndex; index < maxIndex; index++) { + prod *= maxCt[index]; + } + bool uniform = (prod == totals[threadIdIndex]); + + // When affinity is off, this routine will still be called to set + // __kmp_ncores, as well as __kmp_nThreadsPerCore, nCoresPerPkg, & nPackages. + // Make sure all these vars are set correctly, and return now if affinity is + // not enabled. + __kmp_ncores = totals[coreIdIndex]; + + if (__kmp_affinity_verbose) { + if (!KMP_AFFINITY_CAPABLE()) { + KMP_INFORM(AffNotCapableUseCpuinfo, "KMP_AFFINITY"); + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + if (uniform) { + KMP_INFORM(Uniform, "KMP_AFFINITY"); + } else { + KMP_INFORM(NonUniform, "KMP_AFFINITY"); + } + } else { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, + __kmp_affin_fullMask); + KMP_INFORM(AffCapableUseCpuinfo, "KMP_AFFINITY"); + if (__kmp_affinity_respect_mask) { + KMP_INFORM(InitOSProcSetRespect, "KMP_AFFINITY", buf); + } else { + KMP_INFORM(InitOSProcSetNotRespect, "KMP_AFFINITY", buf); + } + KMP_INFORM(AvailableOSProc, "KMP_AFFINITY", __kmp_avail_proc); + if (uniform) { + KMP_INFORM(Uniform, "KMP_AFFINITY"); + } else { + KMP_INFORM(NonUniform, "KMP_AFFINITY"); + } } - - __kmp_free(inMap); + kmp_str_buf_t buf; + __kmp_str_buf_init(&buf); + + __kmp_str_buf_print(&buf, "%d", totals[maxIndex]); + for (index = maxIndex - 1; index >= pkgIdIndex; index--) { + __kmp_str_buf_print(&buf, " x %d", maxCt[index]); + } + KMP_INFORM(TopologyExtra, "KMP_AFFINITY", buf.str, maxCt[coreIdIndex], + maxCt[threadIdIndex], __kmp_ncores); + + __kmp_str_buf_free(&buf); + } + +#if KMP_MIC && REDUCE_TEAM_SIZE + // Set the default team size. + if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) { + __kmp_dflt_team_nth = teamSize; + KA_TRACE(20, ("__kmp_affinity_create_cpuinfo_map: setting " + "__kmp_dflt_team_nth = %d\n", + __kmp_dflt_team_nth)); + } +#endif // KMP_MIC && REDUCE_TEAM_SIZE + + KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL); + KMP_DEBUG_ASSERT(num_avail == __kmp_avail_proc); + __kmp_pu_os_idx = (int *)__kmp_allocate(sizeof(int) * __kmp_avail_proc); + for (i = 0; i < num_avail; ++i) { // fill the os indices + __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex]; + } + + if (__kmp_affinity_type == affinity_none) { __kmp_free(lastId); __kmp_free(totals); __kmp_free(maxCt); __kmp_free(counts); CLEANUP_THREAD_INFO; - return depth; -} - - -// -// Create and return a table of affinity masks, indexed by OS thread ID. -// This routine handles OR'ing together all the affinity masks of threads -// that are sufficiently close, if granularity > fine. -// -static kmp_affin_mask_t * -__kmp_create_masks(unsigned *maxIndex, unsigned *numUnique, - AddrUnsPair *address2os, unsigned numAddrs) -{ - // - // First form a table of affinity masks in order of OS thread id. - // - unsigned depth; - unsigned maxOsId; - unsigned i; - - KMP_ASSERT(numAddrs > 0); - depth = address2os[0].first.depth; - - maxOsId = 0; - for (i = 0; i < numAddrs; i++) { - unsigned osId = address2os[i].second; - if (osId > maxOsId) { - maxOsId = osId; + return 0; + } + + // Count the number of levels which have more nodes at that level than at the + // parent's level (with there being an implicit root node of the top level). + // This is equivalent to saying that there is at least one node at this level + // which has a sibling. These levels are in the map, and the package level is + // always in the map. + bool *inMap = (bool *)__kmp_allocate((maxIndex + 1) * sizeof(bool)); + int level = 0; + for (index = threadIdIndex; index < maxIndex; index++) { + KMP_ASSERT(totals[index] >= totals[index + 1]); + inMap[index] = (totals[index] > totals[index + 1]); + } + inMap[maxIndex] = (totals[maxIndex] > 1); + inMap[pkgIdIndex] = true; + + int depth = 0; + for (index = threadIdIndex; index <= maxIndex; index++) { + if (inMap[index]) { + depth++; + } + } + KMP_ASSERT(depth > 0); + + // Construct the data structure that is to be returned. + *address2os = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * num_avail); + int pkgLevel = -1; + int coreLevel = -1; + int threadLevel = -1; + + for (i = 0; i < num_avail; ++i) { + Address addr(depth); + unsigned os = threadInfo[i][osIdIndex]; + int src_index; + int dst_index = 0; + + for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) { + if (!inMap[src_index]) { + continue; + } + addr.labels[dst_index] = threadInfo[i][src_index]; + if (src_index == pkgIdIndex) { + pkgLevel = dst_index; + } else if (src_index == coreIdIndex) { + coreLevel = dst_index; + } else if (src_index == threadIdIndex) { + threadLevel = dst_index; + } + dst_index++; + } + (*address2os)[i] = AddrUnsPair(addr, os); + } + + if (__kmp_affinity_gran_levels < 0) { + // Set the granularity level based on what levels are modeled + // in the machine topology map. + unsigned src_index; + __kmp_affinity_gran_levels = 0; + for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) { + if (!inMap[src_index]) { + continue; + } + switch (src_index) { + case threadIdIndex: + if (__kmp_affinity_gran > affinity_gran_thread) { + __kmp_affinity_gran_levels++; } - } - kmp_affin_mask_t *osId2Mask; - KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId+1)); - // - // Sort the address2os table according to physical order. Doing so - // will put all threads on the same core/package/node in consecutive - // locations. - // - qsort(address2os, numAddrs, sizeof(*address2os), - __kmp_affinity_cmp_Address_labels); - - KMP_ASSERT(__kmp_affinity_gran_levels >= 0); - if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) { - KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels); - } - if (__kmp_affinity_gran_levels >= (int)depth) { - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffThreadsMayMigrate); + break; + case coreIdIndex: + if (__kmp_affinity_gran > affinity_gran_core) { + __kmp_affinity_gran_levels++; } - } + break; - // - // Run through the table, forming the masks for all threads on each - // core. Threads on the same core will have identical "Address" - // objects, not considering the last level, which must be the thread - // id. All threads on a core will appear consecutively. - // - unsigned unique = 0; - unsigned j = 0; // index of 1st thread on core - unsigned leader = 0; - Address *leaderAddr = &(address2os[0].first); - kmp_affin_mask_t *sum; - KMP_CPU_ALLOC_ON_STACK(sum); - KMP_CPU_ZERO(sum); - KMP_CPU_SET(address2os[0].second, sum); - for (i = 1; i < numAddrs; i++) { - // - // If this thread is sufficiently close to the leader (within the - // granularity setting), then set the bit for this os thread in the - // affinity mask for this group, and go on to the next thread. - // - if (leaderAddr->isClose(address2os[i].first, - __kmp_affinity_gran_levels)) { - KMP_CPU_SET(address2os[i].second, sum); - continue; + case pkgIdIndex: + if (__kmp_affinity_gran > affinity_gran_package) { + __kmp_affinity_gran_levels++; } + break; + } + } + } - // - // For every thread in this group, copy the mask to the thread's - // entry in the osId2Mask table. Mark the first address as a - // leader. - // - for (; j < i; j++) { - unsigned osId = address2os[j].second; - KMP_DEBUG_ASSERT(osId <= maxOsId); - kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId); - KMP_CPU_COPY(mask, sum); - address2os[j].first.leader = (j == leader); - } - unique++; + if (__kmp_affinity_verbose) { + __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel, + coreLevel, threadLevel); + } - // - // Start a new mask. - // - leader = i; - leaderAddr = &(address2os[i].first); - KMP_CPU_ZERO(sum); - KMP_CPU_SET(address2os[i].second, sum); - } + __kmp_free(inMap); + __kmp_free(lastId); + __kmp_free(totals); + __kmp_free(maxCt); + __kmp_free(counts); + CLEANUP_THREAD_INFO; + return depth; +} - // - // For every thread in last group, copy the mask to the thread's - // entry in the osId2Mask table. - // +// Create and return a table of affinity masks, indexed by OS thread ID. +// This routine handles OR'ing together all the affinity masks of threads +// that are sufficiently close, if granularity > fine. +static kmp_affin_mask_t *__kmp_create_masks(unsigned *maxIndex, + unsigned *numUnique, + AddrUnsPair *address2os, + unsigned numAddrs) { + // First form a table of affinity masks in order of OS thread id. + unsigned depth; + unsigned maxOsId; + unsigned i; + + KMP_ASSERT(numAddrs > 0); + depth = address2os[0].first.depth; + + maxOsId = 0; + for (i = 0; i < numAddrs; i++) { + unsigned osId = address2os[i].second; + if (osId > maxOsId) { + maxOsId = osId; + } + } + kmp_affin_mask_t *osId2Mask; + KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1)); + + // Sort the address2os table according to physical order. Doing so will put + // all threads on the same core/package/node in consecutive locations. + qsort(address2os, numAddrs, sizeof(*address2os), + __kmp_affinity_cmp_Address_labels); + + KMP_ASSERT(__kmp_affinity_gran_levels >= 0); + if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) { + KMP_INFORM(ThreadsMigrate, "KMP_AFFINITY", __kmp_affinity_gran_levels); + } + if (__kmp_affinity_gran_levels >= (int)depth) { + if (__kmp_affinity_verbose || + (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) { + KMP_WARNING(AffThreadsMayMigrate); + } + } + + // Run through the table, forming the masks for all threads on each core. + // Threads on the same core will have identical "Address" objects, not + // considering the last level, which must be the thread id. All threads on a + // core will appear consecutively. + unsigned unique = 0; + unsigned j = 0; // index of 1st thread on core + unsigned leader = 0; + Address *leaderAddr = &(address2os[0].first); + kmp_affin_mask_t *sum; + KMP_CPU_ALLOC_ON_STACK(sum); + KMP_CPU_ZERO(sum); + KMP_CPU_SET(address2os[0].second, sum); + for (i = 1; i < numAddrs; i++) { + // If this thread is sufficiently close to the leader (within the + // granularity setting), then set the bit for this os thread in the + // affinity mask for this group, and go on to the next thread. + if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) { + KMP_CPU_SET(address2os[i].second, sum); + continue; + } + + // For every thread in this group, copy the mask to the thread's entry in + // the osId2Mask table. Mark the first address as a leader. for (; j < i; j++) { - unsigned osId = address2os[j].second; - KMP_DEBUG_ASSERT(osId <= maxOsId); - kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId); - KMP_CPU_COPY(mask, sum); - address2os[j].first.leader = (j == leader); + unsigned osId = address2os[j].second; + KMP_DEBUG_ASSERT(osId <= maxOsId); + kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId); + KMP_CPU_COPY(mask, sum); + address2os[j].first.leader = (j == leader); } unique++; - KMP_CPU_FREE_FROM_STACK(sum); - *maxIndex = maxOsId; - *numUnique = unique; - return osId2Mask; + // Start a new mask. + leader = i; + leaderAddr = &(address2os[i].first); + KMP_CPU_ZERO(sum); + KMP_CPU_SET(address2os[i].second, sum); + } + + // For every thread in last group, copy the mask to the thread's + // entry in the osId2Mask table. + for (; j < i; j++) { + unsigned osId = address2os[j].second; + KMP_DEBUG_ASSERT(osId <= maxOsId); + kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId); + KMP_CPU_COPY(mask, sum); + address2os[j].first.leader = (j == leader); + } + unique++; + KMP_CPU_FREE_FROM_STACK(sum); + + *maxIndex = maxOsId; + *numUnique = unique; + return osId2Mask; } - -// // Stuff for the affinity proclist parsers. It's easier to declare these vars // as file-static than to try and pass them through the calling sequence of // the recursive-descent OMP_PLACES parser. -// static kmp_affin_mask_t *newMasks; static int numNewMasks; static int nextNewMask; -#define ADD_MASK(_mask) \ - { \ - if (nextNewMask >= numNewMasks) { \ - int i; \ - numNewMasks *= 2; \ - kmp_affin_mask_t* temp; \ - KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \ - for(i=0;i _maxOsId) || \ - (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \ - if (__kmp_affinity_verbose || (__kmp_affinity_warnings \ - && (__kmp_affinity_type != affinity_none))) { \ - KMP_WARNING(AffIgnoreInvalidProcID, _osId); \ - } \ - } \ - else { \ - ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \ - } \ - } - +#define ADD_MASK(_mask) \ + { \ + if (nextNewMask >= numNewMasks) { \ + int i; \ + numNewMasks *= 2; \ + kmp_affin_mask_t *temp; \ + KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \ + for (i = 0; i < numNewMasks / 2; i++) { \ + kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \ + kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \ + KMP_CPU_COPY(dest, src); \ + } \ + KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \ + newMasks = temp; \ + } \ + KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \ + nextNewMask++; \ + } + +#define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \ + { \ + if (((_osId) > _maxOsId) || \ + (!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \ + if (__kmp_affinity_verbose || \ + (__kmp_affinity_warnings && \ + (__kmp_affinity_type != affinity_none))) { \ + KMP_WARNING(AffIgnoreInvalidProcID, _osId); \ + } \ + } else { \ + ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \ + } \ + } -// // Re-parse the proclist (for the explicit affinity type), and form the list // of affinity newMasks indexed by gtid. -// -static void -__kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks, - unsigned int *out_numMasks, const char *proclist, - kmp_affin_mask_t *osId2Mask, int maxOsId) -{ - int i; - const char *scan = proclist; - const char *next = proclist; - - // - // We use malloc() for the temporary mask vector, - // so that we can use realloc() to extend it. - // - numNewMasks = 2; - KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks); - nextNewMask = 0; - kmp_affin_mask_t *sumMask; - KMP_CPU_ALLOC(sumMask); - int setSize = 0; - - for (;;) { - int start, end, stride; +static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks, + unsigned int *out_numMasks, + const char *proclist, + kmp_affin_mask_t *osId2Mask, + int maxOsId) { + int i; + const char *scan = proclist; + const char *next = proclist; + + // We use malloc() for the temporary mask vector, so that we can use + // realloc() to extend it. + numNewMasks = 2; + KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks); + nextNewMask = 0; + kmp_affin_mask_t *sumMask; + KMP_CPU_ALLOC(sumMask); + int setSize = 0; + + for (;;) { + int start, end, stride; + + SKIP_WS(scan); + next = scan; + if (*next == '\0') { + break; + } + + if (*next == '{') { + int num; + setSize = 0; + next++; // skip '{' + SKIP_WS(next); + scan = next; + + // Read the first integer in the set. + KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad proclist"); + SKIP_DIGITS(next); + num = __kmp_str_to_int(scan, *next); + KMP_ASSERT2(num >= 0, "bad explicit proc list"); + + // Copy the mask for that osId to the sum (union) mask. + if ((num > maxOsId) || + (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { + if (__kmp_affinity_verbose || + (__kmp_affinity_warnings && + (__kmp_affinity_type != affinity_none))) { + KMP_WARNING(AffIgnoreInvalidProcID, num); + } + KMP_CPU_ZERO(sumMask); + } else { + KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num)); + setSize = 1; + } - SKIP_WS(scan); - next = scan; - if (*next == '\0') { - break; + for (;;) { + // Check for end of set. + SKIP_WS(next); + if (*next == '}') { + next++; // skip '}' + break; } - if (*next == '{') { - int num; - setSize = 0; - next++; // skip '{' - SKIP_WS(next); - scan = next; - - // - // Read the first integer in the set. - // - KMP_ASSERT2((*next >= '0') && (*next <= '9'), - "bad proclist"); - SKIP_DIGITS(next); - num = __kmp_str_to_int(scan, *next); - KMP_ASSERT2(num >= 0, "bad explicit proc list"); - - // - // Copy the mask for that osId to the sum (union) mask. - // - if ((num > maxOsId) || - (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, num); - } - KMP_CPU_ZERO(sumMask); - } - else { - KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num)); - setSize = 1; - } - - for (;;) { - // - // Check for end of set. - // - SKIP_WS(next); - if (*next == '}') { - next++; // skip '}' - break; - } - - // - // Skip optional comma. - // - if (*next == ',') { - next++; - } - SKIP_WS(next); - - // - // Read the next integer in the set. - // - scan = next; - KMP_ASSERT2((*next >= '0') && (*next <= '9'), - "bad explicit proc list"); - - SKIP_DIGITS(next); - num = __kmp_str_to_int(scan, *next); - KMP_ASSERT2(num >= 0, "bad explicit proc list"); - - // - // Add the mask for that osId to the sum mask. - // - if ((num > maxOsId) || - (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, num); - } - } - else { - KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num)); - setSize++; - } - } - if (setSize > 0) { - ADD_MASK(sumMask); - } - - SKIP_WS(next); - if (*next == ',') { - next++; - } - scan = next; - continue; + // Skip optional comma. + if (*next == ',') { + next++; } - - // - // Read the first integer. - // - KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list"); - SKIP_DIGITS(next); - start = __kmp_str_to_int(scan, *next); - KMP_ASSERT2(start >= 0, "bad explicit proc list"); SKIP_WS(next); - // - // If this isn't a range, then add a mask to the list and go on. - // - if (*next != '-') { - ADD_MASK_OSID(start, osId2Mask, maxOsId); - - // - // Skip optional comma. - // - if (*next == ',') { - next++; - } - scan = next; - continue; - } - - // - // This is a range. Skip over the '-' and read in the 2nd int. - // - next++; // skip '-' - SKIP_WS(next); + // Read the next integer in the set. scan = next; KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list"); + SKIP_DIGITS(next); - end = __kmp_str_to_int(scan, *next); - KMP_ASSERT2(end >= 0, "bad explicit proc list"); + num = __kmp_str_to_int(scan, *next); + KMP_ASSERT2(num >= 0, "bad explicit proc list"); - // - // Check for a stride parameter - // - stride = 1; - SKIP_WS(next); - if (*next == ':') { - // - // A stride is specified. Skip over the ':" and read the 3rd int. - // - int sign = +1; - next++; // skip ':' - SKIP_WS(next); - scan = next; - if (*next == '-') { - sign = -1; - next++; - SKIP_WS(next); - scan = next; - } - KMP_ASSERT2((*next >= '0') && (*next <= '9'), - "bad explicit proc list"); - SKIP_DIGITS(next); - stride = __kmp_str_to_int(scan, *next); - KMP_ASSERT2(stride >= 0, "bad explicit proc list"); - stride *= sign; + // Add the mask for that osId to the sum mask. + if ((num > maxOsId) || + (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { + if (__kmp_affinity_verbose || + (__kmp_affinity_warnings && + (__kmp_affinity_type != affinity_none))) { + KMP_WARNING(AffIgnoreInvalidProcID, num); + } + } else { + KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num)); + setSize++; } + } + if (setSize > 0) { + ADD_MASK(sumMask); + } - // - // Do some range checks. - // - KMP_ASSERT2(stride != 0, "bad explicit proc list"); - if (stride > 0) { - KMP_ASSERT2(start <= end, "bad explicit proc list"); - } - else { - KMP_ASSERT2(start >= end, "bad explicit proc list"); - } - KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list"); - - // - // Add the mask for each OS proc # to the list. - // - if (stride > 0) { - do { - ADD_MASK_OSID(start, osId2Mask, maxOsId); - start += stride; - } while (start <= end); - } - else { - do { - ADD_MASK_OSID(start, osId2Mask, maxOsId); - start += stride; - } while (start >= end); - } + SKIP_WS(next); + if (*next == ',') { + next++; + } + scan = next; + continue; + } - // - // Skip optional comma. - // + // Read the first integer. + KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list"); + SKIP_DIGITS(next); + start = __kmp_str_to_int(scan, *next); + KMP_ASSERT2(start >= 0, "bad explicit proc list"); + SKIP_WS(next); + + // If this isn't a range, then add a mask to the list and go on. + if (*next != '-') { + ADD_MASK_OSID(start, osId2Mask, maxOsId); + + // Skip optional comma. + if (*next == ',') { + next++; + } + scan = next; + continue; + } + + // This is a range. Skip over the '-' and read in the 2nd int. + next++; // skip '-' + SKIP_WS(next); + scan = next; + KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list"); + SKIP_DIGITS(next); + end = __kmp_str_to_int(scan, *next); + KMP_ASSERT2(end >= 0, "bad explicit proc list"); + + // Check for a stride parameter + stride = 1; + SKIP_WS(next); + if (*next == ':') { + // A stride is specified. Skip over the ':" and read the 3rd int. + int sign = +1; + next++; // skip ':' + SKIP_WS(next); + scan = next; + if (*next == '-') { + sign = -1; + next++; SKIP_WS(next); - if (*next == ',') { - next++; - } scan = next; + } + KMP_ASSERT2((*next >= '0') && (*next <= '9'), "bad explicit proc list"); + SKIP_DIGITS(next); + stride = __kmp_str_to_int(scan, *next); + KMP_ASSERT2(stride >= 0, "bad explicit proc list"); + stride *= sign; } - *out_numMasks = nextNewMask; - if (nextNewMask == 0) { - *out_masks = NULL; - KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); - return; + // Do some range checks. + KMP_ASSERT2(stride != 0, "bad explicit proc list"); + if (stride > 0) { + KMP_ASSERT2(start <= end, "bad explicit proc list"); + } else { + KMP_ASSERT2(start >= end, "bad explicit proc list"); } - KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask); - for(i = 0; i < nextNewMask; i++) { - kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i); - kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i); - KMP_CPU_COPY(dest, src); + KMP_ASSERT2((end - start) / stride <= 65536, "bad explicit proc list"); + + // Add the mask for each OS proc # to the list. + if (stride > 0) { + do { + ADD_MASK_OSID(start, osId2Mask, maxOsId); + start += stride; + } while (start <= end); + } else { + do { + ADD_MASK_OSID(start, osId2Mask, maxOsId); + start += stride; + } while (start >= end); + } + + // Skip optional comma. + SKIP_WS(next); + if (*next == ',') { + next++; } + scan = next; + } + + *out_numMasks = nextNewMask; + if (nextNewMask == 0) { + *out_masks = NULL; KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); - KMP_CPU_FREE(sumMask); + return; + } + KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask); + for (i = 0; i < nextNewMask; i++) { + kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); + kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i); + KMP_CPU_COPY(dest, src); + } + KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); + KMP_CPU_FREE(sumMask); } - -# if OMP_40_ENABLED +#if OMP_40_ENABLED /*----------------------------------------------------------------------------- - Re-parse the OMP_PLACES proc id list, forming the newMasks for the different places. Again, Here is the grammar: @@ -3044,756 +2707,574 @@ subplace := num : num : signed signed := num signed := + signed signed := - signed - -----------------------------------------------------------------------------*/ -static void -__kmp_process_subplace_list(const char **scan, kmp_affin_mask_t *osId2Mask, - int maxOsId, kmp_affin_mask_t *tempMask, int *setSize) -{ - const char *next; +static void __kmp_process_subplace_list(const char **scan, + kmp_affin_mask_t *osId2Mask, + int maxOsId, kmp_affin_mask_t *tempMask, + int *setSize) { + const char *next; - for (;;) { - int start, count, stride, i; - - // - // Read in the starting proc id - // - SKIP_WS(*scan); - KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), - "bad explicit places list"); - next = *scan; - SKIP_DIGITS(next); - start = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(start >= 0); - *scan = next; - - // - // valid follow sets are ',' ':' and '}' - // - SKIP_WS(*scan); - if (**scan == '}' || **scan == ',') { - if ((start > maxOsId) || - (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, start); - } - } - else { - KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); - (*setSize)++; - } - if (**scan == '}') { - break; - } - (*scan)++; // skip ',' - continue; - } - KMP_ASSERT2(**scan == ':', "bad explicit places list"); - (*scan)++; // skip ':' - - // - // Read count parameter - // - SKIP_WS(*scan); - KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), - "bad explicit places list"); - next = *scan; - SKIP_DIGITS(next); - count = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(count >= 0); - *scan = next; - - // - // valid follow sets are ',' ':' and '}' - // - SKIP_WS(*scan); - if (**scan == '}' || **scan == ',') { - for (i = 0; i < count; i++) { - if ((start > maxOsId) || - (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, start); - } - break; // don't proliferate warnings for large count - } - else { - KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); - start++; - (*setSize)++; - } - } - if (**scan == '}') { - break; - } - (*scan)++; // skip ',' - continue; - } - KMP_ASSERT2(**scan == ':', "bad explicit places list"); - (*scan)++; // skip ':' - - // - // Read stride parameter - // - int sign = +1; - for (;;) { - SKIP_WS(*scan); - if (**scan == '+') { - (*scan)++; // skip '+' - continue; - } - if (**scan == '-') { - sign *= -1; - (*scan)++; // skip '-' - continue; - } - break; - } - SKIP_WS(*scan); - KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), - "bad explicit places list"); - next = *scan; - SKIP_DIGITS(next); - stride = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(stride >= 0); - *scan = next; - stride *= sign; - - // - // valid follow sets are ',' and '}' - // - SKIP_WS(*scan); - if (**scan == '}' || **scan == ',') { - for (i = 0; i < count; i++) { - if ((start > maxOsId) || - (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, start); - } - break; // don't proliferate warnings for large count - } - else { - KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); - start += stride; - (*setSize)++; - } - } - if (**scan == '}') { - break; - } - (*scan)++; // skip ',' - continue; - } + for (;;) { + int start, count, stride, i; - KMP_ASSERT2(0, "bad explicit places list"); + // Read in the starting proc id + SKIP_WS(*scan); + KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list"); + next = *scan; + SKIP_DIGITS(next); + start = __kmp_str_to_int(*scan, *next); + KMP_ASSERT(start >= 0); + *scan = next; + + // valid follow sets are ',' ':' and '}' + SKIP_WS(*scan); + if (**scan == '}' || **scan == ',') { + if ((start > maxOsId) || + (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { + if (__kmp_affinity_verbose || + (__kmp_affinity_warnings && + (__kmp_affinity_type != affinity_none))) { + KMP_WARNING(AffIgnoreInvalidProcID, start); + } + } else { + KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); + (*setSize)++; + } + if (**scan == '}') { + break; + } + (*scan)++; // skip ',' + continue; } -} - - -static void -__kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask, - int maxOsId, kmp_affin_mask_t *tempMask, int *setSize) -{ - const char *next; + KMP_ASSERT2(**scan == ':', "bad explicit places list"); + (*scan)++; // skip ':' - // - // valid follow sets are '{' '!' and num - // + // Read count parameter SKIP_WS(*scan); - if (**scan == '{') { - (*scan)++; // skip '{' - __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask, - setSize); - KMP_ASSERT2(**scan == '}', "bad explicit places list"); - (*scan)++; // skip '}' - } - else if (**scan == '!') { - (*scan)++; // skip '!' - __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize); - KMP_CPU_COMPLEMENT(maxOsId, tempMask); - } - else if ((**scan >= '0') && (**scan <= '9')) { - next = *scan; - SKIP_DIGITS(next); - int num = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(num >= 0); - if ((num > maxOsId) || - (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffIgnoreInvalidProcID, num); - } - } - else { - KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num)); - (*setSize)++; + KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list"); + next = *scan; + SKIP_DIGITS(next); + count = __kmp_str_to_int(*scan, *next); + KMP_ASSERT(count >= 0); + *scan = next; + + // valid follow sets are ',' ':' and '}' + SKIP_WS(*scan); + if (**scan == '}' || **scan == ',') { + for (i = 0; i < count; i++) { + if ((start > maxOsId) || + (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { + if (__kmp_affinity_verbose || + (__kmp_affinity_warnings && + (__kmp_affinity_type != affinity_none))) { + KMP_WARNING(AffIgnoreInvalidProcID, start); + } + break; // don't proliferate warnings for large count + } else { + KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); + start++; + (*setSize)++; } - *scan = next; // skip num + } + if (**scan == '}') { + break; + } + (*scan)++; // skip ',' + continue; } - else { - KMP_ASSERT2(0, "bad explicit places list"); + KMP_ASSERT2(**scan == ':', "bad explicit places list"); + (*scan)++; // skip ':' + + // Read stride parameter + int sign = +1; + for (;;) { + SKIP_WS(*scan); + if (**scan == '+') { + (*scan)++; // skip '+' + continue; + } + if (**scan == '-') { + sign *= -1; + (*scan)++; // skip '-' + continue; + } + break; + } + SKIP_WS(*scan); + KMP_ASSERT2((**scan >= '0') && (**scan <= '9'), "bad explicit places list"); + next = *scan; + SKIP_DIGITS(next); + stride = __kmp_str_to_int(*scan, *next); + KMP_ASSERT(stride >= 0); + *scan = next; + stride *= sign; + + // valid follow sets are ',' and '}' + SKIP_WS(*scan); + if (**scan == '}' || **scan == ',') { + for (i = 0; i < count; i++) { + if ((start > maxOsId) || + (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) { + if (__kmp_affinity_verbose || + (__kmp_affinity_warnings && + (__kmp_affinity_type != affinity_none))) { + KMP_WARNING(AffIgnoreInvalidProcID, start); + } + break; // don't proliferate warnings for large count + } else { + KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start)); + start += stride; + (*setSize)++; + } + } + if (**scan == '}') { + break; + } + (*scan)++; // skip ',' + continue; } -} - - -//static void -void -__kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks, - unsigned int *out_numMasks, const char *placelist, - kmp_affin_mask_t *osId2Mask, int maxOsId) -{ - int i,j,count,stride,sign; - const char *scan = placelist; - const char *next = placelist; - - numNewMasks = 2; - KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks); - nextNewMask = 0; - - // tempMask is modified based on the previous or initial - // place to form the current place - // previousMask contains the previous place - kmp_affin_mask_t *tempMask; - kmp_affin_mask_t *previousMask; - KMP_CPU_ALLOC(tempMask); - KMP_CPU_ZERO(tempMask); - KMP_CPU_ALLOC(previousMask); - KMP_CPU_ZERO(previousMask); - int setSize = 0; - - for (;;) { - __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize); - // - // valid follow sets are ',' ':' and EOL - // - SKIP_WS(scan); - if (*scan == '\0' || *scan == ',') { - if (setSize > 0) { - ADD_MASK(tempMask); - } - KMP_CPU_ZERO(tempMask); - setSize = 0; - if (*scan == '\0') { - break; - } - scan++; // skip ',' - continue; - } + KMP_ASSERT2(0, "bad explicit places list"); + } +} - KMP_ASSERT2(*scan == ':', "bad explicit places list"); - scan++; // skip ':' +static void __kmp_process_place(const char **scan, kmp_affin_mask_t *osId2Mask, + int maxOsId, kmp_affin_mask_t *tempMask, + int *setSize) { + const char *next; + + // valid follow sets are '{' '!' and num + SKIP_WS(*scan); + if (**scan == '{') { + (*scan)++; // skip '{' + __kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize); + KMP_ASSERT2(**scan == '}', "bad explicit places list"); + (*scan)++; // skip '}' + } else if (**scan == '!') { + (*scan)++; // skip '!' + __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize); + KMP_CPU_COMPLEMENT(maxOsId, tempMask); + } else if ((**scan >= '0') && (**scan <= '9')) { + next = *scan; + SKIP_DIGITS(next); + int num = __kmp_str_to_int(*scan, *next); + KMP_ASSERT(num >= 0); + if ((num > maxOsId) || + (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) { + if (__kmp_affinity_verbose || + (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) { + KMP_WARNING(AffIgnoreInvalidProcID, num); + } + } else { + KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num)); + (*setSize)++; + } + *scan = next; // skip num + } else { + KMP_ASSERT2(0, "bad explicit places list"); + } +} - // - // Read count parameter - // - SKIP_WS(scan); - KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), - "bad explicit places list"); - next = scan; - SKIP_DIGITS(next); - count = __kmp_str_to_int(scan, *next); - KMP_ASSERT(count >= 0); - scan = next; +// static void +void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks, + unsigned int *out_numMasks, + const char *placelist, + kmp_affin_mask_t *osId2Mask, + int maxOsId) { + int i, j, count, stride, sign; + const char *scan = placelist; + const char *next = placelist; + + numNewMasks = 2; + KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks); + nextNewMask = 0; + + // tempMask is modified based on the previous or initial + // place to form the current place + // previousMask contains the previous place + kmp_affin_mask_t *tempMask; + kmp_affin_mask_t *previousMask; + KMP_CPU_ALLOC(tempMask); + KMP_CPU_ZERO(tempMask); + KMP_CPU_ALLOC(previousMask); + KMP_CPU_ZERO(previousMask); + int setSize = 0; + + for (;;) { + __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize); + + // valid follow sets are ',' ':' and EOL + SKIP_WS(scan); + if (*scan == '\0' || *scan == ',') { + if (setSize > 0) { + ADD_MASK(tempMask); + } + KMP_CPU_ZERO(tempMask); + setSize = 0; + if (*scan == '\0') { + break; + } + scan++; // skip ',' + continue; + } + + KMP_ASSERT2(*scan == ':', "bad explicit places list"); + scan++; // skip ':' + + // Read count parameter + SKIP_WS(scan); + KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), "bad explicit places list"); + next = scan; + SKIP_DIGITS(next); + count = __kmp_str_to_int(scan, *next); + KMP_ASSERT(count >= 0); + scan = next; + + // valid follow sets are ',' ':' and EOL + SKIP_WS(scan); + if (*scan == '\0' || *scan == ',') { + stride = +1; + } else { + KMP_ASSERT2(*scan == ':', "bad explicit places list"); + scan++; // skip ':' - // - // valid follow sets are ',' ':' and EOL - // + // Read stride parameter + sign = +1; + for (;;) { SKIP_WS(scan); - if (*scan == '\0' || *scan == ',') { - stride = +1; - } - else { - KMP_ASSERT2(*scan == ':', "bad explicit places list"); - scan++; // skip ':' - - // - // Read stride parameter - // - sign = +1; - for (;;) { - SKIP_WS(scan); - if (*scan == '+') { - scan++; // skip '+' - continue; - } - if (*scan == '-') { - sign *= -1; - scan++; // skip '-' - continue; - } - break; - } - SKIP_WS(scan); - KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), - "bad explicit places list"); - next = scan; - SKIP_DIGITS(next); - stride = __kmp_str_to_int(scan, *next); - KMP_DEBUG_ASSERT(stride >= 0); - scan = next; - stride *= sign; - } - - // Add places determined by initial_place : count : stride - for (i = 0; i < count; i++) { - if (setSize == 0) { - break; - } - // Add the current place, then build the next place (tempMask) from that - KMP_CPU_COPY(previousMask, tempMask); - ADD_MASK(previousMask); - KMP_CPU_ZERO(tempMask); - setSize = 0; - KMP_CPU_SET_ITERATE(j, previousMask) { - if (! KMP_CPU_ISSET(j, previousMask)) { - continue; - } - if ((j+stride > maxOsId) || (j+stride < 0) || - (! KMP_CPU_ISSET(j, __kmp_affin_fullMask)) || - (! KMP_CPU_ISSET(j+stride, KMP_CPU_INDEX(osId2Mask, j+stride)))) { - if ((__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none))) && i < count - 1) { - KMP_WARNING(AffIgnoreInvalidProcID, j+stride); - } - continue; - } - KMP_CPU_SET(j+stride, tempMask); - setSize++; - } + if (*scan == '+') { + scan++; // skip '+' + continue; } - KMP_CPU_ZERO(tempMask); - setSize = 0; - - // - // valid follow sets are ',' and EOL - // - SKIP_WS(scan); - if (*scan == '\0') { - break; + if (*scan == '-') { + sign *= -1; + scan++; // skip '-' + continue; } - if (*scan == ',') { - scan++; // skip ',' - continue; + break; + } + SKIP_WS(scan); + KMP_ASSERT2((*scan >= '0') && (*scan <= '9'), "bad explicit places list"); + next = scan; + SKIP_DIGITS(next); + stride = __kmp_str_to_int(scan, *next); + KMP_DEBUG_ASSERT(stride >= 0); + scan = next; + stride *= sign; + } + + // Add places determined by initial_place : count : stride + for (i = 0; i < count; i++) { + if (setSize == 0) { + break; + } + // Add the current place, then build the next place (tempMask) from that + KMP_CPU_COPY(previousMask, tempMask); + ADD_MASK(previousMask); + KMP_CPU_ZERO(tempMask); + setSize = 0; + KMP_CPU_SET_ITERATE(j, previousMask) { + if (!KMP_CPU_ISSET(j, previousMask)) { + continue; + } + if ((j + stride > maxOsId) || (j + stride < 0) || + (!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) || + (!KMP_CPU_ISSET(j + stride, + KMP_CPU_INDEX(osId2Mask, j + stride)))) { + if ((__kmp_affinity_verbose || + (__kmp_affinity_warnings && + (__kmp_affinity_type != affinity_none))) && + i < count - 1) { + KMP_WARNING(AffIgnoreInvalidProcID, j + stride); + } + continue; } - - KMP_ASSERT2(0, "bad explicit places list"); + KMP_CPU_SET(j + stride, tempMask); + setSize++; + } } + KMP_CPU_ZERO(tempMask); + setSize = 0; - *out_numMasks = nextNewMask; - if (nextNewMask == 0) { - *out_masks = NULL; - KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); - return; + // valid follow sets are ',' and EOL + SKIP_WS(scan); + if (*scan == '\0') { + break; } - KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask); - KMP_CPU_FREE(tempMask); - KMP_CPU_FREE(previousMask); - for(i = 0; i < nextNewMask; i++) { - kmp_affin_mask_t* src = KMP_CPU_INDEX(newMasks, i); - kmp_affin_mask_t* dest = KMP_CPU_INDEX((*out_masks), i); - KMP_CPU_COPY(dest, src); + if (*scan == ',') { + scan++; // skip ',' + continue; } + + KMP_ASSERT2(0, "bad explicit places list"); + } + + *out_numMasks = nextNewMask; + if (nextNewMask == 0) { + *out_masks = NULL; KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); + return; + } + KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask); + KMP_CPU_FREE(tempMask); + KMP_CPU_FREE(previousMask); + for (i = 0; i < nextNewMask; i++) { + kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); + kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i); + KMP_CPU_COPY(dest, src); + } + KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks); } -# endif /* OMP_40_ENABLED */ +#endif /* OMP_40_ENABLED */ #undef ADD_MASK #undef ADD_MASK_OSID #if KMP_USE_HWLOC -static int -__kmp_hwloc_count_children_by_type( - hwloc_topology_t t, hwloc_obj_t o, hwloc_obj_type_t type, hwloc_obj_t* f) -{ - if (!hwloc_compare_types(o->type, type)) { - if (*f == NULL) - *f = o; // output first descendant found - return 1; - } - int sum = 0; - for (unsigned i = 0; i < o->arity; i++) - sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f); - return sum; // will be 0 if no one found (as PU arity is 0) +static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o, + hwloc_obj_type_t type, + hwloc_obj_t* f) { + if (!hwloc_compare_types(o->type, type)) { + if (*f == NULL) + *f = o; // output first descendant found + return 1; + } + int sum = 0; + for (unsigned i = 0; i < o->arity; i++) + sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f); + return sum; // will be 0 if no one found (as PU arity is 0) } -static int -__kmp_hwloc_count_children_by_depth( - hwloc_topology_t t, hwloc_obj_t o, unsigned depth, hwloc_obj_t* f) -{ - if (o->depth == depth) { - if (*f == NULL) - *f = o; // output first descendant found - return 1; - } - int sum = 0; - for (unsigned i = 0; i < o->arity; i++) - sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f); - return sum; // will be 0 if no one found (as PU arity is 0) +static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t, + hwloc_obj_t o, unsigned depth, + hwloc_obj_t* f) { + if (o->depth == depth) { + if (*f == NULL) + *f = o; // output first descendant found + return 1; + } + int sum = 0; + for (unsigned i = 0; i < o->arity; i++) + sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f); + return sum; // will be 0 if no one found (as PU arity is 0) } -static int -__kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) -{ // skip PUs descendants of the object o - int skipped = 0; - hwloc_obj_t hT = NULL; - int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT); - for (int i = 0; i < N; ++i) { - KMP_DEBUG_ASSERT(hT); - unsigned idx = hT->os_index; - if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) { - KMP_CPU_CLR(idx, __kmp_affin_fullMask); - KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx)); - ++skipped; - } - hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT); - } - return skipped; // count number of skipped units +static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) { + // skip PUs descendants of the object o + int skipped = 0; + hwloc_obj_t hT = NULL; + int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT); + for (int i = 0; i < N; ++i) { + KMP_DEBUG_ASSERT(hT); + unsigned idx = hT->os_index; + if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) { + KMP_CPU_CLR(idx, __kmp_affin_fullMask); + KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx)); + ++skipped; + } + hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT); + } + return skipped; // count number of skipped units } -static int -__kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) -{ // check if obj has PUs present in fullMask - hwloc_obj_t hT = NULL; - int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT); - for (int i = 0; i < N; ++i) { - KMP_DEBUG_ASSERT(hT); - unsigned idx = hT->os_index; - if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) - return 1; // found PU - hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT); - } - return 0; // no PUs found +static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) { + // check if obj has PUs present in fullMask + hwloc_obj_t hT = NULL; + int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT); + for (int i = 0; i < N; ++i) { + KMP_DEBUG_ASSERT(hT); + unsigned idx = hT->os_index; + if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) + return 1; // found PU + hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT); + } + return 0; // no PUs found } #endif // KMP_USE_HWLOC -static void -__kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) -{ - AddrUnsPair *newAddr; - if (__kmp_hws_requested == 0) - goto _exit; // no topology limiting actions requested, exit +static void __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) { + AddrUnsPair *newAddr; + if (__kmp_hws_requested == 0) + goto _exit; // no topology limiting actions requested, exit #if KMP_USE_HWLOC - if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) { - // Number of subobjects calculated dynamically, this works fine for - // any non-uniform topology. - // L2 cache objects are determined by depth, other objects - by type. - hwloc_topology_t tp = __kmp_hwloc_topology; - int nS=0, nN=0, nL=0, nC=0, nT=0; // logical index including skipped - int nCr=0, nTr=0; // number of requested units - int nPkg=0, nCo=0, n_new=0, n_old = 0, nCpP=0, nTpC=0; // counters - hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to) - int L2depth, idx; - - // check support of extensions ---------------------------------- - int numa_support = 0, tile_support = 0; - if (__kmp_pu_os_idx) - hT = hwloc_get_pu_obj_by_os_index( - tp, __kmp_pu_os_idx[__kmp_avail_proc - 1]); - else - hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1); - if (hT == NULL) { // something's gone wrong - KMP_WARNING(AffHWSubsetUnsupported); + if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) { + // Number of subobjects calculated dynamically, this works fine for + // any non-uniform topology. + // L2 cache objects are determined by depth, other objects - by type. + hwloc_topology_t tp = __kmp_hwloc_topology; + int nS=0, nN=0, nL=0, nC=0, nT=0; // logical index including skipped + int nCr=0, nTr=0; // number of requested units + int nPkg=0, nCo=0, n_new=0, n_old = 0, nCpP=0, nTpC=0; // counters + hwloc_obj_t hT, hC, hL, hN, hS; // hwloc objects (pointers to) + int L2depth, idx; + + // check support of extensions ---------------------------------- + int numa_support = 0, tile_support = 0; + if (__kmp_pu_os_idx) + hT = hwloc_get_pu_obj_by_os_index(tp, + __kmp_pu_os_idx[__kmp_avail_proc - 1]); + else + hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1); + if (hT == NULL) { // something's gone wrong + KMP_WARNING(AffHWSubsetUnsupported); + goto _exit; + } + // check NUMA node + hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT); + hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT); + if (hN != NULL && hN->depth > hS->depth) { + numa_support = 1; // 1 in case socket includes node(s) + } else if (__kmp_hws_node.num > 0) { + // don't support sockets inside NUMA node (no such HW found for testing) + KMP_WARNING(AffHWSubsetUnsupported); + goto _exit; + } + // check L2 cahce, get object by depth because of multiple caches + L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED); + hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT); + if (hL != NULL && __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, + &hC) > 1) { + tile_support = 1; // no sense to count L2 if it includes single core + } else if (__kmp_hws_tile.num > 0) { + if (__kmp_hws_core.num == 0) { + __kmp_hws_core = __kmp_hws_tile; // replace L2 with core + __kmp_hws_tile.num = 0; + } else { + // L2 and core are both requested, but represent same object + KMP_WARNING(AffHWSubsetInvalid); goto _exit; } - // check NUMA node - hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT); - hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT); - if (hN != NULL && hN->depth > hS->depth) { - numa_support = 1; // 1 in case socket includes node(s) - } else if (__kmp_hws_node.num > 0) { - // don't support sockets inside NUMA node (no such HW found for testing) - KMP_WARNING(AffHWSubsetUnsupported); + } + // end of check of extensions ----------------------------------- + + // fill in unset items, validate settings ----------------------- + if (__kmp_hws_socket.num == 0) + __kmp_hws_socket.num = nPackages; // use all available sockets + if (__kmp_hws_socket.offset >= nPackages) { + KMP_WARNING(AffHWSubsetManySockets); + goto _exit; + } + if (numa_support) { + int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE, + &hN); // num nodes in socket + if (__kmp_hws_node.num == 0) + __kmp_hws_node.num = NN; // use all available nodes + if (__kmp_hws_node.offset >= NN) { + KMP_WARNING(AffHWSubsetManyNodes); goto _exit; } - // check L2 cahce, get object by depth because of multiple caches - L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED); - hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT); - if (hL != NULL && __kmp_hwloc_count_children_by_type( - tp, hL, HWLOC_OBJ_CORE, &hC) > 1) { - tile_support = 1; // no sense to count L2 if it includes single core - } else if (__kmp_hws_tile.num > 0) { - if (__kmp_hws_core.num == 0) { - __kmp_hws_core = __kmp_hws_tile; // replace L2 with core - __kmp_hws_tile.num = 0; - } else { - // L2 and core are both requested, but represent same object - KMP_WARNING(AffHWSubsetInvalid); + if (tile_support) { + // get num tiles in node + int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL); + if (__kmp_hws_tile.num == 0) { + __kmp_hws_tile.num = NL + 1; + } // use all available tiles, some node may have more tiles, thus +1 + if (__kmp_hws_tile.offset >= NL) { + KMP_WARNING(AffHWSubsetManyTiles); goto _exit; } - } - // end of check of extensions ----------------------------------- - - // fill in unset items, validate settings ----------------------- - if (__kmp_hws_socket.num == 0) - __kmp_hws_socket.num = nPackages; // use all available sockets - if (__kmp_hws_socket.offset >= nPackages) { - KMP_WARNING(AffHWSubsetManySockets); + int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, + &hC); // num cores in tile + if (__kmp_hws_core.num == 0) + __kmp_hws_core.num = NC; // use all available cores + if (__kmp_hws_core.offset >= NC) { + KMP_WARNING(AffHWSubsetManyCores); goto _exit; - } - if (numa_support) { - int NN = __kmp_hwloc_count_children_by_type( - tp, hS, HWLOC_OBJ_NUMANODE, &hN); // num nodes in socket - if (__kmp_hws_node.num == 0) - __kmp_hws_node.num = NN; // use all available nodes - if (__kmp_hws_node.offset >= NN) { - KMP_WARNING(AffHWSubsetManyNodes); + } + } else { // tile_support + int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE, + &hC); // num cores in node + if (__kmp_hws_core.num == 0) + __kmp_hws_core.num = NC; // use all available cores + if (__kmp_hws_core.offset >= NC) { + KMP_WARNING(AffHWSubsetManyCores); goto _exit; } - if (tile_support) { - // get num tiles in node - int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL); - if (__kmp_hws_tile.num == 0) { - __kmp_hws_tile.num = NL + 1; - } // use all available tiles, some node may have more tiles, thus +1 - if (__kmp_hws_tile.offset >= NL) { - KMP_WARNING(AffHWSubsetManyTiles); - goto _exit; - } - int NC = __kmp_hwloc_count_children_by_type( - tp, hL, HWLOC_OBJ_CORE, &hC); // num cores in tile - if (__kmp_hws_core.num == 0) - __kmp_hws_core.num = NC; // use all available cores - if (__kmp_hws_core.offset >= NC) { - KMP_WARNING(AffHWSubsetManyCores); - goto _exit; - } - } else { // tile_support - int NC = __kmp_hwloc_count_children_by_type( - tp, hN, HWLOC_OBJ_CORE, &hC); // num cores in node - if (__kmp_hws_core.num == 0) - __kmp_hws_core.num = NC; // use all available cores - if (__kmp_hws_core.offset >= NC) { - KMP_WARNING(AffHWSubsetManyCores); - goto _exit; - } - } // tile_support - } else { // numa_support - if (tile_support) { - // get num tiles in socket - int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL); - if (__kmp_hws_tile.num == 0) - __kmp_hws_tile.num = NL; // use all available tiles - if (__kmp_hws_tile.offset >= NL) { - KMP_WARNING(AffHWSubsetManyTiles); - goto _exit; - } - int NC = __kmp_hwloc_count_children_by_type( - tp, hL, HWLOC_OBJ_CORE, &hC); // num cores in tile - if (__kmp_hws_core.num == 0) - __kmp_hws_core.num = NC; // use all available cores - if (__kmp_hws_core.offset >= NC) { - KMP_WARNING(AffHWSubsetManyCores); - goto _exit; - } - } else { // tile_support - int NC = __kmp_hwloc_count_children_by_type( - tp, hS, HWLOC_OBJ_CORE, &hC); // num cores in socket - if (__kmp_hws_core.num == 0) - __kmp_hws_core.num = NC; // use all available cores - if (__kmp_hws_core.offset >= NC) { - KMP_WARNING(AffHWSubsetManyCores); - goto _exit; - } - } // tile_support - } - if (__kmp_hws_proc.num == 0) - __kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all available procs - if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) { - KMP_WARNING(AffHWSubsetManyProcs); - goto _exit; - } - // end of validation -------------------------------------------- - - if (pAddr) // pAddr is NULL in case of affinity_none - newAddr = (AddrUnsPair *)__kmp_allocate( - sizeof(AddrUnsPair) * __kmp_avail_proc); // max size - // main loop to form HW subset ---------------------------------- - hS = NULL; - int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE); - for (int s = 0; s < NP; ++s) { - // Check Socket ----------------------------------------------- - hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS); - if (!__kmp_hwloc_obj_has_PUs(tp, hS)) - continue; // skip socket if all PUs are out of fullMask - ++nS; // only count objects those have PUs in affinity mask - if (nS <= __kmp_hws_socket.offset || - nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) { - n_old += __kmp_hwloc_skip_PUs_obj(tp, hS); // skip socket - continue; // move to next socket + } // tile_support + } else { // numa_support + if (tile_support) { + // get num tiles in socket + int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL); + if (__kmp_hws_tile.num == 0) + __kmp_hws_tile.num = NL; // use all available tiles + if (__kmp_hws_tile.offset >= NL) { + KMP_WARNING(AffHWSubsetManyTiles); + goto _exit; } - nCr = 0; // count number of cores per socket - // socket requested, go down the topology tree - // check 4 cases: (+NUMA+Tile), (+NUMA-Tile), (-NUMA+Tile), (-NUMA-Tile) - if (numa_support) { - nN = 0; - hN = NULL; - int NN = __kmp_hwloc_count_children_by_type( - tp, hS, HWLOC_OBJ_NUMANODE, &hN); // num nodes in current socket - for (int n = 0; n < NN; ++n) { - // Check NUMA Node ---------------------------------------- - if (!__kmp_hwloc_obj_has_PUs(tp, hN)) { - hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN); - continue; // skip node if all PUs are out of fullMask - } - ++nN; - if (nN <= __kmp_hws_node.offset || - nN > __kmp_hws_node.num + __kmp_hws_node.offset) { - // skip node as not requested - n_old += __kmp_hwloc_skip_PUs_obj(tp, hN); // skip node - hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN); - continue; // move to next node - } - // node requested, go down the topology tree - if (tile_support) { - nL = 0; - hL = NULL; - int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL); - for (int l = 0; l < NL; ++l) { - // Check L2 (tile) ------------------------------------ - if (!__kmp_hwloc_obj_has_PUs(tp, hL)) { - hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL); - continue; // skip tile if all PUs are out of fullMask - } - ++nL; - if (nL <= __kmp_hws_tile.offset || - nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) { - // skip tile as not requested - n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile - hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL); - continue; // move to next tile - } - // tile requested, go down the topology tree - nC = 0; - hC = NULL; - int NC = __kmp_hwloc_count_children_by_type( - tp, hL, HWLOC_OBJ_CORE, &hC); // num cores in current tile - for (int c = 0; c < NC; ++c) { - // Check Core --------------------------------------- - if (!__kmp_hwloc_obj_has_PUs(tp, hC)) { - hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); - continue; // skip core if all PUs are out of fullMask - } - ++nC; - if (nC <= __kmp_hws_core.offset || - nC > __kmp_hws_core.num + __kmp_hws_core.offset) { - // skip node as not requested - n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core - hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); - continue; // move to next node - } - // core requested, go down to PUs - nT = 0; - nTr = 0; - hT = NULL; - int NT = __kmp_hwloc_count_children_by_type( - tp, hC, HWLOC_OBJ_PU, &hT); // num procs in current core - for (int t = 0; t < NT; ++t) { - // Check PU --------------------------------------- - idx = hT->os_index; - if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) { - hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); - continue; // skip PU if not in fullMask - } - ++nT; - if (nT <= __kmp_hws_proc.offset || - nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) { - // skip PU - KMP_CPU_CLR(idx, __kmp_affin_fullMask); - ++n_old; - KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx)); - hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); - continue; // move to next node - } - ++nTr; - if (pAddr) // collect requested thread's data - newAddr[n_new] = (*pAddr)[n_old]; - ++n_new; - ++n_old; - hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); - } // threads loop - if (nTr > 0) { - ++nCr; // num cores per socket - ++nCo; // total num cores - if (nTr > nTpC) - nTpC = nTr; // calc max threads per core - } - hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); - } // cores loop - hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL); - } // tiles loop - } else { // tile_support - // no tiles, check cores - nC = 0; - hC = NULL; - int NC = __kmp_hwloc_count_children_by_type( - tp, hN, HWLOC_OBJ_CORE, &hC); // num cores in current node - for (int c = 0; c < NC; ++c) { - // Check Core --------------------------------------- - if (!__kmp_hwloc_obj_has_PUs(tp, hC)) { - hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); - continue; // skip core if all PUs are out of fullMask - } - ++nC; - if (nC <= __kmp_hws_core.offset || - nC > __kmp_hws_core.num + __kmp_hws_core.offset) { - // skip node as not requested - n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core - hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); - continue; // move to next node - } - // core requested, go down to PUs - nT = 0; - nTr = 0; - hT = NULL; - int NT = __kmp_hwloc_count_children_by_type( - tp, hC, HWLOC_OBJ_PU, &hT); - for (int t = 0; t < NT; ++t) { - // Check PU --------------------------------------- - idx = hT->os_index; - if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) { - hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); - continue; // skip PU if not in fullMask - } - ++nT; - if (nT <= __kmp_hws_proc.offset || - nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) { - // skip PU - KMP_CPU_CLR(idx, __kmp_affin_fullMask); - ++n_old; - KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx)); - hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); - continue; // move to next node - } - ++nTr; - if (pAddr) // collect requested thread's data - newAddr[n_new] = (*pAddr)[n_old]; - ++n_new; - ++n_old; - hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); - } // threads loop - if (nTr > 0) { - ++nCr; // num cores per socket - ++nCo; // total num cores - if (nTr > nTpC) - nTpC = nTr; // calc max threads per core - } - hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); - } // cores loop - } // tiles support + int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, + &hC); // num cores in tile + if (__kmp_hws_core.num == 0) + __kmp_hws_core.num = NC; // use all available cores + if (__kmp_hws_core.offset >= NC) { + KMP_WARNING(AffHWSubsetManyCores); + goto _exit; + } + } else { // tile_support + int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE, + &hC); // num cores in socket + if (__kmp_hws_core.num == 0) + __kmp_hws_core.num = NC; // use all available cores + if (__kmp_hws_core.offset >= NC) { + KMP_WARNING(AffHWSubsetManyCores); + goto _exit; + } + } // tile_support + } + if (__kmp_hws_proc.num == 0) + __kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all available procs + if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) { + KMP_WARNING(AffHWSubsetManyProcs); + goto _exit; + } + // end of validation -------------------------------------------- + + if (pAddr) // pAddr is NULL in case of affinity_none + newAddr = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * + __kmp_avail_proc); // max size + // main loop to form HW subset ---------------------------------- + hS = NULL; + int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE); + for (int s = 0; s < NP; ++s) { + // Check Socket ----------------------------------------------- + hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS); + if (!__kmp_hwloc_obj_has_PUs(tp, hS)) + continue; // skip socket if all PUs are out of fullMask + ++nS; // only count objects those have PUs in affinity mask + if (nS <= __kmp_hws_socket.offset || + nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) { + n_old += __kmp_hwloc_skip_PUs_obj(tp, hS); // skip socket + continue; // move to next socket + } + nCr = 0; // count number of cores per socket + // socket requested, go down the topology tree + // check 4 cases: (+NUMA+Tile), (+NUMA-Tile), (-NUMA+Tile), (-NUMA-Tile) + if (numa_support) { + nN = 0; + hN = NULL; + // num nodes in current socket + int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE, + &hN); + for (int n = 0; n < NN; ++n) { + // Check NUMA Node ---------------------------------------- + if (!__kmp_hwloc_obj_has_PUs(tp, hN)) { + hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN); + continue; // skip node if all PUs are out of fullMask + } + ++nN; + if (nN <= __kmp_hws_node.offset || + nN > __kmp_hws_node.num + __kmp_hws_node.offset) { + // skip node as not requested + n_old += __kmp_hwloc_skip_PUs_obj(tp, hN); // skip node hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN); - } // nodes loop - } else { // numa_support - // no NUMA support + continue; // move to next node + } + // node requested, go down the topology tree if (tile_support) { nL = 0; hL = NULL; - int NL = __kmp_hwloc_count_children_by_depth( - tp, hS, L2depth, &hL); // num tiles in current socket + int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL); for (int l = 0; l < NL; ++l) { // Check L2 (tile) ------------------------------------ if (!__kmp_hwloc_obj_has_PUs(tp, hL)) { @@ -3811,8 +3292,9 @@ __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) // tile requested, go down the topology tree nC = 0; hC = NULL; - int NC = __kmp_hwloc_count_children_by_type( - tp, hL, HWLOC_OBJ_CORE, &hC); // num cores per tile + // num cores in current tile + int NC = __kmp_hwloc_count_children_by_type(tp, hL, + HWLOC_OBJ_CORE, &hC); for (int c = 0; c < NC; ++c) { // Check Core --------------------------------------- if (!__kmp_hwloc_obj_has_PUs(tp, hC)) { @@ -3831,8 +3313,9 @@ __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) nT = 0; nTr = 0; hT = NULL; - int NT = __kmp_hwloc_count_children_by_type( - tp, hC, HWLOC_OBJ_PU, &hT); // num procs per core + // num procs in current core + int NT = __kmp_hwloc_count_children_by_type(tp, hC, + HWLOC_OBJ_PU, &hT); for (int t = 0; t < NT; ++t) { // Check PU --------------------------------------- idx = hT->os_index; @@ -3871,10 +3354,11 @@ __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) // no tiles, check cores nC = 0; hC = NULL; - int NC = __kmp_hwloc_count_children_by_type( - tp, hS, HWLOC_OBJ_CORE, &hC); // num cores in socket + // num cores in current node + int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE, + &hC); for (int c = 0; c < NC; ++c) { - // Check Core ------------------------------------------- + // Check Core --------------------------------------- if (!__kmp_hwloc_obj_has_PUs(tp, hC)) { hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); continue; // skip core if all PUs are out of fullMask @@ -3891,8 +3375,8 @@ __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) nT = 0; nTr = 0; hT = NULL; - int NT = __kmp_hwloc_count_children_by_type( - tp, hC, HWLOC_OBJ_PU, &hT); // num procs per core + int NT = __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, + &hT); for (int t = 0; t < NT; ++t) { // Check PU --------------------------------------- idx = hT->os_index; @@ -3926,85 +3410,232 @@ __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); } // cores loop } // tiles support - } // numa_support - if (nCr > 0) { // found cores? - ++nPkg; // num sockets - if (nCr > nCpP) - nCpP = nCr; // calc max cores per socket - } - } // sockets loop - - // check the subset is valid - KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc); - KMP_DEBUG_ASSERT(nPkg > 0); - KMP_DEBUG_ASSERT(nCpP > 0); - KMP_DEBUG_ASSERT(nTpC > 0); - KMP_DEBUG_ASSERT(nCo > 0); - KMP_DEBUG_ASSERT(nPkg <= nPackages); - KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg); - KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore); - KMP_DEBUG_ASSERT(nCo <= __kmp_ncores); - - nPackages = nPkg; // correct num sockets - nCoresPerPkg = nCpP; // correct num cores per socket - __kmp_nThreadsPerCore = nTpC; // correct num threads per core - __kmp_avail_proc = n_new; // correct num procs - __kmp_ncores = nCo; // correct num cores - // hwloc topology method end - } else -#endif // KMP_USE_HWLOC - { - int n_old = 0, n_new = 0, proc_num = 0; - if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) { - KMP_WARNING(AffHWSubsetNoHWLOC); - goto _exit; + hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN); + } // nodes loop + } else { // numa_support + // no NUMA support + if (tile_support) { + nL = 0; + hL = NULL; + // num tiles in current socket + int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL); + for (int l = 0; l < NL; ++l) { + // Check L2 (tile) ------------------------------------ + if (!__kmp_hwloc_obj_has_PUs(tp, hL)) { + hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL); + continue; // skip tile if all PUs are out of fullMask + } + ++nL; + if (nL <= __kmp_hws_tile.offset || + nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) { + // skip tile as not requested + n_old += __kmp_hwloc_skip_PUs_obj(tp, hL); // skip tile + hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL); + continue; // move to next tile + } + // tile requested, go down the topology tree + nC = 0; + hC = NULL; + // num cores per tile + int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, + &hC); + for (int c = 0; c < NC; ++c) { + // Check Core --------------------------------------- + if (!__kmp_hwloc_obj_has_PUs(tp, hC)) { + hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); + continue; // skip core if all PUs are out of fullMask + } + ++nC; + if (nC <= __kmp_hws_core.offset || + nC > __kmp_hws_core.num + __kmp_hws_core.offset) { + // skip node as not requested + n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core + hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); + continue; // move to next node + } + // core requested, go down to PUs + nT = 0; + nTr = 0; + hT = NULL; + // num procs per core + int NT = __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, + &hT); + for (int t = 0; t < NT; ++t) { + // Check PU --------------------------------------- + idx = hT->os_index; + if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) { + hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); + continue; // skip PU if not in fullMask + } + ++nT; + if (nT <= __kmp_hws_proc.offset || + nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) { + // skip PU + KMP_CPU_CLR(idx, __kmp_affin_fullMask); + ++n_old; + KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx)); + hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); + continue; // move to next node + } + ++nTr; + if (pAddr) // collect requested thread's data + newAddr[n_new] = (*pAddr)[n_old]; + ++n_new; + ++n_old; + hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); + } // threads loop + if (nTr > 0) { + ++nCr; // num cores per socket + ++nCo; // total num cores + if (nTr > nTpC) + nTpC = nTr; // calc max threads per core + } + hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); + } // cores loop + hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL); + } // tiles loop + } else { // tile_support + // no tiles, check cores + nC = 0; + hC = NULL; + // num cores in socket + int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE, + &hC); + for (int c = 0; c < NC; ++c) { + // Check Core ------------------------------------------- + if (!__kmp_hwloc_obj_has_PUs(tp, hC)) { + hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); + continue; // skip core if all PUs are out of fullMask + } + ++nC; + if (nC <= __kmp_hws_core.offset || + nC > __kmp_hws_core.num + __kmp_hws_core.offset) { + // skip node as not requested + n_old += __kmp_hwloc_skip_PUs_obj(tp, hC); // skip core + hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); + continue; // move to next node + } + // core requested, go down to PUs + nT = 0; + nTr = 0; + hT = NULL; + // num procs per core + int NT = __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, + &hT); + for (int t = 0; t < NT; ++t) { + // Check PU --------------------------------------- + idx = hT->os_index; + if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) { + hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); + continue; // skip PU if not in fullMask + } + ++nT; + if (nT <= __kmp_hws_proc.offset || + nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) { + // skip PU + KMP_CPU_CLR(idx, __kmp_affin_fullMask); + ++n_old; + KC_TRACE(200, ("KMP_HW_SUBSET: skipped proc %d\n", idx)); + hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); + continue; // move to next node + } + ++nTr; + if (pAddr) // collect requested thread's data + newAddr[n_new] = (*pAddr)[n_old]; + ++n_new; + ++n_old; + hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT); + } // threads loop + if (nTr > 0) { + ++nCr; // num cores per socket + ++nCo; // total num cores + if (nTr > nTpC) + nTpC = nTr; // calc max threads per core + } + hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC); + } // cores loop + } // tiles support + } // numa_support + if (nCr > 0) { // found cores? + ++nPkg; // num sockets + if (nCr > nCpP) + nCpP = nCr; // calc max cores per socket } - if (__kmp_hws_socket.num == 0) - __kmp_hws_socket.num = nPackages; // use all available sockets - if (__kmp_hws_core.num == 0) - __kmp_hws_core.num = nCoresPerPkg; // use all available cores - if (__kmp_hws_proc.num == 0 || + } // sockets loop + + // check the subset is valid + KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc); + KMP_DEBUG_ASSERT(nPkg > 0); + KMP_DEBUG_ASSERT(nCpP > 0); + KMP_DEBUG_ASSERT(nTpC > 0); + KMP_DEBUG_ASSERT(nCo > 0); + KMP_DEBUG_ASSERT(nPkg <= nPackages); + KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg); + KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore); + KMP_DEBUG_ASSERT(nCo <= __kmp_ncores); + + nPackages = nPkg; // correct num sockets + nCoresPerPkg = nCpP; // correct num cores per socket + __kmp_nThreadsPerCore = nTpC; // correct num threads per core + __kmp_avail_proc = n_new; // correct num procs + __kmp_ncores = nCo; // correct num cores + // hwloc topology method end + } else +#endif // KMP_USE_HWLOC + { + int n_old = 0, n_new = 0, proc_num = 0; + if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) { + KMP_WARNING(AffHWSubsetNoHWLOC); + goto _exit; + } + if (__kmp_hws_socket.num == 0) + __kmp_hws_socket.num = nPackages; // use all available sockets + if (__kmp_hws_core.num == 0) + __kmp_hws_core.num = nCoresPerPkg; // use all available cores + if (__kmp_hws_proc.num == 0 || __kmp_hws_proc.num > __kmp_nThreadsPerCore) - __kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all HW contexts - if ( !__kmp_affinity_uniform_topology() ) { - KMP_WARNING( AffHWSubsetNonUniform ); - goto _exit; // don't support non-uniform topology - } - if ( depth > 3 ) { - KMP_WARNING( AffHWSubsetNonThreeLevel ); - goto _exit; // don't support not-3-level topology - } - if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) { - KMP_WARNING(AffHWSubsetManySockets); - goto _exit; - } - if ( __kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg ) { - KMP_WARNING( AffHWSubsetManyCores ); - goto _exit; - } - // Form the requested subset - if (pAddr) // pAddr is NULL in case of affinity_none - newAddr = (AddrUnsPair *)__kmp_allocate( sizeof(AddrUnsPair) * - __kmp_hws_socket.num * __kmp_hws_core.num * __kmp_hws_proc.num); - for (int i = 0; i < nPackages; ++i) { - if (i < __kmp_hws_socket.offset || - i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) { - // skip not-requested socket - n_old += nCoresPerPkg * __kmp_nThreadsPerCore; - if (__kmp_pu_os_idx != NULL) { - // walk through skipped socket - for (int j = 0; j < nCoresPerPkg; ++j) { - for (int k = 0; k < __kmp_nThreadsPerCore; ++k) { - KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask); - ++proc_num; - } + __kmp_hws_proc.num = __kmp_nThreadsPerCore; // use all HW contexts + if ( !__kmp_affinity_uniform_topology() ) { + KMP_WARNING( AffHWSubsetNonUniform ); + goto _exit; // don't support non-uniform topology + } + if ( depth > 3 ) { + KMP_WARNING( AffHWSubsetNonThreeLevel ); + goto _exit; // don't support not-3-level topology + } + if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) { + KMP_WARNING(AffHWSubsetManySockets); + goto _exit; + } + if ( __kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg ) { + KMP_WARNING( AffHWSubsetManyCores ); + goto _exit; + } + // Form the requested subset + if (pAddr) // pAddr is NULL in case of affinity_none + newAddr = (AddrUnsPair *)__kmp_allocate(sizeof(AddrUnsPair) * + __kmp_hws_socket.num * + __kmp_hws_core.num * + __kmp_hws_proc.num); + for (int i = 0; i < nPackages; ++i) { + if (i < __kmp_hws_socket.offset || + i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) { + // skip not-requested socket + n_old += nCoresPerPkg * __kmp_nThreadsPerCore; + if (__kmp_pu_os_idx != NULL) { + // walk through skipped socket + for (int j = 0; j < nCoresPerPkg; ++j) { + for (int k = 0; k < __kmp_nThreadsPerCore; ++k) { + KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask); + ++proc_num; } } - } else { - // walk through requested socket - for (int j = 0; j < nCoresPerPkg; ++j) { - if (j < __kmp_hws_core.offset || - j >= __kmp_hws_core.offset + __kmp_hws_core.num) + } + } else { + // walk through requested socket + for (int j = 0; j < nCoresPerPkg; ++j) { + if (j < __kmp_hws_core.offset || + j >= __kmp_hws_core.offset + __kmp_hws_core.num) { // skip not-requested core n_old += __kmp_nThreadsPerCore; if (__kmp_pu_os_idx != NULL) { @@ -4014,1428 +3645,1350 @@ __kmp_apply_thread_places(AddrUnsPair **pAddr, int depth) } } } else { - // walk through requested core - for (int k = 0; k < __kmp_nThreadsPerCore; ++k) { - if (k < __kmp_hws_proc.num) { - if (pAddr) // collect requested thread's data - newAddr[n_new] = (*pAddr)[n_old]; - n_new++; - } else { - if (__kmp_pu_os_idx != NULL) - KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask); - } - n_old++; - ++proc_num; + // walk through requested core + for (int k = 0; k < __kmp_nThreadsPerCore; ++k) { + if (k < __kmp_hws_proc.num) { + if (pAddr) // collect requested thread's data + newAddr[n_new] = (*pAddr)[n_old]; + n_new++; + } else { + if (__kmp_pu_os_idx != NULL) + KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask); } + n_old++; + ++proc_num; } } } } - KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore); - KMP_DEBUG_ASSERT(n_new == __kmp_hws_socket.num * __kmp_hws_core.num * - __kmp_hws_proc.num); - nPackages = __kmp_hws_socket.num; // correct nPackages - nCoresPerPkg = __kmp_hws_core.num; // correct nCoresPerPkg - __kmp_nThreadsPerCore = __kmp_hws_proc.num; // correct __kmp_nThreadsPerCore - __kmp_avail_proc = n_new; // correct avail_proc - __kmp_ncores = nPackages * __kmp_hws_core.num; // correct ncores - } // non-hwloc topology method - if (pAddr) { - __kmp_free( *pAddr ); - *pAddr = newAddr; // replace old topology with new one - } - if (__kmp_affinity_verbose) { - char m[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(m,KMP_AFFIN_MASK_PRINT_LEN,__kmp_affin_fullMask); - if (__kmp_affinity_respect_mask) { - KMP_INFORM(InitOSProcSetRespect, "KMP_HW_SUBSET", m); - } else { - KMP_INFORM(InitOSProcSetNotRespect, "KMP_HW_SUBSET", m); - } - KMP_INFORM(AvailableOSProc, "KMP_HW_SUBSET", __kmp_avail_proc); - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - __kmp_str_buf_print(&buf, "%d", nPackages); - KMP_INFORM(TopologyExtra, "KMP_HW_SUBSET", buf.str, nCoresPerPkg, - __kmp_nThreadsPerCore, __kmp_ncores); - __kmp_str_buf_free(&buf); - } -_exit: - if (__kmp_pu_os_idx != NULL) { - __kmp_free(__kmp_pu_os_idx); - __kmp_pu_os_idx = NULL; } + KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore); + KMP_DEBUG_ASSERT(n_new == __kmp_hws_socket.num * __kmp_hws_core.num * + __kmp_hws_proc.num); + nPackages = __kmp_hws_socket.num; // correct nPackages + nCoresPerPkg = __kmp_hws_core.num; // correct nCoresPerPkg + __kmp_nThreadsPerCore = __kmp_hws_proc.num; // correct __kmp_nThreadsPerCore + __kmp_avail_proc = n_new; // correct avail_proc + __kmp_ncores = nPackages * __kmp_hws_core.num; // correct ncores + } // non-hwloc topology method + if (pAddr) { + __kmp_free( *pAddr ); + *pAddr = newAddr; // replace old topology with new one + } + if (__kmp_affinity_verbose) { + char m[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(m,KMP_AFFIN_MASK_PRINT_LEN,__kmp_affin_fullMask); + if (__kmp_affinity_respect_mask) { + KMP_INFORM(InitOSProcSetRespect, "KMP_HW_SUBSET", m); + } else { + KMP_INFORM(InitOSProcSetNotRespect, "KMP_HW_SUBSET", m); + } + KMP_INFORM(AvailableOSProc, "KMP_HW_SUBSET", __kmp_avail_proc); + kmp_str_buf_t buf; + __kmp_str_buf_init(&buf); + __kmp_str_buf_print(&buf, "%d", nPackages); + KMP_INFORM(TopologyExtra, "KMP_HW_SUBSET", buf.str, nCoresPerPkg, + __kmp_nThreadsPerCore, __kmp_ncores); + __kmp_str_buf_free(&buf); + } + _exit: + if (__kmp_pu_os_idx != NULL) { + __kmp_free(__kmp_pu_os_idx); + __kmp_pu_os_idx = NULL; + } } -// -// This function figures out the deepest level at which there is at least one cluster/core -// with more than one processing unit bound to it. -// -static int -__kmp_affinity_find_core_level(const AddrUnsPair *address2os, int nprocs, int bottom_level) -{ - int core_level = 0; +// This function figures out the deepest level at which there is at least one +// cluster/core with more than one processing unit bound to it. +static int __kmp_affinity_find_core_level(const AddrUnsPair *address2os, + int nprocs, int bottom_level) { + int core_level = 0; - for( int i = 0; i < nprocs; i++ ) { - for( int j = bottom_level; j > 0; j-- ) { - if( address2os[i].first.labels[j] > 0 ) { - if( core_level < ( j - 1 ) ) { - core_level = j - 1; - } - } + for (int i = 0; i < nprocs; i++) { + for (int j = bottom_level; j > 0; j--) { + if (address2os[i].first.labels[j] > 0) { + if (core_level < (j - 1)) { + core_level = j - 1; } + } } - return core_level; + } + return core_level; } -// // This function counts number of clusters/cores at given level. -// -static int __kmp_affinity_compute_ncores(const AddrUnsPair *address2os, int nprocs, int bottom_level, int core_level) -{ - int ncores = 0; - int i, j; - - j = bottom_level; - for( i = 0; i < nprocs; i++ ) { - for ( j = bottom_level; j > core_level; j-- ) { - if( ( i + 1 ) < nprocs ) { - if( address2os[i + 1].first.labels[j] > 0 ) { - break; - } - } - } - if( j == core_level ) { - ncores++; +static int __kmp_affinity_compute_ncores(const AddrUnsPair *address2os, + int nprocs, int bottom_level, + int core_level) { + int ncores = 0; + int i, j; + + j = bottom_level; + for (i = 0; i < nprocs; i++) { + for (j = bottom_level; j > core_level; j--) { + if ((i + 1) < nprocs) { + if (address2os[i + 1].first.labels[j] > 0) { + break; } + } } - if( j > core_level ) { - // - // In case of ( nprocs < __kmp_avail_proc ) we may end too deep and miss one core. - // May occur when called from __kmp_affinity_find_core(). - // - ncores++; + if (j == core_level) { + ncores++; } - return ncores; + } + if (j > core_level) { + // In case of ( nprocs < __kmp_avail_proc ) we may end too deep and miss one + // core. May occur when called from __kmp_affinity_find_core(). + ncores++; + } + return ncores; } -// // This function finds to which cluster/core given processing unit is bound. -// -static int __kmp_affinity_find_core(const AddrUnsPair *address2os, int proc, int bottom_level, int core_level) -{ - return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level, core_level) - 1; +static int __kmp_affinity_find_core(const AddrUnsPair *address2os, int proc, + int bottom_level, int core_level) { + return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level, + core_level) - 1; } -// -// This function finds maximal number of processing units bound to a cluster/core at given level. -// -static int __kmp_affinity_max_proc_per_core(const AddrUnsPair *address2os, int nprocs, int bottom_level, int core_level) -{ - int maxprocpercore = 0; +// This function finds maximal number of processing units bound to a +// cluster/core at given level. +static int __kmp_affinity_max_proc_per_core(const AddrUnsPair *address2os, + int nprocs, int bottom_level, + int core_level) { + int maxprocpercore = 0; - if( core_level < bottom_level ) { - for( int i = 0; i < nprocs; i++ ) { - int percore = address2os[i].first.labels[core_level + 1] + 1; + if (core_level < bottom_level) { + for (int i = 0; i < nprocs; i++) { + int percore = address2os[i].first.labels[core_level + 1] + 1; - if( percore > maxprocpercore ) { - maxprocpercore = percore; - } - } - } else { - maxprocpercore = 1; + if (percore > maxprocpercore) { + maxprocpercore = percore; + } } - return maxprocpercore; + } else { + maxprocpercore = 1; + } + return maxprocpercore; } static AddrUnsPair *address2os = NULL; -static int * procarr = NULL; -static int __kmp_aff_depth = 0; - -#define KMP_EXIT_AFF_NONE \ - KMP_ASSERT(__kmp_affinity_type == affinity_none); \ - KMP_ASSERT(address2os == NULL); \ - __kmp_apply_thread_places(NULL, 0); \ - return; - -static int -__kmp_affinity_cmp_Address_child_num(const void *a, const void *b) -{ - const Address *aa = (const Address *)&(((AddrUnsPair *)a) - ->first); - const Address *bb = (const Address *)&(((AddrUnsPair *)b) - ->first); - unsigned depth = aa->depth; - unsigned i; - KMP_DEBUG_ASSERT(depth == bb->depth); - KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth); - KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0); - for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) { - int j = depth - i - 1; - if (aa->childNums[j] < bb->childNums[j]) return -1; - if (aa->childNums[j] > bb->childNums[j]) return 1; - } - for (; i < depth; i++) { - int j = i - __kmp_affinity_compact; - if (aa->childNums[j] < bb->childNums[j]) return -1; - if (aa->childNums[j] > bb->childNums[j]) return 1; - } - return 0; +static int *procarr = NULL; +static int __kmp_aff_depth = 0; + +#define KMP_EXIT_AFF_NONE \ + KMP_ASSERT(__kmp_affinity_type == affinity_none); \ + KMP_ASSERT(address2os == NULL); \ + __kmp_apply_thread_places(NULL, 0); \ + return; + +static int __kmp_affinity_cmp_Address_child_num(const void *a, const void *b) { + const Address *aa = (const Address *)&(((AddrUnsPair *)a)->first); + const Address *bb = (const Address *)&(((AddrUnsPair *)b)->first); + unsigned depth = aa->depth; + unsigned i; + KMP_DEBUG_ASSERT(depth == bb->depth); + KMP_DEBUG_ASSERT((unsigned)__kmp_affinity_compact <= depth); + KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0); + for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) { + int j = depth - i - 1; + if (aa->childNums[j] < bb->childNums[j]) + return -1; + if (aa->childNums[j] > bb->childNums[j]) + return 1; + } + for (; i < depth; i++) { + int j = i - __kmp_affinity_compact; + if (aa->childNums[j] < bb->childNums[j]) + return -1; + if (aa->childNums[j] > bb->childNums[j]) + return 1; + } + return 0; } -static void -__kmp_aux_affinity_initialize(void) -{ - if (__kmp_affinity_masks != NULL) { - KMP_ASSERT(__kmp_affin_fullMask != NULL); - return; - } - - // - // Create the "full" mask - this defines all of the processors that we - // consider to be in the machine model. If respect is set, then it is - // the initialization thread's affinity mask. Otherwise, it is all - // processors that we know about on the machine. - // - if (__kmp_affin_fullMask == NULL) { - KMP_CPU_ALLOC(__kmp_affin_fullMask); - } - if (KMP_AFFINITY_CAPABLE()) { - if (__kmp_affinity_respect_mask) { - __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE); - - // - // Count the number of available processors. - // - unsigned i; - __kmp_avail_proc = 0; - KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) { - if (! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) { - continue; - } - __kmp_avail_proc++; - } - if (__kmp_avail_proc > __kmp_xproc) { - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(ErrorInitializeAffinity); - } - __kmp_affinity_type = affinity_none; - KMP_AFFINITY_DISABLE(); - return; - } - } - else { - __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask); - __kmp_avail_proc = __kmp_xproc; +static void __kmp_aux_affinity_initialize(void) { + if (__kmp_affinity_masks != NULL) { + KMP_ASSERT(__kmp_affin_fullMask != NULL); + return; + } + + // Create the "full" mask - this defines all of the processors that we + // consider to be in the machine model. If respect is set, then it is the + // initialization thread's affinity mask. Otherwise, it is all processors that + // we know about on the machine. + if (__kmp_affin_fullMask == NULL) { + KMP_CPU_ALLOC(__kmp_affin_fullMask); + } + if (KMP_AFFINITY_CAPABLE()) { + if (__kmp_affinity_respect_mask) { + __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE); + + // Count the number of available processors. + unsigned i; + __kmp_avail_proc = 0; + KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) { + if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) { + continue; + } + __kmp_avail_proc++; + } + if (__kmp_avail_proc > __kmp_xproc) { + if (__kmp_affinity_verbose || + (__kmp_affinity_warnings && + (__kmp_affinity_type != affinity_none))) { + KMP_WARNING(ErrorInitializeAffinity); } + __kmp_affinity_type = affinity_none; + KMP_AFFINITY_DISABLE(); + return; + } + } else { + __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask); + __kmp_avail_proc = __kmp_xproc; } + } - int depth = -1; - kmp_i18n_id_t msg_id = kmp_i18n_null; + int depth = -1; + kmp_i18n_id_t msg_id = kmp_i18n_null; - // - // For backward compatibility, setting KMP_CPUINFO_FILE => - // KMP_TOPOLOGY_METHOD=cpuinfo - // - if ((__kmp_cpuinfo_file != NULL) && + // For backward compatibility, setting KMP_CPUINFO_FILE => + // KMP_TOPOLOGY_METHOD=cpuinfo + if ((__kmp_cpuinfo_file != NULL) && (__kmp_affinity_top_method == affinity_top_method_all)) { - __kmp_affinity_top_method = affinity_top_method_cpuinfo; - } - - if (__kmp_affinity_top_method == affinity_top_method_all) { - // - // In the default code path, errors are not fatal - we just try using - // another method. We only emit a warning message if affinity is on, - // or the verbose flag is set, an the nowarnings flag was not set. - // - const char *file_name = NULL; - int line = 0; -# if KMP_USE_HWLOC - if (depth < 0 && __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); - } - if(!__kmp_hwloc_error) { - depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id); - if (depth == 0) { - KMP_EXIT_AFF_NONE; - } else if(depth < 0 && __kmp_affinity_verbose) { - KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY"); - } - } else if(__kmp_affinity_verbose) { - KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY"); - } - } -# endif - -# if KMP_ARCH_X86 || KMP_ARCH_X86_64 - - if (depth < 0) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC)); - } - - file_name = NULL; - depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id); - if (depth == 0) { - KMP_EXIT_AFF_NONE; - } - - if (depth < 0) { - if (__kmp_affinity_verbose) { - if (msg_id != kmp_i18n_null) { - KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), - KMP_I18N_STR(DecodingLegacyAPIC)); - } - else { - KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC)); - } - } - - file_name = NULL; - depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id); - if (depth == 0) { - KMP_EXIT_AFF_NONE; - } - } - } - -# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -# if KMP_OS_LINUX - - if (depth < 0) { - if (__kmp_affinity_verbose) { - if (msg_id != kmp_i18n_null) { - KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", __kmp_i18n_catgets(msg_id), "/proc/cpuinfo"); - } - else { - KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo"); - } - } - - FILE *f = fopen("/proc/cpuinfo", "r"); - if (f == NULL) { - msg_id = kmp_i18n_str_CantOpenCpuinfo; - } - else { - file_name = "/proc/cpuinfo"; - depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f); - fclose(f); - if (depth == 0) { - KMP_EXIT_AFF_NONE; - } - } - } - -# endif /* KMP_OS_LINUX */ - -# if KMP_GROUP_AFFINITY - - if ((depth < 0) && (__kmp_num_proc_groups > 1)) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY"); - } - - depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id); - KMP_ASSERT(depth != 0); - } - -# endif /* KMP_GROUP_AFFINITY */ - - if (depth < 0) { - if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) { - if (file_name == NULL) { - KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id)); - } - else if (line == 0) { - KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id)); - } - else { - KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id)); - } - } - // FIXME - print msg if msg_id = kmp_i18n_null ??? - - file_name = ""; - depth = __kmp_affinity_create_flat_map(&address2os, &msg_id); - if (depth == 0) { - KMP_EXIT_AFF_NONE; - } - KMP_ASSERT(depth > 0); - KMP_ASSERT(address2os != NULL); + __kmp_affinity_top_method = affinity_top_method_cpuinfo; + } + + if (__kmp_affinity_top_method == affinity_top_method_all) { + // In the default code path, errors are not fatal - we just try using + // another method. We only emit a warning message if affinity is on, or the + // verbose flag is set, an the nowarnings flag was not set. + const char *file_name = NULL; + int line = 0; +#if KMP_USE_HWLOC + if (depth < 0 && + __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) { + if (__kmp_affinity_verbose) { + KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); + } + if (!__kmp_hwloc_error) { + depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id); + if (depth == 0) { + KMP_EXIT_AFF_NONE; + } else if (depth < 0 && __kmp_affinity_verbose) { + KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY"); } + } else if (__kmp_affinity_verbose) { + KMP_INFORM(AffIgnoringHwloc, "KMP_AFFINITY"); + } } +#endif - // - // If the user has specified that a paricular topology discovery method - // is to be used, then we abort if that method fails. The exception is - // group affinity, which might have been implicitly set. - // +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -# if KMP_ARCH_X86 || KMP_ARCH_X86_64 + if (depth < 0) { + if (__kmp_affinity_verbose) { + KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC)); + } - else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffInfoStr, "KMP_AFFINITY", - KMP_I18N_STR(Decodingx2APIC)); - } + file_name = NULL; + depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id); + if (depth == 0) { + KMP_EXIT_AFF_NONE; + } - depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id); - if (depth == 0) { - KMP_EXIT_AFF_NONE; - } - if (depth < 0) { - KMP_ASSERT(msg_id != kmp_i18n_null); - KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); - } - } - else if (__kmp_affinity_top_method == affinity_top_method_apicid) { + if (depth < 0) { if (__kmp_affinity_verbose) { + if (msg_id != kmp_i18n_null) { + KMP_INFORM(AffInfoStrStr, "KMP_AFFINITY", + __kmp_i18n_catgets(msg_id), + KMP_I18N_STR(DecodingLegacyAPIC)); + } else { KMP_INFORM(AffInfoStr, "KMP_AFFINITY", - KMP_I18N_STR(DecodingLegacyAPIC)); + KMP_I18N_STR(DecodingLegacyAPIC)); + } } + file_name = NULL; depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id); if (depth == 0) { - KMP_EXIT_AFF_NONE; - } - if (depth < 0) { - KMP_ASSERT(msg_id != kmp_i18n_null); - KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); + KMP_EXIT_AFF_NONE; } + } } -# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) { - const char *filename; - if (__kmp_cpuinfo_file != NULL) { - filename = __kmp_cpuinfo_file; - } - else { - filename = "/proc/cpuinfo"; - } +#if KMP_OS_LINUX - if (__kmp_affinity_verbose) { - KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename); + if (depth < 0) { + if (__kmp_affinity_verbose) { + if (msg_id != kmp_i18n_null) { + KMP_INFORM(AffStrParseFilename, "KMP_AFFINITY", + __kmp_i18n_catgets(msg_id), "/proc/cpuinfo"); + } else { + KMP_INFORM(AffParseFilename, "KMP_AFFINITY", "/proc/cpuinfo"); } + } - FILE *f = fopen(filename, "r"); - if (f == NULL) { - int code = errno; - if (__kmp_cpuinfo_file != NULL) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG(CantOpenFileForReading, filename), - KMP_ERR(code), - KMP_HNT(NameComesFrom_CPUINFO_FILE), - __kmp_msg_null - ); - } - else { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG(CantOpenFileForReading, filename), - KMP_ERR(code), - __kmp_msg_null - ); - } - } - int line = 0; - depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f); + FILE *f = fopen("/proc/cpuinfo", "r"); + if (f == NULL) { + msg_id = kmp_i18n_str_CantOpenCpuinfo; + } else { + file_name = "/proc/cpuinfo"; + depth = + __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f); fclose(f); - if (depth < 0) { - KMP_ASSERT(msg_id != kmp_i18n_null); - if (line > 0) { - KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id)); - } - else { - KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id)); - } - } - if (__kmp_affinity_type == affinity_none) { - KMP_ASSERT(depth == 0); - KMP_EXIT_AFF_NONE; - } - } - -# if KMP_GROUP_AFFINITY - - else if (__kmp_affinity_top_method == affinity_top_method_group) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY"); - } - - depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id); - KMP_ASSERT(depth != 0); - if (depth < 0) { - KMP_ASSERT(msg_id != kmp_i18n_null); - KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); - } - } - -# endif /* KMP_GROUP_AFFINITY */ - - else if (__kmp_affinity_top_method == affinity_top_method_flat) { - if (__kmp_affinity_verbose) { - KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY"); - } - - depth = __kmp_affinity_create_flat_map(&address2os, &msg_id); if (depth == 0) { - KMP_EXIT_AFF_NONE; + KMP_EXIT_AFF_NONE; } - // should not fail - KMP_ASSERT(depth > 0); - KMP_ASSERT(address2os != NULL); + } } -# if KMP_USE_HWLOC - else if (__kmp_affinity_top_method == affinity_top_method_hwloc) { - KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC); - if (__kmp_affinity_verbose) { - KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); - } - depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id); - if (depth == 0) { - KMP_EXIT_AFF_NONE; - } - } -# endif // KMP_USE_HWLOC +#endif /* KMP_OS_LINUX */ - if (address2os == NULL) { - if (KMP_AFFINITY_CAPABLE() - && (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none)))) { - KMP_WARNING(ErrorInitializeAffinity); - } - __kmp_affinity_type = affinity_none; - KMP_AFFINITY_DISABLE(); - return; - } +#if KMP_GROUP_AFFINITY - __kmp_apply_thread_places(&address2os, depth); + if ((depth < 0) && (__kmp_num_proc_groups > 1)) { + if (__kmp_affinity_verbose) { + KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY"); + } - // - // Create the table of masks, indexed by thread Id. - // - unsigned maxIndex; - unsigned numUnique; - kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique, - address2os, __kmp_avail_proc); - if (__kmp_affinity_gran_levels == 0) { - KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc); + depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id); + KMP_ASSERT(depth != 0); } - // - // Set the childNums vector in all Address objects. This must be done - // before we can sort using __kmp_affinity_cmp_Address_child_num(), - // which takes into account the setting of __kmp_affinity_compact. - // - __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc); - - switch (__kmp_affinity_type) { - - case affinity_explicit: - KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL); -# if OMP_40_ENABLED - if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) -# endif - { - __kmp_affinity_process_proclist(&__kmp_affinity_masks, - &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask, - maxIndex); - } -# if OMP_40_ENABLED - else { - __kmp_affinity_process_placelist(&__kmp_affinity_masks, - &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask, - maxIndex); - } -# endif - if (__kmp_affinity_num_masks == 0) { - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none))) { - KMP_WARNING(AffNoValidProcID); - } - __kmp_affinity_type = affinity_none; - return; - } - break; - - // - // The other affinity types rely on sorting the Addresses according - // to some permutation of the machine topology tree. Set - // __kmp_affinity_compact and __kmp_affinity_offset appropriately, - // then jump to a common code fragment to do the sort and create - // the array of affinity masks. - // - - case affinity_logical: - __kmp_affinity_compact = 0; - if (__kmp_affinity_offset) { - __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset - % __kmp_avail_proc; - } - goto sortAddresses; +#endif /* KMP_GROUP_AFFINITY */ - case affinity_physical: - if (__kmp_nThreadsPerCore > 1) { - __kmp_affinity_compact = 1; - if (__kmp_affinity_compact >= depth) { - __kmp_affinity_compact = 0; - } + if (depth < 0) { + if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) { + if (file_name == NULL) { + KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id)); + } else if (line == 0) { + KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id)); } else { - __kmp_affinity_compact = 0; - } - if (__kmp_affinity_offset) { - __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset - % __kmp_avail_proc; - } - goto sortAddresses; - - case affinity_scatter: - if (__kmp_affinity_compact >= depth) { - __kmp_affinity_compact = 0; - } - else { - __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact; - } - goto sortAddresses; - - case affinity_compact: - if (__kmp_affinity_compact >= depth) { - __kmp_affinity_compact = depth - 1; - } - goto sortAddresses; - - case affinity_balanced: - if( depth <= 1 ) { - if( __kmp_affinity_verbose || __kmp_affinity_warnings ) { - KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" ); - } - __kmp_affinity_type = affinity_none; - return; - } else if( __kmp_affinity_uniform_topology() ) { - break; - } else { // Non-uniform topology - - // Save the depth for further usage - __kmp_aff_depth = depth; - - int core_level = __kmp_affinity_find_core_level(address2os, __kmp_avail_proc, depth - 1); - int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc, depth - 1, core_level); - int maxprocpercore = __kmp_affinity_max_proc_per_core(address2os, __kmp_avail_proc, depth - 1, core_level); - - int nproc = ncores * maxprocpercore; - if( ( nproc < 2 ) || ( nproc < __kmp_avail_proc ) ) { - if( __kmp_affinity_verbose || __kmp_affinity_warnings ) { - KMP_WARNING( AffBalancedNotAvail, "KMP_AFFINITY" ); - } - __kmp_affinity_type = affinity_none; - return; - } - - procarr = ( int * )__kmp_allocate( sizeof( int ) * nproc ); - for( int i = 0; i < nproc; i++ ) { - procarr[ i ] = -1; - } - - int lastcore = -1; - int inlastcore = 0; - for( int i = 0; i < __kmp_avail_proc; i++ ) { - int proc = address2os[ i ].second; - int core = __kmp_affinity_find_core(address2os, i, depth - 1, core_level); - - if ( core == lastcore ) { - inlastcore++; - } else { - inlastcore = 0; - } - lastcore = core; - - procarr[ core * maxprocpercore + inlastcore ] = proc; - } - - break; - } - - sortAddresses: - // - // Allocate the gtid->affinity mask table. - // - if (__kmp_affinity_dups) { - __kmp_affinity_num_masks = __kmp_avail_proc; - } - else { - __kmp_affinity_num_masks = numUnique; - } - -# if OMP_40_ENABLED - if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel ) - && ( __kmp_affinity_num_places > 0 ) - && ( (unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) { - __kmp_affinity_num_masks = __kmp_affinity_num_places; - } -# endif - - KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks); - - // - // Sort the address2os table according to the current setting of - // __kmp_affinity_compact, then fill out __kmp_affinity_masks. - // - qsort(address2os, __kmp_avail_proc, sizeof(*address2os), - __kmp_affinity_cmp_Address_child_num); - { - int i; - unsigned j; - for (i = 0, j = 0; i < __kmp_avail_proc; i++) { - if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) { - continue; - } - unsigned osId = address2os[i].second; - kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId); - kmp_affin_mask_t *dest - = KMP_CPU_INDEX(__kmp_affinity_masks, j); - KMP_ASSERT(KMP_CPU_ISSET(osId, src)); - KMP_CPU_COPY(dest, src); - if (++j >= __kmp_affinity_num_masks) { - break; - } - } - KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks); + KMP_INFORM(UsingFlatOSFileLine, file_name, line, + __kmp_i18n_catgets(msg_id)); } - break; + } + // FIXME - print msg if msg_id = kmp_i18n_null ??? - default: - KMP_ASSERT2(0, "Unexpected affinity setting"); + file_name = ""; + depth = __kmp_affinity_create_flat_map(&address2os, &msg_id); + if (depth == 0) { + KMP_EXIT_AFF_NONE; + } + KMP_ASSERT(depth > 0); + KMP_ASSERT(address2os != NULL); } + } - KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex+1); - machine_hierarchy.init(address2os, __kmp_avail_proc); -} -#undef KMP_EXIT_AFF_NONE +// If the user has specified that a paricular topology discovery method is to be +// used, then we abort if that method fails. The exception is group affinity, +// which might have been implicitly set. +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 -void -__kmp_affinity_initialize(void) -{ - // - // Much of the code above was written assumming that if a machine was not - // affinity capable, then __kmp_affinity_type == affinity_none. We now - // explicitly represent this as __kmp_affinity_type == affinity_disabled. - // - // There are too many checks for __kmp_affinity_type == affinity_none - // in this code. Instead of trying to change them all, check if - // __kmp_affinity_type == affinity_disabled, and if so, slam it with - // affinity_none, call the real initialization routine, then restore - // __kmp_affinity_type to affinity_disabled. - // - int disabled = (__kmp_affinity_type == affinity_disabled); - if (! KMP_AFFINITY_CAPABLE()) { - KMP_ASSERT(disabled); - } - if (disabled) { - __kmp_affinity_type = affinity_none; - } - __kmp_aux_affinity_initialize(); - if (disabled) { - __kmp_affinity_type = affinity_disabled; + else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) { + if (__kmp_affinity_verbose) { + KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC)); } -} - -void -__kmp_affinity_uninitialize(void) -{ - if (__kmp_affinity_masks != NULL) { - KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks); - __kmp_affinity_masks = NULL; - } - if (__kmp_affin_fullMask != NULL) { - KMP_CPU_FREE(__kmp_affin_fullMask); - __kmp_affin_fullMask = NULL; - } - __kmp_affinity_num_masks = 0; - __kmp_affinity_type = affinity_default; -# if OMP_40_ENABLED - __kmp_affinity_num_places = 0; -# endif - if (__kmp_affinity_proclist != NULL) { - __kmp_free(__kmp_affinity_proclist); - __kmp_affinity_proclist = NULL; - } - if( address2os != NULL ) { - __kmp_free( address2os ); - address2os = NULL; - } - if( procarr != NULL ) { - __kmp_free( procarr ); - procarr = NULL; - } -# if KMP_USE_HWLOC - if (__kmp_hwloc_topology != NULL) { - hwloc_topology_destroy(__kmp_hwloc_topology); - __kmp_hwloc_topology = NULL; - } -# endif - KMPAffinity::destroy_api(); -} - - -void -__kmp_affinity_set_init_mask(int gtid, int isa_root) -{ - if (! KMP_AFFINITY_CAPABLE()) { - return; + depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id); + if (depth == 0) { + KMP_EXIT_AFF_NONE; } - - kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]); - if (th->th.th_affin_mask == NULL) { - KMP_CPU_ALLOC(th->th.th_affin_mask); + if (depth < 0) { + KMP_ASSERT(msg_id != kmp_i18n_null); + KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); } - else { - KMP_CPU_ZERO(th->th.th_affin_mask); + } else if (__kmp_affinity_top_method == affinity_top_method_apicid) { + if (__kmp_affinity_verbose) { + KMP_INFORM(AffInfoStr, "KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC)); } - // - // Copy the thread mask to the kmp_info_t strucuture. - // If __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one - // that has all of the OS proc ids set, or if __kmp_affinity_respect_mask - // is set, then the full mask is the same as the mask of the initialization - // thread. - // - kmp_affin_mask_t *mask; - int i; - -# if OMP_40_ENABLED - if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) -# endif - { - if ((__kmp_affinity_type == affinity_none) || (__kmp_affinity_type == affinity_balanced) - ) { -# if KMP_GROUP_AFFINITY - if (__kmp_num_proc_groups > 1) { - return; - } -# endif - KMP_ASSERT(__kmp_affin_fullMask != NULL); - i = KMP_PLACE_ALL; - mask = __kmp_affin_fullMask; - } - else { - KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 ); - i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks; - mask = KMP_CPU_INDEX(__kmp_affinity_masks, i); - } + depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id); + if (depth == 0) { + KMP_EXIT_AFF_NONE; } -# if OMP_40_ENABLED - else { - if ((! isa_root) - || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) { -# if KMP_GROUP_AFFINITY - if (__kmp_num_proc_groups > 1) { - return; - } -# endif - KMP_ASSERT(__kmp_affin_fullMask != NULL); - i = KMP_PLACE_ALL; - mask = __kmp_affin_fullMask; - } - else { - // - // int i = some hash function or just a counter that doesn't - // always start at 0. Use gtid for now. - // - KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 ); - i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks; - mask = KMP_CPU_INDEX(__kmp_affinity_masks, i); - } + if (depth < 0) { + KMP_ASSERT(msg_id != kmp_i18n_null); + KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); } -# endif + } + +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ -# if OMP_40_ENABLED - th->th.th_current_place = i; - if (isa_root) { - th->th.th_new_place = i; - th->th.th_first_place = 0; - th->th.th_last_place = __kmp_affinity_num_masks - 1; + else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) { + const char *filename; + if (__kmp_cpuinfo_file != NULL) { + filename = __kmp_cpuinfo_file; + } else { + filename = "/proc/cpuinfo"; } - if (i == KMP_PLACE_ALL) { - KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n", - gtid)); + if (__kmp_affinity_verbose) { + KMP_INFORM(AffParseFilename, "KMP_AFFINITY", filename); } - else { - KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n", - gtid, i)); + + FILE *f = fopen(filename, "r"); + if (f == NULL) { + int code = errno; + if (__kmp_cpuinfo_file != NULL) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantOpenFileForReading, filename), + KMP_ERR(code), KMP_HNT(NameComesFrom_CPUINFO_FILE), + __kmp_msg_null); + } else { + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantOpenFileForReading, filename), + KMP_ERR(code), __kmp_msg_null); + } } -# else - if (i == -1) { - KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n", - gtid)); + int line = 0; + depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f); + fclose(f); + if (depth < 0) { + KMP_ASSERT(msg_id != kmp_i18n_null); + if (line > 0) { + KMP_FATAL(FileLineMsgExiting, filename, line, + __kmp_i18n_catgets(msg_id)); + } else { + KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id)); + } } - else { - KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n", - gtid, i)); + if (__kmp_affinity_type == affinity_none) { + KMP_ASSERT(depth == 0); + KMP_EXIT_AFF_NONE; } -# endif /* OMP_40_ENABLED */ + } - KMP_CPU_COPY(th->th.th_affin_mask, mask); +#if KMP_GROUP_AFFINITY + else if (__kmp_affinity_top_method == affinity_top_method_group) { if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - th->th.th_affin_mask); - KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(),__kmp_gettid(), gtid, buf); + KMP_INFORM(AffWindowsProcGroupMap, "KMP_AFFINITY"); } -# if KMP_OS_WINDOWS - // - // On Windows* OS, the process affinity mask might have changed. - // If the user didn't request affinity and this call fails, - // just continue silently. See CQ171393. - // - if ( __kmp_affinity_type == affinity_none ) { - __kmp_set_system_affinity(th->th.th_affin_mask, FALSE); + depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id); + KMP_ASSERT(depth != 0); + if (depth < 0) { + KMP_ASSERT(msg_id != kmp_i18n_null); + KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id)); } - else -# endif - __kmp_set_system_affinity(th->th.th_affin_mask, TRUE); -} + } +#endif /* KMP_GROUP_AFFINITY */ -# if OMP_40_ENABLED + else if (__kmp_affinity_top_method == affinity_top_method_flat) { + if (__kmp_affinity_verbose) { + KMP_INFORM(AffUsingFlatOS, "KMP_AFFINITY"); + } -void -__kmp_affinity_set_place(int gtid) -{ - int retval; + depth = __kmp_affinity_create_flat_map(&address2os, &msg_id); + if (depth == 0) { + KMP_EXIT_AFF_NONE; + } + // should not fail + KMP_ASSERT(depth > 0); + KMP_ASSERT(address2os != NULL); + } - if (! KMP_AFFINITY_CAPABLE()) { - return; +#if KMP_USE_HWLOC + else if (__kmp_affinity_top_method == affinity_top_method_hwloc) { + KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC); + if (__kmp_affinity_verbose) { + KMP_INFORM(AffUsingHwloc, "KMP_AFFINITY"); } + depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id); + if (depth == 0) { + KMP_EXIT_AFF_NONE; + } + } +#endif // KMP_USE_HWLOC - kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]); + if (address2os == NULL) { + if (KMP_AFFINITY_CAPABLE() && + (__kmp_affinity_verbose || + (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) { + KMP_WARNING(ErrorInitializeAffinity); + } + __kmp_affinity_type = affinity_none; + KMP_AFFINITY_DISABLE(); + return; + } - KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n", - gtid, th->th.th_new_place, th->th.th_current_place)); + __kmp_apply_thread_places(&address2os, depth); - // - // Check that the new place is within this thread's partition. - // - KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); - KMP_ASSERT(th->th.th_new_place >= 0); - KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks); - if (th->th.th_first_place <= th->th.th_last_place) { - KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) - && (th->th.th_new_place <= th->th.th_last_place)); + // Create the table of masks, indexed by thread Id. + unsigned maxIndex; + unsigned numUnique; + kmp_affin_mask_t *osId2Mask = + __kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc); + if (__kmp_affinity_gran_levels == 0) { + KMP_DEBUG_ASSERT((int)numUnique == __kmp_avail_proc); + } + + // Set the childNums vector in all Address objects. This must be done before + // we can sort using __kmp_affinity_cmp_Address_child_num(), which takes into + // account the setting of __kmp_affinity_compact. + __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc); + + switch (__kmp_affinity_type) { + + case affinity_explicit: + KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL); +#if OMP_40_ENABLED + if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) +#endif + { + __kmp_affinity_process_proclist( + &__kmp_affinity_masks, &__kmp_affinity_num_masks, + __kmp_affinity_proclist, osId2Mask, maxIndex); } +#if OMP_40_ENABLED else { - KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) - || (th->th.th_new_place >= th->th.th_last_place)); + __kmp_affinity_process_placelist( + &__kmp_affinity_masks, &__kmp_affinity_num_masks, + __kmp_affinity_proclist, osId2Mask, maxIndex); + } +#endif + if (__kmp_affinity_num_masks == 0) { + if (__kmp_affinity_verbose || + (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) { + KMP_WARNING(AffNoValidProcID); + } + __kmp_affinity_type = affinity_none; + return; } + break; - // - // Copy the thread mask to the kmp_info_t strucuture, - // and set this thread's affinity. - // - kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, - th->th.th_new_place); - KMP_CPU_COPY(th->th.th_affin_mask, mask); - th->th.th_current_place = th->th.th_new_place; + // The other affinity types rely on sorting the Addresses according to some + // permutation of the machine topology tree. Set __kmp_affinity_compact and + // __kmp_affinity_offset appropriately, then jump to a common code fragment + // to do the sort and create the array of affinity masks. - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - th->th.th_affin_mask); - KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(), __kmp_gettid(), gtid, buf); + case affinity_logical: + __kmp_affinity_compact = 0; + if (__kmp_affinity_offset) { + __kmp_affinity_offset = + __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc; } - __kmp_set_system_affinity(th->th.th_affin_mask, TRUE); -} - -# endif /* OMP_40_ENABLED */ + goto sortAddresses; + case affinity_physical: + if (__kmp_nThreadsPerCore > 1) { + __kmp_affinity_compact = 1; + if (__kmp_affinity_compact >= depth) { + __kmp_affinity_compact = 0; + } + } else { + __kmp_affinity_compact = 0; + } + if (__kmp_affinity_offset) { + __kmp_affinity_offset = + __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc; + } + goto sortAddresses; -int -__kmp_aux_set_affinity(void **mask) -{ - int gtid; - kmp_info_t *th; - int retval; + case affinity_scatter: + if (__kmp_affinity_compact >= depth) { + __kmp_affinity_compact = 0; + } else { + __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact; + } + goto sortAddresses; - if (! KMP_AFFINITY_CAPABLE()) { - return -1; + case affinity_compact: + if (__kmp_affinity_compact >= depth) { + __kmp_affinity_compact = depth - 1; } + goto sortAddresses; - gtid = __kmp_entry_gtid(); - KA_TRACE(1000, ;{ - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - (kmp_affin_mask_t *)(*mask)); - __kmp_debug_printf("kmp_set_affinity: setting affinity mask for thread %d = %s\n", - gtid, buf); - }); - - if (__kmp_env_consistency_check) { - if ((mask == NULL) || (*mask == NULL)) { - KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); - } - else { - unsigned proc; - int num_procs = 0; + case affinity_balanced: + if (depth <= 1) { + if (__kmp_affinity_verbose || __kmp_affinity_warnings) { + KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY"); + } + __kmp_affinity_type = affinity_none; + return; + } else if (__kmp_affinity_uniform_topology()) { + break; + } else { // Non-uniform topology - KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t*)(*mask))) { - if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) { - KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); - } - if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) { - continue; - } - num_procs++; - } - if (num_procs == 0) { - KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); - } + // Save the depth for further usage + __kmp_aff_depth = depth; -# if KMP_GROUP_AFFINITY - if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) { - KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); - } -# endif /* KMP_GROUP_AFFINITY */ + int core_level = __kmp_affinity_find_core_level( + address2os, __kmp_avail_proc, depth - 1); + int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc, + depth - 1, core_level); + int maxprocpercore = __kmp_affinity_max_proc_per_core( + address2os, __kmp_avail_proc, depth - 1, core_level); + int nproc = ncores * maxprocpercore; + if ((nproc < 2) || (nproc < __kmp_avail_proc)) { + if (__kmp_affinity_verbose || __kmp_affinity_warnings) { + KMP_WARNING(AffBalancedNotAvail, "KMP_AFFINITY"); } - } - - th = __kmp_threads[gtid]; - KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); - retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE); - if (retval == 0) { - KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask)); - } + __kmp_affinity_type = affinity_none; + return; + } -# if OMP_40_ENABLED - th->th.th_current_place = KMP_PLACE_UNDEFINED; - th->th.th_new_place = KMP_PLACE_UNDEFINED; - th->th.th_first_place = 0; - th->th.th_last_place = __kmp_affinity_num_masks - 1; + procarr = (int *)__kmp_allocate(sizeof(int) * nproc); + for (int i = 0; i < nproc; i++) { + procarr[i] = -1; + } - // - // Turn off 4.0 affinity for the current tread at this parallel level. - // - th->th.th_current_task->td_icvs.proc_bind = proc_bind_false; -# endif + int lastcore = -1; + int inlastcore = 0; + for (int i = 0; i < __kmp_avail_proc; i++) { + int proc = address2os[i].second; + int core = + __kmp_affinity_find_core(address2os, i, depth - 1, core_level); - return retval; -} + if (core == lastcore) { + inlastcore++; + } else { + inlastcore = 0; + } + lastcore = core; + procarr[core * maxprocpercore + inlastcore] = proc; + } -int -__kmp_aux_get_affinity(void **mask) -{ - int gtid; - int retval; - kmp_info_t *th; + break; + } - if (! KMP_AFFINITY_CAPABLE()) { - return -1; + sortAddresses: + // Allocate the gtid->affinity mask table. + if (__kmp_affinity_dups) { + __kmp_affinity_num_masks = __kmp_avail_proc; + } else { + __kmp_affinity_num_masks = numUnique; } - gtid = __kmp_entry_gtid(); - th = __kmp_threads[gtid]; - KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); +#if OMP_40_ENABLED + if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) && + (__kmp_affinity_num_places > 0) && + ((unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) { + __kmp_affinity_num_masks = __kmp_affinity_num_places; + } +#endif - KA_TRACE(1000, ;{ - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - th->th.th_affin_mask); - __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf); - }); + KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks); - if (__kmp_env_consistency_check) { - if ((mask == NULL) || (*mask == NULL)) { - KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity"); + // Sort the address2os table according to the current setting of + // __kmp_affinity_compact, then fill out __kmp_affinity_masks. + qsort(address2os, __kmp_avail_proc, sizeof(*address2os), + __kmp_affinity_cmp_Address_child_num); + { + int i; + unsigned j; + for (i = 0, j = 0; i < __kmp_avail_proc; i++) { + if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) { + continue; + } + unsigned osId = address2os[i].second; + kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId); + kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j); + KMP_ASSERT(KMP_CPU_ISSET(osId, src)); + KMP_CPU_COPY(dest, src); + if (++j >= __kmp_affinity_num_masks) { + break; } + } + KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks); } + break; -# if !KMP_OS_WINDOWS - - retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE); - KA_TRACE(1000, ;{ - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - (kmp_affin_mask_t *)(*mask)); - __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf); - }); - return retval; - -# else + default: + KMP_ASSERT2(0, "Unexpected affinity setting"); + } - KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask); - return 0; + KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1); + machine_hierarchy.init(address2os, __kmp_avail_proc); +} +#undef KMP_EXIT_AFF_NONE -# endif /* KMP_OS_WINDOWS */ +void __kmp_affinity_initialize(void) { + // Much of the code above was written assumming that if a machine was not + // affinity capable, then __kmp_affinity_type == affinity_none. We now + // explicitly represent this as __kmp_affinity_type == affinity_disabled. + // There are too many checks for __kmp_affinity_type == affinity_none + // in this code. Instead of trying to change them all, check if + // __kmp_affinity_type == affinity_disabled, and if so, slam it with + // affinity_none, call the real initialization routine, then restore + // __kmp_affinity_type to affinity_disabled. + int disabled = (__kmp_affinity_type == affinity_disabled); + if (!KMP_AFFINITY_CAPABLE()) { + KMP_ASSERT(disabled); + } + if (disabled) { + __kmp_affinity_type = affinity_none; + } + __kmp_aux_affinity_initialize(); + if (disabled) { + __kmp_affinity_type = affinity_disabled; + } +} +void __kmp_affinity_uninitialize(void) { + if (__kmp_affinity_masks != NULL) { + KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks); + __kmp_affinity_masks = NULL; + } + if (__kmp_affin_fullMask != NULL) { + KMP_CPU_FREE(__kmp_affin_fullMask); + __kmp_affin_fullMask = NULL; + } + __kmp_affinity_num_masks = 0; + __kmp_affinity_type = affinity_default; +#if OMP_40_ENABLED + __kmp_affinity_num_places = 0; +#endif + if (__kmp_affinity_proclist != NULL) { + __kmp_free(__kmp_affinity_proclist); + __kmp_affinity_proclist = NULL; + } + if (address2os != NULL) { + __kmp_free(address2os); + address2os = NULL; + } + if (procarr != NULL) { + __kmp_free(procarr); + procarr = NULL; + } +#if KMP_USE_HWLOC + if (__kmp_hwloc_topology != NULL) { + hwloc_topology_destroy(__kmp_hwloc_topology); + __kmp_hwloc_topology = NULL; + } +#endif + KMPAffinity::destroy_api(); } -int -__kmp_aux_get_affinity_max_proc() { - if (! KMP_AFFINITY_CAPABLE()) { - return 0; - } +void __kmp_affinity_set_init_mask(int gtid, int isa_root) { + if (!KMP_AFFINITY_CAPABLE()) { + return; + } + + kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]); + if (th->th.th_affin_mask == NULL) { + KMP_CPU_ALLOC(th->th.th_affin_mask); + } else { + KMP_CPU_ZERO(th->th.th_affin_mask); + } + + // Copy the thread mask to the kmp_info_t strucuture. If + // __kmp_affinity_type == affinity_none, copy the "full" mask, i.e. one that + // has all of the OS proc ids set, or if __kmp_affinity_respect_mask is set, + // then the full mask is the same as the mask of the initialization thread. + kmp_affin_mask_t *mask; + int i; + +#if OMP_40_ENABLED + if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel) +#endif + { + if ((__kmp_affinity_type == affinity_none) || + (__kmp_affinity_type == affinity_balanced)) { +#if KMP_GROUP_AFFINITY + if (__kmp_num_proc_groups > 1) { + return; + } +#endif + KMP_ASSERT(__kmp_affin_fullMask != NULL); + i = KMP_PLACE_ALL; + mask = __kmp_affin_fullMask; + } else { + KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0); + i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks; + mask = KMP_CPU_INDEX(__kmp_affinity_masks, i); + } + } +#if OMP_40_ENABLED + else { + if ((!isa_root) || + (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) { #if KMP_GROUP_AFFINITY - if ( __kmp_num_proc_groups > 1 ) { - return (int)(__kmp_num_proc_groups*sizeof(DWORD_PTR)*CHAR_BIT); + if (__kmp_num_proc_groups > 1) { + return; + } +#endif + KMP_ASSERT(__kmp_affin_fullMask != NULL); + i = KMP_PLACE_ALL; + mask = __kmp_affin_fullMask; + } else { + // int i = some hash function or just a counter that doesn't + // always start at 0. Use gtid for now. + KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0); + i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks; + mask = KMP_CPU_INDEX(__kmp_affinity_masks, i); } + } #endif - return __kmp_xproc; -} - -int -__kmp_aux_set_affinity_mask_proc(int proc, void **mask) -{ - int retval; - if (! KMP_AFFINITY_CAPABLE()) { - return -1; - } +#if OMP_40_ENABLED + th->th.th_current_place = i; + if (isa_root) { + th->th.th_new_place = i; + th->th.th_first_place = 0; + th->th.th_last_place = __kmp_affinity_num_masks - 1; + } + + if (i == KMP_PLACE_ALL) { + KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to all places\n", + gtid)); + } else { + KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to place %d\n", + gtid, i)); + } +#else + if (i == -1) { + KA_TRACE( + 100, + ("__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n", + gtid)); + } else { + KA_TRACE(100, ("__kmp_affinity_set_init_mask: binding T#%d to mask %d\n", + gtid, i)); + } +#endif /* OMP_40_ENABLED */ + + KMP_CPU_COPY(th->th.th_affin_mask, mask); + + if (__kmp_affinity_verbose) { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, + th->th.th_affin_mask); + KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), + __kmp_gettid(), gtid, buf); + } + +#if KMP_OS_WINDOWS + // On Windows* OS, the process affinity mask might have changed. If the user + // didn't request affinity and this call fails, just continue silently. + // See CQ171393. + if (__kmp_affinity_type == affinity_none) { + __kmp_set_system_affinity(th->th.th_affin_mask, FALSE); + } else +#endif + __kmp_set_system_affinity(th->th.th_affin_mask, TRUE); +} - KA_TRACE(1000, ;{ - int gtid = __kmp_entry_gtid(); - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - (kmp_affin_mask_t *)(*mask)); - __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n", - proc, gtid, buf); - }); - - if (__kmp_env_consistency_check) { - if ((mask == NULL) || (*mask == NULL)) { - KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc"); - } - } +#if OMP_40_ENABLED - if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) { - return -1; - } - if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) { - return -2; - } +void __kmp_affinity_set_place(int gtid) { + int retval; - KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask)); - return 0; + if (!KMP_AFFINITY_CAPABLE()) { + return; + } + + kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]); + + KA_TRACE(100, ("__kmp_affinity_set_place: binding T#%d to place %d (current " + "place = %d)\n", + gtid, th->th.th_new_place, th->th.th_current_place)); + + // Check that the new place is within this thread's partition. + KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); + KMP_ASSERT(th->th.th_new_place >= 0); + KMP_ASSERT((unsigned)th->th.th_new_place <= __kmp_affinity_num_masks); + if (th->th.th_first_place <= th->th.th_last_place) { + KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) && + (th->th.th_new_place <= th->th.th_last_place)); + } else { + KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) || + (th->th.th_new_place >= th->th.th_last_place)); + } + + // Copy the thread mask to the kmp_info_t strucuture, + // and set this thread's affinity. + kmp_affin_mask_t *mask = + KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place); + KMP_CPU_COPY(th->th.th_affin_mask, mask); + th->th.th_current_place = th->th.th_new_place; + + if (__kmp_affinity_verbose) { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, + th->th.th_affin_mask); + KMP_INFORM(BoundToOSProcSet, "OMP_PROC_BIND", (kmp_int32)getpid(), + __kmp_gettid(), gtid, buf); + } + __kmp_set_system_affinity(th->th.th_affin_mask, TRUE); } +#endif /* OMP_40_ENABLED */ + +int __kmp_aux_set_affinity(void **mask) { + int gtid; + kmp_info_t *th; + int retval; + + if (!KMP_AFFINITY_CAPABLE()) { + return -1; + } + + gtid = __kmp_entry_gtid(); + KA_TRACE(1000, ; { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, + (kmp_affin_mask_t *)(*mask)); + __kmp_debug_printf( + "kmp_set_affinity: setting affinity mask for thread %d = %s\n", gtid, + buf); + }); + + if (__kmp_env_consistency_check) { + if ((mask == NULL) || (*mask == NULL)) { + KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); + } else { + unsigned proc; + int num_procs = 0; -int -__kmp_aux_unset_affinity_mask_proc(int proc, void **mask) -{ - int retval; + KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t *)(*mask))) { + if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) { + KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); + } + if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) { + continue; + } + num_procs++; + } + if (num_procs == 0) { + KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); + } - if (! KMP_AFFINITY_CAPABLE()) { - return -1; +#if KMP_GROUP_AFFINITY + if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) { + KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); + } +#endif /* KMP_GROUP_AFFINITY */ } + } - KA_TRACE(1000, ;{ - int gtid = __kmp_entry_gtid(); - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - (kmp_affin_mask_t *)(*mask)); - __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n", - proc, gtid, buf); - }); - - if (__kmp_env_consistency_check) { - if ((mask == NULL) || (*mask == NULL)) { - KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc"); - } - } + th = __kmp_threads[gtid]; + KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); + retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE); + if (retval == 0) { + KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask)); + } - if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) { - return -1; - } - if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) { - return -2; - } +#if OMP_40_ENABLED + th->th.th_current_place = KMP_PLACE_UNDEFINED; + th->th.th_new_place = KMP_PLACE_UNDEFINED; + th->th.th_first_place = 0; + th->th.th_last_place = __kmp_affinity_num_masks - 1; - KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask)); - return 0; + // Turn off 4.0 affinity for the current tread at this parallel level. + th->th.th_current_task->td_icvs.proc_bind = proc_bind_false; +#endif + + return retval; } +int __kmp_aux_get_affinity(void **mask) { + int gtid; + int retval; + kmp_info_t *th; + + if (!KMP_AFFINITY_CAPABLE()) { + return -1; + } + + gtid = __kmp_entry_gtid(); + th = __kmp_threads[gtid]; + KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL); + + KA_TRACE(1000, ; { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, + th->th.th_affin_mask); + __kmp_printf("kmp_get_affinity: stored affinity mask for thread %d = %s\n", + gtid, buf); + }); + + if (__kmp_env_consistency_check) { + if ((mask == NULL) || (*mask == NULL)) { + KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity"); + } + } + +#if !KMP_OS_WINDOWS + + retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE); + KA_TRACE(1000, ; { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, + (kmp_affin_mask_t *)(*mask)); + __kmp_printf("kmp_get_affinity: system affinity mask for thread %d = %s\n", + gtid, buf); + }); + return retval; -int -__kmp_aux_get_affinity_mask_proc(int proc, void **mask) -{ - int retval; +#else - if (! KMP_AFFINITY_CAPABLE()) { - return -1; - } + KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask); + return 0; - KA_TRACE(1000, ;{ - int gtid = __kmp_entry_gtid(); - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, - (kmp_affin_mask_t *)(*mask)); - __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n", - proc, gtid, buf); - }); - - if (__kmp_env_consistency_check) { - if ((mask == NULL) || (*mask == NULL)) { - KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc"); - } - } +#endif /* KMP_OS_WINDOWS */ +} - if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) { - return -1; - } - if (! KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) { - return 0; - } +int __kmp_aux_get_affinity_max_proc() { + if (!KMP_AFFINITY_CAPABLE()) { + return 0; + } +#if KMP_GROUP_AFFINITY + if (__kmp_num_proc_groups > 1) { + return (int)(__kmp_num_proc_groups * sizeof(DWORD_PTR) * CHAR_BIT); + } +#endif + return __kmp_xproc; +} + +int __kmp_aux_set_affinity_mask_proc(int proc, void **mask) { + int retval; + + if (!KMP_AFFINITY_CAPABLE()) { + return -1; + } + + KA_TRACE(1000, ; { + int gtid = __kmp_entry_gtid(); + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, + (kmp_affin_mask_t *)(*mask)); + __kmp_debug_printf("kmp_set_affinity_mask_proc: setting proc %d in " + "affinity mask for thread %d = %s\n", + proc, gtid, buf); + }); + + if (__kmp_env_consistency_check) { + if ((mask == NULL) || (*mask == NULL)) { + KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity_mask_proc"); + } + } + + if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) { + return -1; + } + if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) { + return -2; + } + + KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask)); + return 0; +} - return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask)); +int __kmp_aux_unset_affinity_mask_proc(int proc, void **mask) { + int retval; + + if (!KMP_AFFINITY_CAPABLE()) { + return -1; + } + + KA_TRACE(1000, ; { + int gtid = __kmp_entry_gtid(); + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, + (kmp_affin_mask_t *)(*mask)); + __kmp_debug_printf("kmp_unset_affinity_mask_proc: unsetting proc %d in " + "affinity mask for thread %d = %s\n", + proc, gtid, buf); + }); + + if (__kmp_env_consistency_check) { + if ((mask == NULL) || (*mask == NULL)) { + KMP_FATAL(AffinityInvalidMask, "kmp_unset_affinity_mask_proc"); + } + } + + if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) { + return -1; + } + if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) { + return -2; + } + + KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask)); + return 0; } +int __kmp_aux_get_affinity_mask_proc(int proc, void **mask) { + int retval; + + if (!KMP_AFFINITY_CAPABLE()) { + return -1; + } + + KA_TRACE(1000, ; { + int gtid = __kmp_entry_gtid(); + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, + (kmp_affin_mask_t *)(*mask)); + __kmp_debug_printf("kmp_get_affinity_mask_proc: getting proc %d in " + "affinity mask for thread %d = %s\n", + proc, gtid, buf); + }); + + if (__kmp_env_consistency_check) { + if ((mask == NULL) || (*mask == NULL)) { + KMP_FATAL(AffinityInvalidMask, "kmp_get_affinity_mask_proc"); + } + } + + if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) { + return -1; + } + if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) { + return 0; + } + + return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask)); +} // Dynamic affinity settings - Affinity balanced -void __kmp_balanced_affinity( int tid, int nthreads ) -{ - bool fine_gran = true; +void __kmp_balanced_affinity(int tid, int nthreads) { + bool fine_gran = true; + + switch (__kmp_affinity_gran) { + case affinity_gran_fine: + case affinity_gran_thread: + break; + case affinity_gran_core: + if (__kmp_nThreadsPerCore > 1) { + fine_gran = false; + } + break; + case affinity_gran_package: + if (nCoresPerPkg > 1) { + fine_gran = false; + } + break; + default: + fine_gran = false; + } + + if (__kmp_affinity_uniform_topology()) { + int coreID; + int threadID; + // Number of hyper threads per core in HT machine + int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores; + // Number of cores + int ncores = __kmp_ncores; + if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) { + __kmp_nth_per_core = __kmp_avail_proc / nPackages; + ncores = nPackages; + } + // How many threads will be bound to each core + int chunk = nthreads / ncores; + // How many cores will have an additional thread bound to it - "big cores" + int big_cores = nthreads % ncores; + // Number of threads on the big cores + int big_nth = (chunk + 1) * big_cores; + if (tid < big_nth) { + coreID = tid / (chunk + 1); + threadID = (tid % (chunk + 1)) % __kmp_nth_per_core; + } else { // tid >= big_nth + coreID = (tid - big_cores) / chunk; + threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core; + } + + KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal set affinity operation when not capable"); - switch (__kmp_affinity_gran) { - case affinity_gran_fine: - case affinity_gran_thread: - break; - case affinity_gran_core: - if( __kmp_nThreadsPerCore > 1) { - fine_gran = false; - } - break; - case affinity_gran_package: - if( nCoresPerPkg > 1) { - fine_gran = false; - } - break; - default: - fine_gran = false; - } - - if( __kmp_affinity_uniform_topology() ) { - int coreID; - int threadID; - // Number of hyper threads per core in HT machine - int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores; - // Number of cores - int ncores = __kmp_ncores; - if( ( nPackages > 1 ) && ( __kmp_nth_per_core <= 1 ) ) { - __kmp_nth_per_core = __kmp_avail_proc / nPackages; - ncores = nPackages; - } - // How many threads will be bound to each core - int chunk = nthreads / ncores; - // How many cores will have an additional thread bound to it - "big cores" - int big_cores = nthreads % ncores; - // Number of threads on the big cores - int big_nth = ( chunk + 1 ) * big_cores; - if( tid < big_nth ) { - coreID = tid / (chunk + 1 ); - threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ; - } else { //tid >= big_nth - coreID = ( tid - big_cores ) / chunk; - threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ; - } + kmp_affin_mask_t *mask; + KMP_CPU_ALLOC_ON_STACK(mask); + KMP_CPU_ZERO(mask); - KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal set affinity operation when not capable"); + if (fine_gran) { + int osID = address2os[coreID * __kmp_nth_per_core + threadID].second; + KMP_CPU_SET(osID, mask); + } else { + for (int i = 0; i < __kmp_nth_per_core; i++) { + int osID; + osID = address2os[coreID * __kmp_nth_per_core + i].second; + KMP_CPU_SET(osID, mask); + } + } + if (__kmp_affinity_verbose) { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask); + KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), + __kmp_gettid(), tid, buf); + } + __kmp_set_system_affinity(mask, TRUE); + KMP_CPU_FREE_FROM_STACK(mask); + } else { // Non-uniform topology - kmp_affin_mask_t *mask; - KMP_CPU_ALLOC_ON_STACK(mask); - KMP_CPU_ZERO(mask); + kmp_affin_mask_t *mask; + KMP_CPU_ALLOC_ON_STACK(mask); + KMP_CPU_ZERO(mask); - if( fine_gran ) { - int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second; - KMP_CPU_SET( osID, mask); - } else { - for( int i = 0; i < __kmp_nth_per_core; i++ ) { - int osID; - osID = address2os[ coreID * __kmp_nth_per_core + i ].second; - KMP_CPU_SET( osID, mask); - } + int core_level = __kmp_affinity_find_core_level( + address2os, __kmp_avail_proc, __kmp_aff_depth - 1); + int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc, + __kmp_aff_depth - 1, core_level); + int nth_per_core = __kmp_affinity_max_proc_per_core( + address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level); + + // For performance gain consider the special case nthreads == + // __kmp_avail_proc + if (nthreads == __kmp_avail_proc) { + if (fine_gran) { + int osID = address2os[tid].second; + KMP_CPU_SET(osID, mask); + } else { + int core = __kmp_affinity_find_core(address2os, tid, + __kmp_aff_depth - 1, core_level); + for (int i = 0; i < __kmp_avail_proc; i++) { + int osID = address2os[i].second; + if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1, + core_level) == core) { + KMP_CPU_SET(osID, mask); + } } - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask); - KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), - __kmp_gettid(), tid, buf); + } + } else if (nthreads <= ncores) { + + int core = 0; + for (int i = 0; i < ncores; i++) { + // Check if this core from procarr[] is in the mask + int in_mask = 0; + for (int j = 0; j < nth_per_core; j++) { + if (procarr[i * nth_per_core + j] != -1) { + in_mask = 1; + break; + } } - __kmp_set_system_affinity( mask, TRUE ); - KMP_CPU_FREE_FROM_STACK(mask); - } else { // Non-uniform topology - - kmp_affin_mask_t *mask; - KMP_CPU_ALLOC_ON_STACK(mask); - KMP_CPU_ZERO(mask); - - int core_level = __kmp_affinity_find_core_level(address2os, __kmp_avail_proc, __kmp_aff_depth - 1); - int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level); - int nth_per_core = __kmp_affinity_max_proc_per_core(address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level); - - // For performance gain consider the special case nthreads == __kmp_avail_proc - if( nthreads == __kmp_avail_proc ) { - if( fine_gran ) { - int osID = address2os[ tid ].second; - KMP_CPU_SET( osID, mask); - } else { - int core = __kmp_affinity_find_core(address2os, tid, __kmp_aff_depth - 1, core_level); - for( int i = 0; i < __kmp_avail_proc; i++ ) { - int osID = address2os[ i ].second; - if( __kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1, core_level) == core ) { - KMP_CPU_SET( osID, mask); - } - } - } - } else if( nthreads <= ncores ) { - - int core = 0; - for( int i = 0; i < ncores; i++ ) { - // Check if this core from procarr[] is in the mask - int in_mask = 0; - for( int j = 0; j < nth_per_core; j++ ) { - if( procarr[ i * nth_per_core + j ] != - 1 ) { - in_mask = 1; - break; - } + if (in_mask) { + if (tid == core) { + for (int j = 0; j < nth_per_core; j++) { + int osID = procarr[i * nth_per_core + j]; + if (osID != -1) { + KMP_CPU_SET(osID, mask); + // For fine granularity it is enough to set the first available + // osID for this core + if (fine_gran) { + break; } - if( in_mask ) { - if( tid == core ) { - for( int j = 0; j < nth_per_core; j++ ) { - int osID = procarr[ i * nth_per_core + j ]; - if( osID != -1 ) { - KMP_CPU_SET( osID, mask ); - // For fine granularity it is enough to set the first available osID for this core - if( fine_gran) { - break; - } - } - } - break; - } else { - core++; - } - } - } - - } else { // nthreads > ncores - - // Array to save the number of processors at each core - int* nproc_at_core = (int*)KMP_ALLOCA(sizeof(int)*ncores); - // Array to save the number of cores with "x" available processors; - int* ncores_with_x_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1)); - // Array to save the number of cores with # procs from x to nth_per_core - int* ncores_with_x_to_max_procs = (int*)KMP_ALLOCA(sizeof(int)*(nth_per_core+1)); - - for( int i = 0; i <= nth_per_core; i++ ) { - ncores_with_x_procs[ i ] = 0; - ncores_with_x_to_max_procs[ i ] = 0; + } } + break; + } else { + core++; + } + } + } + } else { // nthreads > ncores + // Array to save the number of processors at each core + int *nproc_at_core = (int *)KMP_ALLOCA(sizeof(int) * ncores); + // Array to save the number of cores with "x" available processors; + int *ncores_with_x_procs = + (int *)KMP_ALLOCA(sizeof(int) * (nth_per_core + 1)); + // Array to save the number of cores with # procs from x to nth_per_core + int *ncores_with_x_to_max_procs = + (int *)KMP_ALLOCA(sizeof(int) * (nth_per_core + 1)); + + for (int i = 0; i <= nth_per_core; i++) { + ncores_with_x_procs[i] = 0; + ncores_with_x_to_max_procs[i] = 0; + } - for( int i = 0; i < ncores; i++ ) { - int cnt = 0; - for( int j = 0; j < nth_per_core; j++ ) { - if( procarr[ i * nth_per_core + j ] != -1 ) { - cnt++; - } - } - nproc_at_core[ i ] = cnt; - ncores_with_x_procs[ cnt ]++; - } + for (int i = 0; i < ncores; i++) { + int cnt = 0; + for (int j = 0; j < nth_per_core; j++) { + if (procarr[i * nth_per_core + j] != -1) { + cnt++; + } + } + nproc_at_core[i] = cnt; + ncores_with_x_procs[cnt]++; + } - for( int i = 0; i <= nth_per_core; i++ ) { - for( int j = i; j <= nth_per_core; j++ ) { - ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ]; - } - } + for (int i = 0; i <= nth_per_core; i++) { + for (int j = i; j <= nth_per_core; j++) { + ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j]; + } + } - // Max number of processors - int nproc = nth_per_core * ncores; - // An array to keep number of threads per each context - int * newarr = ( int * )__kmp_allocate( sizeof( int ) * nproc ); - for( int i = 0; i < nproc; i++ ) { - newarr[ i ] = 0; - } + // Max number of processors + int nproc = nth_per_core * ncores; + // An array to keep number of threads per each context + int *newarr = (int *)__kmp_allocate(sizeof(int) * nproc); + for (int i = 0; i < nproc; i++) { + newarr[i] = 0; + } - int nth = nthreads; - int flag = 0; - while( nth > 0 ) { - for( int j = 1; j <= nth_per_core; j++ ) { - int cnt = ncores_with_x_to_max_procs[ j ]; - for( int i = 0; i < ncores; i++ ) { - // Skip the core with 0 processors - if( nproc_at_core[ i ] == 0 ) { - continue; - } - for( int k = 0; k < nth_per_core; k++ ) { - if( procarr[ i * nth_per_core + k ] != -1 ) { - if( newarr[ i * nth_per_core + k ] == 0 ) { - newarr[ i * nth_per_core + k ] = 1; - cnt--; - nth--; - break; - } else { - if( flag != 0 ) { - newarr[ i * nth_per_core + k ] ++; - cnt--; - nth--; - break; - } - } - } - } - if( cnt == 0 || nth == 0 ) { - break; - } - } - if( nth == 0 ) { - break; - } - } - flag = 1; - } - int sum = 0; - for( int i = 0; i < nproc; i++ ) { - sum += newarr[ i ]; - if( sum > tid ) { - if( fine_gran) { - int osID = procarr[ i ]; - KMP_CPU_SET( osID, mask); - } else { - int coreID = i / nth_per_core; - for( int ii = 0; ii < nth_per_core; ii++ ) { - int osID = procarr[ coreID * nth_per_core + ii ]; - if( osID != -1 ) { - KMP_CPU_SET( osID, mask); - } - } - } + int nth = nthreads; + int flag = 0; + while (nth > 0) { + for (int j = 1; j <= nth_per_core; j++) { + int cnt = ncores_with_x_to_max_procs[j]; + for (int i = 0; i < ncores; i++) { + // Skip the core with 0 processors + if (nproc_at_core[i] == 0) { + continue; + } + for (int k = 0; k < nth_per_core; k++) { + if (procarr[i * nth_per_core + k] != -1) { + if (newarr[i * nth_per_core + k] == 0) { + newarr[i * nth_per_core + k] = 1; + cnt--; + nth--; + break; + } else { + if (flag != 0) { + newarr[i * nth_per_core + k]++; + cnt--; + nth--; break; + } } + } + } + if (cnt == 0 || nth == 0) { + break; } - __kmp_free( newarr ); + } + if (nth == 0) { + break; + } } - - if (__kmp_affinity_verbose) { - char buf[KMP_AFFIN_MASK_PRINT_LEN]; - __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask); - KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), - __kmp_gettid(), tid, buf); + flag = 1; + } + int sum = 0; + for (int i = 0; i < nproc; i++) { + sum += newarr[i]; + if (sum > tid) { + if (fine_gran) { + int osID = procarr[i]; + KMP_CPU_SET(osID, mask); + } else { + int coreID = i / nth_per_core; + for (int ii = 0; ii < nth_per_core; ii++) { + int osID = procarr[coreID * nth_per_core + ii]; + if (osID != -1) { + KMP_CPU_SET(osID, mask); + } + } + } + break; } - __kmp_set_system_affinity( mask, TRUE ); - KMP_CPU_FREE_FROM_STACK(mask); + } + __kmp_free(newarr); } + + if (__kmp_affinity_verbose) { + char buf[KMP_AFFIN_MASK_PRINT_LEN]; + __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask); + KMP_INFORM(BoundToOSProcSet, "KMP_AFFINITY", (kmp_int32)getpid(), + __kmp_gettid(), tid, buf); + } + __kmp_set_system_affinity(mask, TRUE); + KMP_CPU_FREE_FROM_STACK(mask); + } } #if KMP_OS_LINUX @@ -5451,28 +5004,29 @@ void __kmp_balanced_affinity( int tid, int nthreads ) #ifdef __cplusplus extern "C" #endif -int -kmp_set_thread_affinity_mask_initial() + int + kmp_set_thread_affinity_mask_initial() // the function returns 0 on success, // -1 if we cannot bind thread // >0 (errno) if an error happened during binding { - int gtid = __kmp_get_gtid(); - if (gtid < 0) { - // Do not touch non-omp threads - KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: " - "non-omp thread, returning\n")); - return -1; - } - if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) { - KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: " - "affinity not initialized, returning\n")); - return -1; - } - KA_TRACE(30, ( "kmp_set_thread_affinity_mask_initial: " - "set full mask for thread %d\n", gtid)); - KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL); - return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE); + int gtid = __kmp_get_gtid(); + if (gtid < 0) { + // Do not touch non-omp threads + KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: " + "non-omp thread, returning\n")); + return -1; + } + if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) { + KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: " + "affinity not initialized, returning\n")); + return -1; + } + KA_TRACE(30, ("kmp_set_thread_affinity_mask_initial: " + "set full mask for thread %d\n", + gtid)); + KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL); + return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE); } #endif diff --git a/openmp/runtime/src/kmp_affinity.h b/openmp/runtime/src/kmp_affinity.h index 142acf7..bae013e 100644 --- a/openmp/runtime/src/kmp_affinity.h +++ b/openmp/runtime/src/kmp_affinity.h @@ -12,765 +12,827 @@ // //===----------------------------------------------------------------------===// + #ifndef KMP_AFFINITY_H #define KMP_AFFINITY_H -#include "kmp_os.h" #include "kmp.h" +#include "kmp_os.h" #if KMP_AFFINITY_SUPPORTED #if KMP_USE_HWLOC -class KMPHwlocAffinity: public KMPAffinity { +class KMPHwlocAffinity : public KMPAffinity { public: - class Mask : public KMPAffinity::Mask { - hwloc_cpuset_t mask; - public: - Mask() { mask = hwloc_bitmap_alloc(); this->zero(); } - ~Mask() { hwloc_bitmap_free(mask); } - void set(int i) override { hwloc_bitmap_set(mask, i); } - bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } - void clear(int i) override { hwloc_bitmap_clr(mask, i); } - void zero() override { hwloc_bitmap_zero(mask); } - void copy(const KMPAffinity::Mask* src) override { - const Mask* convert = static_cast(src); - hwloc_bitmap_copy(mask, convert->mask); - } - void bitwise_and(const KMPAffinity::Mask* rhs) override { - const Mask* convert = static_cast(rhs); - hwloc_bitmap_and(mask, mask, convert->mask); - } - void bitwise_or(const KMPAffinity::Mask * rhs) override { - const Mask* convert = static_cast(rhs); - hwloc_bitmap_or(mask, mask, convert->mask); - } - void bitwise_not() override { hwloc_bitmap_not(mask, mask); } - int begin() const override { return hwloc_bitmap_first(mask); } - int end() const override { return -1; } - int next(int previous) const override { return hwloc_bitmap_next(mask, previous); } - int get_system_affinity(bool abort_on_error) override { - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal get affinity operation when not capable"); - int retval = hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); - if (retval >= 0) { - return 0; - } - int error = errno; - if (abort_on_error) { - __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null); - } - return error; - } - int set_system_affinity(bool abort_on_error) const override { - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal get affinity operation when not capable"); - int retval = hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); - if (retval >= 0) { - return 0; - } - int error = errno; - if (abort_on_error) { - __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null); - } - return error; - } - int get_proc_group() const override { - int i; - int group = -1; -# if KMP_OS_WINDOWS - if (__kmp_num_proc_groups == 1) { - return 1; - } - for (i = 0; i < __kmp_num_proc_groups; i++) { - // On windows, the long type is always 32 bits - unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2); - unsigned long second_32_bits = hwloc_bitmap_to_ith_ulong(mask, i*2+1); - if (first_32_bits == 0 && second_32_bits == 0) { - continue; - } - if (group >= 0) { - return -1; - } - group = i; - } -# endif /* KMP_OS_WINDOWS */ - return group; - } - }; - void determine_capable(const char* var) override { - const hwloc_topology_support* topology_support; - if(__kmp_hwloc_topology == NULL) { - if(hwloc_topology_init(&__kmp_hwloc_topology) < 0) { - __kmp_hwloc_error = TRUE; - if(__kmp_affinity_verbose) - KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); - } - if(hwloc_topology_load(__kmp_hwloc_topology) < 0) { - __kmp_hwloc_error = TRUE; - if(__kmp_affinity_verbose) - KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); - } - } - topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); - // Is the system capable of setting/getting this thread's affinity? - // also, is topology discovery possible? (pu indicates ability to discover processing units) - // and finally, were there no errors when calling any hwloc_* API functions? - if(topology_support && topology_support->cpubind->set_thisthread_cpubind && - topology_support->cpubind->get_thisthread_cpubind && - topology_support->discovery->pu && - !__kmp_hwloc_error) - { - // enables affinity according to KMP_AFFINITY_CAPABLE() macro - KMP_AFFINITY_ENABLE(TRUE); - } else { - // indicate that hwloc didn't work and disable affinity - __kmp_hwloc_error = TRUE; - KMP_AFFINITY_DISABLE(); - } + class Mask : public KMPAffinity::Mask { + hwloc_cpuset_t mask; + + public: + Mask() { + mask = hwloc_bitmap_alloc(); + this->zero(); + } + ~Mask() { hwloc_bitmap_free(mask); } + void set(int i) override { hwloc_bitmap_set(mask, i); } + bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); } + void clear(int i) override { hwloc_bitmap_clr(mask, i); } + void zero() override { hwloc_bitmap_zero(mask); } + void copy(const KMPAffinity::Mask *src) override { + const Mask *convert = static_cast(src); + hwloc_bitmap_copy(mask, convert->mask); } - void bind_thread(int which) override { - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal set affinity operation when not capable"); - KMPAffinity::Mask *mask; - KMP_CPU_ALLOC_ON_STACK(mask); - KMP_CPU_ZERO(mask); - KMP_CPU_SET(which, mask); - __kmp_set_system_affinity(mask, TRUE); - KMP_CPU_FREE_FROM_STACK(mask); - } - KMPAffinity::Mask* allocate_mask() override { return new Mask(); } - void deallocate_mask(KMPAffinity::Mask* m) override { delete m; } - KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; } - void deallocate_mask_array(KMPAffinity::Mask* array) override { - Mask* hwloc_array = static_cast(array); - delete[] hwloc_array; - } - KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override { - Mask* hwloc_array = static_cast(array); - return &(hwloc_array[index]); - } - api_type get_api_type() const override { return HWLOC; } + void bitwise_and(const KMPAffinity::Mask *rhs) override { + const Mask *convert = static_cast(rhs); + hwloc_bitmap_and(mask, mask, convert->mask); + } + void bitwise_or(const KMPAffinity::Mask *rhs) override { + const Mask *convert = static_cast(rhs); + hwloc_bitmap_or(mask, mask, convert->mask); + } + void bitwise_not() override { hwloc_bitmap_not(mask, mask); } + int begin() const override { return hwloc_bitmap_first(mask); } + int end() const override { return -1; } + int next(int previous) const override { + return hwloc_bitmap_next(mask, previous); + } + int get_system_affinity(bool abort_on_error) override { + KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal get affinity operation when not capable"); + int retval = + hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); + if (retval >= 0) { + return 0; + } + int error = errno; + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(FatalSysError), KMP_ERR(error), + __kmp_msg_null); + } + return error; + } + int set_system_affinity(bool abort_on_error) const override { + KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal get affinity operation when not capable"); + int retval = + hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD); + if (retval >= 0) { + return 0; + } + int error = errno; + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(FatalSysError), KMP_ERR(error), + __kmp_msg_null); + } + return error; + } + int get_proc_group() const override { + int i; + int group = -1; +#if KMP_OS_WINDOWS + if (__kmp_num_proc_groups == 1) { + return 1; + } + for (i = 0; i < __kmp_num_proc_groups; i++) { + // On windows, the long type is always 32 bits + unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2); + unsigned long second_32_bits = + hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1); + if (first_32_bits == 0 && second_32_bits == 0) { + continue; + } + if (group >= 0) { + return -1; + } + group = i; + } +#endif /* KMP_OS_WINDOWS */ + return group; + } + }; + void determine_capable(const char *var) override { + const hwloc_topology_support *topology_support; + if (__kmp_hwloc_topology == NULL) { + if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) { + __kmp_hwloc_error = TRUE; + if (__kmp_affinity_verbose) + KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()"); + } + if (hwloc_topology_load(__kmp_hwloc_topology) < 0) { + __kmp_hwloc_error = TRUE; + if (__kmp_affinity_verbose) + KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()"); + } + } + topology_support = hwloc_topology_get_support(__kmp_hwloc_topology); + // Is the system capable of setting/getting this thread's affinity? + // Also, is topology discovery possible? (pu indicates ability to discover + // processing units). And finally, were there no errors when calling any + // hwloc_* API functions? + if (topology_support && topology_support->cpubind->set_thisthread_cpubind && + topology_support->cpubind->get_thisthread_cpubind && + topology_support->discovery->pu && !__kmp_hwloc_error) { + // enables affinity according to KMP_AFFINITY_CAPABLE() macro + KMP_AFFINITY_ENABLE(TRUE); + } else { + // indicate that hwloc didn't work and disable affinity + __kmp_hwloc_error = TRUE; + KMP_AFFINITY_DISABLE(); + } + } + void bind_thread(int which) override { + KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal set affinity operation when not capable"); + KMPAffinity::Mask *mask; + KMP_CPU_ALLOC_ON_STACK(mask); + KMP_CPU_ZERO(mask); + KMP_CPU_SET(which, mask); + __kmp_set_system_affinity(mask, TRUE); + KMP_CPU_FREE_FROM_STACK(mask); + } + KMPAffinity::Mask *allocate_mask() override { return new Mask(); } + void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } + KMPAffinity::Mask *allocate_mask_array(int num) override { + return new Mask[num]; + } + void deallocate_mask_array(KMPAffinity::Mask *array) override { + Mask *hwloc_array = static_cast(array); + delete[] hwloc_array; + } + KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, + int index) override { + Mask *hwloc_array = static_cast(array); + return &(hwloc_array[index]); + } + api_type get_api_type() const override { return HWLOC; } }; #endif /* KMP_USE_HWLOC */ #if KMP_OS_LINUX -/* - * On some of the older OS's that we build on, these constants aren't present - * in #included from . They must be the same on - * all systems of the same arch where they are defined, and they cannot change. - * stone forever. - */ +/* On some of the older OS's that we build on, these constants aren't present + in #included from . They must be the same on + all systems of the same arch where they are defined, and they cannot change. + stone forever. */ #include -# if KMP_ARCH_X86 || KMP_ARCH_ARM -# ifndef __NR_sched_setaffinity -# define __NR_sched_setaffinity 241 -# elif __NR_sched_setaffinity != 241 -# error Wrong code for setaffinity system call. -# endif /* __NR_sched_setaffinity */ -# ifndef __NR_sched_getaffinity -# define __NR_sched_getaffinity 242 -# elif __NR_sched_getaffinity != 242 -# error Wrong code for getaffinity system call. -# endif /* __NR_sched_getaffinity */ -# elif KMP_ARCH_AARCH64 -# ifndef __NR_sched_setaffinity -# define __NR_sched_setaffinity 122 -# elif __NR_sched_setaffinity != 122 -# error Wrong code for setaffinity system call. -# endif /* __NR_sched_setaffinity */ -# ifndef __NR_sched_getaffinity -# define __NR_sched_getaffinity 123 -# elif __NR_sched_getaffinity != 123 -# error Wrong code for getaffinity system call. -# endif /* __NR_sched_getaffinity */ -# elif KMP_ARCH_X86_64 -# ifndef __NR_sched_setaffinity -# define __NR_sched_setaffinity 203 -# elif __NR_sched_setaffinity != 203 -# error Wrong code for setaffinity system call. -# endif /* __NR_sched_setaffinity */ -# ifndef __NR_sched_getaffinity -# define __NR_sched_getaffinity 204 -# elif __NR_sched_getaffinity != 204 -# error Wrong code for getaffinity system call. -# endif /* __NR_sched_getaffinity */ -# elif KMP_ARCH_PPC64 -# ifndef __NR_sched_setaffinity -# define __NR_sched_setaffinity 222 -# elif __NR_sched_setaffinity != 222 -# error Wrong code for setaffinity system call. -# endif /* __NR_sched_setaffinity */ -# ifndef __NR_sched_getaffinity -# define __NR_sched_getaffinity 223 -# elif __NR_sched_getaffinity != 223 -# error Wrong code for getaffinity system call. -# endif /* __NR_sched_getaffinity */ -# elif KMP_ARCH_MIPS -# ifndef __NR_sched_setaffinity -# define __NR_sched_setaffinity 4239 -# elif __NR_sched_setaffinity != 4239 -# error Wrong code for setaffinity system call. -# endif /* __NR_sched_setaffinity */ -# ifndef __NR_sched_getaffinity -# define __NR_sched_getaffinity 4240 -# elif __NR_sched_getaffinity != 4240 -# error Wrong code for getaffinity system call. -# endif /* __NR_sched_getaffinity */ -# elif KMP_ARCH_MIPS64 -# ifndef __NR_sched_setaffinity -# define __NR_sched_setaffinity 5195 -# elif __NR_sched_setaffinity != 5195 -# error Wrong code for setaffinity system call. -# endif /* __NR_sched_setaffinity */ -# ifndef __NR_sched_getaffinity -# define __NR_sched_getaffinity 5196 -# elif __NR_sched_getaffinity != 5196 -# error Wrong code for getaffinity system call. -# endif /* __NR_sched_getaffinity */ -# error Unknown or unsupported architecture -# endif /* KMP_ARCH_* */ +#if KMP_ARCH_X86 || KMP_ARCH_ARM +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 241 +#elif __NR_sched_setaffinity != 241 +#error Wrong code for setaffinity system call. +#endif /* __NR_sched_setaffinity */ +#ifndef __NR_sched_getaffinity +#define __NR_sched_getaffinity 242 +#elif __NR_sched_getaffinity != 242 +#error Wrong code for getaffinity system call. +#endif /* __NR_sched_getaffinity */ +#elif KMP_ARCH_AARCH64 +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 122 +#elif __NR_sched_setaffinity != 122 +#error Wrong code for setaffinity system call. +#endif /* __NR_sched_setaffinity */ +#ifndef __NR_sched_getaffinity +#define __NR_sched_getaffinity 123 +#elif __NR_sched_getaffinity != 123 +#error Wrong code for getaffinity system call. +#endif /* __NR_sched_getaffinity */ +#elif KMP_ARCH_X86_64 +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 203 +#elif __NR_sched_setaffinity != 203 +#error Wrong code for setaffinity system call. +#endif /* __NR_sched_setaffinity */ +#ifndef __NR_sched_getaffinity +#define __NR_sched_getaffinity 204 +#elif __NR_sched_getaffinity != 204 +#error Wrong code for getaffinity system call. +#endif /* __NR_sched_getaffinity */ +#elif KMP_ARCH_PPC64 +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 222 +#elif __NR_sched_setaffinity != 222 +#error Wrong code for setaffinity system call. +#endif /* __NR_sched_setaffinity */ +#ifndef __NR_sched_getaffinity +#define __NR_sched_getaffinity 223 +#elif __NR_sched_getaffinity != 223 +#error Wrong code for getaffinity system call. +#endif /* __NR_sched_getaffinity */ +#elif KMP_ARCH_MIPS +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 4239 +#elif __NR_sched_setaffinity != 4239 +#error Wrong code for setaffinity system call. +#endif /* __NR_sched_setaffinity */ +#ifndef __NR_sched_getaffinity +#define __NR_sched_getaffinity 4240 +#elif __NR_sched_getaffinity != 4240 +#error Wrong code for getaffinity system call. +#endif /* __NR_sched_getaffinity */ +#elif KMP_ARCH_MIPS64 +#ifndef __NR_sched_setaffinity +#define __NR_sched_setaffinity 5195 +#elif __NR_sched_setaffinity != 5195 +#error Wrong code for setaffinity system call. +#endif /* __NR_sched_setaffinity */ +#ifndef __NR_sched_getaffinity +#define __NR_sched_getaffinity 5196 +#elif __NR_sched_getaffinity != 5196 +#error Wrong code for getaffinity system call. +#endif /* __NR_sched_getaffinity */ +#error Unknown or unsupported architecture +#endif /* KMP_ARCH_* */ class KMPNativeAffinity : public KMPAffinity { - class Mask : public KMPAffinity::Mask { - typedef unsigned char mask_t; - static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT; - public: - mask_t* mask; - Mask() { mask = (mask_t*)__kmp_allocate(__kmp_affin_mask_size); } - ~Mask() { if (mask) __kmp_free(mask); } - void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); } - bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); } - void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); } - void zero() override { - for (size_t i=0; i<__kmp_affin_mask_size; ++i) - mask[i] = 0; - } - void copy(const KMPAffinity::Mask* src) override { - const Mask * convert = static_cast(src); - for (size_t i=0; i<__kmp_affin_mask_size; ++i) - mask[i] = convert->mask[i]; - } - void bitwise_and(const KMPAffinity::Mask* rhs) override { - const Mask * convert = static_cast(rhs); - for (size_t i=0; i<__kmp_affin_mask_size; ++i) - mask[i] &= convert->mask[i]; - } - void bitwise_or(const KMPAffinity::Mask* rhs) override { - const Mask * convert = static_cast(rhs); - for (size_t i=0; i<__kmp_affin_mask_size; ++i) - mask[i] |= convert->mask[i]; - } - void bitwise_not() override { - for (size_t i=0; i<__kmp_affin_mask_size; ++i) - mask[i] = ~(mask[i]); - } - int begin() const override { - int retval = 0; - while (retval < end() && !is_set(retval)) - ++retval; - return retval; - } - int end() const override { return __kmp_affin_mask_size*BITS_PER_MASK_T; } - int next(int previous) const override { - int retval = previous+1; - while (retval < end() && !is_set(retval)) - ++retval; - return retval; - } - int get_system_affinity(bool abort_on_error) override { - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal get affinity operation when not capable"); - int retval = syscall( __NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask ); - if (retval >= 0) { - return 0; - } - int error = errno; - if (abort_on_error) { - __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null); - } - return error; - } - int set_system_affinity(bool abort_on_error) const override { - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal get affinity operation when not capable"); - int retval = syscall( __NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask ); - if (retval >= 0) { - return 0; - } - int error = errno; - if (abort_on_error) { - __kmp_msg(kmp_ms_fatal, KMP_MSG( FatalSysError ), KMP_ERR( error ), __kmp_msg_null); - } - return error; - } - }; - void determine_capable(const char* env_var) override { - __kmp_affinity_determine_capable(env_var); + class Mask : public KMPAffinity::Mask { + typedef unsigned char mask_t; + static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; + + public: + mask_t *mask; + Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); } + ~Mask() { + if (mask) + __kmp_free(mask); + } + void set(int i) override { + mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); + } + bool is_set(int i) const override { + return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); + } + void clear(int i) override { + mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); } - void bind_thread(int which) override { - __kmp_affinity_bind_thread(which); + void zero() override { + for (size_t i = 0; i < __kmp_affin_mask_size; ++i) + mask[i] = 0; } - KMPAffinity::Mask* allocate_mask() override { - KMPNativeAffinity::Mask* retval = new Mask(); - return retval; + void copy(const KMPAffinity::Mask *src) override { + const Mask *convert = static_cast(src); + for (size_t i = 0; i < __kmp_affin_mask_size; ++i) + mask[i] = convert->mask[i]; } - void deallocate_mask(KMPAffinity::Mask* m) override { - KMPNativeAffinity::Mask* native_mask = static_cast(m); - delete m; + void bitwise_and(const KMPAffinity::Mask *rhs) override { + const Mask *convert = static_cast(rhs); + for (size_t i = 0; i < __kmp_affin_mask_size; ++i) + mask[i] &= convert->mask[i]; } - KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; } - void deallocate_mask_array(KMPAffinity::Mask* array) override { - Mask* linux_array = static_cast(array); - delete[] linux_array; + void bitwise_or(const KMPAffinity::Mask *rhs) override { + const Mask *convert = static_cast(rhs); + for (size_t i = 0; i < __kmp_affin_mask_size; ++i) + mask[i] |= convert->mask[i]; } - KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override { - Mask* linux_array = static_cast(array); - return &(linux_array[index]); + void bitwise_not() override { + for (size_t i = 0; i < __kmp_affin_mask_size; ++i) + mask[i] = ~(mask[i]); } - api_type get_api_type() const override { return NATIVE_OS; } + int begin() const override { + int retval = 0; + while (retval < end() && !is_set(retval)) + ++retval; + return retval; + } + int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; } + int next(int previous) const override { + int retval = previous + 1; + while (retval < end() && !is_set(retval)) + ++retval; + return retval; + } + int get_system_affinity(bool abort_on_error) override { + KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal get affinity operation when not capable"); + int retval = + syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask); + if (retval >= 0) { + return 0; + } + int error = errno; + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(FatalSysError), KMP_ERR(error), + __kmp_msg_null); + } + return error; + } + int set_system_affinity(bool abort_on_error) const override { + KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal get affinity operation when not capable"); + int retval = + syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask); + if (retval >= 0) { + return 0; + } + int error = errno; + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(FatalSysError), KMP_ERR(error), + __kmp_msg_null); + } + return error; + } + }; + void determine_capable(const char *env_var) override { + __kmp_affinity_determine_capable(env_var); + } + void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } + KMPAffinity::Mask *allocate_mask() override { + KMPNativeAffinity::Mask *retval = new Mask(); + return retval; + } + void deallocate_mask(KMPAffinity::Mask *m) override { + KMPNativeAffinity::Mask *native_mask = + static_cast(m); + delete m; + } + KMPAffinity::Mask *allocate_mask_array(int num) override { + return new Mask[num]; + } + void deallocate_mask_array(KMPAffinity::Mask *array) override { + Mask *linux_array = static_cast(array); + delete[] linux_array; + } + KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, + int index) override { + Mask *linux_array = static_cast(array); + return &(linux_array[index]); + } + api_type get_api_type() const override { return NATIVE_OS; } }; #endif /* KMP_OS_LINUX */ #if KMP_OS_WINDOWS class KMPNativeAffinity : public KMPAffinity { - class Mask : public KMPAffinity::Mask { - typedef ULONG_PTR mask_t; - static const int BITS_PER_MASK_T = sizeof(mask_t)*CHAR_BIT; - mask_t* mask; - public: - Mask() { mask = (mask_t*)__kmp_allocate(sizeof(mask_t)*__kmp_num_proc_groups); } - ~Mask() { if (mask) __kmp_free(mask); } - void set(int i) override { mask[i/BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); } - bool is_set(int i) const override { return (mask[i/BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); } - void clear(int i) override { mask[i/BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); } - void zero() override { - for (size_t i=0; i<__kmp_num_proc_groups; ++i) - mask[i] = 0; - } - void copy(const KMPAffinity::Mask* src) override { - const Mask * convert = static_cast(src); - for (size_t i=0; i<__kmp_num_proc_groups; ++i) - mask[i] = convert->mask[i]; - } - void bitwise_and(const KMPAffinity::Mask* rhs) override { - const Mask * convert = static_cast(rhs); - for (size_t i=0; i<__kmp_num_proc_groups; ++i) - mask[i] &= convert->mask[i]; - } - void bitwise_or(const KMPAffinity::Mask* rhs) override { - const Mask * convert = static_cast(rhs); - for (size_t i=0; i<__kmp_num_proc_groups; ++i) - mask[i] |= convert->mask[i]; - } - void bitwise_not() override { - for (size_t i=0; i<__kmp_num_proc_groups; ++i) - mask[i] = ~(mask[i]); - } - int begin() const override { - int retval = 0; - while (retval < end() && !is_set(retval)) - ++retval; - return retval; - } - int end() const override { return __kmp_num_proc_groups*BITS_PER_MASK_T; } - int next(int previous) const override { - int retval = previous+1; - while (retval < end() && !is_set(retval)) - ++retval; - return retval; - } - int set_system_affinity(bool abort_on_error) const override { - if (__kmp_num_proc_groups > 1) { - // Check for a valid mask. - GROUP_AFFINITY ga; - int group = get_proc_group(); - if (group < 0) { - if (abort_on_error) { - KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); - } - return -1; - } - // Transform the bit vector into a GROUP_AFFINITY struct - // and make the system call to set affinity. - ga.Group = group; - ga.Mask = mask[group]; - ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; - - KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); - if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ), - KMP_ERR( error ), __kmp_msg_null); - } - return error; - } - } else { - if (!SetThreadAffinityMask( GetCurrentThread(), *mask )) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetThreadAffMask ), - KMP_ERR( error ), __kmp_msg_null); - } - return error; - } - } - return 0; - } - int get_system_affinity(bool abort_on_error) override { - if (__kmp_num_proc_groups > 1) { - this->zero(); - GROUP_AFFINITY ga; - KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); - if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), - KMP_ERR(error), __kmp_msg_null); - } - return error; - } - if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || (ga.Mask == 0)) { - return -1; - } - mask[ga.Group] = ga.Mask; - } else { - mask_t newMask, sysMask, retval; - if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetProcessAffinityMask()"), - KMP_ERR(error), __kmp_msg_null); - } - return error; - } - retval = SetThreadAffinityMask(GetCurrentThread(), newMask); - if (! retval) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"), - KMP_ERR(error), __kmp_msg_null); - } - return error; - } - newMask = SetThreadAffinityMask(GetCurrentThread(), retval); - if (! newMask) { - DWORD error = GetLastError(); - if (abort_on_error) { - __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SetThreadAffinityMask()"), - KMP_ERR(error), __kmp_msg_null); - } - } - *mask = retval; - } - return 0; - } - int get_proc_group() const override { - int group = -1; - if (__kmp_num_proc_groups == 1) { - return 1; - } - for (int i = 0; i < __kmp_num_proc_groups; i++) { - if (mask[i] == 0) - continue; - if (group >= 0) - return -1; - group = i; - } - return group; - } - }; - void determine_capable(const char* env_var) override { - __kmp_affinity_determine_capable(env_var); + class Mask : public KMPAffinity::Mask { + typedef ULONG_PTR mask_t; + static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT; + mask_t *mask; + + public: + Mask() { + mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups); + } + ~Mask() { + if (mask) + __kmp_free(mask); + } + void set(int i) override { + mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T)); + } + bool is_set(int i) const override { + return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T))); + } + void clear(int i) override { + mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); + } + void zero() override { + for (size_t i = 0; i < __kmp_num_proc_groups; ++i) + mask[i] = 0; + } + void copy(const KMPAffinity::Mask *src) override { + const Mask *convert = static_cast(src); + for (size_t i = 0; i < __kmp_num_proc_groups; ++i) + mask[i] = convert->mask[i]; + } + void bitwise_and(const KMPAffinity::Mask *rhs) override { + const Mask *convert = static_cast(rhs); + for (size_t i = 0; i < __kmp_num_proc_groups; ++i) + mask[i] &= convert->mask[i]; + } + void bitwise_or(const KMPAffinity::Mask *rhs) override { + const Mask *convert = static_cast(rhs); + for (size_t i = 0; i < __kmp_num_proc_groups; ++i) + mask[i] |= convert->mask[i]; + } + void bitwise_not() override { + for (size_t i = 0; i < __kmp_num_proc_groups; ++i) + mask[i] = ~(mask[i]); + } + int begin() const override { + int retval = 0; + while (retval < end() && !is_set(retval)) + ++retval; + return retval; } - void bind_thread(int which) override { - __kmp_affinity_bind_thread(which); + int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; } + int next(int previous) const override { + int retval = previous + 1; + while (retval < end() && !is_set(retval)) + ++retval; + return retval; } - KMPAffinity::Mask* allocate_mask() override { return new Mask(); } - void deallocate_mask(KMPAffinity::Mask* m) override { delete m; } - KMPAffinity::Mask* allocate_mask_array(int num) override { return new Mask[num]; } - void deallocate_mask_array(KMPAffinity::Mask* array) override { - Mask* windows_array = static_cast(array); - delete[] windows_array; + int set_system_affinity(bool abort_on_error) const override { + if (__kmp_num_proc_groups > 1) { + // Check for a valid mask. + GROUP_AFFINITY ga; + int group = get_proc_group(); + if (group < 0) { + if (abort_on_error) { + KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity"); + } + return -1; + } + // Transform the bit vector into a GROUP_AFFINITY struct + // and make the system call to set affinity. + ga.Group = group; + ga.Mask = mask[group]; + ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; + + KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); + if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetThreadAffMask), + KMP_ERR(error), __kmp_msg_null); + } + return error; + } + } else { + if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetThreadAffMask), + KMP_ERR(error), __kmp_msg_null); + } + return error; + } + } + return 0; } - KMPAffinity::Mask* index_mask_array(KMPAffinity::Mask* array, int index) override { - Mask* windows_array = static_cast(array); - return &(windows_array[index]); + int get_system_affinity(bool abort_on_error) override { + if (__kmp_num_proc_groups > 1) { + this->zero(); + GROUP_AFFINITY ga; + KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL); + if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, + KMP_MSG(FunctionError, "GetThreadGroupAffinity()"), + KMP_ERR(error), __kmp_msg_null); + } + return error; + } + if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) || + (ga.Mask == 0)) { + return -1; + } + mask[ga.Group] = ga.Mask; + } else { + mask_t newMask, sysMask, retval; + if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, + KMP_MSG(FunctionError, "GetProcessAffinityMask()"), + KMP_ERR(error), __kmp_msg_null); + } + return error; + } + retval = SetThreadAffinityMask(GetCurrentThread(), newMask); + if (!retval) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, + KMP_MSG(FunctionError, "SetThreadAffinityMask()"), + KMP_ERR(error), __kmp_msg_null); + } + return error; + } + newMask = SetThreadAffinityMask(GetCurrentThread(), retval); + if (!newMask) { + DWORD error = GetLastError(); + if (abort_on_error) { + __kmp_msg(kmp_ms_fatal, + KMP_MSG(FunctionError, "SetThreadAffinityMask()"), + KMP_ERR(error), __kmp_msg_null); + } + } + *mask = retval; + } + return 0; } - api_type get_api_type() const override { return NATIVE_OS; } + int get_proc_group() const override { + int group = -1; + if (__kmp_num_proc_groups == 1) { + return 1; + } + for (int i = 0; i < __kmp_num_proc_groups; i++) { + if (mask[i] == 0) + continue; + if (group >= 0) + return -1; + group = i; + } + return group; + } + }; + void determine_capable(const char *env_var) override { + __kmp_affinity_determine_capable(env_var); + } + void bind_thread(int which) override { __kmp_affinity_bind_thread(which); } + KMPAffinity::Mask *allocate_mask() override { return new Mask(); } + void deallocate_mask(KMPAffinity::Mask *m) override { delete m; } + KMPAffinity::Mask *allocate_mask_array(int num) override { + return new Mask[num]; + } + void deallocate_mask_array(KMPAffinity::Mask *array) override { + Mask *windows_array = static_cast(array); + delete[] windows_array; + } + KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array, + int index) override { + Mask *windows_array = static_cast(array); + return &(windows_array[index]); + } + api_type get_api_type() const override { return NATIVE_OS; } }; #endif /* KMP_OS_WINDOWS */ #endif /* KMP_AFFINITY_SUPPORTED */ class Address { public: - static const unsigned maxDepth = 32; - unsigned labels[maxDepth]; - unsigned childNums[maxDepth]; - unsigned depth; - unsigned leader; - Address(unsigned _depth) - : depth(_depth), leader(FALSE) { - } - Address &operator=(const Address &b) { - depth = b.depth; - for (unsigned i = 0; i < depth; i++) { - labels[i] = b.labels[i]; - childNums[i] = b.childNums[i]; - } - leader = FALSE; - return *this; - } - bool operator==(const Address &b) const { - if (depth != b.depth) - return false; - for (unsigned i = 0; i < depth; i++) - if(labels[i] != b.labels[i]) - return false; - return true; - } - bool isClose(const Address &b, int level) const { - if (depth != b.depth) - return false; - if ((unsigned)level >= depth) - return true; - for (unsigned i = 0; i < (depth - level); i++) - if(labels[i] != b.labels[i]) - return false; - return true; - } - bool operator!=(const Address &b) const { - return !operator==(b); - } - void print() const { - unsigned i; - printf("Depth: %u --- ", depth); - for(i=0;i= depth) + return true; + for (unsigned i = 0; i < (depth - level); i++) + if (labels[i] != b.labels[i]) + return false; + return true; + } + bool operator!=(const Address &b) const { return !operator==(b); } + void print() const { + unsigned i; + printf("Depth: %u --- ", depth); + for (i = 0; i < depth; i++) { + printf("%u ", labels[i]); + } + } }; class AddrUnsPair { public: - Address first; - unsigned second; - AddrUnsPair(Address _first, unsigned _second) - : first(_first), second(_second) { - } - AddrUnsPair &operator=(const AddrUnsPair &b) - { - first = b.first; - second = b.second; - return *this; - } - void print() const { - printf("first = "); first.print(); - printf(" --- second = %u", second); - } - bool operator==(const AddrUnsPair &b) const { - if(first != b.first) return false; - if(second != b.second) return false; - return true; - } - bool operator!=(const AddrUnsPair &b) const { - return !operator==(b); - } + Address first; + unsigned second; + AddrUnsPair(Address _first, unsigned _second) + : first(_first), second(_second) {} + AddrUnsPair &operator=(const AddrUnsPair &b) { + first = b.first; + second = b.second; + return *this; + } + void print() const { + printf("first = "); + first.print(); + printf(" --- second = %u", second); + } + bool operator==(const AddrUnsPair &b) const { + if (first != b.first) + return false; + if (second != b.second) + return false; + return true; + } + bool operator!=(const AddrUnsPair &b) const { return !operator==(b); } }; - -static int -__kmp_affinity_cmp_Address_labels(const void *a, const void *b) -{ - const Address *aa = (const Address *)&(((AddrUnsPair *)a) - ->first); - const Address *bb = (const Address *)&(((AddrUnsPair *)b) - ->first); - unsigned depth = aa->depth; - unsigned i; - KMP_DEBUG_ASSERT(depth == bb->depth); - for (i = 0; i < depth; i++) { - if (aa->labels[i] < bb->labels[i]) return -1; - if (aa->labels[i] > bb->labels[i]) return 1; - } - return 0; +static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) { + const Address *aa = (const Address *)&(((AddrUnsPair *)a)->first); + const Address *bb = (const Address *)&(((AddrUnsPair *)b)->first); + unsigned depth = aa->depth; + unsigned i; + KMP_DEBUG_ASSERT(depth == bb->depth); + for (i = 0; i < depth; i++) { + if (aa->labels[i] < bb->labels[i]) + return -1; + if (aa->labels[i] > bb->labels[i]) + return 1; + } + return 0; } - -/** A structure for holding machine-specific hierarchy info to be computed once at init. - This structure represents a mapping of threads to the actual machine hierarchy, or to - our best guess at what the hierarchy might be, for the purpose of performing an - efficient barrier. In the worst case, when there is no machine hierarchy information, - it produces a tree suitable for a barrier, similar to the tree used in the hyper barrier. */ +/* A structure for holding machine-specific hierarchy info to be computed once + at init. This structure represents a mapping of threads to the actual machine + hierarchy, or to our best guess at what the hierarchy might be, for the + purpose of performing an efficient barrier. In the worst case, when there is + no machine hierarchy information, it produces a tree suitable for a barrier, + similar to the tree used in the hyper barrier. */ class hierarchy_info { public: - /** Good default values for number of leaves and branching factor, given no affinity information. - Behaves a bit like hyper barrier. */ - static const kmp_uint32 maxLeaves=4; - static const kmp_uint32 minBranch=4; - /** Number of levels in the hierarchy. Typical levels are threads/core, cores/package - or socket, packages/node, nodes/machine, etc. We don't want to get specific with - nomenclature. When the machine is oversubscribed we add levels to duplicate the - hierarchy, doubling the thread capacity of the hierarchy each time we add a level. */ - kmp_uint32 maxLevels; - - /** This is specifically the depth of the machine configuration hierarchy, in terms of the - number of levels along the longest path from root to any leaf. It corresponds to the - number of entries in numPerLevel if we exclude all but one trailing 1. */ - kmp_uint32 depth; - kmp_uint32 base_num_threads; - enum init_status { initialized=0, not_initialized=1, initializing=2 }; - volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, 2=initialization in progress - volatile kmp_int8 resizing; // 0=not resizing, 1=resizing - - /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children the parent of a - node at level i has. For example, if we have a machine with 4 packages, 4 cores/package - and 2 HT per core, then numPerLevel = {2, 4, 4, 1, 1}. All empty levels are set to 1. */ - kmp_uint32 *numPerLevel; - kmp_uint32 *skipPerLevel; - - void deriveLevels(AddrUnsPair *adr2os, int num_addrs) { - int hier_depth = adr2os[0].first.depth; - int level = 0; - for (int i=hier_depth-1; i>=0; --i) { - int max = -1; - for (int j=0; j max) max = next; - } - numPerLevel[level] = max+1; - ++level; - } + /* Good default values for number of leaves and branching factor, given no + affinity information. Behaves a bit like hyper barrier. */ + static const kmp_uint32 maxLeaves = 4; + static const kmp_uint32 minBranch = 4; + /** Number of levels in the hierarchy. Typical levels are threads/core, + cores/package or socket, packages/node, nodes/machine, etc. We don't want + to get specific with nomenclature. When the machine is oversubscribed we + add levels to duplicate the hierarchy, doubling the thread capacity of the + hierarchy each time we add a level. */ + kmp_uint32 maxLevels; + + /** This is specifically the depth of the machine configuration hierarchy, in + terms of the number of levels along the longest path from root to any + leaf. It corresponds to the number of entries in numPerLevel if we exclude + all but one trailing 1. */ + kmp_uint32 depth; + kmp_uint32 base_num_threads; + enum init_status { initialized = 0, not_initialized = 1, initializing = 2 }; + volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized, + // 2=initialization in progress + volatile kmp_int8 resizing; // 0=not resizing, 1=resizing + + /** Level 0 corresponds to leaves. numPerLevel[i] is the number of children + the parent of a node at level i has. For example, if we have a machine + with 4 packages, 4 cores/package and 2 HT per core, then numPerLevel = + {2, 4, 4, 1, 1}. All empty levels are set to 1. */ + kmp_uint32 *numPerLevel; + kmp_uint32 *skipPerLevel; + + void deriveLevels(AddrUnsPair *adr2os, int num_addrs) { + int hier_depth = adr2os[0].first.depth; + int level = 0; + for (int i = hier_depth - 1; i >= 0; --i) { + int max = -1; + for (int j = 0; j < num_addrs; ++j) { + int next = adr2os[j].first.childNums[i]; + if (next > max) + max = next; + } + numPerLevel[level] = max + 1; + ++level; + } + } + + hierarchy_info() + : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} + + void fini() { + if (!uninitialized && numPerLevel) + __kmp_free(numPerLevel); + } + + void init(AddrUnsPair *adr2os, int num_addrs) { + kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8( + &uninitialized, not_initialized, initializing); + if (bool_result == 0) { // Wait for initialization + while (TCR_1(uninitialized) != initialized) + KMP_CPU_PAUSE(); + return; + } + KMP_DEBUG_ASSERT(bool_result == 1); + + /* Added explicit initialization of the data fields here to prevent usage of + dirty value observed when static library is re-initialized multiple times + (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses + OpenMP). */ + depth = 1; + resizing = 0; + maxLevels = 7; + numPerLevel = + (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); + skipPerLevel = &(numPerLevel[maxLevels]); + for (kmp_uint32 i = 0; i < maxLevels; + ++i) { // init numPerLevel[*] to 1 item per level + numPerLevel[i] = 1; + skipPerLevel[i] = 1; } - hierarchy_info() : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {} - - void fini() { if (!uninitialized && numPerLevel) __kmp_free(numPerLevel); } - - void init(AddrUnsPair *adr2os, int num_addrs) - { - kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, not_initialized, initializing); - if (bool_result == 0) { // Wait for initialization - while (TCR_1(uninitialized) != initialized) KMP_CPU_PAUSE(); - return; - } - KMP_DEBUG_ASSERT(bool_result==1); - - /* Added explicit initialization of the data fields here to prevent usage of dirty value - observed when static library is re-initialized multiple times (e.g. when - non-OpenMP thread repeatedly launches/joins thread that uses OpenMP). */ - depth = 1; - resizing = 0; - maxLevels = 7; - numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32)); - skipPerLevel = &(numPerLevel[maxLevels]); - for (kmp_uint32 i=0; i=0; --i) // count non-empty levels to get depth - if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' - depth++; - - kmp_uint32 branch = minBranch; - if (numPerLevel[0] == 1) branch = num_addrs/maxLeaves; - if (branch branch || (d==0 && numPerLevel[d]>maxLeaves)) { // max 4 on level 0! - if (numPerLevel[d] & 1) numPerLevel[d]++; - numPerLevel[d] = numPerLevel[d] >> 1; - if (numPerLevel[d+1] == 1) depth++; - numPerLevel[d+1] = numPerLevel[d+1] << 1; - } - if(numPerLevel[0] == 1) { - branch = branch >> 1; - if (branch<4) branch = minBranch; - } - } - - for (kmp_uint32 i=1; i= 0; + --i) // count non-empty levels to get depth + if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1' + depth++; + + kmp_uint32 branch = minBranch; + if (numPerLevel[0] == 1) + branch = num_addrs / maxLeaves; + if (branch < minBranch) + branch = minBranch; + for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width + while (numPerLevel[d] > branch || + (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0! + if (numPerLevel[d] & 1) + numPerLevel[d]++; + numPerLevel[d] = numPerLevel[d] >> 1; + if (numPerLevel[d + 1] == 1) + depth++; + numPerLevel[d + 1] = numPerLevel[d + 1] << 1; + } + if (numPerLevel[0] == 1) { + branch = branch >> 1; + if (branch < 4) + branch = minBranch; + } } - // Resize the hierarchy if nproc changes to something larger than before - void resize(kmp_uint32 nproc) - { - kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); - while (bool_result == 0) { // someone else is trying to resize - KMP_CPU_PAUSE(); - if (nproc <= base_num_threads) // happy with other thread's resize - return; - else // try to resize - bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1); - } - KMP_DEBUG_ASSERT(bool_result!=0); - if (nproc <= base_num_threads) return; // happy with other thread's resize - - // Calculate new maxLevels - kmp_uint32 old_sz = skipPerLevel[depth-1]; - kmp_uint32 incs = 0, old_maxLevels = maxLevels; - // First see if old maxLevels is enough to contain new size - for (kmp_uint32 i=depth; iold_sz; ++i) { - skipPerLevel[i] = 2*skipPerLevel[i-1]; - numPerLevel[i-1] *= 2; - old_sz *= 2; - depth++; - } - if (nproc > old_sz) { // Not enough space, need to expand hierarchy - while (nproc > old_sz) { - old_sz *=2; - incs++; - depth++; - } - maxLevels += incs; - - // Resize arrays - kmp_uint32 *old_numPerLevel = numPerLevel; - kmp_uint32 *old_skipPerLevel = skipPerLevel; - numPerLevel = skipPerLevel = NULL; - numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*sizeof(kmp_uint32)); - skipPerLevel = &(numPerLevel[maxLevels]); - - // Copy old elements from old arrays - for (kmp_uint32 i=0; i old_sz; ++i) { + skipPerLevel[i] = 2 * skipPerLevel[i - 1]; + numPerLevel[i - 1] *= 2; + old_sz *= 2; + depth++; + } + if (nproc > old_sz) { // Not enough space, need to expand hierarchy + while (nproc > old_sz) { + old_sz *= 2; + incs++; + depth++; + } + maxLevels += incs; + + // Resize arrays + kmp_uint32 *old_numPerLevel = numPerLevel; + kmp_uint32 *old_skipPerLevel = skipPerLevel; + numPerLevel = skipPerLevel = NULL; + numPerLevel = + (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32)); + skipPerLevel = &(numPerLevel[maxLevels]); + + // Copy old elements from old arrays + for (kmp_uint32 i = 0; i < old_maxLevels; + ++i) { // init numPerLevel[*] to 1 item per level + numPerLevel[i] = old_numPerLevel[i]; + skipPerLevel[i] = old_skipPerLevel[i]; + } + + // Init new elements in arrays to 1 + for (kmp_uint32 i = old_maxLevels; i < maxLevels; + ++i) { // init numPerLevel[*] to 1 item per level + numPerLevel[i] = 1; + skipPerLevel[i] = 1; + } + + // Free old arrays + __kmp_free(old_numPerLevel); + } - // Fill in oversubscription levels of hierarchy - for (kmp_uint32 i=old_maxLevels; i0: (common) block size for all - bpool calls made so far - */ - bfhead_t * last_pool; /* Last pool owned by this thread (delay dealocation) */ + /* Automatic expansion block management functions */ + bget_compact_t compfcn; + bget_acquire_t acqfcn; + bget_release_t relfcn; + + bget_mode_t mode; /* what allocation mode to use? */ + + bufsize exp_incr; /* Expansion block size */ + bufsize pool_len; /* 0: no bpool calls have been made + -1: not all pool blocks are the same size + >0: (common) block size for all bpool calls made so far + */ + bfhead_t *last_pool; /* Last pool owned by this thread (delay dealocation) */ } thr_data_t; /* Minimum allocation quantum: */ - -#define QLSize (sizeof(qlinks_t)) -#define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize) -#define MaxSize (bufsize)( ~ ( ( (bufsize)( 1 ) << ( sizeof( bufsize ) * CHAR_BIT - 1 ) ) | ( SizeQuant - 1 ) ) ) - // Maximun for the requested size. +#define QLSize (sizeof(qlinks_t)) +#define SizeQ ((SizeQuant > QLSize) ? SizeQuant : QLSize) +#define MaxSize \ + (bufsize)( \ + ~(((bufsize)(1) << (sizeof(bufsize) * CHAR_BIT - 1)) | (SizeQuant - 1))) +// Maximun for the requested size. /* End sentinel: value placed in bsize field of dummy block delimiting end of pool block. The most negative number which will fit in a bufsize, defined in a way that the compiler will accept. */ -#define ESent ((bufsize) (-(((((bufsize)1)<<((int)sizeof(bufsize)*8-2))-1)*2)-2)) - -/* ------------------------------------------------------------------------ */ +#define ESent \ + ((bufsize)(-(((((bufsize)1) << ((int)sizeof(bufsize) * 8 - 2)) - 1) * 2) - 2)) /* Thread Data management routines */ +static int bget_get_bin(bufsize size) { + // binary chop bins + int lo = 0, hi = MAX_BGET_BINS - 1; -static int -bget_get_bin( bufsize size ) -{ - // binary chop bins - int lo = 0, hi = MAX_BGET_BINS - 1; + KMP_DEBUG_ASSERT(size > 0); - KMP_DEBUG_ASSERT( size > 0 ); + while ((hi - lo) > 1) { + int mid = (lo + hi) >> 1; + if (size < bget_bin_size[mid]) + hi = mid - 1; + else + lo = mid; + } - while ( (hi - lo) > 1 ) { - int mid = (lo + hi) >> 1; - if (size < bget_bin_size[ mid ]) - hi = mid - 1; - else - lo = mid; - } - - KMP_DEBUG_ASSERT( (lo >= 0) && (lo < MAX_BGET_BINS) ); + KMP_DEBUG_ASSERT((lo >= 0) && (lo < MAX_BGET_BINS)); - return lo; + return lo; } -static void -set_thr_data( kmp_info_t *th ) -{ - int i; - thr_data_t *data; +static void set_thr_data(kmp_info_t *th) { + int i; + thr_data_t *data; - data = - (thr_data_t *)( - ( ! th->th.th_local.bget_data ) ? __kmp_allocate( sizeof( *data ) ) : th->th.th_local.bget_data - ); + data = (thr_data_t *)((!th->th.th_local.bget_data) + ? __kmp_allocate(sizeof(*data)) + : th->th.th_local.bget_data); - memset( data, '\0', sizeof( *data ) ); + memset(data, '\0', sizeof(*data)); - for (i = 0; i < MAX_BGET_BINS; ++i) { - data->freelist[ i ].ql.flink = & data->freelist[ i ]; - data->freelist[ i ].ql.blink = & data->freelist[ i ]; - } + for (i = 0; i < MAX_BGET_BINS; ++i) { + data->freelist[i].ql.flink = &data->freelist[i]; + data->freelist[i].ql.blink = &data->freelist[i]; + } - th->th.th_local.bget_data = data; - th->th.th_local.bget_list = 0; -#if ! USE_CMP_XCHG_FOR_BGET + th->th.th_local.bget_data = data; + th->th.th_local.bget_list = 0; +#if !USE_CMP_XCHG_FOR_BGET #ifdef USE_QUEUING_LOCK_FOR_BGET - __kmp_init_lock( & th->th.th_local.bget_lock ); + __kmp_init_lock(&th->th.th_local.bget_lock); #else - __kmp_init_bootstrap_lock( & th->th.th_local.bget_lock ); + __kmp_init_bootstrap_lock(&th->th.th_local.bget_lock); #endif /* USE_LOCK_FOR_BGET */ #endif /* ! USE_CMP_XCHG_FOR_BGET */ } -static thr_data_t * -get_thr_data( kmp_info_t *th ) -{ - thr_data_t *data; +static thr_data_t *get_thr_data(kmp_info_t *th) { + thr_data_t *data; - data = (thr_data_t *) th->th.th_local.bget_data; + data = (thr_data_t *)th->th.th_local.bget_data; - KMP_DEBUG_ASSERT( data != 0 ); + KMP_DEBUG_ASSERT(data != 0); - return data; + return data; } - #ifdef KMP_DEBUG -static void -__kmp_bget_validate_queue( kmp_info_t *th ) -{ - /* NOTE: assume that the global_lock is held */ +static void __kmp_bget_validate_queue(kmp_info_t *th) { + /* NOTE: assume that the global_lock is held */ - void *p = (void *) th->th.th_local.bget_list; + void *p = (void *)th->th.th_local.bget_list; - while (p != 0) { - bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t)); + while (p != 0) { + bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t)); - KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); - p = (void *) b->ql.flink; - } + KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); + p = (void *)b->ql.flink; + } } #endif /* Walk the free list and release the enqueued buffers */ +static void __kmp_bget_dequeue(kmp_info_t *th) { + void *p = TCR_SYNC_PTR(th->th.th_local.bget_list); -static void -__kmp_bget_dequeue( kmp_info_t *th ) -{ - void *p = TCR_SYNC_PTR(th->th.th_local.bget_list); - - if (p != 0) { - #if USE_CMP_XCHG_FOR_BGET - { - volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); - while ( ! KMP_COMPARE_AND_STORE_PTR( - & th->th.th_local.bget_list, old_value, NULL ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); - } - p = (void *) old_value; - } - #else /* ! USE_CMP_XCHG_FOR_BGET */ - #ifdef USE_QUEUING_LOCK_FOR_BGET - __kmp_acquire_lock( & th->th.th_local.bget_lock, - __kmp_gtid_from_thread(th) ); - #else - __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock ); - #endif /* USE_QUEUING_LOCK_FOR_BGET */ - - p = (void *) th->th.th_local.bget_list; - th->th.th_local.bget_list = 0; - - #ifdef USE_QUEUING_LOCK_FOR_BGET - __kmp_release_lock( & th->th.th_local.bget_lock, - __kmp_gtid_from_thread(th) ); - #else - __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock ); - #endif - #endif /* USE_CMP_XCHG_FOR_BGET */ - - /* Check again to make sure the list is not empty */ - - while (p != 0) { - void *buf = p; - bfhead_t *b = BFH(((char *) p) - sizeof(bhead_t)); - - KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 ); - KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) == - (kmp_uintptr_t)th ); // clear possible mark - KMP_DEBUG_ASSERT( b->ql.blink == 0 ); - - p = (void *) b->ql.flink; - - brel( th, buf ); - } + if (p != 0) { +#if USE_CMP_XCHG_FOR_BGET + { + volatile void *old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); + while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list, old_value, + NULL)) { + KMP_CPU_PAUSE(); + old_value = TCR_SYNC_PTR(th->th.th_local.bget_list); + } + p = (void *)old_value; } +#else /* ! USE_CMP_XCHG_FOR_BGET */ +#ifdef USE_QUEUING_LOCK_FOR_BGET + __kmp_acquire_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th)); +#else + __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock); +#endif /* USE_QUEUING_LOCK_FOR_BGET */ + + p = (void *)th->th.th_local.bget_list; + th->th.th_local.bget_list = 0; + +#ifdef USE_QUEUING_LOCK_FOR_BGET + __kmp_release_lock(&th->th.th_local.bget_lock, __kmp_gtid_from_thread(th)); +#else + __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock); +#endif +#endif /* USE_CMP_XCHG_FOR_BGET */ + + /* Check again to make sure the list is not empty */ + while (p != 0) { + void *buf = p; + bfhead_t *b = BFH(((char *)p) - sizeof(bhead_t)); + + KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); + KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) == + (kmp_uintptr_t)th); // clear possible mark + KMP_DEBUG_ASSERT(b->ql.blink == 0); + + p = (void *)b->ql.flink; + + brel(th, buf); + } + } } /* Chain together the free buffers by using the thread owner field */ - -static void -__kmp_bget_enqueue( kmp_info_t *th, void *buf +static void __kmp_bget_enqueue(kmp_info_t *th, void *buf #ifdef USE_QUEUING_LOCK_FOR_BGET - , kmp_int32 rel_gtid + , + kmp_int32 rel_gtid #endif - ) -{ - bfhead_t *b = BFH(((char *) buf) - sizeof(bhead_t)); + ) { + bfhead_t *b = BFH(((char *)buf) - sizeof(bhead_t)); - KMP_DEBUG_ASSERT( b->bh.bb.bsize != 0 ); - KMP_DEBUG_ASSERT( ( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ) == - (kmp_uintptr_t)th ); // clear possible mark + KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); + KMP_DEBUG_ASSERT(((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1) == + (kmp_uintptr_t)th); // clear possible mark - b->ql.blink = 0; + b->ql.blink = 0; - KC_TRACE( 10, ( "__kmp_bget_enqueue: moving buffer to T#%d list\n", - __kmp_gtid_from_thread( th ) ) ); + KC_TRACE(10, ("__kmp_bget_enqueue: moving buffer to T#%d list\n", + __kmp_gtid_from_thread(th))); #if USE_CMP_XCHG_FOR_BGET - { - volatile void *old_value = TCR_PTR(th->th.th_local.bget_list); - /* the next pointer must be set before setting bget_list to buf to avoid - exposing a broken list to other threads, even for an instant. */ - b->ql.flink = BFH( old_value ); - - while ( ! KMP_COMPARE_AND_STORE_PTR( - & th->th.th_local.bget_list, old_value, buf ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_PTR(th->th.th_local.bget_list); - /* the next pointer must be set before setting bget_list to buf to avoid - exposing a broken list to other threads, even for an instant. */ - b->ql.flink = BFH( old_value ); - } + { + volatile void *old_value = TCR_PTR(th->th.th_local.bget_list); + /* the next pointer must be set before setting bget_list to buf to avoid + exposing a broken list to other threads, even for an instant. */ + b->ql.flink = BFH(old_value); + + while (!KMP_COMPARE_AND_STORE_PTR(&th->th.th_local.bget_list, old_value, + buf)) { + KMP_CPU_PAUSE(); + old_value = TCR_PTR(th->th.th_local.bget_list); + /* the next pointer must be set before setting bget_list to buf to avoid + exposing a broken list to other threads, even for an instant. */ + b->ql.flink = BFH(old_value); } + } #else /* ! USE_CMP_XCHG_FOR_BGET */ -# ifdef USE_QUEUING_LOCK_FOR_BGET - __kmp_acquire_lock( & th->th.th_local.bget_lock, rel_gtid ); -# else - __kmp_acquire_bootstrap_lock( & th->th.th_local.bget_lock ); - # endif - - b->ql.flink = BFH( th->th.th_local.bget_list ); - th->th.th_local.bget_list = (void *) buf; - -# ifdef USE_QUEUING_LOCK_FOR_BGET - __kmp_release_lock( & th->th.th_local.bget_lock, rel_gtid ); -# else - __kmp_release_bootstrap_lock( & th->th.th_local.bget_lock ); -# endif +#ifdef USE_QUEUING_LOCK_FOR_BGET + __kmp_acquire_lock(&th->th.th_local.bget_lock, rel_gtid); +#else + __kmp_acquire_bootstrap_lock(&th->th.th_local.bget_lock); +#endif + + b->ql.flink = BFH(th->th.th_local.bget_list); + th->th.th_local.bget_list = (void *)buf; + +#ifdef USE_QUEUING_LOCK_FOR_BGET + __kmp_release_lock(&th->th.th_local.bget_lock, rel_gtid); +#else + __kmp_release_bootstrap_lock(&th->th.th_local.bget_lock); +#endif #endif /* USE_CMP_XCHG_FOR_BGET */ } /* insert buffer back onto a new freelist */ +static void __kmp_bget_insert_into_freelist(thr_data_t *thr, bfhead_t *b) { + int bin; -static void -__kmp_bget_insert_into_freelist( thr_data_t *thr, bfhead_t *b ) -{ - int bin; - - KMP_DEBUG_ASSERT( ((size_t)b ) % SizeQuant == 0 ); - KMP_DEBUG_ASSERT( b->bh.bb.bsize % SizeQuant == 0 ); + KMP_DEBUG_ASSERT(((size_t)b) % SizeQuant == 0); + KMP_DEBUG_ASSERT(b->bh.bb.bsize % SizeQuant == 0); - bin = bget_get_bin( b->bh.bb.bsize ); + bin = bget_get_bin(b->bh.bb.bsize); - KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.blink->ql.flink == &thr->freelist[ bin ]); - KMP_DEBUG_ASSERT(thr->freelist[ bin ].ql.flink->ql.blink == &thr->freelist[ bin ]); + KMP_DEBUG_ASSERT(thr->freelist[bin].ql.blink->ql.flink == + &thr->freelist[bin]); + KMP_DEBUG_ASSERT(thr->freelist[bin].ql.flink->ql.blink == + &thr->freelist[bin]); - b->ql.flink = &thr->freelist[ bin ]; - b->ql.blink = thr->freelist[ bin ].ql.blink; + b->ql.flink = &thr->freelist[bin]; + b->ql.blink = thr->freelist[bin].ql.blink; - thr->freelist[ bin ].ql.blink = b; - b->ql.blink->ql.flink = b; + thr->freelist[bin].ql.blink = b; + b->ql.blink->ql.flink = b; } /* unlink the buffer from the old freelist */ +static void __kmp_bget_remove_from_freelist(bfhead_t *b) { + KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b); + KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b); -static void -__kmp_bget_remove_from_freelist( bfhead_t *b ) -{ - KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b); - KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b); - - b->ql.blink->ql.flink = b->ql.flink; - b->ql.flink->ql.blink = b->ql.blink; + b->ql.blink->ql.flink = b->ql.flink; + b->ql.flink->ql.blink = b->ql.blink; } -/* ------------------------------------------------------------------------ */ - /* GET STATS -- check info on free list */ +static void bcheck(kmp_info_t *th, bufsize *max_free, bufsize *total_free) { + thr_data_t *thr = get_thr_data(th); + int bin; -static void -bcheck( kmp_info_t *th, bufsize *max_free, bufsize *total_free ) -{ - thr_data_t *thr = get_thr_data( th ); - int bin; - - *total_free = *max_free = 0; + *total_free = *max_free = 0; - for (bin = 0; bin < MAX_BGET_BINS; ++bin) { - bfhead_t *b, *best; + for (bin = 0; bin < MAX_BGET_BINS; ++bin) { + bfhead_t *b, *best; - best = &thr->freelist[ bin ]; - b = best->ql.flink; + best = &thr->freelist[bin]; + b = best->ql.flink; - while (b != &thr->freelist[ bin ]) { - *total_free += (b->bh.bb.bsize - sizeof( bhead_t )); - if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize)) - best = b; + while (b != &thr->freelist[bin]) { + *total_free += (b->bh.bb.bsize - sizeof(bhead_t)); + if ((best == &thr->freelist[bin]) || (b->bh.bb.bsize < best->bh.bb.bsize)) + best = b; - /* Link to next buffer */ - b = b->ql.flink; - } - - if (*max_free < best->bh.bb.bsize) - *max_free = best->bh.bb.bsize; + /* Link to next buffer */ + b = b->ql.flink; } - if (*max_free > (bufsize)sizeof( bhead_t )) - *max_free -= sizeof( bhead_t ); -} + if (*max_free < best->bh.bb.bsize) + *max_free = best->bh.bb.bsize; + } -/* ------------------------------------------------------------------------ */ + if (*max_free > (bufsize)sizeof(bhead_t)) + *max_free -= sizeof(bhead_t); +} /* BGET -- Allocate a buffer. */ +static void *bget(kmp_info_t *th, bufsize requested_size) { + thr_data_t *thr = get_thr_data(th); + bufsize size = requested_size; + bfhead_t *b; + void *buf; + int compactseq = 0; + int use_blink = 0; + /* For BestFit */ + bfhead_t *best; + + if (size < 0 || size + sizeof(bhead_t) > MaxSize) { + return NULL; + }; // if -static void * -bget( kmp_info_t *th, bufsize requested_size ) -{ - thr_data_t *thr = get_thr_data( th ); - bufsize size = requested_size; - bfhead_t *b; - void *buf; - int compactseq = 0; - int use_blink = 0; -/* For BestFit */ - bfhead_t *best; - - if ( size < 0 || size + sizeof( bhead_t ) > MaxSize ) { - return NULL; - }; // if - - __kmp_bget_dequeue( th ); /* Release any queued buffers */ - - if (size < (bufsize)SizeQ) { /* Need at least room for the */ - size = SizeQ; /* queue links. */ - } - #if defined( SizeQuant ) && ( SizeQuant > 1 ) - size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1)); - #endif - - size += sizeof(bhead_t); /* Add overhead in allocated buffer - to size required. */ - KMP_DEBUG_ASSERT( size >= 0 ); - KMP_DEBUG_ASSERT( size % SizeQuant == 0 ); - - use_blink = ( thr->mode == bget_mode_lifo ); - - /* If a compact function was provided in the call to bectl(), wrap - a loop around the allocation process to allow compaction to - intervene in case we don't find a suitable buffer in the chain. */ + __kmp_bget_dequeue(th); /* Release any queued buffers */ - for (;;) { - int bin; + if (size < (bufsize)SizeQ) { // Need at least room for the queue links. + size = SizeQ; + } +#if defined(SizeQuant) && (SizeQuant > 1) + size = (size + (SizeQuant - 1)) & (~(SizeQuant - 1)); +#endif - for (bin = bget_get_bin( size ); bin < MAX_BGET_BINS; ++bin) { - /* Link to next buffer */ - b = ( use_blink ? thr->freelist[ bin ].ql.blink : thr->freelist[ bin ].ql.flink ); + size += sizeof(bhead_t); // Add overhead in allocated buffer to size required. + KMP_DEBUG_ASSERT(size >= 0); + KMP_DEBUG_ASSERT(size % SizeQuant == 0); - if (thr->mode == bget_mode_best) { - best = &thr->freelist[ bin ]; + use_blink = (thr->mode == bget_mode_lifo); - /* Scan the free list searching for the first buffer big enough - to hold the requested size buffer. */ + /* If a compact function was provided in the call to bectl(), wrap + a loop around the allocation process to allow compaction to + intervene in case we don't find a suitable buffer in the chain. */ - while (b != &thr->freelist[ bin ]) { - if (b->bh.bb.bsize >= (bufsize) size) { - if ((best == &thr->freelist[ bin ]) || (b->bh.bb.bsize < best->bh.bb.bsize)) { - best = b; - } - } + for (;;) { + int bin; - /* Link to next buffer */ - b = ( use_blink ? b->ql.blink : b->ql.flink ); - } - b = best; + for (bin = bget_get_bin(size); bin < MAX_BGET_BINS; ++bin) { + /* Link to next buffer */ + b = (use_blink ? thr->freelist[bin].ql.blink + : thr->freelist[bin].ql.flink); + + if (thr->mode == bget_mode_best) { + best = &thr->freelist[bin]; + + /* Scan the free list searching for the first buffer big enough + to hold the requested size buffer. */ + while (b != &thr->freelist[bin]) { + if (b->bh.bb.bsize >= (bufsize)size) { + if ((best == &thr->freelist[bin]) || + (b->bh.bb.bsize < best->bh.bb.bsize)) { + best = b; } + } - while (b != &thr->freelist[ bin ]) { - if ((bufsize) b->bh.bb.bsize >= (bufsize) size) { - - /* Buffer is big enough to satisfy the request. Allocate it - to the caller. We must decide whether the buffer is large - enough to split into the part given to the caller and a - free buffer that remains on the free list, or whether the - entire buffer should be removed from the free list and - given to the caller in its entirety. We only split the - buffer if enough room remains for a header plus the minimum - quantum of allocation. */ - - if ((b->bh.bb.bsize - (bufsize) size) > (bufsize)(SizeQ + (sizeof(bhead_t)))) { - bhead_t *ba, *bn; - - ba = BH(((char *) b) + (b->bh.bb.bsize - (bufsize) size)); - bn = BH(((char *) ba) + size); - - KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize); - - /* Subtract size from length of free block. */ - b->bh.bb.bsize -= (bufsize) size; - - /* Link allocated buffer to the previous free buffer. */ - ba->bb.prevfree = b->bh.bb.bsize; - - /* Plug negative size into user buffer. */ - ba->bb.bsize = -size; - - /* Mark this buffer as owned by this thread. */ - TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it) - /* Mark buffer after this one not preceded by free block. */ - bn->bb.prevfree = 0; - - /* unlink the buffer from the old freelist, and reinsert it into the new freelist */ - __kmp_bget_remove_from_freelist( b ); - __kmp_bget_insert_into_freelist( thr, b ); + /* Link to next buffer */ + b = (use_blink ? b->ql.blink : b->ql.flink); + } + b = best; + } + + while (b != &thr->freelist[bin]) { + if ((bufsize)b->bh.bb.bsize >= (bufsize)size) { + + // Buffer is big enough to satisfy the request. Allocate it to the + // caller. We must decide whether the buffer is large enough to split + // into the part given to the caller and a free buffer that remains + // on the free list, or whether the entire buffer should be removed + // from the free list and given to the caller in its entirety. We + // only split the buffer if enough room remains for a header plus the + // minimum quantum of allocation. + if ((b->bh.bb.bsize - (bufsize)size) > + (bufsize)(SizeQ + (sizeof(bhead_t)))) { + bhead_t *ba, *bn; + + ba = BH(((char *)b) + (b->bh.bb.bsize - (bufsize)size)); + bn = BH(((char *)ba) + size); + + KMP_DEBUG_ASSERT(bn->bb.prevfree == b->bh.bb.bsize); + + /* Subtract size from length of free block. */ + b->bh.bb.bsize -= (bufsize)size; + + /* Link allocated buffer to the previous free buffer. */ + ba->bb.prevfree = b->bh.bb.bsize; + + /* Plug negative size into user buffer. */ + ba->bb.bsize = -size; + + /* Mark this buffer as owned by this thread. */ + TCW_PTR(ba->bb.bthr, + th); // not an allocated address (do not mark it) + /* Mark buffer after this one not preceded by free block. */ + bn->bb.prevfree = 0; + + // unlink buffer from old freelist, and reinsert into new freelist + __kmp_bget_remove_from_freelist(b); + __kmp_bget_insert_into_freelist(thr, b); #if BufStats - thr->totalloc += (size_t) size; - thr->numget++; /* Increment number of bget() calls */ + thr->totalloc += (size_t)size; + thr->numget++; /* Increment number of bget() calls */ #endif - buf = (void *) ((((char *) ba) + sizeof(bhead_t))); - KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 ); - return buf; - } else { - bhead_t *ba; + buf = (void *)((((char *)ba) + sizeof(bhead_t))); + KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); + return buf; + } else { + bhead_t *ba; - ba = BH(((char *) b) + b->bh.bb.bsize); + ba = BH(((char *)b) + b->bh.bb.bsize); - KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize); + KMP_DEBUG_ASSERT(ba->bb.prevfree == b->bh.bb.bsize); - /* The buffer isn't big enough to split. Give the whole - shebang to the caller and remove it from the free list. */ + /* The buffer isn't big enough to split. Give the whole + shebang to the caller and remove it from the free list. */ - __kmp_bget_remove_from_freelist( b ); + __kmp_bget_remove_from_freelist(b); #if BufStats - thr->totalloc += (size_t) b->bh.bb.bsize; - thr->numget++; /* Increment number of bget() calls */ + thr->totalloc += (size_t)b->bh.bb.bsize; + thr->numget++; /* Increment number of bget() calls */ #endif - /* Negate size to mark buffer allocated. */ - b->bh.bb.bsize = -(b->bh.bb.bsize); - - /* Mark this buffer as owned by this thread. */ - TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark it) - /* Zero the back pointer in the next buffer in memory - to indicate that this buffer is allocated. */ - ba->bb.prevfree = 0; - - /* Give user buffer starting at queue links. */ - buf = (void *) &(b->ql); - KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 ); - return buf; - } - } - - /* Link to next buffer */ - b = ( use_blink ? b->ql.blink : b->ql.flink ); - } + /* Negate size to mark buffer allocated. */ + b->bh.bb.bsize = -(b->bh.bb.bsize); + + /* Mark this buffer as owned by this thread. */ + TCW_PTR(ba->bb.bthr, th); // not an allocated address (do not mark) + /* Zero the back pointer in the next buffer in memory + to indicate that this buffer is allocated. */ + ba->bb.prevfree = 0; + + /* Give user buffer starting at queue links. */ + buf = (void *)&(b->ql); + KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); + return buf; + } } - /* We failed to find a buffer. If there's a compact function - defined, notify it of the size requested. If it returns - TRUE, try the allocation again. */ - - if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) { - break; - } + /* Link to next buffer */ + b = (use_blink ? b->ql.blink : b->ql.flink); + } } - /* No buffer available with requested size free. */ - - /* Don't give up yet -- look in the reserve supply. */ + /* We failed to find a buffer. If there's a compact function defined, + notify it of the size requested. If it returns TRUE, try the allocation + again. */ - if (thr->acqfcn != 0) { - if (size > (bufsize) (thr->exp_incr - sizeof(bhead_t))) { + if ((thr->compfcn == 0) || (!(*thr->compfcn)(size, ++compactseq))) { + break; + } + } - /* Request is too large to fit in a single expansion - block. Try to satisy it by a direct buffer acquisition. */ + /* No buffer available with requested size free. */ - bdhead_t *bdh; + /* Don't give up yet -- look in the reserve supply. */ + if (thr->acqfcn != 0) { + if (size > (bufsize)(thr->exp_incr - sizeof(bhead_t))) { + /* Request is too large to fit in a single expansion block. + Try to satisy it by a direct buffer acquisition. */ + bdhead_t *bdh; - size += sizeof(bdhead_t) - sizeof(bhead_t); + size += sizeof(bdhead_t) - sizeof(bhead_t); - KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", (int) size ) ); + KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", (int)size)); - /* richryan */ - bdh = BDH((*thr->acqfcn)((bufsize) size)); - if (bdh != NULL) { + /* richryan */ + bdh = BDH((*thr->acqfcn)((bufsize)size)); + if (bdh != NULL) { - /* Mark the buffer special by setting the size field - of its header to zero. */ - bdh->bh.bb.bsize = 0; + // Mark the buffer special by setting size field of its header to zero. + bdh->bh.bb.bsize = 0; - /* Mark this buffer as owned by this thread. */ - TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated, - // because direct buffer never goes to free list - bdh->bh.bb.prevfree = 0; - bdh->tsize = size; + /* Mark this buffer as owned by this thread. */ + TCW_PTR(bdh->bh.bb.bthr, th); // don't mark buffer as allocated, + // because direct buffer never goes to free list + bdh->bh.bb.prevfree = 0; + bdh->tsize = size; #if BufStats - thr->totalloc += (size_t) size; - thr->numget++; /* Increment number of bget() calls */ - thr->numdget++; /* Direct bget() call count */ + thr->totalloc += (size_t)size; + thr->numget++; /* Increment number of bget() calls */ + thr->numdget++; /* Direct bget() call count */ #endif - buf = (void *) (bdh + 1); - KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 ); - return buf; - } + buf = (void *)(bdh + 1); + KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); + return buf; + } - } else { - - /* Try to obtain a new expansion block */ + } else { - void *newpool; + /* Try to obtain a new expansion block */ + void *newpool; - KE_TRACE( 10, ("%%%%%% MALLOCB( %d )\n", (int) thr->exp_incr ) ); + KE_TRACE(10, ("%%%%%% MALLOCB( %d )\n", (int)thr->exp_incr)); - /* richryan */ - newpool = (*thr->acqfcn)((bufsize) thr->exp_incr); - KMP_DEBUG_ASSERT( ((size_t)newpool) % SizeQuant == 0 ); - if (newpool != NULL) { - bpool( th, newpool, thr->exp_incr); - buf = bget( th, requested_size); /* This can't, I say, can't get into a loop. */ - return buf; - } - } + /* richryan */ + newpool = (*thr->acqfcn)((bufsize)thr->exp_incr); + KMP_DEBUG_ASSERT(((size_t)newpool) % SizeQuant == 0); + if (newpool != NULL) { + bpool(th, newpool, thr->exp_incr); + buf = bget( + th, requested_size); /* This can't, I say, can't get into a loop. */ + return buf; + } } + } - /* Still no buffer available */ + /* Still no buffer available */ - return NULL; + return NULL; } /* BGETZ -- Allocate a buffer and clear its contents to zero. We clear the entire contents of the buffer to zero, not just the region requested by the caller. */ -static void * -bgetz( kmp_info_t *th, bufsize size ) -{ - char *buf = (char *) bget( th, size); +static void *bgetz(kmp_info_t *th, bufsize size) { + char *buf = (char *)bget(th, size); - if (buf != NULL) { - bhead_t *b; - bufsize rsize; + if (buf != NULL) { + bhead_t *b; + bufsize rsize; - b = BH(buf - sizeof(bhead_t)); - rsize = -(b->bb.bsize); - if (rsize == 0) { - bdhead_t *bd; + b = BH(buf - sizeof(bhead_t)); + rsize = -(b->bb.bsize); + if (rsize == 0) { + bdhead_t *bd; - bd = BDH(buf - sizeof(bdhead_t)); - rsize = bd->tsize - (bufsize) sizeof(bdhead_t); - } else { - rsize -= sizeof(bhead_t); - } + bd = BDH(buf - sizeof(bdhead_t)); + rsize = bd->tsize - (bufsize)sizeof(bdhead_t); + } else { + rsize -= sizeof(bhead_t); + } - KMP_DEBUG_ASSERT(rsize >= size); + KMP_DEBUG_ASSERT(rsize >= size); - (void) memset(buf, 0, (bufsize) rsize); - } - return ((void *) buf); + (void)memset(buf, 0, (bufsize)rsize); + } + return ((void *)buf); } /* BGETR -- Reallocate a buffer. This is a minimal implementation, @@ -757,392 +694,372 @@ bgetz( kmp_info_t *th, bufsize size ) enhanced to allow the buffer to grow into adjacent free blocks and to avoid moving data unnecessarily. */ -static void * -bgetr( kmp_info_t *th, void *buf, bufsize size) -{ - void *nbuf; - bufsize osize; /* Old size of buffer */ - bhead_t *b; - - nbuf = bget( th, size ); - if ( nbuf == NULL ) { /* Acquire new buffer */ - return NULL; - } - if ( buf == NULL ) { - return nbuf; - } - b = BH(((char *) buf) - sizeof(bhead_t)); - osize = -b->bb.bsize; - if (osize == 0) { - /* Buffer acquired directly through acqfcn. */ - bdhead_t *bd; - - bd = BDH(((char *) buf) - sizeof(bdhead_t)); - osize = bd->tsize - (bufsize) sizeof(bdhead_t); - } else { - osize -= sizeof(bhead_t); - }; - - KMP_DEBUG_ASSERT(osize > 0); - - (void) KMP_MEMCPY((char *) nbuf, (char *) buf, /* Copy the data */ - (size_t) ((size < osize) ? size : osize)); - brel( th, buf ); +static void *bgetr(kmp_info_t *th, void *buf, bufsize size) { + void *nbuf; + bufsize osize; /* Old size of buffer */ + bhead_t *b; + nbuf = bget(th, size); + if (nbuf == NULL) { /* Acquire new buffer */ + return NULL; + } + if (buf == NULL) { return nbuf; + } + b = BH(((char *)buf) - sizeof(bhead_t)); + osize = -b->bb.bsize; + if (osize == 0) { + /* Buffer acquired directly through acqfcn. */ + bdhead_t *bd; + + bd = BDH(((char *)buf) - sizeof(bdhead_t)); + osize = bd->tsize - (bufsize)sizeof(bdhead_t); + } else { + osize -= sizeof(bhead_t); + }; + + KMP_DEBUG_ASSERT(osize > 0); + + (void)KMP_MEMCPY((char *)nbuf, (char *)buf, /* Copy the data */ + (size_t)((size < osize) ? size : osize)); + brel(th, buf); + + return nbuf; } /* BREL -- Release a buffer. */ +static void brel(kmp_info_t *th, void *buf) { + thr_data_t *thr = get_thr_data(th); + bfhead_t *b, *bn; + kmp_info_t *bth; -static void -brel( kmp_info_t *th, void *buf ) -{ - thr_data_t *thr = get_thr_data( th ); - bfhead_t *b, *bn; - kmp_info_t *bth; - - KMP_DEBUG_ASSERT(buf != NULL); - KMP_DEBUG_ASSERT( ((size_t)buf) % SizeQuant == 0 ); + KMP_DEBUG_ASSERT(buf != NULL); + KMP_DEBUG_ASSERT(((size_t)buf) % SizeQuant == 0); - b = BFH(((char *) buf) - sizeof(bhead_t)); + b = BFH(((char *)buf) - sizeof(bhead_t)); - if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */ - bdhead_t *bdh; + if (b->bh.bb.bsize == 0) { /* Directly-acquired buffer? */ + bdhead_t *bdh; - bdh = BDH(((char *) buf) - sizeof(bdhead_t)); - KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); + bdh = BDH(((char *)buf) - sizeof(bdhead_t)); + KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); #if BufStats - thr->totalloc -= (size_t) bdh->tsize; - thr->numdrel++; /* Number of direct releases */ - thr->numrel++; /* Increment number of brel() calls */ + thr->totalloc -= (size_t)bdh->tsize; + thr->numdrel++; /* Number of direct releases */ + thr->numrel++; /* Increment number of brel() calls */ #endif /* BufStats */ #ifdef FreeWipe - (void) memset((char *) buf, 0x55, - (size_t) (bdh->tsize - sizeof(bdhead_t))); + (void)memset((char *)buf, 0x55, (size_t)(bdh->tsize - sizeof(bdhead_t))); #endif /* FreeWipe */ - KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) bdh ) ); + KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)bdh)); - KMP_DEBUG_ASSERT( thr->relfcn != 0 ); - (*thr->relfcn)((void *) bdh); /* Release it directly. */ - return; - } + KMP_DEBUG_ASSERT(thr->relfcn != 0); + (*thr->relfcn)((void *)bdh); /* Release it directly. */ + return; + } - bth = (kmp_info_t *)( (kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & ~1 ); // clear possible mark before comparison - if ( bth != th ) { - /* Add this buffer to be released by the owning thread later */ - __kmp_bget_enqueue( bth, buf + bth = (kmp_info_t *)((kmp_uintptr_t)TCR_PTR(b->bh.bb.bthr) & + ~1); // clear possible mark before comparison + if (bth != th) { + /* Add this buffer to be released by the owning thread later */ + __kmp_bget_enqueue(bth, buf #ifdef USE_QUEUING_LOCK_FOR_BGET - , __kmp_gtid_from_thread( th ) + , + __kmp_gtid_from_thread(th) #endif - ); - return; - } + ); + return; + } - /* Buffer size must be negative, indicating that the buffer is - allocated. */ - - if (b->bh.bb.bsize >= 0) { - bn = NULL; - } - KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0); + /* Buffer size must be negative, indicating that the buffer is allocated. */ + if (b->bh.bb.bsize >= 0) { + bn = NULL; + } + KMP_DEBUG_ASSERT(b->bh.bb.bsize < 0); - /* Back pointer in next buffer must be zero, indicating the - same thing: */ + /* Back pointer in next buffer must be zero, indicating the same thing: */ - KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.bsize)->bb.prevfree == 0); + KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.bsize)->bb.prevfree == 0); #if BufStats - thr->numrel++; /* Increment number of brel() calls */ - thr->totalloc += (size_t) b->bh.bb.bsize; + thr->numrel++; /* Increment number of brel() calls */ + thr->totalloc += (size_t)b->bh.bb.bsize; #endif - /* If the back link is nonzero, the previous buffer is free. */ - - if (b->bh.bb.prevfree != 0) { - /* The previous buffer is free. Consolidate this buffer with it - by adding the length of this buffer to the previous free - buffer. Note that we subtract the size in the buffer being - released, since it's negative to indicate that the buffer is - allocated. */ - - register bufsize size = b->bh.bb.bsize; - - /* Make the previous buffer the one we're working on. */ - KMP_DEBUG_ASSERT(BH((char *) b - b->bh.bb.prevfree)->bb.bsize == b->bh.bb.prevfree); - b = BFH(((char *) b) - b->bh.bb.prevfree); - b->bh.bb.bsize -= size; - - /* unlink the buffer from the old freelist */ - __kmp_bget_remove_from_freelist( b ); - } - else { - /* The previous buffer isn't allocated. Mark this buffer - size as positive (i.e. free) and fall through to place - the buffer on the free list as an isolated free block. */ - - b->bh.bb.bsize = -b->bh.bb.bsize; - } - - /* insert buffer back onto a new freelist */ - __kmp_bget_insert_into_freelist( thr, b ); - - - /* Now we look at the next buffer in memory, located by advancing from - the start of this buffer by its size, to see if that buffer is - free. If it is, we combine this buffer with the next one in - memory, dechaining the second buffer from the free list. */ - - bn = BFH(((char *) b) + b->bh.bb.bsize); - if (bn->bh.bb.bsize > 0) { - - /* The buffer is free. Remove it from the free list and add - its size to that of our buffer. */ - - KMP_DEBUG_ASSERT(BH((char *) bn + bn->bh.bb.bsize)->bb.prevfree == bn->bh.bb.bsize); - - __kmp_bget_remove_from_freelist( bn ); - - b->bh.bb.bsize += bn->bh.bb.bsize; - - /* unlink the buffer from the old freelist, and reinsert it into the new freelist */ - - __kmp_bget_remove_from_freelist( b ); - __kmp_bget_insert_into_freelist( thr, b ); - - /* Finally, advance to the buffer that follows the newly - consolidated free block. We must set its backpointer to the - head of the consolidated free block. We know the next block - must be an allocated block because the process of recombination - guarantees that two free blocks will never be contiguous in - memory. */ - - bn = BFH(((char *) b) + b->bh.bb.bsize); - } + /* If the back link is nonzero, the previous buffer is free. */ + + if (b->bh.bb.prevfree != 0) { + /* The previous buffer is free. Consolidate this buffer with it by adding + the length of this buffer to the previous free buffer. Note that we + subtract the size in the buffer being released, since it's negative to + indicate that the buffer is allocated. */ + register bufsize size = b->bh.bb.bsize; + + /* Make the previous buffer the one we're working on. */ + KMP_DEBUG_ASSERT(BH((char *)b - b->bh.bb.prevfree)->bb.bsize == + b->bh.bb.prevfree); + b = BFH(((char *)b) - b->bh.bb.prevfree); + b->bh.bb.bsize -= size; + + /* unlink the buffer from the old freelist */ + __kmp_bget_remove_from_freelist(b); + } else { + /* The previous buffer isn't allocated. Mark this buffer size as positive + (i.e. free) and fall through to place the buffer on the free list as an + isolated free block. */ + b->bh.bb.bsize = -b->bh.bb.bsize; + } + + /* insert buffer back onto a new freelist */ + __kmp_bget_insert_into_freelist(thr, b); + + /* Now we look at the next buffer in memory, located by advancing from + the start of this buffer by its size, to see if that buffer is + free. If it is, we combine this buffer with the next one in + memory, dechaining the second buffer from the free list. */ + bn = BFH(((char *)b) + b->bh.bb.bsize); + if (bn->bh.bb.bsize > 0) { + + /* The buffer is free. Remove it from the free list and add + its size to that of our buffer. */ + KMP_DEBUG_ASSERT(BH((char *)bn + bn->bh.bb.bsize)->bb.prevfree == + bn->bh.bb.bsize); + + __kmp_bget_remove_from_freelist(bn); + + b->bh.bb.bsize += bn->bh.bb.bsize; + + /* unlink the buffer from the old freelist, and reinsert it into the new + * freelist */ + __kmp_bget_remove_from_freelist(b); + __kmp_bget_insert_into_freelist(thr, b); + + /* Finally, advance to the buffer that follows the newly + consolidated free block. We must set its backpointer to the + head of the consolidated free block. We know the next block + must be an allocated block because the process of recombination + guarantees that two free blocks will never be contiguous in + memory. */ + bn = BFH(((char *)b) + b->bh.bb.bsize); + } #ifdef FreeWipe - (void) memset(((char *) b) + sizeof(bfhead_t), 0x55, - (size_t) (b->bh.bb.bsize - sizeof(bfhead_t))); + (void)memset(((char *)b) + sizeof(bfhead_t), 0x55, + (size_t)(b->bh.bb.bsize - sizeof(bfhead_t))); #endif - KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0); - - /* The next buffer is allocated. Set the backpointer in it to point - to this buffer; the previous free buffer in memory. */ + KMP_DEBUG_ASSERT(bn->bh.bb.bsize < 0); - bn->bh.bb.prevfree = b->bh.bb.bsize; + /* The next buffer is allocated. Set the backpointer in it to point + to this buffer; the previous free buffer in memory. */ - /* If a block-release function is defined, and this free buffer - constitutes the entire block, release it. Note that pool_len - is defined in such a way that the test will fail unless all - pool blocks are the same size. */ + bn->bh.bb.prevfree = b->bh.bb.bsize; - if (thr->relfcn != 0 && - b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) - { + /* If a block-release function is defined, and this free buffer + constitutes the entire block, release it. Note that pool_len + is defined in such a way that the test will fail unless all + pool blocks are the same size. */ + if (thr->relfcn != 0 && + b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) { #if BufStats - if (thr->numpblk != 1) { /* Do not release the last buffer until finalization time */ + if (thr->numpblk != + 1) { /* Do not release the last buffer until finalization time */ #endif - KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); - KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent); - KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize); + KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); + KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent); + KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree == + b->bh.bb.bsize); - /* Unlink the buffer from the free list */ - __kmp_bget_remove_from_freelist( b ); + /* Unlink the buffer from the free list */ + __kmp_bget_remove_from_freelist(b); - KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) ); + KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b)); - (*thr->relfcn)(b); + (*thr->relfcn)(b); #if BufStats - thr->numprel++; /* Nr of expansion block releases */ - thr->numpblk--; /* Total number of blocks */ - KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); + thr->numprel++; /* Nr of expansion block releases */ + thr->numpblk--; /* Total number of blocks */ + KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); - /* avoid leaving stale last_pool pointer around if it is being dealloced */ - if (thr->last_pool == b) thr->last_pool = 0; - } - else { - thr->last_pool = b; - } -#endif /* BufStats */ + // avoid leaving stale last_pool pointer around if it is being dealloced + if (thr->last_pool == b) + thr->last_pool = 0; + } else { + thr->last_pool = b; } +#endif /* BufStats */ + } } /* BECTL -- Establish automatic pool expansion control */ - -static void -bectl( kmp_info_t *th, bget_compact_t compact, bget_acquire_t acquire, bget_release_t release, bufsize pool_incr) -{ - thr_data_t *thr = get_thr_data( th ); - - thr->compfcn = compact; - thr->acqfcn = acquire; - thr->relfcn = release; - thr->exp_incr = pool_incr; +static void bectl(kmp_info_t *th, bget_compact_t compact, + bget_acquire_t acquire, bget_release_t release, + bufsize pool_incr) { + thr_data_t *thr = get_thr_data(th); + + thr->compfcn = compact; + thr->acqfcn = acquire; + thr->relfcn = release; + thr->exp_incr = pool_incr; } /* BPOOL -- Add a region of memory to the buffer pool. */ +static void bpool(kmp_info_t *th, void *buf, bufsize len) { + /* int bin = 0; */ + thr_data_t *thr = get_thr_data(th); + bfhead_t *b = BFH(buf); + bhead_t *bn; -static void -bpool( kmp_info_t *th, void *buf, bufsize len) -{ -/* int bin = 0; */ - thr_data_t *thr = get_thr_data( th ); - bfhead_t *b = BFH(buf); - bhead_t *bn; - - __kmp_bget_dequeue( th ); /* Release any queued buffers */ + __kmp_bget_dequeue(th); /* Release any queued buffers */ #ifdef SizeQuant - len &= ~(SizeQuant - 1); + len &= ~(SizeQuant - 1); #endif - if (thr->pool_len == 0) { - thr->pool_len = len; - } else if (len != thr->pool_len) { - thr->pool_len = -1; - } + if (thr->pool_len == 0) { + thr->pool_len = len; + } else if (len != thr->pool_len) { + thr->pool_len = -1; + } #if BufStats - thr->numpget++; /* Number of block acquisitions */ - thr->numpblk++; /* Number of blocks total */ - KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); + thr->numpget++; /* Number of block acquisitions */ + thr->numpblk++; /* Number of blocks total */ + KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); #endif /* BufStats */ - /* Since the block is initially occupied by a single free buffer, - it had better not be (much) larger than the largest buffer - whose size we can store in bhead.bb.bsize. */ - - KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize) ESent + 1)); - - /* Clear the backpointer at the start of the block to indicate that - there is no free block prior to this one. That blocks - recombination when the first block in memory is released. */ - - b->bh.bb.prevfree = 0; - - /* Create a dummy allocated buffer at the end of the pool. This dummy - buffer is seen when a buffer at the end of the pool is released and - blocks recombination of the last buffer with the dummy buffer at - the end. The length in the dummy buffer is set to the largest - negative number to denote the end of the pool for diagnostic - routines (this specific value is not counted on by the actual - allocation and release functions). */ - - len -= sizeof(bhead_t); - b->bh.bb.bsize = (bufsize) len; - /* Set the owner of this buffer */ - TCW_PTR( b->bh.bb.bthr, (kmp_info_t*)((kmp_uintptr_t)th | 1) ); // mark the buffer as allocated address - - /* Chain the new block to the free list. */ - __kmp_bget_insert_into_freelist( thr, b ); + /* Since the block is initially occupied by a single free buffer, + it had better not be (much) larger than the largest buffer + whose size we can store in bhead.bb.bsize. */ + KMP_DEBUG_ASSERT(len - sizeof(bhead_t) <= -((bufsize)ESent + 1)); + + /* Clear the backpointer at the start of the block to indicate that + there is no free block prior to this one. That blocks + recombination when the first block in memory is released. */ + b->bh.bb.prevfree = 0; + + /* Create a dummy allocated buffer at the end of the pool. This dummy + buffer is seen when a buffer at the end of the pool is released and + blocks recombination of the last buffer with the dummy buffer at + the end. The length in the dummy buffer is set to the largest + negative number to denote the end of the pool for diagnostic + routines (this specific value is not counted on by the actual + allocation and release functions). */ + len -= sizeof(bhead_t); + b->bh.bb.bsize = (bufsize)len; + /* Set the owner of this buffer */ + TCW_PTR(b->bh.bb.bthr, + (kmp_info_t *)((kmp_uintptr_t)th | + 1)); // mark the buffer as allocated address + + /* Chain the new block to the free list. */ + __kmp_bget_insert_into_freelist(thr, b); #ifdef FreeWipe - (void) memset(((char *) b) + sizeof(bfhead_t), 0x55, - (size_t) (len - sizeof(bfhead_t))); + (void)memset(((char *)b) + sizeof(bfhead_t), 0x55, + (size_t)(len - sizeof(bfhead_t))); #endif - bn = BH(((char *) b) + len); - bn->bb.prevfree = (bufsize) len; - /* Definition of ESent assumes two's complement! */ - KMP_DEBUG_ASSERT( (~0) == -1 && (bn != 0) ); + bn = BH(((char *)b) + len); + bn->bb.prevfree = (bufsize)len; + /* Definition of ESent assumes two's complement! */ + KMP_DEBUG_ASSERT((~0) == -1 && (bn != 0)); - bn->bb.bsize = ESent; + bn->bb.bsize = ESent; } -/* ------------------------------------------------------------------------ */ - /* BFREED -- Dump the free lists for this thread. */ - -static void -bfreed( kmp_info_t *th ) -{ - int bin = 0, count = 0; - int gtid = __kmp_gtid_from_thread( th ); - thr_data_t *thr = get_thr_data( th ); +static void bfreed(kmp_info_t *th) { + int bin = 0, count = 0; + int gtid = __kmp_gtid_from_thread(th); + thr_data_t *thr = get_thr_data(th); #if BufStats - __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC " get=%" KMP_INT64_SPEC " rel=%" \ - KMP_INT64_SPEC " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC " prel=%" KMP_INT64_SPEC \ - " dget=%" KMP_INT64_SPEC " drel=%" KMP_INT64_SPEC "\n", - gtid, (kmp_uint64) thr->totalloc, - (kmp_int64) thr->numget, (kmp_int64) thr->numrel, - (kmp_int64) thr->numpblk, - (kmp_int64) thr->numpget, (kmp_int64) thr->numprel, - (kmp_int64) thr->numdget, (kmp_int64) thr->numdrel ); + __kmp_printf_no_lock("__kmp_printpool: T#%d total=%" KMP_UINT64_SPEC + " get=%" KMP_INT64_SPEC " rel=%" KMP_INT64_SPEC + " pblk=%" KMP_INT64_SPEC " pget=%" KMP_INT64_SPEC + " prel=%" KMP_INT64_SPEC " dget=%" KMP_INT64_SPEC + " drel=%" KMP_INT64_SPEC "\n", + gtid, (kmp_uint64)thr->totalloc, (kmp_int64)thr->numget, + (kmp_int64)thr->numrel, (kmp_int64)thr->numpblk, + (kmp_int64)thr->numpget, (kmp_int64)thr->numprel, + (kmp_int64)thr->numdget, (kmp_int64)thr->numdrel); #endif - for (bin = 0; bin < MAX_BGET_BINS; ++bin) { - bfhead_t *b; + for (bin = 0; bin < MAX_BGET_BINS; ++bin) { + bfhead_t *b; - for (b = thr->freelist[ bin ].ql.flink; b != &thr->freelist[ bin ]; b = b->ql.flink) { - bufsize bs = b->bh.bb.bsize; + for (b = thr->freelist[bin].ql.flink; b != &thr->freelist[bin]; + b = b->ql.flink) { + bufsize bs = b->bh.bb.bsize; - KMP_DEBUG_ASSERT( b->ql.blink->ql.flink == b ); - KMP_DEBUG_ASSERT( b->ql.flink->ql.blink == b ); - KMP_DEBUG_ASSERT( bs > 0 ); + KMP_DEBUG_ASSERT(b->ql.blink->ql.flink == b); + KMP_DEBUG_ASSERT(b->ql.flink->ql.blink == b); + KMP_DEBUG_ASSERT(bs > 0); - count += 1; + count += 1; - __kmp_printf_no_lock("__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, (long) bs ); + __kmp_printf_no_lock( + "__kmp_printpool: T#%d Free block: 0x%p size %6ld bytes.\n", gtid, b, + (long)bs); #ifdef FreeWipe - { - char *lerr = ((char *) b) + sizeof(bfhead_t); - if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || (memcmp(lerr, lerr + 1, (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) { - __kmp_printf_no_lock( "__kmp_printpool: T#%d (Contents of above free block have been overstored.)\n", gtid ); - } - } -#endif + { + char *lerr = ((char *)b) + sizeof(bfhead_t); + if ((bs > sizeof(bfhead_t)) && + ((*lerr != 0x55) || + (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) != + 0))) { + __kmp_printf_no_lock("__kmp_printpool: T#%d (Contents of above " + "free block have been overstored.)\n", + gtid); } + } +#endif } + } - if (count == 0) - __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid ); + if (count == 0) + __kmp_printf_no_lock("__kmp_printpool: T#%d No free blocks\n", gtid); } -/* ------------------------------------------------------------------------ */ - #ifdef KMP_DEBUG #if BufStats /* BSTATS -- Return buffer allocation free space statistics. */ - -static void -bstats( kmp_info_t *th, bufsize *curalloc, bufsize *totfree, bufsize *maxfree, long *nget, long *nrel) -{ - int bin = 0; - thr_data_t *thr = get_thr_data( th ); - - *nget = thr->numget; - *nrel = thr->numrel; - *curalloc = (bufsize) thr->totalloc; - *totfree = 0; - *maxfree = -1; - - for (bin = 0; bin < MAX_BGET_BINS; ++bin) { - bfhead_t *b = thr->freelist[ bin ].ql.flink; - - while (b != &thr->freelist[ bin ]) { - KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0); - *totfree += b->bh.bb.bsize; - if (b->bh.bb.bsize > *maxfree) { - *maxfree = b->bh.bb.bsize; - } - b = b->ql.flink; /* Link to next buffer */ - } +static void bstats(kmp_info_t *th, bufsize *curalloc, bufsize *totfree, + bufsize *maxfree, long *nget, long *nrel) { + int bin = 0; + thr_data_t *thr = get_thr_data(th); + + *nget = thr->numget; + *nrel = thr->numrel; + *curalloc = (bufsize)thr->totalloc; + *totfree = 0; + *maxfree = -1; + + for (bin = 0; bin < MAX_BGET_BINS; ++bin) { + bfhead_t *b = thr->freelist[bin].ql.flink; + + while (b != &thr->freelist[bin]) { + KMP_DEBUG_ASSERT(b->bh.bb.bsize > 0); + *totfree += b->bh.bb.bsize; + if (b->bh.bb.bsize > *maxfree) { + *maxfree = b->bh.bb.bsize; + } + b = b->ql.flink; /* Link to next buffer */ } + } } /* BSTATSE -- Return extended statistics */ - -static void -bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *nprel, long *ndget, long *ndrel) -{ - thr_data_t *thr = get_thr_data( th ); - - *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr; - *npool = thr->numpblk; - *npget = thr->numpget; - *nprel = thr->numprel; - *ndget = thr->numdget; - *ndrel = thr->numdrel; +static void bstatse(kmp_info_t *th, bufsize *pool_incr, long *npool, + long *npget, long *nprel, long *ndget, long *ndrel) { + thr_data_t *thr = get_thr_data(th); + + *pool_incr = (thr->pool_len < 0) ? -thr->exp_incr : thr->exp_incr; + *npool = thr->numpblk; + *npget = thr->numpget; + *nprel = thr->numprel; + *ndget = thr->numdget; + *ndrel = thr->numdrel; } #endif /* BufStats */ @@ -1150,59 +1067,56 @@ bstatse( kmp_info_t *th, bufsize *pool_incr, long *npool, long *npget, long *np /* BUFDUMP -- Dump the data in a buffer. This is called with the user data pointer, and backs up to the buffer header. It will dump either a free block or an allocated one. */ - -static void -bufdump( kmp_info_t *th, void *buf ) -{ - bfhead_t *b; - unsigned char *bdump; - bufsize bdlen; - - b = BFH(((char *) buf) - sizeof(bhead_t)); - KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); - if (b->bh.bb.bsize < 0) { - bdump = (unsigned char *) buf; - bdlen = (-b->bh.bb.bsize) - (bufsize) sizeof(bhead_t); - } else { - bdump = (unsigned char *) (((char *) b) + sizeof(bfhead_t)); - bdlen = b->bh.bb.bsize - (bufsize) sizeof(bfhead_t); +static void bufdump(kmp_info_t *th, void *buf) { + bfhead_t *b; + unsigned char *bdump; + bufsize bdlen; + + b = BFH(((char *)buf) - sizeof(bhead_t)); + KMP_DEBUG_ASSERT(b->bh.bb.bsize != 0); + if (b->bh.bb.bsize < 0) { + bdump = (unsigned char *)buf; + bdlen = (-b->bh.bb.bsize) - (bufsize)sizeof(bhead_t); + } else { + bdump = (unsigned char *)(((char *)b) + sizeof(bfhead_t)); + bdlen = b->bh.bb.bsize - (bufsize)sizeof(bfhead_t); + } + + while (bdlen > 0) { + int i, dupes = 0; + bufsize l = bdlen; + char bhex[50], bascii[20]; + + if (l > 16) { + l = 16; } - while (bdlen > 0) { - int i, dupes = 0; - bufsize l = bdlen; - char bhex[50], bascii[20]; - - if (l > 16) { - l = 16; - } - - for (i = 0; i < l; i++) { - (void) KMP_SNPRINTF(bhex + i * 3, sizeof(bhex) - i * 3, "%02X ", bdump[i]); - if (bdump[i] > 0x20 && bdump[i] < 0x7F) - bascii[ i ] = bdump[ i ]; - else - bascii[ i ] = ' '; - } - bascii[i] = 0; - (void) __kmp_printf_no_lock("%-48s %s\n", bhex, bascii); - bdump += l; - bdlen -= l; - while ((bdlen > 16) && (memcmp((char *) (bdump - 16), - (char *) bdump, 16) == 0)) { - dupes++; - bdump += 16; - bdlen -= 16; - } - if (dupes > 1) { - (void) __kmp_printf_no_lock( - " (%d lines [%d bytes] identical to above line skipped)\n", - dupes, dupes * 16); - } else if (dupes == 1) { - bdump -= 16; - bdlen += 16; - } + for (i = 0; i < l; i++) { + (void)KMP_SNPRINTF(bhex + i * 3, sizeof(bhex) - i * 3, "%02X ", bdump[i]); + if (bdump[i] > 0x20 && bdump[i] < 0x7F) + bascii[i] = bdump[i]; + else + bascii[i] = ' '; + } + bascii[i] = 0; + (void)__kmp_printf_no_lock("%-48s %s\n", bhex, bascii); + bdump += l; + bdlen -= l; + while ((bdlen > 16) && + (memcmp((char *)(bdump - 16), (char *)bdump, 16) == 0)) { + dupes++; + bdump += 16; + bdlen -= 16; } + if (dupes > 1) { + (void)__kmp_printf_no_lock( + " (%d lines [%d bytes] identical to above line skipped)\n", dupes, + dupes * 16); + } else if (dupes == 1) { + bdump -= 16; + bdlen += 16; + } + } } /* BPOOLD -- Dump a buffer pool. The buffer headers are always listed. @@ -1210,611 +1124,519 @@ bufdump( kmp_info_t *th, void *buf ) are dumped. If DUMPFREE is nonzero, free blocks are dumped as well. If FreeWipe checking is enabled, free blocks which have been clobbered will always be dumped. */ - -static void -bpoold( kmp_info_t *th, void *buf, int dumpalloc, int dumpfree) -{ - bfhead_t *b = BFH( (char*)buf - sizeof(bhead_t)); - - while (b->bh.bb.bsize != ESent) { - bufsize bs = b->bh.bb.bsize; - - if (bs < 0) { - bs = -bs; - (void) __kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n", (long) bs); - if (dumpalloc) { - bufdump( th, (void *) (((char *) b) + sizeof(bhead_t))); - } - } else { - const char *lerr = ""; - - KMP_DEBUG_ASSERT(bs > 0); - if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) { - lerr = " (Bad free list links)"; - } - (void) __kmp_printf_no_lock("Free block: size %6ld bytes.%s\n", - (long) bs, lerr); +static void bpoold(kmp_info_t *th, void *buf, int dumpalloc, int dumpfree) { + bfhead_t *b = BFH((char *)buf - sizeof(bhead_t)); + + while (b->bh.bb.bsize != ESent) { + bufsize bs = b->bh.bb.bsize; + + if (bs < 0) { + bs = -bs; + (void)__kmp_printf_no_lock("Allocated buffer: size %6ld bytes.\n", + (long)bs); + if (dumpalloc) { + bufdump(th, (void *)(((char *)b) + sizeof(bhead_t))); + } + } else { + const char *lerr = ""; + + KMP_DEBUG_ASSERT(bs > 0); + if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) { + lerr = " (Bad free list links)"; + } + (void)__kmp_printf_no_lock("Free block: size %6ld bytes.%s\n", + (long)bs, lerr); #ifdef FreeWipe - lerr = ((char *) b) + sizeof(bfhead_t); - if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || - (memcmp(lerr, lerr + 1, - (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) { - (void) __kmp_printf_no_lock( - "(Contents of above free block have been overstored.)\n"); - bufdump( th, (void *) (((char *) b) + sizeof(bhead_t))); - } else + lerr = ((char *)b) + sizeof(bfhead_t); + if ((bs > sizeof(bfhead_t)) && + ((*lerr != 0x55) || + (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) != + 0))) { + (void)__kmp_printf_no_lock( + "(Contents of above free block have been overstored.)\n"); + bufdump(th, (void *)(((char *)b) + sizeof(bhead_t))); + } else #endif - if (dumpfree) { - bufdump( th, (void *) (((char *) b) + sizeof(bhead_t))); - } - } - b = BFH(((char *) b) + bs); + if (dumpfree) { + bufdump(th, (void *)(((char *)b) + sizeof(bhead_t))); + } } + b = BFH(((char *)b) + bs); + } } /* BPOOLV -- Validate a buffer pool. */ +static int bpoolv(kmp_info_t *th, void *buf) { + bfhead_t *b = BFH(buf); -static int -bpoolv( kmp_info_t *th, void *buf ) -{ - bfhead_t *b = BFH(buf); + while (b->bh.bb.bsize != ESent) { + bufsize bs = b->bh.bb.bsize; - while (b->bh.bb.bsize != ESent) { - bufsize bs = b->bh.bb.bsize; - - if (bs < 0) { - bs = -bs; - } else { + if (bs < 0) { + bs = -bs; + } else { #ifdef FreeWipe - char *lerr = ""; + char *lerr = ""; #endif - KMP_DEBUG_ASSERT(bs > 0); - if (bs <= 0) { - return 0; - } - if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) { - (void) __kmp_printf_no_lock("Free block: size %6ld bytes. (Bad free list links)\n", - (long) bs); - KMP_DEBUG_ASSERT(0); - return 0; - } + KMP_DEBUG_ASSERT(bs > 0); + if (bs <= 0) { + return 0; + } + if ((b->ql.blink->ql.flink != b) || (b->ql.flink->ql.blink != b)) { + (void)__kmp_printf_no_lock( + "Free block: size %6ld bytes. (Bad free list links)\n", (long)bs); + KMP_DEBUG_ASSERT(0); + return 0; + } #ifdef FreeWipe - lerr = ((char *) b) + sizeof(bfhead_t); - if ((bs > sizeof(bfhead_t)) && ((*lerr != 0x55) || - (memcmp(lerr, lerr + 1, - (size_t) (bs - (sizeof(bfhead_t) + 1))) != 0))) { - (void) __kmp_printf_no_lock( - "(Contents of above free block have been overstored.)\n"); - bufdump( th, (void *) (((char *) b) + sizeof(bhead_t))); - KMP_DEBUG_ASSERT(0); - return 0; - } + lerr = ((char *)b) + sizeof(bfhead_t); + if ((bs > sizeof(bfhead_t)) && + ((*lerr != 0x55) || + (memcmp(lerr, lerr + 1, (size_t)(bs - (sizeof(bfhead_t) + 1))) != + 0))) { + (void)__kmp_printf_no_lock( + "(Contents of above free block have been overstored.)\n"); + bufdump(th, (void *)(((char *)b) + sizeof(bhead_t))); + KMP_DEBUG_ASSERT(0); + return 0; + } #endif /* FreeWipe */ - } - b = BFH(((char *) b) + bs); } - return 1; + b = BFH(((char *)b) + bs); + } + return 1; } #endif /* KMP_DEBUG */ -/* ------------------------------------------------------------------------ */ +void __kmp_initialize_bget(kmp_info_t *th) { + KMP_DEBUG_ASSERT(SizeQuant >= sizeof(void *) && (th != 0)); -void -__kmp_initialize_bget( kmp_info_t *th ) -{ - KMP_DEBUG_ASSERT( SizeQuant >= sizeof( void * ) && (th != 0) ); + set_thr_data(th); - set_thr_data( th ); - - bectl( th, (bget_compact_t) 0, (bget_acquire_t) malloc, (bget_release_t) free, - (bufsize) __kmp_malloc_pool_incr ); + bectl(th, (bget_compact_t)0, (bget_acquire_t)malloc, (bget_release_t)free, + (bufsize)__kmp_malloc_pool_incr); } -void -__kmp_finalize_bget( kmp_info_t *th ) -{ - thr_data_t *thr; - bfhead_t *b; +void __kmp_finalize_bget(kmp_info_t *th) { + thr_data_t *thr; + bfhead_t *b; - KMP_DEBUG_ASSERT( th != 0 ); + KMP_DEBUG_ASSERT(th != 0); #if BufStats - thr = (thr_data_t *) th->th.th_local.bget_data; - KMP_DEBUG_ASSERT( thr != NULL ); - b = thr->last_pool; - - /* If a block-release function is defined, and this free buffer - constitutes the entire block, release it. Note that pool_len - is defined in such a way that the test will fail unless all - pool blocks are the same size. */ - - /* Deallocate the last pool if one exists because we no longer do it in brel() */ - if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 && - b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) - { - KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); - KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.bsize == ESent); - KMP_DEBUG_ASSERT(BH((char *) b + b->bh.bb.bsize)->bb.prevfree == b->bh.bb.bsize); - - /* Unlink the buffer from the free list */ - __kmp_bget_remove_from_freelist( b ); - - KE_TRACE( 10, ("%%%%%% FREE( %p )\n", (void *) b ) ); - - (*thr->relfcn)(b); - thr->numprel++; /* Nr of expansion block releases */ - thr->numpblk--; /* Total number of blocks */ - KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); - } + thr = (thr_data_t *)th->th.th_local.bget_data; + KMP_DEBUG_ASSERT(thr != NULL); + b = thr->last_pool; + + /* If a block-release function is defined, and this free buffer constitutes + the entire block, release it. Note that pool_len is defined in such a way + that the test will fail unless all pool blocks are the same size. */ + + // Deallocate the last pool if one exists because we no longer do it in brel() + if (thr->relfcn != 0 && b != 0 && thr->numpblk != 0 && + b->bh.bb.bsize == (bufsize)(thr->pool_len - sizeof(bhead_t))) { + KMP_DEBUG_ASSERT(b->bh.bb.prevfree == 0); + KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.bsize == ESent); + KMP_DEBUG_ASSERT(BH((char *)b + b->bh.bb.bsize)->bb.prevfree == + b->bh.bb.bsize); + + /* Unlink the buffer from the free list */ + __kmp_bget_remove_from_freelist(b); + + KE_TRACE(10, ("%%%%%% FREE( %p )\n", (void *)b)); + + (*thr->relfcn)(b); + thr->numprel++; /* Nr of expansion block releases */ + thr->numpblk--; /* Total number of blocks */ + KMP_DEBUG_ASSERT(thr->numpblk == thr->numpget - thr->numprel); + } #endif /* BufStats */ - /* Deallocate bget_data */ - if ( th->th.th_local.bget_data != NULL ) { - __kmp_free( th->th.th_local.bget_data ); - th->th.th_local.bget_data = NULL; - }; // if + /* Deallocate bget_data */ + if (th->th.th_local.bget_data != NULL) { + __kmp_free(th->th.th_local.bget_data); + th->th.th_local.bget_data = NULL; + }; // if } -void -kmpc_set_poolsize( size_t size ) -{ - bectl( __kmp_get_thread(), (bget_compact_t) 0, (bget_acquire_t) malloc, - (bget_release_t) free, (bufsize) size ); +void kmpc_set_poolsize(size_t size) { + bectl(__kmp_get_thread(), (bget_compact_t)0, (bget_acquire_t)malloc, + (bget_release_t)free, (bufsize)size); } -size_t -kmpc_get_poolsize( void ) -{ - thr_data_t *p; +size_t kmpc_get_poolsize(void) { + thr_data_t *p; - p = get_thr_data( __kmp_get_thread() ); + p = get_thr_data(__kmp_get_thread()); - return p->exp_incr; + return p->exp_incr; } -void -kmpc_set_poolmode( int mode ) -{ - thr_data_t *p; +void kmpc_set_poolmode(int mode) { + thr_data_t *p; - if (mode == bget_mode_fifo || mode == bget_mode_lifo || mode == bget_mode_best) { - p = get_thr_data( __kmp_get_thread() ); - p->mode = (bget_mode_t) mode; - } + if (mode == bget_mode_fifo || mode == bget_mode_lifo || + mode == bget_mode_best) { + p = get_thr_data(__kmp_get_thread()); + p->mode = (bget_mode_t)mode; + } } -int -kmpc_get_poolmode( void ) -{ - thr_data_t *p; +int kmpc_get_poolmode(void) { + thr_data_t *p; - p = get_thr_data( __kmp_get_thread() ); + p = get_thr_data(__kmp_get_thread()); - return p->mode; + return p->mode; } -void -kmpc_get_poolstat( size_t *maxmem, size_t *allmem ) -{ - kmp_info_t *th = __kmp_get_thread(); - bufsize a, b; +void kmpc_get_poolstat(size_t *maxmem, size_t *allmem) { + kmp_info_t *th = __kmp_get_thread(); + bufsize a, b; - __kmp_bget_dequeue( th ); /* Release any queued buffers */ + __kmp_bget_dequeue(th); /* Release any queued buffers */ - bcheck( th, &a, &b ); + bcheck(th, &a, &b); - *maxmem = a; - *allmem = b; + *maxmem = a; + *allmem = b; } -void -kmpc_poolprint( void ) -{ - kmp_info_t *th = __kmp_get_thread(); +void kmpc_poolprint(void) { + kmp_info_t *th = __kmp_get_thread(); - __kmp_bget_dequeue( th ); /* Release any queued buffers */ + __kmp_bget_dequeue(th); /* Release any queued buffers */ - bfreed( th ); + bfreed(th); } #endif // #if KMP_USE_BGET -/* ------------------------------------------------------------------------ */ - -void * -kmpc_malloc( size_t size ) -{ - void * ptr; - ptr = bget( __kmp_entry_thread(), (bufsize)(size + sizeof(ptr)) ); - if( ptr != NULL ) { - // save allocated pointer just before one returned to user - *(void**)ptr = ptr; - ptr = (void**)ptr + 1; - } - return ptr; +void *kmpc_malloc(size_t size) { + void *ptr; + ptr = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr))); + if (ptr != NULL) { + // save allocated pointer just before one returned to user + *(void **)ptr = ptr; + ptr = (void **)ptr + 1; + } + return ptr; } -#define IS_POWER_OF_TWO(n) (((n)&((n)-1))==0) - -void * -kmpc_aligned_malloc( size_t size, size_t alignment ) -{ - void * ptr; - void * ptr_allocated; - KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too big - if( !IS_POWER_OF_TWO(alignment) ) { - // AC: do we need to issue a warning here? - errno = EINVAL; - return NULL; - } - size = size + sizeof( void* ) + alignment; - ptr_allocated = bget( __kmp_entry_thread(), (bufsize)size ); - if( ptr_allocated != NULL ) { - // save allocated pointer just before one returned to user - ptr = (void*)(((kmp_uintptr_t)ptr_allocated + sizeof( void* ) + alignment) & ~(alignment - 1)); - *((void**)ptr - 1) = ptr_allocated; - } else { - ptr = NULL; - } - return ptr; -} +#define IS_POWER_OF_TWO(n) (((n) & ((n)-1)) == 0) -void * -kmpc_calloc( size_t nelem, size_t elsize ) -{ - void * ptr; - ptr = bgetz( __kmp_entry_thread(), (bufsize) (nelem * elsize + sizeof(ptr)) ); - if( ptr != NULL ) { - // save allocated pointer just before one returned to user - *(void**)ptr = ptr; - ptr = (void**)ptr + 1; - } - return ptr; +void *kmpc_aligned_malloc(size_t size, size_t alignment) { + void *ptr; + void *ptr_allocated; + KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too big + if (!IS_POWER_OF_TWO(alignment)) { + // AC: do we need to issue a warning here? + errno = EINVAL; + return NULL; + } + size = size + sizeof(void *) + alignment; + ptr_allocated = bget(__kmp_entry_thread(), (bufsize)size); + if (ptr_allocated != NULL) { + // save allocated pointer just before one returned to user + ptr = (void *)(((kmp_uintptr_t)ptr_allocated + sizeof(void *) + alignment) & + ~(alignment - 1)); + *((void **)ptr - 1) = ptr_allocated; + } else { + ptr = NULL; + } + return ptr; } -void * -kmpc_realloc( void * ptr, size_t size ) -{ - void * result = NULL; - if ( ptr == NULL ) { - // If pointer is NULL, realloc behaves like malloc. - result = bget( __kmp_entry_thread(), (bufsize)(size + sizeof(ptr)) ); - // save allocated pointer just before one returned to user - if( result != NULL ) { - *(void**)result = result; - result = (void**)result + 1; - } - } else if ( size == 0 ) { - // If size is 0, realloc behaves like free. - // The thread must be registered by the call to kmpc_malloc() or kmpc_calloc() before. - // So it should be safe to call __kmp_get_thread(), not __kmp_entry_thread(). - KMP_ASSERT(*((void**)ptr - 1)); - brel( __kmp_get_thread(), *((void**)ptr - 1) ); - } else { - result = bgetr( __kmp_entry_thread(), *((void**)ptr - 1), (bufsize)(size + sizeof(ptr)) ); - if( result != NULL ) { - *(void**)result = result; - result = (void**)result + 1; - } - }; // if - return result; +void *kmpc_calloc(size_t nelem, size_t elsize) { + void *ptr; + ptr = bgetz(__kmp_entry_thread(), (bufsize)(nelem * elsize + sizeof(ptr))); + if (ptr != NULL) { + // save allocated pointer just before one returned to user + *(void **)ptr = ptr; + ptr = (void **)ptr + 1; + } + return ptr; } -/* NOTE: the library must have already been initialized by a previous allocate */ - -void -kmpc_free( void * ptr ) -{ - if ( ! __kmp_init_serial ) { - return; - }; // if - if ( ptr != NULL ) { - kmp_info_t *th = __kmp_get_thread(); - __kmp_bget_dequeue( th ); /* Release any queued buffers */ - // extract allocated pointer and free it - KMP_ASSERT(*((void**)ptr - 1)); - brel( th, *((void**)ptr - 1) ); - }; +void *kmpc_realloc(void *ptr, size_t size) { + void *result = NULL; + if (ptr == NULL) { + // If pointer is NULL, realloc behaves like malloc. + result = bget(__kmp_entry_thread(), (bufsize)(size + sizeof(ptr))); + // save allocated pointer just before one returned to user + if (result != NULL) { + *(void **)result = result; + result = (void **)result + 1; + } + } else if (size == 0) { + // If size is 0, realloc behaves like free. + // The thread must be registered by the call to kmpc_malloc() or + // kmpc_calloc() before. + // So it should be safe to call __kmp_get_thread(), not + // __kmp_entry_thread(). + KMP_ASSERT(*((void **)ptr - 1)); + brel(__kmp_get_thread(), *((void **)ptr - 1)); + } else { + result = bgetr(__kmp_entry_thread(), *((void **)ptr - 1), + (bufsize)(size + sizeof(ptr))); + if (result != NULL) { + *(void **)result = result; + result = (void **)result + 1; + } + }; // if + return result; } +// NOTE: the library must have already been initialized by a previous allocate +void kmpc_free(void *ptr) { + if (!__kmp_init_serial) { + return; + }; // if + if (ptr != NULL) { + kmp_info_t *th = __kmp_get_thread(); + __kmp_bget_dequeue(th); /* Release any queued buffers */ + // extract allocated pointer and free it + KMP_ASSERT(*((void **)ptr - 1)); + brel(th, *((void **)ptr - 1)); + }; +} -/* ------------------------------------------------------------------------ */ - -void * -___kmp_thread_malloc( kmp_info_t *th, size_t size KMP_SRC_LOC_DECL ) -{ - void * ptr; - KE_TRACE( 30, ( - "-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", - th, - (int) size - KMP_SRC_LOC_PARM - ) ); - ptr = bget( th, (bufsize) size ); - KE_TRACE( 30, ( "<- __kmp_thread_malloc() returns %p\n", ptr ) ); - return ptr; +void *___kmp_thread_malloc(kmp_info_t *th, size_t size KMP_SRC_LOC_DECL) { + void *ptr; + KE_TRACE(30, ("-> __kmp_thread_malloc( %p, %d ) called from %s:%d\n", th, + (int)size KMP_SRC_LOC_PARM)); + ptr = bget(th, (bufsize)size); + KE_TRACE(30, ("<- __kmp_thread_malloc() returns %p\n", ptr)); + return ptr; } -void * -___kmp_thread_calloc( kmp_info_t *th, size_t nelem, size_t elsize KMP_SRC_LOC_DECL ) -{ - void * ptr; - KE_TRACE( 30, ( - "-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", - th, - (int) nelem, - (int) elsize - KMP_SRC_LOC_PARM - ) ); - ptr = bgetz( th, (bufsize) (nelem * elsize) ); - KE_TRACE( 30, ( "<- __kmp_thread_calloc() returns %p\n", ptr ) ); - return ptr; +void *___kmp_thread_calloc(kmp_info_t *th, size_t nelem, + size_t elsize KMP_SRC_LOC_DECL) { + void *ptr; + KE_TRACE(30, ("-> __kmp_thread_calloc( %p, %d, %d ) called from %s:%d\n", th, + (int)nelem, (int)elsize KMP_SRC_LOC_PARM)); + ptr = bgetz(th, (bufsize)(nelem * elsize)); + KE_TRACE(30, ("<- __kmp_thread_calloc() returns %p\n", ptr)); + return ptr; } -void * -___kmp_thread_realloc( kmp_info_t *th, void *ptr, size_t size KMP_SRC_LOC_DECL ) -{ - KE_TRACE( 30, ( - "-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", - th, - ptr, - (int) size - KMP_SRC_LOC_PARM - ) ); - ptr = bgetr( th, ptr, (bufsize) size ); - KE_TRACE( 30, ( "<- __kmp_thread_realloc() returns %p\n", ptr ) ); - return ptr; +void *___kmp_thread_realloc(kmp_info_t *th, void *ptr, + size_t size KMP_SRC_LOC_DECL) { + KE_TRACE(30, ("-> __kmp_thread_realloc( %p, %p, %d ) called from %s:%d\n", th, + ptr, (int)size KMP_SRC_LOC_PARM)); + ptr = bgetr(th, ptr, (bufsize)size); + KE_TRACE(30, ("<- __kmp_thread_realloc() returns %p\n", ptr)); + return ptr; } -void -___kmp_thread_free( kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL ) -{ - KE_TRACE( 30, ( - "-> __kmp_thread_free( %p, %p ) called from %s:%d\n", - th, - ptr - KMP_SRC_LOC_PARM - ) ); - if ( ptr != NULL ) { - __kmp_bget_dequeue( th ); /* Release any queued buffers */ - brel( th, ptr ); - } - KE_TRACE( 30, ( "<- __kmp_thread_free()\n" ) ); +void ___kmp_thread_free(kmp_info_t *th, void *ptr KMP_SRC_LOC_DECL) { + KE_TRACE(30, ("-> __kmp_thread_free( %p, %p ) called from %s:%d\n", th, + ptr KMP_SRC_LOC_PARM)); + if (ptr != NULL) { + __kmp_bget_dequeue(th); /* Release any queued buffers */ + brel(th, ptr); + } + KE_TRACE(30, ("<- __kmp_thread_free()\n")); } -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ -/* - If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes memory leaks, but it - may be useful for debugging memory corruptions, used freed pointers, etc. -*/ +/* If LEAK_MEMORY is defined, __kmp_free() will *not* free memory. It causes + memory leaks, but it may be useful for debugging memory corruptions, used + freed pointers, etc. */ /* #define LEAK_MEMORY */ - -struct kmp_mem_descr { // Memory block descriptor. - void * ptr_allocated; // Pointer returned by malloc(), subject for free(). - size_t size_allocated; // Size of allocated memory block. - void * ptr_aligned; // Pointer to aligned memory, to be used by client code. - size_t size_aligned; // Size of aligned memory block. +struct kmp_mem_descr { // Memory block descriptor. + void *ptr_allocated; // Pointer returned by malloc(), subject for free(). + size_t size_allocated; // Size of allocated memory block. + void *ptr_aligned; // Pointer to aligned memory, to be used by client code. + size_t size_aligned; // Size of aligned memory block. }; typedef struct kmp_mem_descr kmp_mem_descr_t; -/* - Allocate memory on requested boundary, fill allocated memory with 0x00. - NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error. - Must use __kmp_free when freeing memory allocated by this routine! - */ -static -void * -___kmp_allocate_align( size_t size, size_t alignment KMP_SRC_LOC_DECL ) -{ - /* - __kmp_allocate() allocates (by call to malloc()) bigger memory block than requested to - return properly aligned pointer. Original pointer returned by malloc() and size of allocated - block is saved in descriptor just before the aligned pointer. This information used by - __kmp_free() -- it has to pass to free() original pointer, not aligned one. - - +---------+------------+-----------------------------------+---------+ - | padding | descriptor | aligned block | padding | - +---------+------------+-----------------------------------+---------+ - ^ ^ - | | - | +- Aligned pointer returned to caller - +- Pointer returned by malloc() - - Aligned block is filled with zeros, paddings are filled with 0xEF. - */ - - kmp_mem_descr_t descr; - kmp_uintptr_t addr_allocated; // Address returned by malloc(). - kmp_uintptr_t addr_aligned; // Aligned address to return to caller. - kmp_uintptr_t addr_descr; // Address of memory block descriptor. - - KE_TRACE( 25, ( - "-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n", - (int) size, - (int) alignment - KMP_SRC_LOC_PARM - ) ); - - KMP_DEBUG_ASSERT( alignment < 32 * 1024 ); // Alignment should not be too - KMP_DEBUG_ASSERT( sizeof( void * ) <= sizeof( kmp_uintptr_t ) ); - // Make sure kmp_uintptr_t is enough to store addresses. - - descr.size_aligned = size; - descr.size_allocated = descr.size_aligned + sizeof( kmp_mem_descr_t ) + alignment; +/* Allocate memory on requested boundary, fill allocated memory with 0x00. + NULL is NEVER returned, __kmp_abort() is called in case of memory allocation + error. Must use __kmp_free when freeing memory allocated by this routine! */ +static void *___kmp_allocate_align(size_t size, + size_t alignment KMP_SRC_LOC_DECL) { + /* __kmp_allocate() allocates (by call to malloc()) bigger memory block than + requested to return properly aligned pointer. Original pointer returned + by malloc() and size of allocated block is saved in descriptor just + before the aligned pointer. This information used by __kmp_free() -- it + has to pass to free() original pointer, not aligned one. + + +---------+------------+-----------------------------------+---------+ + | padding | descriptor | aligned block | padding | + +---------+------------+-----------------------------------+---------+ + ^ ^ + | | + | +- Aligned pointer returned to caller + +- Pointer returned by malloc() + + Aligned block is filled with zeros, paddings are filled with 0xEF. */ + + kmp_mem_descr_t descr; + kmp_uintptr_t addr_allocated; // Address returned by malloc(). + kmp_uintptr_t addr_aligned; // Aligned address to return to caller. + kmp_uintptr_t addr_descr; // Address of memory block descriptor. + + KE_TRACE(25, ("-> ___kmp_allocate_align( %d, %d ) called from %s:%d\n", + (int)size, (int)alignment KMP_SRC_LOC_PARM)); + + KMP_DEBUG_ASSERT(alignment < 32 * 1024); // Alignment should not be too + KMP_DEBUG_ASSERT(sizeof(void *) <= sizeof(kmp_uintptr_t)); + // Make sure kmp_uintptr_t is enough to store addresses. + + descr.size_aligned = size; + descr.size_allocated = + descr.size_aligned + sizeof(kmp_mem_descr_t) + alignment; #if KMP_DEBUG - descr.ptr_allocated = _malloc_src_loc( descr.size_allocated, _file_, _line_ ); + descr.ptr_allocated = _malloc_src_loc(descr.size_allocated, _file_, _line_); #else - descr.ptr_allocated = malloc_src_loc( descr.size_allocated KMP_SRC_LOC_PARM ); + descr.ptr_allocated = malloc_src_loc(descr.size_allocated KMP_SRC_LOC_PARM); #endif - KE_TRACE( 10, ( - " malloc( %d ) returned %p\n", - (int) descr.size_allocated, - descr.ptr_allocated - ) ); - if ( descr.ptr_allocated == NULL ) { - KMP_FATAL( OutOfHeapMemory ); - }; - - addr_allocated = (kmp_uintptr_t) descr.ptr_allocated; - addr_aligned = - ( addr_allocated + sizeof( kmp_mem_descr_t ) + alignment ) - & ~ ( alignment - 1 ); - addr_descr = addr_aligned - sizeof( kmp_mem_descr_t ); - - descr.ptr_aligned = (void *) addr_aligned; - - KE_TRACE( 26, ( - " ___kmp_allocate_align: " - "ptr_allocated=%p, size_allocated=%d, " - "ptr_aligned=%p, size_aligned=%d\n", - descr.ptr_allocated, - (int) descr.size_allocated, - descr.ptr_aligned, - (int) descr.size_aligned - ) ); - - KMP_DEBUG_ASSERT( addr_allocated <= addr_descr ); - KMP_DEBUG_ASSERT( addr_descr + sizeof( kmp_mem_descr_t ) == addr_aligned ); - KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated ); - KMP_DEBUG_ASSERT( addr_aligned % alignment == 0 ); + KE_TRACE(10, (" malloc( %d ) returned %p\n", (int)descr.size_allocated, + descr.ptr_allocated)); + if (descr.ptr_allocated == NULL) { + KMP_FATAL(OutOfHeapMemory); + }; + + addr_allocated = (kmp_uintptr_t)descr.ptr_allocated; + addr_aligned = + (addr_allocated + sizeof(kmp_mem_descr_t) + alignment) & ~(alignment - 1); + addr_descr = addr_aligned - sizeof(kmp_mem_descr_t); + + descr.ptr_aligned = (void *)addr_aligned; + + KE_TRACE(26, (" ___kmp_allocate_align: " + "ptr_allocated=%p, size_allocated=%d, " + "ptr_aligned=%p, size_aligned=%d\n", + descr.ptr_allocated, (int)descr.size_allocated, + descr.ptr_aligned, (int)descr.size_aligned)); + + KMP_DEBUG_ASSERT(addr_allocated <= addr_descr); + KMP_DEBUG_ASSERT(addr_descr + sizeof(kmp_mem_descr_t) == addr_aligned); + KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <= + addr_allocated + descr.size_allocated); + KMP_DEBUG_ASSERT(addr_aligned % alignment == 0); #ifdef KMP_DEBUG - memset( descr.ptr_allocated, 0xEF, descr.size_allocated ); - // Fill allocated memory block with 0xEF. + memset(descr.ptr_allocated, 0xEF, descr.size_allocated); +// Fill allocated memory block with 0xEF. #endif - memset( descr.ptr_aligned, 0x00, descr.size_aligned ); - // Fill the aligned memory block (which is intended for using by caller) with 0x00. Do not - // put this filling under KMP_DEBUG condition! Many callers expect zeroed memory. (Padding - // bytes remain filled with 0xEF in debugging library.) - * ( (kmp_mem_descr_t *) addr_descr ) = descr; - - KMP_MB(); - - KE_TRACE( 25, ( "<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned ) ); - return descr.ptr_aligned; + memset(descr.ptr_aligned, 0x00, descr.size_aligned); + // Fill the aligned memory block (which is intended for using by caller) with + // 0x00. Do not + // put this filling under KMP_DEBUG condition! Many callers expect zeroed + // memory. (Padding + // bytes remain filled with 0xEF in debugging library.) + *((kmp_mem_descr_t *)addr_descr) = descr; + + KMP_MB(); + + KE_TRACE(25, ("<- ___kmp_allocate_align() returns %p\n", descr.ptr_aligned)); + return descr.ptr_aligned; } // func ___kmp_allocate_align - -/* - Allocate memory on cache line boundary, fill allocated memory with 0x00. - Do not call this func directly! Use __kmp_allocate macro instead. - NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error. - Must use __kmp_free when freeing memory allocated by this routine! - */ -void * -___kmp_allocate( size_t size KMP_SRC_LOC_DECL ) -{ - void * ptr; - KE_TRACE( 25, ( "-> __kmp_allocate( %d ) called from %s:%d\n", (int) size KMP_SRC_LOC_PARM ) ); - ptr = ___kmp_allocate_align( size, __kmp_align_alloc KMP_SRC_LOC_PARM ); - KE_TRACE( 25, ( "<- __kmp_allocate() returns %p\n", ptr ) ); - return ptr; +/* Allocate memory on cache line boundary, fill allocated memory with 0x00. + Do not call this func directly! Use __kmp_allocate macro instead. + NULL is NEVER returned, __kmp_abort() is called in case of memory allocation + error. Must use __kmp_free when freeing memory allocated by this routine! */ +void *___kmp_allocate(size_t size KMP_SRC_LOC_DECL) { + void *ptr; + KE_TRACE(25, ("-> __kmp_allocate( %d ) called from %s:%d\n", + (int)size KMP_SRC_LOC_PARM)); + ptr = ___kmp_allocate_align(size, __kmp_align_alloc KMP_SRC_LOC_PARM); + KE_TRACE(25, ("<- __kmp_allocate() returns %p\n", ptr)); + return ptr; } // func ___kmp_allocate -#if (BUILD_MEMORY==FIRST_TOUCH) -void * -__kmp_ft_page_allocate(size_t size) -{ +#if (BUILD_MEMORY == FIRST_TOUCH) +void *__kmp_ft_page_allocate(size_t size) { void *adr, *aadr; const int page_size = KMP_GET_PAGE_SIZE(); - adr = (void *) __kmp_thread_malloc( __kmp_get_thread(), + adr = (void *)__kmp_thread_malloc(__kmp_get_thread(), size + page_size + KMP_PTR_SKIP); - if ( adr == 0 ) - KMP_FATAL( OutOfHeapMemory ); + if (adr == 0) + KMP_FATAL(OutOfHeapMemory); /* check to see if adr is on a page boundary. */ - if ( ( (kmp_uintptr_t) adr & (page_size - 1)) == 0) + if (((kmp_uintptr_t)adr & (page_size - 1)) == 0) /* nothing to do if adr is already on a page boundary. */ aadr = adr; else /* else set aadr to the first page boundary in the allocated memory. */ - aadr = (void *) ( ( (kmp_uintptr_t) adr + page_size) & ~(page_size - 1) ); + aadr = (void *)(((kmp_uintptr_t)adr + page_size) & ~(page_size - 1)); /* the first touch by the owner thread. */ - *((void**)aadr) = adr; + *((void **)aadr) = adr; /* skip the memory space used for storing adr above. */ - return (void*)((char*)aadr + KMP_PTR_SKIP); + return (void *)((char *)aadr + KMP_PTR_SKIP); } #endif -/* - Allocate memory on page boundary, fill allocated memory with 0x00. - Does not call this func directly! Use __kmp_page_allocate macro instead. - NULL is NEVER returned, __kmp_abort() is called in case of memory allocation error. - Must use __kmp_free when freeing memory allocated by this routine! - */ -void * -___kmp_page_allocate( size_t size KMP_SRC_LOC_DECL ) -{ - int page_size = 8 * 1024; - void * ptr; - - KE_TRACE( 25, ( - "-> __kmp_page_allocate( %d ) called from %s:%d\n", - (int) size - KMP_SRC_LOC_PARM - ) ); - ptr = ___kmp_allocate_align( size, page_size KMP_SRC_LOC_PARM ); - KE_TRACE( 25, ( "<- __kmp_page_allocate( %d ) returns %p\n", (int) size, ptr ) ); - return ptr; +/* Allocate memory on page boundary, fill allocated memory with 0x00. + Does not call this func directly! Use __kmp_page_allocate macro instead. + NULL is NEVER returned, __kmp_abort() is called in case of memory allocation + error. Must use __kmp_free when freeing memory allocated by this routine! */ +void *___kmp_page_allocate(size_t size KMP_SRC_LOC_DECL) { + int page_size = 8 * 1024; + void *ptr; + + KE_TRACE(25, ("-> __kmp_page_allocate( %d ) called from %s:%d\n", + (int)size KMP_SRC_LOC_PARM)); + ptr = ___kmp_allocate_align(size, page_size KMP_SRC_LOC_PARM); + KE_TRACE(25, ("<- __kmp_page_allocate( %d ) returns %p\n", (int)size, ptr)); + return ptr; } // ___kmp_page_allocate -/* - Free memory allocated by __kmp_allocate() and __kmp_page_allocate(). - In debug mode, fill the memory block with 0xEF before call to free(). -*/ -void -___kmp_free( void * ptr KMP_SRC_LOC_DECL ) -{ - kmp_mem_descr_t descr; - kmp_uintptr_t addr_allocated; // Address returned by malloc(). - kmp_uintptr_t addr_aligned; // Aligned address passed by caller. - - KE_TRACE( 25, ( "-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM ) ); - KMP_ASSERT( ptr != NULL ); - - descr = * ( kmp_mem_descr_t *) ( (kmp_uintptr_t) ptr - sizeof( kmp_mem_descr_t ) ); - - KE_TRACE( 26, ( " __kmp_free: " - "ptr_allocated=%p, size_allocated=%d, " - "ptr_aligned=%p, size_aligned=%d\n", - descr.ptr_allocated, (int) descr.size_allocated, - descr.ptr_aligned, (int) descr.size_aligned )); - - addr_allocated = (kmp_uintptr_t) descr.ptr_allocated; - addr_aligned = (kmp_uintptr_t) descr.ptr_aligned; - - KMP_DEBUG_ASSERT( addr_aligned % CACHE_LINE == 0 ); - KMP_DEBUG_ASSERT( descr.ptr_aligned == ptr ); - KMP_DEBUG_ASSERT( addr_allocated + sizeof( kmp_mem_descr_t ) <= addr_aligned ); - KMP_DEBUG_ASSERT( descr.size_aligned < descr.size_allocated ); - KMP_DEBUG_ASSERT( addr_aligned + descr.size_aligned <= addr_allocated + descr.size_allocated ); - - #ifdef KMP_DEBUG - memset( descr.ptr_allocated, 0xEF, descr.size_allocated ); - // Fill memory block with 0xEF, it helps catch using freed memory. - #endif - - #ifndef LEAK_MEMORY - KE_TRACE( 10, ( " free( %p )\n", descr.ptr_allocated ) ); - # ifdef KMP_DEBUG - _free_src_loc( descr.ptr_allocated, _file_, _line_ ); - # else - free_src_loc( descr.ptr_allocated KMP_SRC_LOC_PARM ); - # endif - #endif - KMP_MB(); - KE_TRACE( 25, ( "<- __kmp_free() returns\n" ) ); -} // func ___kmp_free +/* Free memory allocated by __kmp_allocate() and __kmp_page_allocate(). + In debug mode, fill the memory block with 0xEF before call to free(). */ +void ___kmp_free(void *ptr KMP_SRC_LOC_DECL) { + kmp_mem_descr_t descr; + kmp_uintptr_t addr_allocated; // Address returned by malloc(). + kmp_uintptr_t addr_aligned; // Aligned address passed by caller. -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ + KE_TRACE(25, + ("-> __kmp_free( %p ) called from %s:%d\n", ptr KMP_SRC_LOC_PARM)); + KMP_ASSERT(ptr != NULL); + + descr = *(kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t)); + + KE_TRACE(26, (" __kmp_free: " + "ptr_allocated=%p, size_allocated=%d, " + "ptr_aligned=%p, size_aligned=%d\n", + descr.ptr_allocated, (int)descr.size_allocated, + descr.ptr_aligned, (int)descr.size_aligned)); + + addr_allocated = (kmp_uintptr_t)descr.ptr_allocated; + addr_aligned = (kmp_uintptr_t)descr.ptr_aligned; + + KMP_DEBUG_ASSERT(addr_aligned % CACHE_LINE == 0); + KMP_DEBUG_ASSERT(descr.ptr_aligned == ptr); + KMP_DEBUG_ASSERT(addr_allocated + sizeof(kmp_mem_descr_t) <= addr_aligned); + KMP_DEBUG_ASSERT(descr.size_aligned < descr.size_allocated); + KMP_DEBUG_ASSERT(addr_aligned + descr.size_aligned <= + addr_allocated + descr.size_allocated); + +#ifdef KMP_DEBUG + memset(descr.ptr_allocated, 0xEF, descr.size_allocated); +// Fill memory block with 0xEF, it helps catch using freed memory. +#endif + +#ifndef LEAK_MEMORY + KE_TRACE(10, (" free( %p )\n", descr.ptr_allocated)); +#ifdef KMP_DEBUG + _free_src_loc(descr.ptr_allocated, _file_, _line_); +#else + free_src_loc(descr.ptr_allocated KMP_SRC_LOC_PARM); +#endif +#endif + KMP_MB(); + KE_TRACE(25, ("<- __kmp_free() returns\n")); +} // func ___kmp_free #if USE_FAST_MEMORY == 3 // Allocate fast memory by first scanning the thread's free lists @@ -1825,254 +1647,257 @@ ___kmp_free( void * ptr KMP_SRC_LOC_DECL ) #define KMP_FREE_LIST_LIMIT 16 // Always use 128 bytes for determining buckets for caching memory blocks -#define DCACHE_LINE 128 - -void * -___kmp_fast_allocate( kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL ) -{ - void * ptr; - int num_lines; - int idx; - int index; - void * alloc_ptr; - size_t alloc_size; - kmp_mem_descr_t * descr; - - KE_TRACE( 25, ( "-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n", - __kmp_gtid_from_thread(this_thr), (int) size KMP_SRC_LOC_PARM ) ); - - num_lines = ( size + DCACHE_LINE - 1 ) / DCACHE_LINE; - idx = num_lines - 1; - KMP_DEBUG_ASSERT( idx >= 0 ); - if ( idx < 2 ) { - index = 0; // idx is [ 0, 1 ], use first free list - num_lines = 2; // 1, 2 cache lines or less than cache line - } else if ( ( idx >>= 2 ) == 0 ) { - index = 1; // idx is [ 2, 3 ], use second free list - num_lines = 4; // 3, 4 cache lines - } else if ( ( idx >>= 2 ) == 0 ) { - index = 2; // idx is [ 4, 15 ], use third free list - num_lines = 16; // 5, 6, ..., 16 cache lines - } else if ( ( idx >>= 2 ) == 0 ) { - index = 3; // idx is [ 16, 63 ], use fourth free list - num_lines = 64; // 17, 18, ..., 64 cache lines - } else { - goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists - } - - ptr = this_thr->th.th_free_lists[index].th_free_list_self; - if ( ptr != NULL ) { - // pop the head of no-sync free list - this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); - KMP_DEBUG_ASSERT( this_thr == - ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned ); - goto end; - }; - ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync ); - if ( ptr != NULL ) { - // no-sync free list is empty, use sync free list (filled in by other threads only) - // pop the head of the sync free list, push NULL instead - while ( ! KMP_COMPARE_AND_STORE_PTR( - &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, NULL ) ) - { - KMP_CPU_PAUSE(); - ptr = TCR_SYNC_PTR( this_thr->th.th_free_lists[index].th_free_list_sync ); - } - // push the rest of chain into no-sync free list (can be NULL if there was the only block) - this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); - KMP_DEBUG_ASSERT( this_thr == - ((kmp_mem_descr_t *)( (kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t) ))->ptr_aligned ); - goto end; +#define DCACHE_LINE 128 + +void *___kmp_fast_allocate(kmp_info_t *this_thr, size_t size KMP_SRC_LOC_DECL) { + void *ptr; + int num_lines; + int idx; + int index; + void *alloc_ptr; + size_t alloc_size; + kmp_mem_descr_t *descr; + + KE_TRACE(25, ("-> __kmp_fast_allocate( T#%d, %d ) called from %s:%d\n", + __kmp_gtid_from_thread(this_thr), (int)size KMP_SRC_LOC_PARM)); + + num_lines = (size + DCACHE_LINE - 1) / DCACHE_LINE; + idx = num_lines - 1; + KMP_DEBUG_ASSERT(idx >= 0); + if (idx < 2) { + index = 0; // idx is [ 0, 1 ], use first free list + num_lines = 2; // 1, 2 cache lines or less than cache line + } else if ((idx >>= 2) == 0) { + index = 1; // idx is [ 2, 3 ], use second free list + num_lines = 4; // 3, 4 cache lines + } else if ((idx >>= 2) == 0) { + index = 2; // idx is [ 4, 15 ], use third free list + num_lines = 16; // 5, 6, ..., 16 cache lines + } else if ((idx >>= 2) == 0) { + index = 3; // idx is [ 16, 63 ], use fourth free list + num_lines = 64; // 17, 18, ..., 64 cache lines + } else { + goto alloc_call; // 65 or more cache lines ( > 8KB ), don't use free lists + } + + ptr = this_thr->th.th_free_lists[index].th_free_list_self; + if (ptr != NULL) { + // pop the head of no-sync free list + this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); + KMP_DEBUG_ASSERT( + this_thr == + ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t))) + ->ptr_aligned); + goto end; + }; + ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync); + if (ptr != NULL) { + // no-sync free list is empty, use sync free list (filled in by other + // threads only) + // pop the head of the sync free list, push NULL instead + while (!KMP_COMPARE_AND_STORE_PTR( + &this_thr->th.th_free_lists[index].th_free_list_sync, ptr, NULL)) { + KMP_CPU_PAUSE(); + ptr = TCR_SYNC_PTR(this_thr->th.th_free_lists[index].th_free_list_sync); } - - alloc_call: - // haven't found block in the free lists, thus allocate it - size = num_lines * DCACHE_LINE; - - alloc_size = size + sizeof( kmp_mem_descr_t ) + DCACHE_LINE; - KE_TRACE( 25, ( "__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with alloc_size %d\n", - __kmp_gtid_from_thread( this_thr ), alloc_size ) ); - alloc_ptr = bget( this_thr, (bufsize) alloc_size ); - - // align ptr to DCACHE_LINE - ptr = (void *)(( ((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + DCACHE_LINE ) & ~( DCACHE_LINE - 1 )); - descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) ); - - descr->ptr_allocated = alloc_ptr; // remember allocated pointer - // we don't need size_allocated - descr->ptr_aligned = (void *)this_thr; // remember allocating thread - // (it is already saved in bget buffer, - // but we may want to use another allocator in future) - descr->size_aligned = size; - - end: - KE_TRACE( 25, ( "<- __kmp_fast_allocate( T#%d ) returns %p\n", - __kmp_gtid_from_thread( this_thr ), ptr ) ); - return ptr; + // push the rest of chain into no-sync free list (can be NULL if there was + // the only block) + this_thr->th.th_free_lists[index].th_free_list_self = *((void **)ptr); + KMP_DEBUG_ASSERT( + this_thr == + ((kmp_mem_descr_t *)((kmp_uintptr_t)ptr - sizeof(kmp_mem_descr_t))) + ->ptr_aligned); + goto end; + } + +alloc_call: + // haven't found block in the free lists, thus allocate it + size = num_lines * DCACHE_LINE; + + alloc_size = size + sizeof(kmp_mem_descr_t) + DCACHE_LINE; + KE_TRACE(25, ("__kmp_fast_allocate: T#%d Calling __kmp_thread_malloc with " + "alloc_size %d\n", + __kmp_gtid_from_thread(this_thr), alloc_size)); + alloc_ptr = bget(this_thr, (bufsize)alloc_size); + + // align ptr to DCACHE_LINE + ptr = (void *)((((kmp_uintptr_t)alloc_ptr) + sizeof(kmp_mem_descr_t) + + DCACHE_LINE) & + ~(DCACHE_LINE - 1)); + descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t)); + + descr->ptr_allocated = alloc_ptr; // remember allocated pointer + // we don't need size_allocated + descr->ptr_aligned = (void *)this_thr; // remember allocating thread + // (it is already saved in bget buffer, + // but we may want to use another allocator in future) + descr->size_aligned = size; + +end: + KE_TRACE(25, ("<- __kmp_fast_allocate( T#%d ) returns %p\n", + __kmp_gtid_from_thread(this_thr), ptr)); + return ptr; } // func __kmp_fast_allocate // Free fast memory and place it on the thread's free list if it is of // the correct size. -void -___kmp_fast_free( kmp_info_t *this_thr, void * ptr KMP_SRC_LOC_DECL ) -{ - kmp_mem_descr_t * descr; - kmp_info_t * alloc_thr; - size_t size; - size_t idx; - int index; - - KE_TRACE( 25, ( "-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n", - __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM ) ); - KMP_ASSERT( ptr != NULL ); - - descr = (kmp_mem_descr_t *)( ((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t) ); - - KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n", - (int) descr->size_aligned ) ); - - size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines - - idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block - if ( idx == size ) { - index = 0; // 2 cache lines - } else if ( ( idx <<= 1 ) == size ) { - index = 1; // 4 cache lines - } else if ( ( idx <<= 2 ) == size ) { - index = 2; // 16 cache lines - } else if ( ( idx <<= 2 ) == size ) { - index = 3; // 64 cache lines +void ___kmp_fast_free(kmp_info_t *this_thr, void *ptr KMP_SRC_LOC_DECL) { + kmp_mem_descr_t *descr; + kmp_info_t *alloc_thr; + size_t size; + size_t idx; + int index; + + KE_TRACE(25, ("-> __kmp_fast_free( T#%d, %p ) called from %s:%d\n", + __kmp_gtid_from_thread(this_thr), ptr KMP_SRC_LOC_PARM)); + KMP_ASSERT(ptr != NULL); + + descr = (kmp_mem_descr_t *)(((kmp_uintptr_t)ptr) - sizeof(kmp_mem_descr_t)); + + KE_TRACE(26, (" __kmp_fast_free: size_aligned=%d\n", + (int)descr->size_aligned)); + + size = descr->size_aligned; // 2, 4, 16, 64, 65, 66, ... cache lines + + idx = DCACHE_LINE * 2; // 2 cache lines is minimal size of block + if (idx == size) { + index = 0; // 2 cache lines + } else if ((idx <<= 1) == size) { + index = 1; // 4 cache lines + } else if ((idx <<= 2) == size) { + index = 2; // 16 cache lines + } else if ((idx <<= 2) == size) { + index = 3; // 64 cache lines + } else { + KMP_DEBUG_ASSERT(size > DCACHE_LINE * 64); + goto free_call; // 65 or more cache lines ( > 8KB ) + } + + alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block + if (alloc_thr == this_thr) { + // push block to self no-sync free list, linking previous head (LIFO) + *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self; + this_thr->th.th_free_lists[index].th_free_list_self = ptr; + } else { + void *head = this_thr->th.th_free_lists[index].th_free_list_other; + if (head == NULL) { + // Create new free list + this_thr->th.th_free_lists[index].th_free_list_other = ptr; + *((void **)ptr) = NULL; // mark the tail of the list + descr->size_allocated = (size_t)1; // head of the list keeps its length } else { - KMP_DEBUG_ASSERT( size > DCACHE_LINE * 64 ); - goto free_call; // 65 or more cache lines ( > 8KB ) - } + // need to check existed "other" list's owner thread and size of queue + kmp_mem_descr_t *dsc = + (kmp_mem_descr_t *)((char *)head - sizeof(kmp_mem_descr_t)); + // allocating thread, same for all queue nodes + kmp_info_t *q_th = (kmp_info_t *)(dsc->ptr_aligned); + size_t q_sz = + dsc->size_allocated + 1; // new size in case we add current task + if (q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT) { + // we can add current task to "other" list, no sync needed + *((void **)ptr) = head; + descr->size_allocated = q_sz; + this_thr->th.th_free_lists[index].th_free_list_other = ptr; + } else { + // either queue blocks owner is changing or size limit exceeded + // return old queue to allocating thread (q_th) synchroneously, + // and start new list for alloc_thr's tasks + void *old_ptr; + void *tail = head; + void *next = *((void **)head); + while (next != NULL) { + KMP_DEBUG_ASSERT( + // queue size should decrease by 1 each step through the list + ((kmp_mem_descr_t *)((char *)next - sizeof(kmp_mem_descr_t))) + ->size_allocated + + 1 == + ((kmp_mem_descr_t *)((char *)tail - sizeof(kmp_mem_descr_t))) + ->size_allocated); + tail = next; // remember tail node + next = *((void **)next); + } + KMP_DEBUG_ASSERT(q_th != NULL); + // push block to owner's sync free list + old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync); + /* the next pointer must be set before setting free_list to ptr to avoid + exposing a broken list to other threads, even for an instant. */ + *((void **)tail) = old_ptr; - alloc_thr = (kmp_info_t *)descr->ptr_aligned; // get thread owning the block - if ( alloc_thr == this_thr ) { - // push block to self no-sync free list, linking previous head (LIFO) - *((void **)ptr) = this_thr->th.th_free_lists[index].th_free_list_self; - this_thr->th.th_free_lists[index].th_free_list_self = ptr; - } else { - void * head = this_thr->th.th_free_lists[index].th_free_list_other; - if ( head == NULL ) { - // Create new free list - this_thr->th.th_free_lists[index].th_free_list_other = ptr; - *((void **)ptr) = NULL; // mark the tail of the list - descr->size_allocated = (size_t)1; // head of the list keeps its length - } else { - // need to check existed "other" list's owner thread and size of queue - kmp_mem_descr_t * dsc = (kmp_mem_descr_t *)( (char*)head - sizeof(kmp_mem_descr_t) ); - kmp_info_t * q_th = (kmp_info_t *)(dsc->ptr_aligned); // allocating thread, same for all queue nodes - size_t q_sz = dsc->size_allocated + 1; // new size in case we add current task - if ( q_th == alloc_thr && q_sz <= KMP_FREE_LIST_LIMIT ) { - // we can add current task to "other" list, no sync needed - *((void **)ptr) = head; - descr->size_allocated = q_sz; - this_thr->th.th_free_lists[index].th_free_list_other = ptr; - } else { - // either queue blocks owner is changing or size limit exceeded - // return old queue to allocating thread (q_th) synchroneously, - // and start new list for alloc_thr's tasks - void * old_ptr; - void * tail = head; - void * next = *((void **)head); - while ( next != NULL ) { - KMP_DEBUG_ASSERT( - // queue size should decrease by 1 each step through the list - ((kmp_mem_descr_t*)((char*)next - sizeof(kmp_mem_descr_t)))->size_allocated + 1 == - ((kmp_mem_descr_t*)((char*)tail - sizeof(kmp_mem_descr_t)))->size_allocated ); - tail = next; // remember tail node - next = *((void **)next); - } - KMP_DEBUG_ASSERT( q_th != NULL ); - // push block to owner's sync free list - old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync ); - /* the next pointer must be set before setting free_list to ptr to avoid - exposing a broken list to other threads, even for an instant. */ - *((void **)tail) = old_ptr; - - while ( ! KMP_COMPARE_AND_STORE_PTR( - &q_th->th.th_free_lists[index].th_free_list_sync, - old_ptr, - head ) ) - { - KMP_CPU_PAUSE(); - old_ptr = TCR_PTR( q_th->th.th_free_lists[index].th_free_list_sync ); - *((void **)tail) = old_ptr; - } - - // start new list of not-selt tasks - this_thr->th.th_free_lists[index].th_free_list_other = ptr; - *((void **)ptr) = NULL; - descr->size_allocated = (size_t)1; // head of queue keeps its length - } + while (!KMP_COMPARE_AND_STORE_PTR( + &q_th->th.th_free_lists[index].th_free_list_sync, old_ptr, head)) { + KMP_CPU_PAUSE(); + old_ptr = TCR_PTR(q_th->th.th_free_lists[index].th_free_list_sync); + *((void **)tail) = old_ptr; } + + // start new list of not-selt tasks + this_thr->th.th_free_lists[index].th_free_list_other = ptr; + *((void **)ptr) = NULL; + descr->size_allocated = (size_t)1; // head of queue keeps its length + } } - goto end; + } + goto end; - free_call: - KE_TRACE(25, ( "__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n", - __kmp_gtid_from_thread( this_thr), size ) ); - __kmp_bget_dequeue( this_thr ); /* Release any queued buffers */ - brel( this_thr, descr->ptr_allocated ); +free_call: + KE_TRACE(25, ("__kmp_fast_free: T#%d Calling __kmp_thread_free for size %d\n", + __kmp_gtid_from_thread(this_thr), size)); + __kmp_bget_dequeue(this_thr); /* Release any queued buffers */ + brel(this_thr, descr->ptr_allocated); - end: - KE_TRACE( 25, ( "<- __kmp_fast_free() returns\n" ) ); +end: + KE_TRACE(25, ("<- __kmp_fast_free() returns\n")); } // func __kmp_fast_free - // Initialize the thread free lists related to fast memory // Only do this when a thread is initially created. -void -__kmp_initialize_fast_memory( kmp_info_t *this_thr ) -{ - KE_TRACE(10, ( "__kmp_initialize_fast_memory: Called from th %p\n", this_thr ) ); +void __kmp_initialize_fast_memory(kmp_info_t *this_thr) { + KE_TRACE(10, ("__kmp_initialize_fast_memory: Called from th %p\n", this_thr)); - memset ( this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof( kmp_free_list_t ) ); + memset(this_thr->th.th_free_lists, 0, NUM_LISTS * sizeof(kmp_free_list_t)); } // Free the memory in the thread free lists related to fast memory // Only do this when a thread is being reaped (destroyed). -void -__kmp_free_fast_memory( kmp_info_t *th ) -{ - // Suppose we use BGET underlying allocator, walk through its structures... - int bin; - thr_data_t * thr = get_thr_data( th ); - void ** lst = NULL; - - KE_TRACE(5, ( "__kmp_free_fast_memory: Called T#%d\n", - __kmp_gtid_from_thread( th ) ) ); - - __kmp_bget_dequeue( th ); // Release any queued buffers - - // Dig through free lists and extract all allocated blocks - for ( bin = 0; bin < MAX_BGET_BINS; ++bin ) { - bfhead_t * b = thr->freelist[ bin ].ql.flink; - while ( b != &thr->freelist[ bin ] ) { - if ( (kmp_uintptr_t)b->bh.bb.bthr & 1 ) { // if the buffer is an allocated address? - *((void**)b) = lst; // link the list (override bthr, but keep flink yet) - lst = (void**)b; // push b into lst - } - b = b->ql.flink; // get next buffer - } - } - while ( lst != NULL ) { - void * next = *lst; - KE_TRACE(10, ( "__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n", - lst, next, th, __kmp_gtid_from_thread( th ) ) ); - (*thr->relfcn)(lst); - #if BufStats - // count blocks to prevent problems in __kmp_finalize_bget() - thr->numprel++; /* Nr of expansion block releases */ - thr->numpblk--; /* Total number of blocks */ - #endif - lst = (void**)next; +void __kmp_free_fast_memory(kmp_info_t *th) { + // Suppose we use BGET underlying allocator, walk through its structures... + int bin; + thr_data_t *thr = get_thr_data(th); + void **lst = NULL; + + KE_TRACE( + 5, ("__kmp_free_fast_memory: Called T#%d\n", __kmp_gtid_from_thread(th))); + + __kmp_bget_dequeue(th); // Release any queued buffers + + // Dig through free lists and extract all allocated blocks + for (bin = 0; bin < MAX_BGET_BINS; ++bin) { + bfhead_t *b = thr->freelist[bin].ql.flink; + while (b != &thr->freelist[bin]) { + if ((kmp_uintptr_t)b->bh.bb.bthr & 1) { // the buffer is allocated address + *((void **)b) = + lst; // link the list (override bthr, but keep flink yet) + lst = (void **)b; // push b into lst + } + b = b->ql.flink; // get next buffer } + } + while (lst != NULL) { + void *next = *lst; + KE_TRACE(10, ("__kmp_free_fast_memory: freeing %p, next=%p th %p (%d)\n", + lst, next, th, __kmp_gtid_from_thread(th))); + (*thr->relfcn)(lst); +#if BufStats + // count blocks to prevent problems in __kmp_finalize_bget() + thr->numprel++; /* Nr of expansion block releases */ + thr->numpblk--; /* Total number of blocks */ +#endif + lst = (void **)next; + } - KE_TRACE(5, ( "__kmp_free_fast_memory: Freed T#%d\n", - __kmp_gtid_from_thread( th ) ) ); + KE_TRACE( + 5, ("__kmp_free_fast_memory: Freed T#%d\n", __kmp_gtid_from_thread(th))); } #endif // USE_FAST_MEMORY diff --git a/openmp/runtime/src/kmp_atomic.cpp b/openmp/runtime/src/kmp_atomic.cpp index 3831165..af0ce21 100644 --- a/openmp/runtime/src/kmp_atomic.cpp +++ b/openmp/runtime/src/kmp_atomic.cpp @@ -14,17 +14,19 @@ #include "kmp_atomic.h" -#include "kmp.h" // TRUE, asm routines prototypes +#include "kmp.h" // TRUE, asm routines prototypes typedef unsigned char uchar; typedef unsigned short ushort; /*! @defgroup ATOMIC_OPS Atomic Operations -These functions are used for implementing the many different varieties of atomic operations. +These functions are used for implementing the many different varieties of atomic +operations. -The compiler is at liberty to inline atomic operations that are naturally supported -by the target architecture. For instance on IA-32 architecture an atomic like this can be inlined +The compiler is at liberty to inline atomic operations that are naturally +supported by the target architecture. For instance on IA-32 architecture an +atomic like this can be inlined @code static int s = 0; #pragma omp atomic @@ -32,11 +34,12 @@ static int s = 0; @endcode using the single instruction: `lock; incl s` -However the runtime does provide entrypoints for these operations to support compilers that choose -not to inline them. (For instance, `__kmpc_atomic_fixed4_add` could be used to perform the -increment above.) +However the runtime does provide entrypoints for these operations to support +compilers that choose not to inline them. (For instance, +`__kmpc_atomic_fixed4_add` could be used to perform the increment above.) -The names of the functions are encoded by using the data type name and the operation name, as in these tables. +The names of the functions are encoded by using the data type name and the +operation name, as in these tables. Data Type | Data type encoding -----------|--------------- @@ -75,14 +78,17 @@ minimum | min .neqv. | neqv
-For non-commutative operations, `_rev` can also be added for the reversed operation. -For the functions that capture the result, the suffix `_cpt` is added. +For non-commutative operations, `_rev` can also be added for the reversed +operation. For the functions that capture the result, the suffix `_cpt` is +added. Update Functions ================ -The general form of an atomic function that just performs an update (without a `capture`) +The general form of an atomic function that just performs an update (without a +`capture`) @code -void __kmpc_atomic__( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ); +void __kmpc_atomic__( ident_t *id_ref, int gtid, TYPE * +lhs, TYPE rhs ); @endcode @param ident_t a pointer to source location @param gtid the global thread id @@ -91,32 +97,36 @@ void __kmpc_atomic__( ident_t *id_ref, int gtid, TYPE * lhs `capture` functions =================== -The capture functions perform an atomic update and return a result, which is either the value -before the capture, or that after. They take an additional argument to determine which result is returned. +The capture functions perform an atomic update and return a result, which is +either the value before the capture, or that after. They take an additional +argument to determine which result is returned. Their general form is therefore @code -TYPE __kmpc_atomic___cpt( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ); +TYPE __kmpc_atomic___cpt( ident_t *id_ref, int gtid, TYPE * +lhs, TYPE rhs, int flag ); @endcode @param ident_t a pointer to source location @param gtid the global thread id @param lhs a pointer to the left operand @param rhs the right operand -@param flag one if the result is to be captured *after* the operation, zero if captured *before*. +@param flag one if the result is to be captured *after* the operation, zero if +captured *before*. -The one set of exceptions to this is the `complex` type where the value is not returned, -rather an extra argument pointer is passed. +The one set of exceptions to this is the `complex` type where the value +is not returned, rather an extra argument pointer is passed. They look like @code -void __kmpc_atomic_cmplx4__cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); +void __kmpc_atomic_cmplx4__cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * +lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); @endcode Read and Write Operations ========================= -The OpenMP* standard now supports atomic operations that simply ensure that the -value is read or written atomically, with no modification -performed. In many cases on IA-32 architecture these operations can be inlined since -the architecture guarantees that no tearing occurs on aligned objects +The OpenMP* standard now supports atomic operations that simply +ensure that the value is read or written atomically, with no modification +performed. In many cases on IA-32 architecture these operations can be inlined +since the architecture guarantees that no tearing occurs on aligned objects accessed with a single memory operation of up to 64 bits in size. The general form of the read operations is @@ -126,7 +136,8 @@ TYPE __kmpc_atomic__rd ( ident_t *id_ref, int gtid, TYPE * loc ); For the write operations the form is @code -void __kmpc_atomic__wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ); +void __kmpc_atomic__wr ( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs +); @endcode Full list of functions @@ -135,7 +146,8 @@ This leads to the generation of 376 atomic functions, as follows. Functons for integers --------------------- -There are versions here for integers of size 1,2,4 and 8 bytes both signed and unsigned (where that matters). +There are versions here for integers of size 1,2,4 and 8 bytes both signed and +unsigned (where that matters). @code __kmpc_atomic_fixed1_add __kmpc_atomic_fixed1_add_cpt @@ -377,8 +389,8 @@ There are versions here for integers of size 1,2,4 and 8 bytes both signed and u Functions for floating point ---------------------------- -There are versions here for floating point numbers of size 4, 8, 10 and 16 bytes. -(Ten byte floats are used by X87, but are now rare). +There are versions here for floating point numbers of size 4, 8, 10 and 16 +bytes. (Ten byte floats are used by X87, but are now rare). @code __kmpc_atomic_float4_add __kmpc_atomic_float4_add_cpt @@ -472,9 +484,10 @@ There are versions here for floating point numbers of size 4, 8, 10 and 16 bytes Functions for Complex types --------------------------- -Functions for complex types whose component floating point variables are of size 4,8,10 or 16 bytes. -The names here are based on the size of the component float, *not* the size of the complex type. So -`__kmpc_atomc_cmplx8_add` is an operation on a `complex` or `complex(kind=8)`, *not* `complex`. +Functions for complex types whose component floating point variables are of size +4,8,10 or 16 bytes. The names here are based on the size of the component float, +*not* the size of the complex type. So `__kmpc_atomc_cmplx8_add` is an operation +on a `complex` or `complex(kind=8)`, *not* `complex`. @code __kmpc_atomic_cmplx4_add @@ -553,104 +566,155 @@ The names here are based on the size of the component float, *not* the size of t */ #ifndef KMP_GOMP_COMPAT -int __kmp_atomic_mode = 1; // Intel perf +int __kmp_atomic_mode = 1; // Intel perf #else -int __kmp_atomic_mode = 2; // GOMP compatibility +int __kmp_atomic_mode = 2; // GOMP compatibility #endif /* KMP_GOMP_COMPAT */ KMP_ALIGN(128) -kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded atomics in Gnu compat mode */ -kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user coded atomics for 1-byte fixed data types */ -kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user coded atomics for 2-byte fixed data types */ -kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user coded atomics for 4-byte fixed data types */ -kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user coded atomics for kmp_real32 data type */ -kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user coded atomics for 8-byte fixed data types */ -kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user coded atomics for kmp_real64 data type */ -kmp_atomic_lock_t __kmp_atomic_lock_8c; /* Control access to all user coded atomics for complex byte data type */ -kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type */ -kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type */ -kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/ -kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/ -kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */ - - -/* - 2007-03-02: - Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a - bug on *_32 and *_32e. This is just a temporary workaround for the problem. - It seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG - routines in assembler language. -*/ +// Control access to all user coded atomics in Gnu compat mode +kmp_atomic_lock_t __kmp_atomic_lock; +// Control access to all user coded atomics for 1-byte fixed data types +kmp_atomic_lock_t __kmp_atomic_lock_1i; +// Control access to all user coded atomics for 2-byte fixed data types +kmp_atomic_lock_t __kmp_atomic_lock_2i; +// Control access to all user coded atomics for 4-byte fixed data types +kmp_atomic_lock_t __kmp_atomic_lock_4i; +// Control access to all user coded atomics for kmp_real32 data type +kmp_atomic_lock_t __kmp_atomic_lock_4r; +// Control access to all user coded atomics for 8-byte fixed data types +kmp_atomic_lock_t __kmp_atomic_lock_8i; +// Control access to all user coded atomics for kmp_real64 data type +kmp_atomic_lock_t __kmp_atomic_lock_8r; +// Control access to all user coded atomics for complex byte data type +kmp_atomic_lock_t __kmp_atomic_lock_8c; +// Control access to all user coded atomics for long double data type +kmp_atomic_lock_t __kmp_atomic_lock_10r; +// Control access to all user coded atomics for _Quad data type +kmp_atomic_lock_t __kmp_atomic_lock_16r; +// Control access to all user coded atomics for double complex data type +kmp_atomic_lock_t __kmp_atomic_lock_16c; +// Control access to all user coded atomics for long double complex type +kmp_atomic_lock_t __kmp_atomic_lock_20c; +// Control access to all user coded atomics for _Quad complex data type +kmp_atomic_lock_t __kmp_atomic_lock_32c; + +/* 2007-03-02: + Without "volatile" specifier in OP_CMPXCHG and MIN_MAX_CMPXCHG we have a bug + on *_32 and *_32e. This is just a temporary workaround for the problem. It + seems the right solution is writing OP_CMPXCHG and MIN_MAX_CMPXCHG routines + in assembler language. */ #define KMP_ATOMIC_VOLATILE volatile -#if ( KMP_ARCH_X86 ) && KMP_HAVE_QUAD - - static inline void operator +=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q += rhs.q; }; - static inline void operator -=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q -= rhs.q; }; - static inline void operator *=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q *= rhs.q; }; - static inline void operator /=( Quad_a4_t & lhs, Quad_a4_t & rhs ) { lhs.q /= rhs.q; }; - static inline bool operator < ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q < rhs.q; } - static inline bool operator > ( Quad_a4_t & lhs, Quad_a4_t & rhs ) { return lhs.q > rhs.q; } - - static inline void operator +=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q += rhs.q; }; - static inline void operator -=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q -= rhs.q; }; - static inline void operator *=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q *= rhs.q; }; - static inline void operator /=( Quad_a16_t & lhs, Quad_a16_t & rhs ) { lhs.q /= rhs.q; }; - static inline bool operator < ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q < rhs.q; } - static inline bool operator > ( Quad_a16_t & lhs, Quad_a16_t & rhs ) { return lhs.q > rhs.q; } - - static inline void operator +=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q += rhs.q; }; - static inline void operator -=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q -= rhs.q; }; - static inline void operator *=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q *= rhs.q; }; - static inline void operator /=( kmp_cmplx128_a4_t & lhs, kmp_cmplx128_a4_t & rhs ) { lhs.q /= rhs.q; }; - - static inline void operator +=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q += rhs.q; }; - static inline void operator -=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q -= rhs.q; }; - static inline void operator *=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q *= rhs.q; }; - static inline void operator /=( kmp_cmplx128_a16_t & lhs, kmp_cmplx128_a16_t & rhs ) { lhs.q /= rhs.q; }; +#if (KMP_ARCH_X86) && KMP_HAVE_QUAD + +static inline void operator+=(Quad_a4_t &lhs, Quad_a4_t &rhs) { + lhs.q += rhs.q; +}; +static inline void operator-=(Quad_a4_t &lhs, Quad_a4_t &rhs) { + lhs.q -= rhs.q; +}; +static inline void operator*=(Quad_a4_t &lhs, Quad_a4_t &rhs) { + lhs.q *= rhs.q; +}; +static inline void operator/=(Quad_a4_t &lhs, Quad_a4_t &rhs) { + lhs.q /= rhs.q; +}; +static inline bool operator<(Quad_a4_t &lhs, Quad_a4_t &rhs) { + return lhs.q < rhs.q; +} +static inline bool operator>(Quad_a4_t &lhs, Quad_a4_t &rhs) { + return lhs.q > rhs.q; +} + +static inline void operator+=(Quad_a16_t &lhs, Quad_a16_t &rhs) { + lhs.q += rhs.q; +}; +static inline void operator-=(Quad_a16_t &lhs, Quad_a16_t &rhs) { + lhs.q -= rhs.q; +}; +static inline void operator*=(Quad_a16_t &lhs, Quad_a16_t &rhs) { + lhs.q *= rhs.q; +}; +static inline void operator/=(Quad_a16_t &lhs, Quad_a16_t &rhs) { + lhs.q /= rhs.q; +}; +static inline bool operator<(Quad_a16_t &lhs, Quad_a16_t &rhs) { + return lhs.q < rhs.q; +} +static inline bool operator>(Quad_a16_t &lhs, Quad_a16_t &rhs) { + return lhs.q > rhs.q; +} + +static inline void operator+=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { + lhs.q += rhs.q; +}; +static inline void operator-=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { + lhs.q -= rhs.q; +}; +static inline void operator*=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { + lhs.q *= rhs.q; +}; +static inline void operator/=(kmp_cmplx128_a4_t &lhs, kmp_cmplx128_a4_t &rhs) { + lhs.q /= rhs.q; +}; + +static inline void operator+=(kmp_cmplx128_a16_t &lhs, + kmp_cmplx128_a16_t &rhs) { + lhs.q += rhs.q; +}; +static inline void operator-=(kmp_cmplx128_a16_t &lhs, + kmp_cmplx128_a16_t &rhs) { + lhs.q -= rhs.q; +}; +static inline void operator*=(kmp_cmplx128_a16_t &lhs, + kmp_cmplx128_a16_t &rhs) { + lhs.q *= rhs.q; +}; +static inline void operator/=(kmp_cmplx128_a16_t &lhs, + kmp_cmplx128_a16_t &rhs) { + lhs.q /= rhs.q; +}; #endif -/* ------------------------------------------------------------------------ */ -/* ATOMIC implementation routines */ -/* one routine for each operation and operand type */ -/* ------------------------------------------------------------------------ */ - +// ATOMIC implementation routines ----------------------------------------- +// One routine for each operation and operand type. // All routines declarations looks like // void __kmpc_atomic_RTYPE_OP( ident_t*, int, TYPE *lhs, TYPE rhs ); -// ------------------------------------------------------------------------ -#define KMP_CHECK_GTID \ - if ( gtid == KMP_GTID_UNKNOWN ) { \ - gtid = __kmp_entry_gtid(); \ - } // check and get gtid when needed +#define KMP_CHECK_GTID \ + if (gtid == KMP_GTID_UNKNOWN) { \ + gtid = __kmp_entry_gtid(); \ + } // check and get gtid when needed // Beginning of a definition (provides name, parameters, gebug trace) -// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) +// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned +// fixed) // OP_ID - operation identifier (add, sub, mul, ...) // TYPE - operands' type -#define ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE, RET_TYPE) \ -RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \ -{ \ - KMP_DEBUG_ASSERT( __kmp_init_serial ); \ - KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); +#define ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ + RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ + TYPE *lhs, TYPE rhs) { \ + KMP_DEBUG_ASSERT(__kmp_init_serial); \ + KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); // ------------------------------------------------------------------------ // Lock variables used for critical sections for various size operands -#define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat -#define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char -#define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short -#define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int -#define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float -#define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int -#define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double -#define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex -#define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double -#define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad -#define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex -#define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex -#define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex +#define ATOMIC_LOCK0 __kmp_atomic_lock // all types, for Gnu compat +#define ATOMIC_LOCK1i __kmp_atomic_lock_1i // char +#define ATOMIC_LOCK2i __kmp_atomic_lock_2i // short +#define ATOMIC_LOCK4i __kmp_atomic_lock_4i // long int +#define ATOMIC_LOCK4r __kmp_atomic_lock_4r // float +#define ATOMIC_LOCK8i __kmp_atomic_lock_8i // long long int +#define ATOMIC_LOCK8r __kmp_atomic_lock_8r // double +#define ATOMIC_LOCK8c __kmp_atomic_lock_8c // float complex +#define ATOMIC_LOCK10r __kmp_atomic_lock_10r // long double +#define ATOMIC_LOCK16r __kmp_atomic_lock_16r // _Quad +#define ATOMIC_LOCK16c __kmp_atomic_lock_16c // double complex +#define ATOMIC_LOCK20c __kmp_atomic_lock_20c // long double complex +#define ATOMIC_LOCK32c __kmp_atomic_lock_32c // _Quad complex // ------------------------------------------------------------------------ // Operation on *lhs, rhs bound by critical section @@ -658,12 +722,12 @@ RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lh // LCK_ID - lock identifier // Note: don't check gtid as it should always be valid // 1, 2-byte - expect valid parameter, other - check before this macro -#define OP_CRITICAL(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - (*lhs) OP (rhs); \ - \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); +#define OP_CRITICAL(OP, LCK_ID) \ + __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ + \ + (*lhs) OP(rhs); \ + \ + __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); // ------------------------------------------------------------------------ // For GNU compatibility, we may need to use a critical section, @@ -686,23 +750,22 @@ RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lh // If FLAG is 0, then we are relying on dead code elimination by the build // compiler to get rid of the useless block of code, and save a needless // branch at runtime. -// #ifdef KMP_GOMP_COMPAT -# define OP_GOMP_CRITICAL(OP,FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL( OP, 0 ); \ - return; \ - } -# else -# define OP_GOMP_CRITICAL(OP,FLAG) +#define OP_GOMP_CRITICAL(OP, FLAG) \ + if ((FLAG) && (__kmp_atomic_mode == 2)) { \ + KMP_CHECK_GTID; \ + OP_CRITICAL(OP, 0); \ + return; \ + } +#else +#define OP_GOMP_CRITICAL(OP, FLAG) #endif /* KMP_GOMP_COMPAT */ #if KMP_MIC -# define KMP_DO_PAUSE _mm_delay_32( 1 ) +#define KMP_DO_PAUSE _mm_delay_32(1) #else -# define KMP_DO_PAUSE KMP_CPU_PAUSE() +#define KMP_DO_PAUSE KMP_CPU_PAUSE() #endif /* KMP_MIC */ // ------------------------------------------------------------------------ @@ -710,51 +773,48 @@ RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lh // TYPE - operands' type // BITS - size in bits, used to distinguish low level calls // OP - operator -#define OP_CMPXCHG(TYPE,BITS,OP) \ - { \ - TYPE old_value, new_value; \ - old_value = *(TYPE volatile *)lhs; \ - new_value = old_value OP rhs; \ - while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ - *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ - { \ - KMP_DO_PAUSE; \ - \ - old_value = *(TYPE volatile *)lhs; \ - new_value = old_value OP rhs; \ - } \ - } +#define OP_CMPXCHG(TYPE, BITS, OP) \ + { \ + TYPE old_value, new_value; \ + old_value = *(TYPE volatile *)lhs; \ + new_value = old_value OP rhs; \ + while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ + (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ + *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ + KMP_DO_PAUSE; \ + \ + old_value = *(TYPE volatile *)lhs; \ + new_value = old_value OP rhs; \ + } \ + } #if USE_CMPXCHG_FIX // 2007-06-25: -// workaround for C78287 (complex(kind=4) data type) -// lin_32, lin_32e, win_32 and win_32e are affected (I verified the asm) -// Compiler ignores the volatile qualifier of the temp_val in the OP_CMPXCHG macro. -// This is a problem of the compiler. -// Related tracker is C76005, targeted to 11.0. -// I verified the asm of the workaround. -#define OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \ - { \ - struct _sss { \ - TYPE cmp; \ - kmp_int##BITS *vvv; \ - }; \ - struct _sss old_value, new_value; \ - old_value.vvv = ( kmp_int##BITS * )&old_value.cmp; \ - new_value.vvv = ( kmp_int##BITS * )&new_value.cmp; \ - *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \ - new_value.cmp = old_value.cmp OP rhs; \ - while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ - *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ - *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv ) ) \ - { \ - KMP_DO_PAUSE; \ - \ - *old_value.vvv = * ( volatile kmp_int##BITS * ) lhs; \ - new_value.cmp = old_value.cmp OP rhs; \ - } \ - } +// workaround for C78287 (complex(kind=4) data type). lin_32, lin_32e, win_32 +// and win_32e are affected (I verified the asm). Compiler ignores the volatile +// qualifier of the temp_val in the OP_CMPXCHG macro. This is a problem of the +// compiler. Related tracker is C76005, targeted to 11.0. I verified the asm of +// the workaround. +#define OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ + { \ + struct _sss { \ + TYPE cmp; \ + kmp_int##BITS *vvv; \ + }; \ + struct _sss old_value, new_value; \ + old_value.vvv = (kmp_int##BITS *)&old_value.cmp; \ + new_value.vvv = (kmp_int##BITS *)&new_value.cmp; \ + *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ + new_value.cmp = old_value.cmp OP rhs; \ + while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ + (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) old_value.vvv, \ + *VOLATILE_CAST(kmp_int##BITS *) new_value.vvv)) { \ + KMP_DO_PAUSE; \ + \ + *old_value.vvv = *(volatile kmp_int##BITS *)lhs; \ + new_value.cmp = old_value.cmp OP rhs; \ + } \ + } // end of the first part of the workaround for C78287 #endif // USE_CMPXCHG_FIX @@ -762,84 +822,98 @@ RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lh // ------------------------------------------------------------------------ // X86 or X86_64: no alignment problems ==================================== -#define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ - KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \ -} +#define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ + GOMP_FLAG) \ + ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ + OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ + /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ + KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ + } // ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - OP_CMPXCHG(TYPE,BITS,OP) \ -} +#define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ + GOMP_FLAG) \ + ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ + OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ + OP_CMPXCHG(TYPE, BITS, OP) \ + } #if USE_CMPXCHG_FIX // ------------------------------------------------------------------------- // workaround for C78287 (complex(kind=4) data type) -#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \ -} +#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ + MASK, GOMP_FLAG) \ + ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ + OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ + OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ + } // end of the second part of the workaround for C78287 #endif #else // ------------------------------------------------------------------------- // Code for other architectures that don't handle unaligned accesses. -#define ATOMIC_FIXED_ADD(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ - /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ - KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ - } \ -} +#define ATOMIC_FIXED_ADD(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ + GOMP_FLAG) \ + ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ + OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ + if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ + /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ + KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ + } else { \ + KMP_CHECK_GTID; \ + OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ + } \ + } // ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ - OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ - } \ -} +#define ATOMIC_CMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ + GOMP_FLAG) \ + ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ + OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ + if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ + OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ + } else { \ + KMP_CHECK_GTID; \ + OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ + } \ + } #if USE_CMPXCHG_FIX // ------------------------------------------------------------------------- // workaround for C78287 (complex(kind=4) data type) -#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ - OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ - } \ -} +#define ATOMIC_CMPXCHG_WORKAROUND(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, \ + MASK, GOMP_FLAG) \ + ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ + OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ + if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ + OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ + } else { \ + KMP_CHECK_GTID; \ + OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ + } \ + } // end of the second part of the workaround for C78287 #endif // USE_CMPXCHG_FIX #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ // Routines for ATOMIC 4-byte operands addition and subtraction -ATOMIC_FIXED_ADD( fixed4, add, kmp_int32, 32, +, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add -ATOMIC_FIXED_ADD( fixed4, sub, kmp_int32, 32, -, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub +ATOMIC_FIXED_ADD(fixed4, add, kmp_int32, 32, +, 4i, 3, + 0) // __kmpc_atomic_fixed4_add +ATOMIC_FIXED_ADD(fixed4, sub, kmp_int32, 32, -, 4i, 3, + 0) // __kmpc_atomic_fixed4_sub -ATOMIC_CMPXCHG( float4, add, kmp_real32, 32, +, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add -ATOMIC_CMPXCHG( float4, sub, kmp_real32, 32, -, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub +ATOMIC_CMPXCHG(float4, add, kmp_real32, 32, +, 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_add +ATOMIC_CMPXCHG(float4, sub, kmp_real32, 32, -, 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_sub // Routines for ATOMIC 8-byte operands addition and subtraction -ATOMIC_FIXED_ADD( fixed8, add, kmp_int64, 64, +, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add -ATOMIC_FIXED_ADD( fixed8, sub, kmp_int64, 64, -, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub +ATOMIC_FIXED_ADD(fixed8, add, kmp_int64, 64, +, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_add +ATOMIC_FIXED_ADD(fixed8, sub, kmp_int64, 64, -, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub -ATOMIC_CMPXCHG( float8, add, kmp_real64, 64, +, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add -ATOMIC_CMPXCHG( float8, sub, kmp_real64, 64, -, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub +ATOMIC_CMPXCHG(float8, add, kmp_real64, 64, +, 8r, 7, + KMP_ARCH_X86) // __kmpc_atomic_float8_add +ATOMIC_CMPXCHG(float8, sub, kmp_real64, 64, -, 8r, 7, + KMP_ARCH_X86) // __kmpc_atomic_float8_sub // ------------------------------------------------------------------------ // Entries definition for integer operands @@ -856,316 +930,420 @@ ATOMIC_CMPXCHG( float8, sub, kmp_real64, 64, -, 8r, 7, KMP_ARCH_X86 ) // __km // Routines for ATOMIC integer operands, other operators // ------------------------------------------------------------------------ // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG -ATOMIC_CMPXCHG( fixed1, add, kmp_int8, 8, +, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add -ATOMIC_CMPXCHG( fixed1, andb, kmp_int8, 8, &, 1i, 0, 0 ) // __kmpc_atomic_fixed1_andb -ATOMIC_CMPXCHG( fixed1, div, kmp_int8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div -ATOMIC_CMPXCHG( fixed1u, div, kmp_uint8, 8, /, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div -ATOMIC_CMPXCHG( fixed1, mul, kmp_int8, 8, *, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul -ATOMIC_CMPXCHG( fixed1, orb, kmp_int8, 8, |, 1i, 0, 0 ) // __kmpc_atomic_fixed1_orb -ATOMIC_CMPXCHG( fixed1, shl, kmp_int8, 8, <<, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl -ATOMIC_CMPXCHG( fixed1, shr, kmp_int8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr -ATOMIC_CMPXCHG( fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr -ATOMIC_CMPXCHG( fixed1, sub, kmp_int8, 8, -, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub -ATOMIC_CMPXCHG( fixed1, xor, kmp_int8, 8, ^, 1i, 0, 0 ) // __kmpc_atomic_fixed1_xor -ATOMIC_CMPXCHG( fixed2, add, kmp_int16, 16, +, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add -ATOMIC_CMPXCHG( fixed2, andb, kmp_int16, 16, &, 2i, 1, 0 ) // __kmpc_atomic_fixed2_andb -ATOMIC_CMPXCHG( fixed2, div, kmp_int16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div -ATOMIC_CMPXCHG( fixed2u, div, kmp_uint16, 16, /, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div -ATOMIC_CMPXCHG( fixed2, mul, kmp_int16, 16, *, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul -ATOMIC_CMPXCHG( fixed2, orb, kmp_int16, 16, |, 2i, 1, 0 ) // __kmpc_atomic_fixed2_orb -ATOMIC_CMPXCHG( fixed2, shl, kmp_int16, 16, <<, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl -ATOMIC_CMPXCHG( fixed2, shr, kmp_int16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr -ATOMIC_CMPXCHG( fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr -ATOMIC_CMPXCHG( fixed2, sub, kmp_int16, 16, -, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub -ATOMIC_CMPXCHG( fixed2, xor, kmp_int16, 16, ^, 2i, 1, 0 ) // __kmpc_atomic_fixed2_xor -ATOMIC_CMPXCHG( fixed4, andb, kmp_int32, 32, &, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andb -ATOMIC_CMPXCHG( fixed4, div, kmp_int32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div -ATOMIC_CMPXCHG( fixed4u, div, kmp_uint32, 32, /, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div -ATOMIC_CMPXCHG( fixed4, mul, kmp_int32, 32, *, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul -ATOMIC_CMPXCHG( fixed4, orb, kmp_int32, 32, |, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orb -ATOMIC_CMPXCHG( fixed4, shl, kmp_int32, 32, <<, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl -ATOMIC_CMPXCHG( fixed4, shr, kmp_int32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr -ATOMIC_CMPXCHG( fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr -ATOMIC_CMPXCHG( fixed4, xor, kmp_int32, 32, ^, 4i, 3, 0 ) // __kmpc_atomic_fixed4_xor -ATOMIC_CMPXCHG( fixed8, andb, kmp_int64, 64, &, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb -ATOMIC_CMPXCHG( fixed8, div, kmp_int64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div -ATOMIC_CMPXCHG( fixed8u, div, kmp_uint64, 64, /, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div -ATOMIC_CMPXCHG( fixed8, mul, kmp_int64, 64, *, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul -ATOMIC_CMPXCHG( fixed8, orb, kmp_int64, 64, |, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb -ATOMIC_CMPXCHG( fixed8, shl, kmp_int64, 64, <<, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl -ATOMIC_CMPXCHG( fixed8, shr, kmp_int64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr -ATOMIC_CMPXCHG( fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr -ATOMIC_CMPXCHG( fixed8, xor, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor -ATOMIC_CMPXCHG( float4, div, kmp_real32, 32, /, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div -ATOMIC_CMPXCHG( float4, mul, kmp_real32, 32, *, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul -ATOMIC_CMPXCHG( float8, div, kmp_real64, 64, /, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div -ATOMIC_CMPXCHG( float8, mul, kmp_real64, 64, *, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul +ATOMIC_CMPXCHG(fixed1, add, kmp_int8, 8, +, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_add +ATOMIC_CMPXCHG(fixed1, andb, kmp_int8, 8, &, 1i, 0, + 0) // __kmpc_atomic_fixed1_andb +ATOMIC_CMPXCHG(fixed1, div, kmp_int8, 8, /, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_div +ATOMIC_CMPXCHG(fixed1u, div, kmp_uint8, 8, /, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div +ATOMIC_CMPXCHG(fixed1, mul, kmp_int8, 8, *, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul +ATOMIC_CMPXCHG(fixed1, orb, kmp_int8, 8, |, 1i, 0, + 0) // __kmpc_atomic_fixed1_orb +ATOMIC_CMPXCHG(fixed1, shl, kmp_int8, 8, <<, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl +ATOMIC_CMPXCHG(fixed1, shr, kmp_int8, 8, >>, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr +ATOMIC_CMPXCHG(fixed1u, shr, kmp_uint8, 8, >>, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr +ATOMIC_CMPXCHG(fixed1, sub, kmp_int8, 8, -, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub +ATOMIC_CMPXCHG(fixed1, xor, kmp_int8, 8, ^, 1i, 0, + 0) // __kmpc_atomic_fixed1_xor +ATOMIC_CMPXCHG(fixed2, add, kmp_int16, 16, +, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_add +ATOMIC_CMPXCHG(fixed2, andb, kmp_int16, 16, &, 2i, 1, + 0) // __kmpc_atomic_fixed2_andb +ATOMIC_CMPXCHG(fixed2, div, kmp_int16, 16, /, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_div +ATOMIC_CMPXCHG(fixed2u, div, kmp_uint16, 16, /, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div +ATOMIC_CMPXCHG(fixed2, mul, kmp_int16, 16, *, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul +ATOMIC_CMPXCHG(fixed2, orb, kmp_int16, 16, |, 2i, 1, + 0) // __kmpc_atomic_fixed2_orb +ATOMIC_CMPXCHG(fixed2, shl, kmp_int16, 16, <<, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl +ATOMIC_CMPXCHG(fixed2, shr, kmp_int16, 16, >>, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr +ATOMIC_CMPXCHG(fixed2u, shr, kmp_uint16, 16, >>, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr +ATOMIC_CMPXCHG(fixed2, sub, kmp_int16, 16, -, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub +ATOMIC_CMPXCHG(fixed2, xor, kmp_int16, 16, ^, 2i, 1, + 0) // __kmpc_atomic_fixed2_xor +ATOMIC_CMPXCHG(fixed4, andb, kmp_int32, 32, &, 4i, 3, + 0) // __kmpc_atomic_fixed4_andb +ATOMIC_CMPXCHG(fixed4, div, kmp_int32, 32, /, 4i, 3, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_div +ATOMIC_CMPXCHG(fixed4u, div, kmp_uint32, 32, /, 4i, 3, + KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div +ATOMIC_CMPXCHG(fixed4, mul, kmp_int32, 32, *, 4i, 3, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul +ATOMIC_CMPXCHG(fixed4, orb, kmp_int32, 32, |, 4i, 3, + 0) // __kmpc_atomic_fixed4_orb +ATOMIC_CMPXCHG(fixed4, shl, kmp_int32, 32, <<, 4i, 3, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl +ATOMIC_CMPXCHG(fixed4, shr, kmp_int32, 32, >>, 4i, 3, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr +ATOMIC_CMPXCHG(fixed4u, shr, kmp_uint32, 32, >>, 4i, 3, + KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr +ATOMIC_CMPXCHG(fixed4, xor, kmp_int32, 32, ^, 4i, 3, + 0) // __kmpc_atomic_fixed4_xor +ATOMIC_CMPXCHG(fixed8, andb, kmp_int64, 64, &, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb +ATOMIC_CMPXCHG(fixed8, div, kmp_int64, 64, /, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_div +ATOMIC_CMPXCHG(fixed8u, div, kmp_uint64, 64, /, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div +ATOMIC_CMPXCHG(fixed8, mul, kmp_int64, 64, *, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul +ATOMIC_CMPXCHG(fixed8, orb, kmp_int64, 64, |, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb +ATOMIC_CMPXCHG(fixed8, shl, kmp_int64, 64, <<, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl +ATOMIC_CMPXCHG(fixed8, shr, kmp_int64, 64, >>, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr +ATOMIC_CMPXCHG(fixed8u, shr, kmp_uint64, 64, >>, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr +ATOMIC_CMPXCHG(fixed8, xor, kmp_int64, 64, ^, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor +ATOMIC_CMPXCHG(float4, div, kmp_real32, 32, /, 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_div +ATOMIC_CMPXCHG(float4, mul, kmp_real32, 32, *, 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_mul +ATOMIC_CMPXCHG(float8, div, kmp_real64, 64, /, 8r, 7, + KMP_ARCH_X86) // __kmpc_atomic_float8_div +ATOMIC_CMPXCHG(float8, mul, kmp_real64, 64, *, 8r, 7, + KMP_ARCH_X86) // __kmpc_atomic_float8_mul // TYPE_ID,OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG - /* ------------------------------------------------------------------------ */ /* Routines for C/C++ Reduction operators && and || */ -/* ------------------------------------------------------------------------ */ // ------------------------------------------------------------------------ // Need separate macros for &&, || because there is no combined assignment // TODO: eliminate ATOMIC_CRIT_{L,EQV} macros as not used -#define ATOMIC_CRIT_L(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \ - OP_CRITICAL( = *lhs OP, LCK_ID ) \ -} +#define ATOMIC_CRIT_L(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ + ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ + OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ + OP_CRITICAL(= *lhs OP, LCK_ID) \ + } #if KMP_ARCH_X86 || KMP_ARCH_X86_64 // ------------------------------------------------------------------------ // X86 or X86_64: no alignment problems =================================== -#define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL( = *lhs OP, GOMP_FLAG ) \ - OP_CMPXCHG(TYPE,BITS,OP) \ -} +#define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ + ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ + OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ + OP_CMPXCHG(TYPE, BITS, OP) \ + } #else // ------------------------------------------------------------------------ // Code for other architectures that don't handle unaligned accesses. -#define ATOMIC_CMPX_L(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(= *lhs OP,GOMP_FLAG) \ - if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ - OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(= *lhs OP,LCK_ID) /* unaligned - use critical */ \ - } \ -} +#define ATOMIC_CMPX_L(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, GOMP_FLAG) \ + ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ + OP_GOMP_CRITICAL(= *lhs OP, GOMP_FLAG) \ + if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ + OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ + } else { \ + KMP_CHECK_GTID; \ + OP_CRITICAL(= *lhs OP, LCK_ID) /* unaligned - use critical */ \ + } \ + } #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ -ATOMIC_CMPX_L( fixed1, andl, char, 8, &&, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl -ATOMIC_CMPX_L( fixed1, orl, char, 8, ||, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl -ATOMIC_CMPX_L( fixed2, andl, short, 16, &&, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl -ATOMIC_CMPX_L( fixed2, orl, short, 16, ||, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl -ATOMIC_CMPX_L( fixed4, andl, kmp_int32, 32, &&, 4i, 3, 0 ) // __kmpc_atomic_fixed4_andl -ATOMIC_CMPX_L( fixed4, orl, kmp_int32, 32, ||, 4i, 3, 0 ) // __kmpc_atomic_fixed4_orl -ATOMIC_CMPX_L( fixed8, andl, kmp_int64, 64, &&, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl -ATOMIC_CMPX_L( fixed8, orl, kmp_int64, 64, ||, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl - +ATOMIC_CMPX_L(fixed1, andl, char, 8, &&, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl +ATOMIC_CMPX_L(fixed1, orl, char, 8, ||, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl +ATOMIC_CMPX_L(fixed2, andl, short, 16, &&, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl +ATOMIC_CMPX_L(fixed2, orl, short, 16, ||, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl +ATOMIC_CMPX_L(fixed4, andl, kmp_int32, 32, &&, 4i, 3, + 0) // __kmpc_atomic_fixed4_andl +ATOMIC_CMPX_L(fixed4, orl, kmp_int32, 32, ||, 4i, 3, + 0) // __kmpc_atomic_fixed4_orl +ATOMIC_CMPX_L(fixed8, andl, kmp_int64, 64, &&, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl +ATOMIC_CMPX_L(fixed8, orl, kmp_int64, 64, ||, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl /* ------------------------------------------------------------------------- */ /* Routines for Fortran operators that matched no one in C: */ /* MAX, MIN, .EQV., .NEQV. */ /* Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl} */ /* Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor} */ -/* ------------------------------------------------------------------------- */ // ------------------------------------------------------------------------- // MIN and MAX need separate macros // OP - operator to check if we need any actions? -#define MIN_MAX_CRITSECT(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - if ( *lhs OP rhs ) { /* still need actions? */ \ - *lhs = rhs; \ - } \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); +#define MIN_MAX_CRITSECT(OP, LCK_ID) \ + __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ + \ + if (*lhs OP rhs) { /* still need actions? */ \ + *lhs = rhs; \ + } \ + __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); // ------------------------------------------------------------------------- #ifdef KMP_GOMP_COMPAT -#define GOMP_MIN_MAX_CRITSECT(OP,FLAG) \ - if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \ - KMP_CHECK_GTID; \ - MIN_MAX_CRITSECT( OP, 0 ); \ - return; \ - } +#define GOMP_MIN_MAX_CRITSECT(OP, FLAG) \ + if ((FLAG) && (__kmp_atomic_mode == 2)) { \ + KMP_CHECK_GTID; \ + MIN_MAX_CRITSECT(OP, 0); \ + return; \ + } #else -#define GOMP_MIN_MAX_CRITSECT(OP,FLAG) +#define GOMP_MIN_MAX_CRITSECT(OP, FLAG) #endif /* KMP_GOMP_COMPAT */ // ------------------------------------------------------------------------- -#define MIN_MAX_CMPXCHG(TYPE,BITS,OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - TYPE old_value; \ - temp_val = *lhs; \ - old_value = temp_val; \ - while ( old_value OP rhs && /* still need actions? */ \ - ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ - *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \ - { \ - KMP_CPU_PAUSE(); \ - temp_val = *lhs; \ - old_value = temp_val; \ - } \ - } +#define MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ + { \ + TYPE KMP_ATOMIC_VOLATILE temp_val; \ + TYPE old_value; \ + temp_val = *lhs; \ + old_value = temp_val; \ + while (old_value OP rhs && /* still need actions? */ \ + !KMP_COMPARE_AND_STORE_ACQ##BITS( \ + (kmp_int##BITS *)lhs, \ + *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ + *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ + KMP_CPU_PAUSE(); \ + temp_val = *lhs; \ + old_value = temp_val; \ + } \ + } // ------------------------------------------------------------------------- // 1-byte, 2-byte operands - use critical section -#define MIN_MAX_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - if ( *lhs OP rhs ) { /* need actions? */ \ - GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \ - MIN_MAX_CRITSECT(OP,LCK_ID) \ - } \ -} +#define MIN_MAX_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ + ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ + if (*lhs OP rhs) { /* need actions? */ \ + GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ + MIN_MAX_CRITSECT(OP, LCK_ID) \ + } \ + } #if KMP_ARCH_X86 || KMP_ARCH_X86_64 // ------------------------------------------------------------------------- // X86 or X86_64: no alignment problems ==================================== -#define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - if ( *lhs OP rhs ) { \ - GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \ - MIN_MAX_CMPXCHG(TYPE,BITS,OP) \ - } \ -} +#define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ + GOMP_FLAG) \ + ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ + if (*lhs OP rhs) { \ + GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ + MIN_MAX_CMPXCHG(TYPE, BITS, OP) \ + } \ + } #else // ------------------------------------------------------------------------- // Code for other architectures that don't handle unaligned accesses. -#define MIN_MAX_COMPXCHG(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - if ( *lhs OP rhs ) { \ - GOMP_MIN_MAX_CRITSECT(OP,GOMP_FLAG) \ - if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ - MIN_MAX_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - MIN_MAX_CRITSECT(OP,LCK_ID) /* unaligned address */ \ - } \ - } \ -} +#define MIN_MAX_COMPXCHG(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ + GOMP_FLAG) \ + ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ + if (*lhs OP rhs) { \ + GOMP_MIN_MAX_CRITSECT(OP, GOMP_FLAG) \ + if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ + MIN_MAX_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ + } else { \ + KMP_CHECK_GTID; \ + MIN_MAX_CRITSECT(OP, LCK_ID) /* unaligned address */ \ + } \ + } \ + } #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ -MIN_MAX_COMPXCHG( fixed1, max, char, 8, <, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max -MIN_MAX_COMPXCHG( fixed1, min, char, 8, >, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min -MIN_MAX_COMPXCHG( fixed2, max, short, 16, <, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max -MIN_MAX_COMPXCHG( fixed2, min, short, 16, >, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min -MIN_MAX_COMPXCHG( fixed4, max, kmp_int32, 32, <, 4i, 3, 0 ) // __kmpc_atomic_fixed4_max -MIN_MAX_COMPXCHG( fixed4, min, kmp_int32, 32, >, 4i, 3, 0 ) // __kmpc_atomic_fixed4_min -MIN_MAX_COMPXCHG( fixed8, max, kmp_int64, 64, <, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max -MIN_MAX_COMPXCHG( fixed8, min, kmp_int64, 64, >, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min -MIN_MAX_COMPXCHG( float4, max, kmp_real32, 32, <, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max -MIN_MAX_COMPXCHG( float4, min, kmp_real32, 32, >, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min -MIN_MAX_COMPXCHG( float8, max, kmp_real64, 64, <, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max -MIN_MAX_COMPXCHG( float8, min, kmp_real64, 64, >, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min +MIN_MAX_COMPXCHG(fixed1, max, char, 8, <, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_max +MIN_MAX_COMPXCHG(fixed1, min, char, 8, >, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_min +MIN_MAX_COMPXCHG(fixed2, max, short, 16, <, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_max +MIN_MAX_COMPXCHG(fixed2, min, short, 16, >, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_min +MIN_MAX_COMPXCHG(fixed4, max, kmp_int32, 32, <, 4i, 3, + 0) // __kmpc_atomic_fixed4_max +MIN_MAX_COMPXCHG(fixed4, min, kmp_int32, 32, >, 4i, 3, + 0) // __kmpc_atomic_fixed4_min +MIN_MAX_COMPXCHG(fixed8, max, kmp_int64, 64, <, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_max +MIN_MAX_COMPXCHG(fixed8, min, kmp_int64, 64, >, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_min +MIN_MAX_COMPXCHG(float4, max, kmp_real32, 32, <, 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_max +MIN_MAX_COMPXCHG(float4, min, kmp_real32, 32, >, 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_min +MIN_MAX_COMPXCHG(float8, max, kmp_real64, 64, <, 8r, 7, + KMP_ARCH_X86) // __kmpc_atomic_float8_max +MIN_MAX_COMPXCHG(float8, min, kmp_real64, 64, >, 8r, 7, + KMP_ARCH_X86) // __kmpc_atomic_float8_min #if KMP_HAVE_QUAD -MIN_MAX_CRITICAL( float16, max, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max -MIN_MAX_CRITICAL( float16, min, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min -#if ( KMP_ARCH_X86 ) - MIN_MAX_CRITICAL( float16, max_a16, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16 - MIN_MAX_CRITICAL( float16, min_a16, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_min_a16 +MIN_MAX_CRITICAL(float16, max, QUAD_LEGACY, <, 16r, + 1) // __kmpc_atomic_float16_max +MIN_MAX_CRITICAL(float16, min, QUAD_LEGACY, >, 16r, + 1) // __kmpc_atomic_float16_min +#if (KMP_ARCH_X86) +MIN_MAX_CRITICAL(float16, max_a16, Quad_a16_t, <, 16r, + 1) // __kmpc_atomic_float16_max_a16 +MIN_MAX_CRITICAL(float16, min_a16, Quad_a16_t, >, 16r, + 1) // __kmpc_atomic_float16_min_a16 #endif #endif // ------------------------------------------------------------------------ // Need separate macros for .EQV. because of the need of complement (~) // OP ignored for critical sections, ^=~ used instead -#define ATOMIC_CRIT_EQV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL(^=~,LCK_ID) /* send assignment and complement */ \ -} +#define ATOMIC_CRIT_EQV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ + ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ + OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \ + OP_CRITICAL(^= ~, LCK_ID) /* send assignment and complement */ \ + } // ------------------------------------------------------------------------ #if KMP_ARCH_X86 || KMP_ARCH_X86_64 // ------------------------------------------------------------------------ // X86 or X86_64: no alignment problems =================================== -#define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(^=~,GOMP_FLAG) /* send assignment */ \ - OP_CMPXCHG(TYPE,BITS,OP) \ -} +#define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ + GOMP_FLAG) \ + ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ + OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) /* send assignment */ \ + OP_CMPXCHG(TYPE, BITS, OP) \ + } // ------------------------------------------------------------------------ #else // ------------------------------------------------------------------------ // Code for other architectures that don't handle unaligned accesses. -#define ATOMIC_CMPX_EQV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(^=~,GOMP_FLAG) \ - if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ - OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(^=~,LCK_ID) /* unaligned address - use critical */ \ - } \ -} +#define ATOMIC_CMPX_EQV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, MASK, \ + GOMP_FLAG) \ + ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ + OP_GOMP_CRITICAL(^= ~, GOMP_FLAG) \ + if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ + OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ + } else { \ + KMP_CHECK_GTID; \ + OP_CRITICAL(^= ~, LCK_ID) /* unaligned address - use critical */ \ + } \ + } #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ -ATOMIC_CMPXCHG( fixed1, neqv, kmp_int8, 8, ^, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv -ATOMIC_CMPXCHG( fixed2, neqv, kmp_int16, 16, ^, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv -ATOMIC_CMPXCHG( fixed4, neqv, kmp_int32, 32, ^, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv -ATOMIC_CMPXCHG( fixed8, neqv, kmp_int64, 64, ^, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv -ATOMIC_CMPX_EQV( fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv -ATOMIC_CMPX_EQV( fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv -ATOMIC_CMPX_EQV( fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv -ATOMIC_CMPX_EQV( fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv - - -// ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use critical section) +ATOMIC_CMPXCHG(fixed1, neqv, kmp_int8, 8, ^, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv +ATOMIC_CMPXCHG(fixed2, neqv, kmp_int16, 16, ^, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv +ATOMIC_CMPXCHG(fixed4, neqv, kmp_int32, 32, ^, 4i, 3, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv +ATOMIC_CMPXCHG(fixed8, neqv, kmp_int64, 64, ^, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv +ATOMIC_CMPX_EQV(fixed1, eqv, kmp_int8, 8, ^~, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv +ATOMIC_CMPX_EQV(fixed2, eqv, kmp_int16, 16, ^~, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv +ATOMIC_CMPX_EQV(fixed4, eqv, kmp_int32, 32, ^~, 4i, 3, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv +ATOMIC_CMPX_EQV(fixed8, eqv, kmp_int64, 64, ^~, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv + +// ------------------------------------------------------------------------ +// Routines for Extended types: long double, _Quad, complex flavours (use +// critical section) // TYPE_ID, OP_ID, TYPE - detailed above // OP - operator // LCK_ID - lock identifier, used to possibly distinguish lock variable -#define ATOMIC_CRITICAL(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \ -} +#define ATOMIC_CRITICAL(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ + ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ + OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \ + OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \ + } /* ------------------------------------------------------------------------- */ // routines for long double type -ATOMIC_CRITICAL( float10, add, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add -ATOMIC_CRITICAL( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub -ATOMIC_CRITICAL( float10, mul, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul -ATOMIC_CRITICAL( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div +ATOMIC_CRITICAL(float10, add, long double, +, 10r, + 1) // __kmpc_atomic_float10_add +ATOMIC_CRITICAL(float10, sub, long double, -, 10r, + 1) // __kmpc_atomic_float10_sub +ATOMIC_CRITICAL(float10, mul, long double, *, 10r, + 1) // __kmpc_atomic_float10_mul +ATOMIC_CRITICAL(float10, div, long double, /, 10r, + 1) // __kmpc_atomic_float10_div #if KMP_HAVE_QUAD // routines for _Quad type -ATOMIC_CRITICAL( float16, add, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add -ATOMIC_CRITICAL( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub -ATOMIC_CRITICAL( float16, mul, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul -ATOMIC_CRITICAL( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL( float16, add_a16, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16 - ATOMIC_CRITICAL( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16 - ATOMIC_CRITICAL( float16, mul_a16, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16 - ATOMIC_CRITICAL( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16 +ATOMIC_CRITICAL(float16, add, QUAD_LEGACY, +, 16r, + 1) // __kmpc_atomic_float16_add +ATOMIC_CRITICAL(float16, sub, QUAD_LEGACY, -, 16r, + 1) // __kmpc_atomic_float16_sub +ATOMIC_CRITICAL(float16, mul, QUAD_LEGACY, *, 16r, + 1) // __kmpc_atomic_float16_mul +ATOMIC_CRITICAL(float16, div, QUAD_LEGACY, /, 16r, + 1) // __kmpc_atomic_float16_div +#if (KMP_ARCH_X86) +ATOMIC_CRITICAL(float16, add_a16, Quad_a16_t, +, 16r, + 1) // __kmpc_atomic_float16_add_a16 +ATOMIC_CRITICAL(float16, sub_a16, Quad_a16_t, -, 16r, + 1) // __kmpc_atomic_float16_sub_a16 +ATOMIC_CRITICAL(float16, mul_a16, Quad_a16_t, *, 16r, + 1) // __kmpc_atomic_float16_mul_a16 +ATOMIC_CRITICAL(float16, div_a16, Quad_a16_t, /, 16r, + 1) // __kmpc_atomic_float16_div_a16 #endif #endif // routines for complex types #if USE_CMPXCHG_FIX // workaround for C78287 (complex(kind=4) data type) -ATOMIC_CMPXCHG_WORKAROUND( cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_add -ATOMIC_CMPXCHG_WORKAROUND( cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_sub -ATOMIC_CMPXCHG_WORKAROUND( cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_mul -ATOMIC_CMPXCHG_WORKAROUND( cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, 1 ) // __kmpc_atomic_cmplx4_div +ATOMIC_CMPXCHG_WORKAROUND(cmplx4, add, kmp_cmplx32, 64, +, 8c, 7, + 1) // __kmpc_atomic_cmplx4_add +ATOMIC_CMPXCHG_WORKAROUND(cmplx4, sub, kmp_cmplx32, 64, -, 8c, 7, + 1) // __kmpc_atomic_cmplx4_sub +ATOMIC_CMPXCHG_WORKAROUND(cmplx4, mul, kmp_cmplx32, 64, *, 8c, 7, + 1) // __kmpc_atomic_cmplx4_mul +ATOMIC_CMPXCHG_WORKAROUND(cmplx4, div, kmp_cmplx32, 64, /, 8c, 7, + 1) // __kmpc_atomic_cmplx4_div // end of the workaround for C78287 #else -ATOMIC_CRITICAL( cmplx4, add, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add -ATOMIC_CRITICAL( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub -ATOMIC_CRITICAL( cmplx4, mul, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul -ATOMIC_CRITICAL( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div +ATOMIC_CRITICAL(cmplx4, add, kmp_cmplx32, +, 8c, 1) // __kmpc_atomic_cmplx4_add +ATOMIC_CRITICAL(cmplx4, sub, kmp_cmplx32, -, 8c, 1) // __kmpc_atomic_cmplx4_sub +ATOMIC_CRITICAL(cmplx4, mul, kmp_cmplx32, *, 8c, 1) // __kmpc_atomic_cmplx4_mul +ATOMIC_CRITICAL(cmplx4, div, kmp_cmplx32, /, 8c, 1) // __kmpc_atomic_cmplx4_div #endif // USE_CMPXCHG_FIX -ATOMIC_CRITICAL( cmplx8, add, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add -ATOMIC_CRITICAL( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub -ATOMIC_CRITICAL( cmplx8, mul, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul -ATOMIC_CRITICAL( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div -ATOMIC_CRITICAL( cmplx10, add, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add -ATOMIC_CRITICAL( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub -ATOMIC_CRITICAL( cmplx10, mul, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul -ATOMIC_CRITICAL( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div +ATOMIC_CRITICAL(cmplx8, add, kmp_cmplx64, +, 16c, 1) // __kmpc_atomic_cmplx8_add +ATOMIC_CRITICAL(cmplx8, sub, kmp_cmplx64, -, 16c, 1) // __kmpc_atomic_cmplx8_sub +ATOMIC_CRITICAL(cmplx8, mul, kmp_cmplx64, *, 16c, 1) // __kmpc_atomic_cmplx8_mul +ATOMIC_CRITICAL(cmplx8, div, kmp_cmplx64, /, 16c, 1) // __kmpc_atomic_cmplx8_div +ATOMIC_CRITICAL(cmplx10, add, kmp_cmplx80, +, 20c, + 1) // __kmpc_atomic_cmplx10_add +ATOMIC_CRITICAL(cmplx10, sub, kmp_cmplx80, -, 20c, + 1) // __kmpc_atomic_cmplx10_sub +ATOMIC_CRITICAL(cmplx10, mul, kmp_cmplx80, *, 20c, + 1) // __kmpc_atomic_cmplx10_mul +ATOMIC_CRITICAL(cmplx10, div, kmp_cmplx80, /, 20c, + 1) // __kmpc_atomic_cmplx10_div #if KMP_HAVE_QUAD -ATOMIC_CRITICAL( cmplx16, add, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add -ATOMIC_CRITICAL( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub -ATOMIC_CRITICAL( cmplx16, mul, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul -ATOMIC_CRITICAL( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL( cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16 - ATOMIC_CRITICAL( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16 - ATOMIC_CRITICAL( cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16 - ATOMIC_CRITICAL( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16 +ATOMIC_CRITICAL(cmplx16, add, CPLX128_LEG, +, 32c, + 1) // __kmpc_atomic_cmplx16_add +ATOMIC_CRITICAL(cmplx16, sub, CPLX128_LEG, -, 32c, + 1) // __kmpc_atomic_cmplx16_sub +ATOMIC_CRITICAL(cmplx16, mul, CPLX128_LEG, *, 32c, + 1) // __kmpc_atomic_cmplx16_mul +ATOMIC_CRITICAL(cmplx16, div, CPLX128_LEG, /, 32c, + 1) // __kmpc_atomic_cmplx16_div +#if (KMP_ARCH_X86) +ATOMIC_CRITICAL(cmplx16, add_a16, kmp_cmplx128_a16_t, +, 32c, + 1) // __kmpc_atomic_cmplx16_add_a16 +ATOMIC_CRITICAL(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, + 1) // __kmpc_atomic_cmplx16_sub_a16 +ATOMIC_CRITICAL(cmplx16, mul_a16, kmp_cmplx128_a16_t, *, 32c, + 1) // __kmpc_atomic_cmplx16_mul_a16 +ATOMIC_CRITICAL(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, + 1) // __kmpc_atomic_cmplx16_div_a16 #endif #endif @@ -1181,34 +1359,34 @@ ATOMIC_CRITICAL( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __km // LCK_ID - lock identifier // Note: don't check gtid as it should always be valid // 1, 2-byte - expect valid parameter, other - check before this macro -#define OP_CRITICAL_REV(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - (*lhs) = (rhs) OP (*lhs); \ - \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); +#define OP_CRITICAL_REV(OP, LCK_ID) \ + __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ + \ + (*lhs) = (rhs)OP(*lhs); \ + \ + __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); #ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_REV(OP,FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_REV( OP, 0 ); \ - return; \ - } +#define OP_GOMP_CRITICAL_REV(OP, FLAG) \ + if ((FLAG) && (__kmp_atomic_mode == 2)) { \ + KMP_CHECK_GTID; \ + OP_CRITICAL_REV(OP, 0); \ + return; \ + } #else -#define OP_GOMP_CRITICAL_REV(OP,FLAG) +#define OP_GOMP_CRITICAL_REV(OP, FLAG) #endif /* KMP_GOMP_COMPAT */ - // Beginning of a definition (provides name, parameters, gebug trace) -// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) +// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned +// fixed) // OP_ID - operation identifier (add, sub, mul, ...) // TYPE - operands' type -#define ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE, RET_TYPE) \ -RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \ -{ \ - KMP_DEBUG_ASSERT( __kmp_init_serial ); \ - KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid )); +#define ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ + RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev(ident_t *id_ref, int gtid, \ + TYPE *lhs, TYPE rhs) { \ + KMP_DEBUG_ASSERT(__kmp_init_serial); \ + KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_rev: T#%d\n", gtid)); // ------------------------------------------------------------------------ // Operation on *lhs, rhs using "compare_and_store" routine @@ -1217,31 +1395,30 @@ RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_rev( ident_t *id_ref, int gtid, TYP // OP - operator // Note: temp_val introduced in order to force the compiler to read // *lhs only once (w/o it the compiler reads *lhs twice) -#define OP_CMPXCHG_REV(TYPE,BITS,OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - TYPE old_value, new_value; \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs OP old_value; \ - while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ - *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ - { \ - KMP_DO_PAUSE; \ - \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs OP old_value; \ - } \ - } +#define OP_CMPXCHG_REV(TYPE, BITS, OP) \ + { \ + TYPE KMP_ATOMIC_VOLATILE temp_val; \ + TYPE old_value, new_value; \ + temp_val = *lhs; \ + old_value = temp_val; \ + new_value = rhs OP old_value; \ + while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ + (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ + *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ + KMP_DO_PAUSE; \ + \ + temp_val = *lhs; \ + old_value = temp_val; \ + new_value = rhs OP old_value; \ + } \ + } // ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \ - OP_CMPXCHG_REV(TYPE,BITS,OP) \ -} +#define ATOMIC_CMPXCHG_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG) \ + ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ + OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ + OP_CMPXCHG_REV(TYPE, BITS, OP) \ + } // ------------------------------------------------------------------------ // Entries definition for integer operands @@ -1257,88 +1434,131 @@ ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \ // Routines for ATOMIC integer operands, other operators // ------------------------------------------------------------------------ // TYPE_ID,OP_ID, TYPE, BITS, OP, LCK_ID, GOMP_FLAG -ATOMIC_CMPXCHG_REV( fixed1, div, kmp_int8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_rev -ATOMIC_CMPXCHG_REV( fixed1u, div, kmp_uint8, 8, /, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_rev -ATOMIC_CMPXCHG_REV( fixed1, shl, kmp_int8, 8, <<, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_rev -ATOMIC_CMPXCHG_REV( fixed1, shr, kmp_int8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_rev -ATOMIC_CMPXCHG_REV( fixed1u, shr, kmp_uint8, 8, >>, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_rev -ATOMIC_CMPXCHG_REV( fixed1, sub, kmp_int8, 8, -, 1i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_rev - -ATOMIC_CMPXCHG_REV( fixed2, div, kmp_int16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_rev -ATOMIC_CMPXCHG_REV( fixed2u, div, kmp_uint16, 16, /, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_rev -ATOMIC_CMPXCHG_REV( fixed2, shl, kmp_int16, 16, <<, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_rev -ATOMIC_CMPXCHG_REV( fixed2, shr, kmp_int16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_rev -ATOMIC_CMPXCHG_REV( fixed2u, shr, kmp_uint16, 16, >>, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_rev -ATOMIC_CMPXCHG_REV( fixed2, sub, kmp_int16, 16, -, 2i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_rev - -ATOMIC_CMPXCHG_REV( fixed4, div, kmp_int32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_rev -ATOMIC_CMPXCHG_REV( fixed4u, div, kmp_uint32, 32, /, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_rev -ATOMIC_CMPXCHG_REV( fixed4, shl, kmp_int32, 32, <<, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_rev -ATOMIC_CMPXCHG_REV( fixed4, shr, kmp_int32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_rev -ATOMIC_CMPXCHG_REV( fixed4u, shr, kmp_uint32, 32, >>, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_rev -ATOMIC_CMPXCHG_REV( fixed4, sub, kmp_int32, 32, -, 4i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_rev - -ATOMIC_CMPXCHG_REV( fixed8, div, kmp_int64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_rev -ATOMIC_CMPXCHG_REV( fixed8u, div, kmp_uint64, 64, /, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_rev -ATOMIC_CMPXCHG_REV( fixed8, shl, kmp_int64, 64, <<, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_rev -ATOMIC_CMPXCHG_REV( fixed8, shr, kmp_int64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_rev -ATOMIC_CMPXCHG_REV( fixed8u, shr, kmp_uint64, 64, >>, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_rev -ATOMIC_CMPXCHG_REV( fixed8, sub, kmp_int64, 64, -, 8i, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_rev - -ATOMIC_CMPXCHG_REV( float4, div, kmp_real32, 32, /, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_rev -ATOMIC_CMPXCHG_REV( float4, sub, kmp_real32, 32, -, 4r, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_rev - -ATOMIC_CMPXCHG_REV( float8, div, kmp_real64, 64, /, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_rev -ATOMIC_CMPXCHG_REV( float8, sub, kmp_real64, 64, -, 8r, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_rev +ATOMIC_CMPXCHG_REV(fixed1, div, kmp_int8, 8, /, 1i, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev +ATOMIC_CMPXCHG_REV(fixed1u, div, kmp_uint8, 8, /, 1i, + KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev +ATOMIC_CMPXCHG_REV(fixed1, shl, kmp_int8, 8, <<, 1i, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_rev +ATOMIC_CMPXCHG_REV(fixed1, shr, kmp_int8, 8, >>, 1i, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_rev +ATOMIC_CMPXCHG_REV(fixed1u, shr, kmp_uint8, 8, >>, 1i, + KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_rev +ATOMIC_CMPXCHG_REV(fixed1, sub, kmp_int8, 8, -, 1i, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev + +ATOMIC_CMPXCHG_REV(fixed2, div, kmp_int16, 16, /, 2i, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev +ATOMIC_CMPXCHG_REV(fixed2u, div, kmp_uint16, 16, /, 2i, + KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev +ATOMIC_CMPXCHG_REV(fixed2, shl, kmp_int16, 16, <<, 2i, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_rev +ATOMIC_CMPXCHG_REV(fixed2, shr, kmp_int16, 16, >>, 2i, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_rev +ATOMIC_CMPXCHG_REV(fixed2u, shr, kmp_uint16, 16, >>, 2i, + KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_rev +ATOMIC_CMPXCHG_REV(fixed2, sub, kmp_int16, 16, -, 2i, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev + +ATOMIC_CMPXCHG_REV(fixed4, div, kmp_int32, 32, /, 4i, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_rev +ATOMIC_CMPXCHG_REV(fixed4u, div, kmp_uint32, 32, /, 4i, + KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_rev +ATOMIC_CMPXCHG_REV(fixed4, shl, kmp_int32, 32, <<, 4i, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_rev +ATOMIC_CMPXCHG_REV(fixed4, shr, kmp_int32, 32, >>, 4i, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_rev +ATOMIC_CMPXCHG_REV(fixed4u, shr, kmp_uint32, 32, >>, 4i, + KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_rev +ATOMIC_CMPXCHG_REV(fixed4, sub, kmp_int32, 32, -, 4i, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_rev + +ATOMIC_CMPXCHG_REV(fixed8, div, kmp_int64, 64, /, 8i, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev +ATOMIC_CMPXCHG_REV(fixed8u, div, kmp_uint64, 64, /, 8i, + KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev +ATOMIC_CMPXCHG_REV(fixed8, shl, kmp_int64, 64, <<, 8i, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_rev +ATOMIC_CMPXCHG_REV(fixed8, shr, kmp_int64, 64, >>, 8i, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_rev +ATOMIC_CMPXCHG_REV(fixed8u, shr, kmp_uint64, 64, >>, 8i, + KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_rev +ATOMIC_CMPXCHG_REV(fixed8, sub, kmp_int64, 64, -, 8i, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev + +ATOMIC_CMPXCHG_REV(float4, div, kmp_real32, 32, /, 4r, + KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev +ATOMIC_CMPXCHG_REV(float4, sub, kmp_real32, 32, -, 4r, + KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev + +ATOMIC_CMPXCHG_REV(float8, div, kmp_real64, 64, /, 8r, + KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev +ATOMIC_CMPXCHG_REV(float8, sub, kmp_real64, 64, -, 8r, + KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev // TYPE_ID,OP_ID, TYPE, BITS,OP,LCK_ID, GOMP_FLAG // ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use critical section) +// Routines for Extended types: long double, _Quad, complex flavours (use +// critical section) // TYPE_ID, OP_ID, TYPE - detailed above // OP - operator // LCK_ID - lock identifier, used to possibly distinguish lock variable -#define ATOMIC_CRITICAL_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_REV(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \ - OP_CRITICAL_REV(OP,LCK_ID) \ -} +#define ATOMIC_CRITICAL_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ + ATOMIC_BEGIN_REV(TYPE_ID, OP_ID, TYPE, void) \ + OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ + OP_CRITICAL_REV(OP, LCK_ID) \ + } /* ------------------------------------------------------------------------- */ // routines for long double type -ATOMIC_CRITICAL_REV( float10, sub, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_rev -ATOMIC_CRITICAL_REV( float10, div, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_rev +ATOMIC_CRITICAL_REV(float10, sub, long double, -, 10r, + 1) // __kmpc_atomic_float10_sub_rev +ATOMIC_CRITICAL_REV(float10, div, long double, /, 10r, + 1) // __kmpc_atomic_float10_div_rev #if KMP_HAVE_QUAD // routines for _Quad type -ATOMIC_CRITICAL_REV( float16, sub, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_rev -ATOMIC_CRITICAL_REV( float16, div, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_rev -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL_REV( float16, sub_a16, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_rev - ATOMIC_CRITICAL_REV( float16, div_a16, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_rev +ATOMIC_CRITICAL_REV(float16, sub, QUAD_LEGACY, -, 16r, + 1) // __kmpc_atomic_float16_sub_rev +ATOMIC_CRITICAL_REV(float16, div, QUAD_LEGACY, /, 16r, + 1) // __kmpc_atomic_float16_div_rev +#if (KMP_ARCH_X86) +ATOMIC_CRITICAL_REV(float16, sub_a16, Quad_a16_t, -, 16r, + 1) // __kmpc_atomic_float16_sub_a16_rev +ATOMIC_CRITICAL_REV(float16, div_a16, Quad_a16_t, /, 16r, + 1) // __kmpc_atomic_float16_div_a16_rev #endif #endif // routines for complex types -ATOMIC_CRITICAL_REV( cmplx4, sub, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_rev -ATOMIC_CRITICAL_REV( cmplx4, div, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_rev -ATOMIC_CRITICAL_REV( cmplx8, sub, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_rev -ATOMIC_CRITICAL_REV( cmplx8, div, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_rev -ATOMIC_CRITICAL_REV( cmplx10, sub, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_rev -ATOMIC_CRITICAL_REV( cmplx10, div, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_rev +ATOMIC_CRITICAL_REV(cmplx4, sub, kmp_cmplx32, -, 8c, + 1) // __kmpc_atomic_cmplx4_sub_rev +ATOMIC_CRITICAL_REV(cmplx4, div, kmp_cmplx32, /, 8c, + 1) // __kmpc_atomic_cmplx4_div_rev +ATOMIC_CRITICAL_REV(cmplx8, sub, kmp_cmplx64, -, 16c, + 1) // __kmpc_atomic_cmplx8_sub_rev +ATOMIC_CRITICAL_REV(cmplx8, div, kmp_cmplx64, /, 16c, + 1) // __kmpc_atomic_cmplx8_div_rev +ATOMIC_CRITICAL_REV(cmplx10, sub, kmp_cmplx80, -, 20c, + 1) // __kmpc_atomic_cmplx10_sub_rev +ATOMIC_CRITICAL_REV(cmplx10, div, kmp_cmplx80, /, 20c, + 1) // __kmpc_atomic_cmplx10_div_rev #if KMP_HAVE_QUAD -ATOMIC_CRITICAL_REV( cmplx16, sub, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_rev -ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_rev -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL_REV( cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_rev - ATOMIC_CRITICAL_REV( cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_rev +ATOMIC_CRITICAL_REV(cmplx16, sub, CPLX128_LEG, -, 32c, + 1) // __kmpc_atomic_cmplx16_sub_rev +ATOMIC_CRITICAL_REV(cmplx16, div, CPLX128_LEG, /, 32c, + 1) // __kmpc_atomic_cmplx16_div_rev +#if (KMP_ARCH_X86) +ATOMIC_CRITICAL_REV(cmplx16, sub_a16, kmp_cmplx128_a16_t, -, 32c, + 1) // __kmpc_atomic_cmplx16_sub_a16_rev +ATOMIC_CRITICAL_REV(cmplx16, div_a16, kmp_cmplx128_a16_t, /, 32c, + 1) // __kmpc_atomic_cmplx16_div_a16_rev #endif #endif - -#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 +#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 // End of OpenMP 4.0: x = expr binop x for non-commutative operations. -#endif //OMP_40_ENABLED - +#endif // OMP_40_ENABLED /* ------------------------------------------------------------------------ */ /* Routines for mixed types of LHS and RHS, when RHS is "larger" */ @@ -1351,156 +1571,242 @@ ATOMIC_CRITICAL_REV( cmplx16, div, CPLX128_LEG, /, 32c, 1 ) // /* Performance penalty expected because of SW emulation use */ /* ------------------------------------------------------------------------ */ -#define ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ -void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs ) \ -{ \ - KMP_DEBUG_ASSERT( __kmp_init_serial ); \ - KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid )); +#define ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ + void __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ + ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs) { \ + KMP_DEBUG_ASSERT(__kmp_init_serial); \ + KA_TRACE(100, \ + ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ + gtid)); // ------------------------------------------------------------------------- -#define ATOMIC_CRITICAL_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL(OP##=,LCK_ID) /* send assignment */ \ -} +#define ATOMIC_CRITICAL_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, LCK_ID, \ + GOMP_FLAG) \ + ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ + OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) /* send assignment */ \ + OP_CRITICAL(OP## =, LCK_ID) /* send assignment */ \ + } // ------------------------------------------------------------------------- #if KMP_ARCH_X86 || KMP_ARCH_X86_64 // ------------------------------------------------------------------------- // X86 or X86_64: no alignment problems ==================================== -#define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - OP_CMPXCHG(TYPE,BITS,OP) \ -} +#define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ + LCK_ID, MASK, GOMP_FLAG) \ + ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ + OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ + OP_CMPXCHG(TYPE, BITS, OP) \ + } // ------------------------------------------------------------------------- #else // ------------------------------------------------------------------------ // Code for other architectures that don't handle unaligned accesses. -#define ATOMIC_CMPXCHG_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ - OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ - } \ -} +#define ATOMIC_CMPXCHG_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ + LCK_ID, MASK, GOMP_FLAG) \ + ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ + OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ + if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ + OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ + } else { \ + KMP_CHECK_GTID; \ + OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ + } \ + } #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ // ------------------------------------------------------------------------- #if KMP_ARCH_X86 || KMP_ARCH_X86_64 // ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ - OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \ - OP_CMPXCHG_REV(TYPE,BITS,OP) \ -} -#define ATOMIC_CRITICAL_REV_FP(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ - OP_GOMP_CRITICAL_REV(OP,GOMP_FLAG) \ - OP_CRITICAL_REV(OP,LCK_ID) \ -} +#define ATOMIC_CMPXCHG_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ + RTYPE, LCK_ID, MASK, GOMP_FLAG) \ + ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ + OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ + OP_CMPXCHG_REV(TYPE, BITS, OP) \ + } +#define ATOMIC_CRITICAL_REV_FP(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ + LCK_ID, GOMP_FLAG) \ + ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ + OP_GOMP_CRITICAL_REV(OP, GOMP_FLAG) \ + OP_CRITICAL_REV(OP, LCK_ID) \ + } #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ // RHS=float8 -ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_float8 -ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_float8 -ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_float8 -ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_float8 -ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_float8 -ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_float8 -ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_float8 -ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_float8 -ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_float8 -ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_float8 -ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_float8 -ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_float8 - -// RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them) +ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, float8, kmp_real64, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_float8 +ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, float8, kmp_real64, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_float8 +ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, float8, kmp_real64, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_float8 +ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, float8, kmp_real64, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_float8 +ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, float8, kmp_real64, 4i, 3, + 0) // __kmpc_atomic_fixed4_mul_float8 +ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, float8, kmp_real64, 4i, 3, + 0) // __kmpc_atomic_fixed4_div_float8 +ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, float8, kmp_real64, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_float8 +ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, float8, kmp_real64, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_float8 +ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, float8, kmp_real64, 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_add_float8 +ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, float8, kmp_real64, 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_sub_float8 +ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, float8, kmp_real64, 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_mul_float8 +ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, float8, kmp_real64, 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_div_float8 + +// RHS=float16 (deprecated, to be removed when we are sure the compiler does not +// use them) #if KMP_HAVE_QUAD -ATOMIC_CMPXCHG_MIX( fixed1, char, add, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_fp -ATOMIC_CMPXCHG_MIX( fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_add_fp -ATOMIC_CMPXCHG_MIX( fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_fp -ATOMIC_CMPXCHG_MIX( fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_sub_fp -ATOMIC_CMPXCHG_MIX( fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_fp -ATOMIC_CMPXCHG_MIX( fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_mul_fp -ATOMIC_CMPXCHG_MIX( fixed1, char, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_fp -ATOMIC_CMPXCHG_MIX( fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_fp - -ATOMIC_CMPXCHG_MIX( fixed2, short, add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_fp -ATOMIC_CMPXCHG_MIX( fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_add_fp -ATOMIC_CMPXCHG_MIX( fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_fp -ATOMIC_CMPXCHG_MIX( fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_sub_fp -ATOMIC_CMPXCHG_MIX( fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_fp -ATOMIC_CMPXCHG_MIX( fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_mul_fp -ATOMIC_CMPXCHG_MIX( fixed2, short, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_fp -ATOMIC_CMPXCHG_MIX( fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_fp - -ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add_fp -ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_add_fp -ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_fp -ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_sub_fp -ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_fp -ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_mul_fp -ATOMIC_CMPXCHG_MIX( fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_fp -ATOMIC_CMPXCHG_MIX( fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_fp - -ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_fp -ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_add_fp -ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_fp -ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_sub_fp -ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_fp -ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_mul_fp -ATOMIC_CMPXCHG_MIX( fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_fp -ATOMIC_CMPXCHG_MIX( fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_fp - -ATOMIC_CMPXCHG_MIX( float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_fp -ATOMIC_CMPXCHG_MIX( float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_fp -ATOMIC_CMPXCHG_MIX( float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_fp -ATOMIC_CMPXCHG_MIX( float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_fp - -ATOMIC_CMPXCHG_MIX( float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_fp -ATOMIC_CMPXCHG_MIX( float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_fp -ATOMIC_CMPXCHG_MIX( float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_fp -ATOMIC_CMPXCHG_MIX( float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_fp - -ATOMIC_CRITICAL_FP( float10, long double, add, +, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_add_fp -ATOMIC_CRITICAL_FP( float10, long double, sub, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_fp -ATOMIC_CRITICAL_FP( float10, long double, mul, *, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_mul_fp -ATOMIC_CRITICAL_FP( float10, long double, div, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_fp +ATOMIC_CMPXCHG_MIX(fixed1, char, add, 8, +, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_fp +ATOMIC_CMPXCHG_MIX(fixed1u, uchar, add, 8, +, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_fp +ATOMIC_CMPXCHG_MIX(fixed1, char, sub, 8, -, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_fp +ATOMIC_CMPXCHG_MIX(fixed1u, uchar, sub, 8, -, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_fp +ATOMIC_CMPXCHG_MIX(fixed1, char, mul, 8, *, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_fp +ATOMIC_CMPXCHG_MIX(fixed1u, uchar, mul, 8, *, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_fp +ATOMIC_CMPXCHG_MIX(fixed1, char, div, 8, /, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_fp +ATOMIC_CMPXCHG_MIX(fixed1u, uchar, div, 8, /, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_fp + +ATOMIC_CMPXCHG_MIX(fixed2, short, add, 16, +, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_fp +ATOMIC_CMPXCHG_MIX(fixed2u, ushort, add, 16, +, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_fp +ATOMIC_CMPXCHG_MIX(fixed2, short, sub, 16, -, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_fp +ATOMIC_CMPXCHG_MIX(fixed2u, ushort, sub, 16, -, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_fp +ATOMIC_CMPXCHG_MIX(fixed2, short, mul, 16, *, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_fp +ATOMIC_CMPXCHG_MIX(fixed2u, ushort, mul, 16, *, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_fp +ATOMIC_CMPXCHG_MIX(fixed2, short, div, 16, /, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_fp +ATOMIC_CMPXCHG_MIX(fixed2u, ushort, div, 16, /, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_fp + +ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, add, 32, +, fp, _Quad, 4i, 3, + 0) // __kmpc_atomic_fixed4_add_fp +ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, add, 32, +, fp, _Quad, 4i, 3, + 0) // __kmpc_atomic_fixed4u_add_fp +ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, sub, 32, -, fp, _Quad, 4i, 3, + 0) // __kmpc_atomic_fixed4_sub_fp +ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, sub, 32, -, fp, _Quad, 4i, 3, + 0) // __kmpc_atomic_fixed4u_sub_fp +ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, mul, 32, *, fp, _Quad, 4i, 3, + 0) // __kmpc_atomic_fixed4_mul_fp +ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, mul, 32, *, fp, _Quad, 4i, 3, + 0) // __kmpc_atomic_fixed4u_mul_fp +ATOMIC_CMPXCHG_MIX(fixed4, kmp_int32, div, 32, /, fp, _Quad, 4i, 3, + 0) // __kmpc_atomic_fixed4_div_fp +ATOMIC_CMPXCHG_MIX(fixed4u, kmp_uint32, div, 32, /, fp, _Quad, 4i, 3, + 0) // __kmpc_atomic_fixed4u_div_fp + +ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, add, 64, +, fp, _Quad, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_fp +ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, add, 64, +, fp, _Quad, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_fp +ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, sub, 64, -, fp, _Quad, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_fp +ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, sub, 64, -, fp, _Quad, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_fp +ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, mul, 64, *, fp, _Quad, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_fp +ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, mul, 64, *, fp, _Quad, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_fp +ATOMIC_CMPXCHG_MIX(fixed8, kmp_int64, div, 64, /, fp, _Quad, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_fp +ATOMIC_CMPXCHG_MIX(fixed8u, kmp_uint64, div, 64, /, fp, _Quad, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_fp + +ATOMIC_CMPXCHG_MIX(float4, kmp_real32, add, 32, +, fp, _Quad, 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_add_fp +ATOMIC_CMPXCHG_MIX(float4, kmp_real32, sub, 32, -, fp, _Quad, 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_sub_fp +ATOMIC_CMPXCHG_MIX(float4, kmp_real32, mul, 32, *, fp, _Quad, 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_mul_fp +ATOMIC_CMPXCHG_MIX(float4, kmp_real32, div, 32, /, fp, _Quad, 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_div_fp + +ATOMIC_CMPXCHG_MIX(float8, kmp_real64, add, 64, +, fp, _Quad, 8r, 7, + KMP_ARCH_X86) // __kmpc_atomic_float8_add_fp +ATOMIC_CMPXCHG_MIX(float8, kmp_real64, sub, 64, -, fp, _Quad, 8r, 7, + KMP_ARCH_X86) // __kmpc_atomic_float8_sub_fp +ATOMIC_CMPXCHG_MIX(float8, kmp_real64, mul, 64, *, fp, _Quad, 8r, 7, + KMP_ARCH_X86) // __kmpc_atomic_float8_mul_fp +ATOMIC_CMPXCHG_MIX(float8, kmp_real64, div, 64, /, fp, _Quad, 8r, 7, + KMP_ARCH_X86) // __kmpc_atomic_float8_div_fp + +ATOMIC_CRITICAL_FP(float10, long double, add, +, fp, _Quad, 10r, + 1) // __kmpc_atomic_float10_add_fp +ATOMIC_CRITICAL_FP(float10, long double, sub, -, fp, _Quad, 10r, + 1) // __kmpc_atomic_float10_sub_fp +ATOMIC_CRITICAL_FP(float10, long double, mul, *, fp, _Quad, 10r, + 1) // __kmpc_atomic_float10_mul_fp +ATOMIC_CRITICAL_FP(float10, long double, div, /, fp, _Quad, 10r, + 1) // __kmpc_atomic_float10_div_fp #if KMP_ARCH_X86 || KMP_ARCH_X86_64 // Reverse operations -ATOMIC_CMPXCHG_REV_MIX( fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_rev_fp -ATOMIC_CMPXCHG_REV_MIX( fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_sub_rev_fp -ATOMIC_CMPXCHG_REV_MIX( fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_rev_fp -ATOMIC_CMPXCHG_REV_MIX( fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_rev_fp - -ATOMIC_CMPXCHG_REV_MIX( fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_rev_fp -ATOMIC_CMPXCHG_REV_MIX( fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_sub_rev_fp -ATOMIC_CMPXCHG_REV_MIX( fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_rev_fp -ATOMIC_CMPXCHG_REV_MIX( fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_rev_fp - -ATOMIC_CMPXCHG_REV_MIX( fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_rev_fp -ATOMIC_CMPXCHG_REV_MIX( fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_sub_rev_fp -ATOMIC_CMPXCHG_REV_MIX( fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_rev_fp -ATOMIC_CMPXCHG_REV_MIX( fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_rev_fp - -ATOMIC_CMPXCHG_REV_MIX( fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_rev_fp -ATOMIC_CMPXCHG_REV_MIX( fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_sub_rev_fp -ATOMIC_CMPXCHG_REV_MIX( fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_rev_fp -ATOMIC_CMPXCHG_REV_MIX( fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_rev_fp - -ATOMIC_CMPXCHG_REV_MIX( float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_rev_fp -ATOMIC_CMPXCHG_REV_MIX( float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_rev_fp - -ATOMIC_CMPXCHG_REV_MIX( float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_rev_fp -ATOMIC_CMPXCHG_REV_MIX( float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_rev_fp - -ATOMIC_CRITICAL_REV_FP( float10, long double, sub_rev, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_rev_fp -ATOMIC_CRITICAL_REV_FP( float10, long double, div_rev, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_rev_fp +ATOMIC_CMPXCHG_REV_MIX(fixed1, char, sub_rev, 8, -, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_rev_fp +ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, sub_rev, 8, -, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_rev_fp +ATOMIC_CMPXCHG_REV_MIX(fixed1, char, div_rev, 8, /, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_rev_fp +ATOMIC_CMPXCHG_REV_MIX(fixed1u, uchar, div_rev, 8, /, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_rev_fp + +ATOMIC_CMPXCHG_REV_MIX(fixed2, short, sub_rev, 16, -, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_rev_fp +ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, sub_rev, 16, -, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_rev_fp +ATOMIC_CMPXCHG_REV_MIX(fixed2, short, div_rev, 16, /, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_rev_fp +ATOMIC_CMPXCHG_REV_MIX(fixed2u, ushort, div_rev, 16, /, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_rev_fp + +ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, sub_rev, 32, -, fp, _Quad, 4i, 3, + 0) // __kmpc_atomic_fixed4_sub_rev_fp +ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, sub_rev, 32, -, fp, _Quad, 4i, 3, + 0) // __kmpc_atomic_fixed4u_sub_rev_fp +ATOMIC_CMPXCHG_REV_MIX(fixed4, kmp_int32, div_rev, 32, /, fp, _Quad, 4i, 3, + 0) // __kmpc_atomic_fixed4_div_rev_fp +ATOMIC_CMPXCHG_REV_MIX(fixed4u, kmp_uint32, div_rev, 32, /, fp, _Quad, 4i, 3, + 0) // __kmpc_atomic_fixed4u_div_rev_fp + +ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, sub_rev, 64, -, fp, _Quad, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_rev_fp +ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, sub_rev, 64, -, fp, _Quad, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_rev_fp +ATOMIC_CMPXCHG_REV_MIX(fixed8, kmp_int64, div_rev, 64, /, fp, _Quad, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_rev_fp +ATOMIC_CMPXCHG_REV_MIX(fixed8u, kmp_uint64, div_rev, 64, /, fp, _Quad, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_rev_fp + +ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, sub_rev, 32, -, fp, _Quad, 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_sub_rev_fp +ATOMIC_CMPXCHG_REV_MIX(float4, kmp_real32, div_rev, 32, /, fp, _Quad, 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_div_rev_fp + +ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, sub_rev, 64, -, fp, _Quad, 8r, 7, + KMP_ARCH_X86) // __kmpc_atomic_float8_sub_rev_fp +ATOMIC_CMPXCHG_REV_MIX(float8, kmp_real64, div_rev, 64, /, fp, _Quad, 8r, 7, + KMP_ARCH_X86) // __kmpc_atomic_float8_div_rev_fp + +ATOMIC_CRITICAL_REV_FP(float10, long double, sub_rev, -, fp, _Quad, 10r, + 1) // __kmpc_atomic_float10_sub_rev_fp +ATOMIC_CRITICAL_REV_FP(float10, long double, div_rev, /, fp, _Quad, 10r, + 1) // __kmpc_atomic_float10_div_rev_fp #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ #endif @@ -1510,57 +1816,63 @@ ATOMIC_CRITICAL_REV_FP( float10, long double, div_rev, /, fp, _Quad, 10r, 1 // X86 or X86_64: no alignment problems ==================================== #if USE_CMPXCHG_FIX // workaround for C78287 (complex(kind=4) data type) -#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - OP_CMPXCHG_WORKAROUND(TYPE,BITS,OP) \ -} +#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ + LCK_ID, MASK, GOMP_FLAG) \ + ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ + OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ + OP_CMPXCHG_WORKAROUND(TYPE, BITS, OP) \ + } // end of the second part of the workaround for C78287 #else -#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - OP_CMPXCHG(TYPE,BITS,OP) \ -} +#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ + LCK_ID, MASK, GOMP_FLAG) \ + ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ + OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ + OP_CMPXCHG(TYPE, BITS, OP) \ + } #endif // USE_CMPXCHG_FIX #else // ------------------------------------------------------------------------ // Code for other architectures that don't handle unaligned accesses. -#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN_MIX(TYPE_ID,TYPE,OP_ID,RTYPE_ID,RTYPE) \ - OP_GOMP_CRITICAL(OP##=,GOMP_FLAG) \ - if ( ! ( (kmp_uintptr_t) lhs & 0x##MASK) ) { \ - OP_CMPXCHG(TYPE,BITS,OP) /* aligned address */ \ - } else { \ - KMP_CHECK_GTID; \ - OP_CRITICAL(OP##=,LCK_ID) /* unaligned address - use critical */ \ - } \ -} +#define ATOMIC_CMPXCHG_CMPLX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, RTYPE, \ + LCK_ID, MASK, GOMP_FLAG) \ + ATOMIC_BEGIN_MIX(TYPE_ID, TYPE, OP_ID, RTYPE_ID, RTYPE) \ + OP_GOMP_CRITICAL(OP## =, GOMP_FLAG) \ + if (!((kmp_uintptr_t)lhs & 0x##MASK)) { \ + OP_CMPXCHG(TYPE, BITS, OP) /* aligned address */ \ + } else { \ + KMP_CHECK_GTID; \ + OP_CRITICAL(OP## =, LCK_ID) /* unaligned address - use critical */ \ + } \ + } #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ -ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_add_cmplx8 -ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_sub_cmplx8 -ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_mul_cmplx8 -ATOMIC_CMPXCHG_CMPLX( cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, 7, KMP_ARCH_X86 ) // __kmpc_atomic_cmplx4_div_cmplx8 +ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, add, 64, +, cmplx8, kmp_cmplx64, 8c, + 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_add_cmplx8 +ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, sub, 64, -, cmplx8, kmp_cmplx64, 8c, + 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_sub_cmplx8 +ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, mul, 64, *, cmplx8, kmp_cmplx64, 8c, + 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_mul_cmplx8 +ATOMIC_CMPXCHG_CMPLX(cmplx4, kmp_cmplx32, div, 64, /, cmplx8, kmp_cmplx64, 8c, + 7, KMP_ARCH_X86) // __kmpc_atomic_cmplx4_div_cmplx8 // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 -////////////////////////////////////////////////////////////////////////////////////////////////////// // ------------------------------------------------------------------------ // Atomic READ routines -// ------------------------------------------------------------------------ // ------------------------------------------------------------------------ // Beginning of a definition (provides name, parameters, gebug trace) -// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) +// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned +// fixed) // OP_ID - operation identifier (add, sub, mul, ...) // TYPE - operands' type -#define ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE, RET_TYPE) \ -RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * loc ) \ -{ \ - KMP_DEBUG_ASSERT( __kmp_init_serial ); \ - KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); +#define ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ + RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ + TYPE *loc) { \ + KMP_DEBUG_ASSERT(__kmp_init_serial); \ + KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); // ------------------------------------------------------------------------ // Operation on *lhs, rhs using "compare_and_store_ret" routine @@ -1571,23 +1883,23 @@ RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lo // *lhs only once (w/o it the compiler reads *lhs twice) // TODO: check if it is still necessary // Return old value regardless of the result of "compare & swap# operation - -#define OP_CMPXCHG_READ(TYPE,BITS,OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - union f_i_union { \ - TYPE f_val; \ - kmp_int##BITS i_val; \ - }; \ - union f_i_union old_value; \ - temp_val = *loc; \ - old_value.f_val = temp_val; \ - old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( (kmp_int##BITS *) loc, \ - *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val, \ - *VOLATILE_CAST(kmp_int##BITS *) &old_value.i_val ); \ - new_value = old_value.f_val; \ - return new_value; \ - } +#define OP_CMPXCHG_READ(TYPE, BITS, OP) \ + { \ + TYPE KMP_ATOMIC_VOLATILE temp_val; \ + union f_i_union { \ + TYPE f_val; \ + kmp_int##BITS i_val; \ + }; \ + union f_i_union old_value; \ + temp_val = *loc; \ + old_value.f_val = temp_val; \ + old_value.i_val = KMP_COMPARE_AND_STORE_RET##BITS( \ + (kmp_int##BITS *)loc, \ + *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val, \ + *VOLATILE_CAST(kmp_int##BITS *) & old_value.i_val); \ + new_value = old_value.f_val; \ + return new_value; \ + } // ------------------------------------------------------------------------- // Operation on *lhs, rhs bound by critical section @@ -1595,140 +1907,152 @@ RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lo // LCK_ID - lock identifier // Note: don't check gtid as it should always be valid // 1, 2-byte - expect valid parameter, other - check before this macro -#define OP_CRITICAL_READ(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - new_value = (*loc); \ - \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); +#define OP_CRITICAL_READ(OP, LCK_ID) \ + __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ + \ + new_value = (*loc); \ + \ + __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); // ------------------------------------------------------------------------- #ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_READ(OP,FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_READ( OP, 0 ); \ - return new_value; \ - } +#define OP_GOMP_CRITICAL_READ(OP, FLAG) \ + if ((FLAG) && (__kmp_atomic_mode == 2)) { \ + KMP_CHECK_GTID; \ + OP_CRITICAL_READ(OP, 0); \ + return new_value; \ + } #else -#define OP_GOMP_CRITICAL_READ(OP,FLAG) +#define OP_GOMP_CRITICAL_READ(OP, FLAG) #endif /* KMP_GOMP_COMPAT */ // ------------------------------------------------------------------------- -#define ATOMIC_FIXED_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \ - new_value = KMP_TEST_THEN_ADD##BITS( loc, OP 0 ); \ - return new_value; \ -} +#define ATOMIC_FIXED_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ + ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ + TYPE new_value; \ + OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ + new_value = KMP_TEST_THEN_ADD##BITS(loc, OP 0); \ + return new_value; \ + } // ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_READ(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) \ - OP_CMPXCHG_READ(TYPE,BITS,OP) \ -} -// ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use critical section) +#define ATOMIC_CMPXCHG_READ(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ + ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ + TYPE new_value; \ + OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) \ + OP_CMPXCHG_READ(TYPE, BITS, OP) \ + } +// ------------------------------------------------------------------------ +// Routines for Extended types: long double, _Quad, complex flavours (use +// critical section) // TYPE_ID, OP_ID, TYPE - detailed above // OP - operator // LCK_ID - lock identifier, used to possibly distinguish lock variable -#define ATOMIC_CRITICAL_READ(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_READ(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_READ(OP##=,GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL_READ(OP,LCK_ID) /* send assignment */ \ - return new_value; \ -} +#define ATOMIC_CRITICAL_READ(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ + ATOMIC_BEGIN_READ(TYPE_ID, OP_ID, TYPE, TYPE) \ + TYPE new_value; \ + OP_GOMP_CRITICAL_READ(OP## =, GOMP_FLAG) /* send assignment */ \ + OP_CRITICAL_READ(OP, LCK_ID) /* send assignment */ \ + return new_value; \ + } // ------------------------------------------------------------------------ -// Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return value doesn't work. +// Fix for cmplx4 read (CQ220361) on Windows* OS. Regular routine with return +// value doesn't work. // Let's return the read value through the additional parameter. +#if (KMP_OS_WINDOWS) -#if ( KMP_OS_WINDOWS ) - -#define OP_CRITICAL_READ_WRK(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - (*out) = (*loc); \ - \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); +#define OP_CRITICAL_READ_WRK(OP, LCK_ID) \ + __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ + \ + (*out) = (*loc); \ + \ + __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); // ------------------------------------------------------------------------ #ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_READ_WRK( OP, 0 ); \ - } +#define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) \ + if ((FLAG) && (__kmp_atomic_mode == 2)) { \ + KMP_CHECK_GTID; \ + OP_CRITICAL_READ_WRK(OP, 0); \ + } #else -#define OP_GOMP_CRITICAL_READ_WRK(OP,FLAG) +#define OP_GOMP_CRITICAL_READ_WRK(OP, FLAG) #endif /* KMP_GOMP_COMPAT */ // ------------------------------------------------------------------------ -#define ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \ -void __kmpc_atomic_##TYPE_ID##_##OP_ID( TYPE * out, ident_t *id_ref, int gtid, TYPE * loc ) \ -{ \ - KMP_DEBUG_ASSERT( __kmp_init_serial ); \ - KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); +#define ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ + void __kmpc_atomic_##TYPE_ID##_##OP_ID(TYPE *out, ident_t *id_ref, int gtid, \ + TYPE *loc) { \ + KMP_DEBUG_ASSERT(__kmp_init_serial); \ + KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); // ------------------------------------------------------------------------ -#define ATOMIC_CRITICAL_READ_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_READ_WRK(TYPE_ID,OP_ID,TYPE) \ - OP_GOMP_CRITICAL_READ_WRK(OP##=,GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL_READ_WRK(OP,LCK_ID) /* send assignment */ \ -} +#define ATOMIC_CRITICAL_READ_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ + ATOMIC_BEGIN_READ_WRK(TYPE_ID, OP_ID, TYPE) \ + OP_GOMP_CRITICAL_READ_WRK(OP## =, GOMP_FLAG) /* send assignment */ \ + OP_CRITICAL_READ_WRK(OP, LCK_ID) /* send assignment */ \ + } #endif // KMP_OS_WINDOWS // ------------------------------------------------------------------------ // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG -ATOMIC_FIXED_READ( fixed4, rd, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_rd -ATOMIC_FIXED_READ( fixed8, rd, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_rd -ATOMIC_CMPXCHG_READ( float4, rd, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_rd -ATOMIC_CMPXCHG_READ( float8, rd, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_rd +ATOMIC_FIXED_READ(fixed4, rd, kmp_int32, 32, +, 0) // __kmpc_atomic_fixed4_rd +ATOMIC_FIXED_READ(fixed8, rd, kmp_int64, 64, +, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_rd +ATOMIC_CMPXCHG_READ(float4, rd, kmp_real32, 32, +, + KMP_ARCH_X86) // __kmpc_atomic_float4_rd +ATOMIC_CMPXCHG_READ(float8, rd, kmp_real64, 64, +, + KMP_ARCH_X86) // __kmpc_atomic_float8_rd // !!! TODO: Remove lock operations for "char" since it can't be non-atomic -ATOMIC_CMPXCHG_READ( fixed1, rd, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_rd -ATOMIC_CMPXCHG_READ( fixed2, rd, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_rd +ATOMIC_CMPXCHG_READ(fixed1, rd, kmp_int8, 8, +, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_rd +ATOMIC_CMPXCHG_READ(fixed2, rd, kmp_int16, 16, +, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_rd -ATOMIC_CRITICAL_READ( float10, rd, long double, +, 10r, 1 ) // __kmpc_atomic_float10_rd +ATOMIC_CRITICAL_READ(float10, rd, long double, +, 10r, + 1) // __kmpc_atomic_float10_rd #if KMP_HAVE_QUAD -ATOMIC_CRITICAL_READ( float16, rd, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_rd +ATOMIC_CRITICAL_READ(float16, rd, QUAD_LEGACY, +, 16r, + 1) // __kmpc_atomic_float16_rd #endif // KMP_HAVE_QUAD // Fix for CQ220361 on Windows* OS -#if ( KMP_OS_WINDOWS ) - ATOMIC_CRITICAL_READ_WRK( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd +#if (KMP_OS_WINDOWS) +ATOMIC_CRITICAL_READ_WRK(cmplx4, rd, kmp_cmplx32, +, 8c, + 1) // __kmpc_atomic_cmplx4_rd #else - ATOMIC_CRITICAL_READ( cmplx4, rd, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_rd +ATOMIC_CRITICAL_READ(cmplx4, rd, kmp_cmplx32, +, 8c, + 1) // __kmpc_atomic_cmplx4_rd #endif -ATOMIC_CRITICAL_READ( cmplx8, rd, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_rd -ATOMIC_CRITICAL_READ( cmplx10, rd, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_rd +ATOMIC_CRITICAL_READ(cmplx8, rd, kmp_cmplx64, +, 16c, + 1) // __kmpc_atomic_cmplx8_rd +ATOMIC_CRITICAL_READ(cmplx10, rd, kmp_cmplx80, +, 20c, + 1) // __kmpc_atomic_cmplx10_rd #if KMP_HAVE_QUAD -ATOMIC_CRITICAL_READ( cmplx16, rd, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_rd -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL_READ( float16, a16_rd, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_a16_rd - ATOMIC_CRITICAL_READ( cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_rd +ATOMIC_CRITICAL_READ(cmplx16, rd, CPLX128_LEG, +, 32c, + 1) // __kmpc_atomic_cmplx16_rd +#if (KMP_ARCH_X86) +ATOMIC_CRITICAL_READ(float16, a16_rd, Quad_a16_t, +, 16r, + 1) // __kmpc_atomic_float16_a16_rd +ATOMIC_CRITICAL_READ(cmplx16, a16_rd, kmp_cmplx128_a16_t, +, 32c, + 1) // __kmpc_atomic_cmplx16_a16_rd #endif #endif - // ------------------------------------------------------------------------ // Atomic WRITE routines -// ------------------------------------------------------------------------ -#define ATOMIC_XCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP,GOMP_FLAG) \ - KMP_XCHG_FIXED##BITS( lhs, rhs ); \ -} +#define ATOMIC_XCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ + ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ + OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ + KMP_XCHG_FIXED##BITS(lhs, rhs); \ + } // ------------------------------------------------------------------------ -#define ATOMIC_XCHG_FLOAT_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP,GOMP_FLAG) \ - KMP_XCHG_REAL##BITS( lhs, rhs ); \ -} - +#define ATOMIC_XCHG_FLOAT_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ + ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ + OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ + KMP_XCHG_REAL##BITS(lhs, rhs); \ + } // ------------------------------------------------------------------------ // Operation on *lhs, rhs using "compare_and_store" routine @@ -1737,89 +2061,103 @@ ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ // OP - operator // Note: temp_val introduced in order to force the compiler to read // *lhs only once (w/o it the compiler reads *lhs twice) -#define OP_CMPXCHG_WR(TYPE,BITS,OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - TYPE old_value, new_value; \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs; \ - while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ - *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ - { \ - KMP_CPU_PAUSE(); \ - \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs; \ - } \ - } +#define OP_CMPXCHG_WR(TYPE, BITS, OP) \ + { \ + TYPE KMP_ATOMIC_VOLATILE temp_val; \ + TYPE old_value, new_value; \ + temp_val = *lhs; \ + old_value = temp_val; \ + new_value = rhs; \ + while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ + (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ + *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ + KMP_CPU_PAUSE(); \ + \ + temp_val = *lhs; \ + old_value = temp_val; \ + new_value = rhs; \ + } \ + } // ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_WR(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP,GOMP_FLAG) \ - OP_CMPXCHG_WR(TYPE,BITS,OP) \ -} +#define ATOMIC_CMPXCHG_WR(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ + ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ + OP_GOMP_CRITICAL(OP, GOMP_FLAG) \ + OP_CMPXCHG_WR(TYPE, BITS, OP) \ + } // ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use critical section) +// Routines for Extended types: long double, _Quad, complex flavours (use +// critical section) // TYPE_ID, OP_ID, TYPE - detailed above // OP - operator // LCK_ID - lock identifier, used to possibly distinguish lock variable -#define ATOMIC_CRITICAL_WR(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN(TYPE_ID,OP_ID,TYPE,void) \ - OP_GOMP_CRITICAL(OP,GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL(OP,LCK_ID) /* send assignment */ \ -} +#define ATOMIC_CRITICAL_WR(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ + ATOMIC_BEGIN(TYPE_ID, OP_ID, TYPE, void) \ + OP_GOMP_CRITICAL(OP, GOMP_FLAG) /* send assignment */ \ + OP_CRITICAL(OP, LCK_ID) /* send assignment */ \ + } // ------------------------------------------------------------------------- -ATOMIC_XCHG_WR( fixed1, wr, kmp_int8, 8, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_wr -ATOMIC_XCHG_WR( fixed2, wr, kmp_int16, 16, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_wr -ATOMIC_XCHG_WR( fixed4, wr, kmp_int32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_wr -#if ( KMP_ARCH_X86 ) - ATOMIC_CMPXCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr +ATOMIC_XCHG_WR(fixed1, wr, kmp_int8, 8, =, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_wr +ATOMIC_XCHG_WR(fixed2, wr, kmp_int16, 16, =, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_wr +ATOMIC_XCHG_WR(fixed4, wr, kmp_int32, 32, =, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_wr +#if (KMP_ARCH_X86) +ATOMIC_CMPXCHG_WR(fixed8, wr, kmp_int64, 64, =, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr #else - ATOMIC_XCHG_WR( fixed8, wr, kmp_int64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_wr +ATOMIC_XCHG_WR(fixed8, wr, kmp_int64, 64, =, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_wr #endif -ATOMIC_XCHG_FLOAT_WR( float4, wr, kmp_real32, 32, =, KMP_ARCH_X86 ) // __kmpc_atomic_float4_wr -#if ( KMP_ARCH_X86 ) - ATOMIC_CMPXCHG_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr +ATOMIC_XCHG_FLOAT_WR(float4, wr, kmp_real32, 32, =, + KMP_ARCH_X86) // __kmpc_atomic_float4_wr +#if (KMP_ARCH_X86) +ATOMIC_CMPXCHG_WR(float8, wr, kmp_real64, 64, =, + KMP_ARCH_X86) // __kmpc_atomic_float8_wr #else - ATOMIC_XCHG_FLOAT_WR( float8, wr, kmp_real64, 64, =, KMP_ARCH_X86 ) // __kmpc_atomic_float8_wr +ATOMIC_XCHG_FLOAT_WR(float8, wr, kmp_real64, 64, =, + KMP_ARCH_X86) // __kmpc_atomic_float8_wr #endif -ATOMIC_CRITICAL_WR( float10, wr, long double, =, 10r, 1 ) // __kmpc_atomic_float10_wr +ATOMIC_CRITICAL_WR(float10, wr, long double, =, 10r, + 1) // __kmpc_atomic_float10_wr #if KMP_HAVE_QUAD -ATOMIC_CRITICAL_WR( float16, wr, QUAD_LEGACY, =, 16r, 1 ) // __kmpc_atomic_float16_wr +ATOMIC_CRITICAL_WR(float16, wr, QUAD_LEGACY, =, 16r, + 1) // __kmpc_atomic_float16_wr #endif -ATOMIC_CRITICAL_WR( cmplx4, wr, kmp_cmplx32, =, 8c, 1 ) // __kmpc_atomic_cmplx4_wr -ATOMIC_CRITICAL_WR( cmplx8, wr, kmp_cmplx64, =, 16c, 1 ) // __kmpc_atomic_cmplx8_wr -ATOMIC_CRITICAL_WR( cmplx10, wr, kmp_cmplx80, =, 20c, 1 ) // __kmpc_atomic_cmplx10_wr +ATOMIC_CRITICAL_WR(cmplx4, wr, kmp_cmplx32, =, 8c, 1) // __kmpc_atomic_cmplx4_wr +ATOMIC_CRITICAL_WR(cmplx8, wr, kmp_cmplx64, =, 16c, + 1) // __kmpc_atomic_cmplx8_wr +ATOMIC_CRITICAL_WR(cmplx10, wr, kmp_cmplx80, =, 20c, + 1) // __kmpc_atomic_cmplx10_wr #if KMP_HAVE_QUAD -ATOMIC_CRITICAL_WR( cmplx16, wr, CPLX128_LEG, =, 32c, 1 ) // __kmpc_atomic_cmplx16_wr -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL_WR( float16, a16_wr, Quad_a16_t, =, 16r, 1 ) // __kmpc_atomic_float16_a16_wr - ATOMIC_CRITICAL_WR( cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_wr +ATOMIC_CRITICAL_WR(cmplx16, wr, CPLX128_LEG, =, 32c, + 1) // __kmpc_atomic_cmplx16_wr +#if (KMP_ARCH_X86) +ATOMIC_CRITICAL_WR(float16, a16_wr, Quad_a16_t, =, 16r, + 1) // __kmpc_atomic_float16_a16_wr +ATOMIC_CRITICAL_WR(cmplx16, a16_wr, kmp_cmplx128_a16_t, =, 32c, + 1) // __kmpc_atomic_cmplx16_a16_wr #endif #endif - // ------------------------------------------------------------------------ // Atomic CAPTURE routines -// ------------------------------------------------------------------------ // Beginning of a definition (provides name, parameters, gebug trace) -// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) +// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned +// fixed) // OP_ID - operation identifier (add, sub, mul, ...) // TYPE - operands' type -#define ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,RET_TYPE) \ -RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, int flag ) \ -{ \ - KMP_DEBUG_ASSERT( __kmp_init_serial ); \ - KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); +#define ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, RET_TYPE) \ + RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, \ + TYPE *lhs, TYPE rhs, int flag) { \ + KMP_DEBUG_ASSERT(__kmp_init_serial); \ + KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); // ------------------------------------------------------------------------- // Operation on *lhs, rhs bound by critical section @@ -1827,29 +2165,29 @@ RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lh // LCK_ID - lock identifier // Note: don't check gtid as it should always be valid // 1, 2-byte - expect valid parameter, other - check before this macro -#define OP_CRITICAL_CPT(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - if( flag ) { \ - (*lhs) OP rhs; \ - new_value = (*lhs); \ - } else { \ - new_value = (*lhs); \ - (*lhs) OP rhs; \ - } \ - \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - return new_value; +#define OP_CRITICAL_CPT(OP, LCK_ID) \ + __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ + \ + if (flag) { \ + (*lhs) OP rhs; \ + new_value = (*lhs); \ + } else { \ + new_value = (*lhs); \ + (*lhs) OP rhs; \ + } \ + \ + __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ + return new_value; // ------------------------------------------------------------------------ #ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_CPT(OP,FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_CPT( OP##=, 0 ); \ - } +#define OP_GOMP_CRITICAL_CPT(OP, FLAG) \ + if ((FLAG) && (__kmp_atomic_mode == 2)) { \ + KMP_CHECK_GTID; \ + OP_CRITICAL_CPT(OP## =, 0); \ + } #else -#define OP_GOMP_CRITICAL_CPT(OP,FLAG) +#define OP_GOMP_CRITICAL_CPT(OP, FLAG) #endif /* KMP_GOMP_COMPAT */ // ------------------------------------------------------------------------ @@ -1859,60 +2197,67 @@ RET_TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lh // OP - operator // Note: temp_val introduced in order to force the compiler to read // *lhs only once (w/o it the compiler reads *lhs twice) -#define OP_CMPXCHG_CPT(TYPE,BITS,OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - TYPE old_value, new_value; \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = old_value OP rhs; \ - while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ - *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ - { \ - KMP_CPU_PAUSE(); \ - \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = old_value OP rhs; \ - } \ - if( flag ) { \ - return new_value; \ - } else \ - return old_value; \ - } +#define OP_CMPXCHG_CPT(TYPE, BITS, OP) \ + { \ + TYPE KMP_ATOMIC_VOLATILE temp_val; \ + TYPE old_value, new_value; \ + temp_val = *lhs; \ + old_value = temp_val; \ + new_value = old_value OP rhs; \ + while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ + (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ + *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ + KMP_CPU_PAUSE(); \ + \ + temp_val = *lhs; \ + old_value = temp_val; \ + new_value = old_value OP rhs; \ + } \ + if (flag) { \ + return new_value; \ + } else \ + return old_value; \ + } // ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \ - OP_CMPXCHG_CPT(TYPE,BITS,OP) \ -} +#define ATOMIC_CMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ + ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ + TYPE new_value; \ + OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ + OP_CMPXCHG_CPT(TYPE, BITS, OP) \ + } // ------------------------------------------------------------------------- -#define ATOMIC_FIXED_ADD_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE old_value, new_value; \ - OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \ - /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ - old_value = KMP_TEST_THEN_ADD##BITS( lhs, OP rhs ); \ - if( flag ) { \ - return old_value OP rhs; \ - } else \ - return old_value; \ -} +#define ATOMIC_FIXED_ADD_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ + ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ + TYPE old_value, new_value; \ + OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ + /* OP used as a sign for subtraction: (lhs-rhs) --> (lhs+-rhs) */ \ + old_value = KMP_TEST_THEN_ADD##BITS(lhs, OP rhs); \ + if (flag) { \ + return old_value OP rhs; \ + } else \ + return old_value; \ + } // ------------------------------------------------------------------------- -ATOMIC_FIXED_ADD_CPT( fixed4, add_cpt, kmp_int32, 32, +, 0 ) // __kmpc_atomic_fixed4_add_cpt -ATOMIC_FIXED_ADD_CPT( fixed4, sub_cpt, kmp_int32, 32, -, 0 ) // __kmpc_atomic_fixed4_sub_cpt -ATOMIC_FIXED_ADD_CPT( fixed8, add_cpt, kmp_int64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_cpt -ATOMIC_FIXED_ADD_CPT( fixed8, sub_cpt, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt - -ATOMIC_CMPXCHG_CPT( float4, add_cpt, kmp_real32, 32, +, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt -ATOMIC_CMPXCHG_CPT( float4, sub_cpt, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt -ATOMIC_CMPXCHG_CPT( float8, add_cpt, kmp_real64, 64, +, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt -ATOMIC_CMPXCHG_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt +ATOMIC_FIXED_ADD_CPT(fixed4, add_cpt, kmp_int32, 32, +, + 0) // __kmpc_atomic_fixed4_add_cpt +ATOMIC_FIXED_ADD_CPT(fixed4, sub_cpt, kmp_int32, 32, -, + 0) // __kmpc_atomic_fixed4_sub_cpt +ATOMIC_FIXED_ADD_CPT(fixed8, add_cpt, kmp_int64, 64, +, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt +ATOMIC_FIXED_ADD_CPT(fixed8, sub_cpt, kmp_int64, 64, -, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt + +ATOMIC_CMPXCHG_CPT(float4, add_cpt, kmp_real32, 32, +, + KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt +ATOMIC_CMPXCHG_CPT(float4, sub_cpt, kmp_real32, 32, -, + KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt +ATOMIC_CMPXCHG_CPT(float8, add_cpt, kmp_real64, 64, +, + KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt +ATOMIC_CMPXCHG_CPT(float8, sub_cpt, kmp_real64, 64, -, + KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt // ------------------------------------------------------------------------ // Entries definition for integer operands @@ -1926,141 +2271,229 @@ ATOMIC_CMPXCHG_CPT( float8, sub_cpt, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmp // Routines for ATOMIC integer operands, other operators // ------------------------------------------------------------------------ // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG -ATOMIC_CMPXCHG_CPT( fixed1, add_cpt, kmp_int8, 8, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_cpt -ATOMIC_CMPXCHG_CPT( fixed1, andb_cpt, kmp_int8, 8, &, 0 ) // __kmpc_atomic_fixed1_andb_cpt -ATOMIC_CMPXCHG_CPT( fixed1, div_cpt, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt -ATOMIC_CMPXCHG_CPT( fixed1u, div_cpt, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt -ATOMIC_CMPXCHG_CPT( fixed1, mul_cpt, kmp_int8, 8, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_cpt -ATOMIC_CMPXCHG_CPT( fixed1, orb_cpt, kmp_int8, 8, |, 0 ) // __kmpc_atomic_fixed1_orb_cpt -ATOMIC_CMPXCHG_CPT( fixed1, shl_cpt, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt -ATOMIC_CMPXCHG_CPT( fixed1, shr_cpt, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt -ATOMIC_CMPXCHG_CPT( fixed1u, shr_cpt, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt -ATOMIC_CMPXCHG_CPT( fixed1, sub_cpt, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt -ATOMIC_CMPXCHG_CPT( fixed1, xor_cpt, kmp_int8, 8, ^, 0 ) // __kmpc_atomic_fixed1_xor_cpt -ATOMIC_CMPXCHG_CPT( fixed2, add_cpt, kmp_int16, 16, +, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_cpt -ATOMIC_CMPXCHG_CPT( fixed2, andb_cpt, kmp_int16, 16, &, 0 ) // __kmpc_atomic_fixed2_andb_cpt -ATOMIC_CMPXCHG_CPT( fixed2, div_cpt, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt -ATOMIC_CMPXCHG_CPT( fixed2u, div_cpt, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt -ATOMIC_CMPXCHG_CPT( fixed2, mul_cpt, kmp_int16, 16, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_cpt -ATOMIC_CMPXCHG_CPT( fixed2, orb_cpt, kmp_int16, 16, |, 0 ) // __kmpc_atomic_fixed2_orb_cpt -ATOMIC_CMPXCHG_CPT( fixed2, shl_cpt, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt -ATOMIC_CMPXCHG_CPT( fixed2, shr_cpt, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt -ATOMIC_CMPXCHG_CPT( fixed2u, shr_cpt, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt -ATOMIC_CMPXCHG_CPT( fixed2, sub_cpt, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt -ATOMIC_CMPXCHG_CPT( fixed2, xor_cpt, kmp_int16, 16, ^, 0 ) // __kmpc_atomic_fixed2_xor_cpt -ATOMIC_CMPXCHG_CPT( fixed4, andb_cpt, kmp_int32, 32, &, 0 ) // __kmpc_atomic_fixed4_andb_cpt -ATOMIC_CMPXCHG_CPT( fixed4, div_cpt, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt -ATOMIC_CMPXCHG_CPT( fixed4u, div_cpt, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt -ATOMIC_CMPXCHG_CPT( fixed4, mul_cpt, kmp_int32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_mul_cpt -ATOMIC_CMPXCHG_CPT( fixed4, orb_cpt, kmp_int32, 32, |, 0 ) // __kmpc_atomic_fixed4_orb_cpt -ATOMIC_CMPXCHG_CPT( fixed4, shl_cpt, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt -ATOMIC_CMPXCHG_CPT( fixed4, shr_cpt, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt -ATOMIC_CMPXCHG_CPT( fixed4u, shr_cpt, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt -ATOMIC_CMPXCHG_CPT( fixed4, xor_cpt, kmp_int32, 32, ^, 0 ) // __kmpc_atomic_fixed4_xor_cpt -ATOMIC_CMPXCHG_CPT( fixed8, andb_cpt, kmp_int64, 64, &, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andb_cpt -ATOMIC_CMPXCHG_CPT( fixed8, div_cpt, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt -ATOMIC_CMPXCHG_CPT( fixed8u, div_cpt, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt -ATOMIC_CMPXCHG_CPT( fixed8, mul_cpt, kmp_int64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_cpt -ATOMIC_CMPXCHG_CPT( fixed8, orb_cpt, kmp_int64, 64, |, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orb_cpt -ATOMIC_CMPXCHG_CPT( fixed8, shl_cpt, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt -ATOMIC_CMPXCHG_CPT( fixed8, shr_cpt, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt -ATOMIC_CMPXCHG_CPT( fixed8u, shr_cpt, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt -ATOMIC_CMPXCHG_CPT( fixed8, xor_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_xor_cpt -ATOMIC_CMPXCHG_CPT( float4, div_cpt, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt -ATOMIC_CMPXCHG_CPT( float4, mul_cpt, kmp_real32, 32, *, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_cpt -ATOMIC_CMPXCHG_CPT( float8, div_cpt, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt -ATOMIC_CMPXCHG_CPT( float8, mul_cpt, kmp_real64, 64, *, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_cpt +ATOMIC_CMPXCHG_CPT(fixed1, add_cpt, kmp_int8, 8, +, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt +ATOMIC_CMPXCHG_CPT(fixed1, andb_cpt, kmp_int8, 8, &, + 0) // __kmpc_atomic_fixed1_andb_cpt +ATOMIC_CMPXCHG_CPT(fixed1, div_cpt, kmp_int8, 8, /, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt +ATOMIC_CMPXCHG_CPT(fixed1u, div_cpt, kmp_uint8, 8, /, + KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt +ATOMIC_CMPXCHG_CPT(fixed1, mul_cpt, kmp_int8, 8, *, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt +ATOMIC_CMPXCHG_CPT(fixed1, orb_cpt, kmp_int8, 8, |, + 0) // __kmpc_atomic_fixed1_orb_cpt +ATOMIC_CMPXCHG_CPT(fixed1, shl_cpt, kmp_int8, 8, <<, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt +ATOMIC_CMPXCHG_CPT(fixed1, shr_cpt, kmp_int8, 8, >>, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt +ATOMIC_CMPXCHG_CPT(fixed1u, shr_cpt, kmp_uint8, 8, >>, + KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt +ATOMIC_CMPXCHG_CPT(fixed1, sub_cpt, kmp_int8, 8, -, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt +ATOMIC_CMPXCHG_CPT(fixed1, xor_cpt, kmp_int8, 8, ^, + 0) // __kmpc_atomic_fixed1_xor_cpt +ATOMIC_CMPXCHG_CPT(fixed2, add_cpt, kmp_int16, 16, +, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt +ATOMIC_CMPXCHG_CPT(fixed2, andb_cpt, kmp_int16, 16, &, + 0) // __kmpc_atomic_fixed2_andb_cpt +ATOMIC_CMPXCHG_CPT(fixed2, div_cpt, kmp_int16, 16, /, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt +ATOMIC_CMPXCHG_CPT(fixed2u, div_cpt, kmp_uint16, 16, /, + KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt +ATOMIC_CMPXCHG_CPT(fixed2, mul_cpt, kmp_int16, 16, *, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt +ATOMIC_CMPXCHG_CPT(fixed2, orb_cpt, kmp_int16, 16, |, + 0) // __kmpc_atomic_fixed2_orb_cpt +ATOMIC_CMPXCHG_CPT(fixed2, shl_cpt, kmp_int16, 16, <<, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt +ATOMIC_CMPXCHG_CPT(fixed2, shr_cpt, kmp_int16, 16, >>, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt +ATOMIC_CMPXCHG_CPT(fixed2u, shr_cpt, kmp_uint16, 16, >>, + KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt +ATOMIC_CMPXCHG_CPT(fixed2, sub_cpt, kmp_int16, 16, -, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt +ATOMIC_CMPXCHG_CPT(fixed2, xor_cpt, kmp_int16, 16, ^, + 0) // __kmpc_atomic_fixed2_xor_cpt +ATOMIC_CMPXCHG_CPT(fixed4, andb_cpt, kmp_int32, 32, &, + 0) // __kmpc_atomic_fixed4_andb_cpt +ATOMIC_CMPXCHG_CPT(fixed4, div_cpt, kmp_int32, 32, /, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt +ATOMIC_CMPXCHG_CPT(fixed4u, div_cpt, kmp_uint32, 32, /, + KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt +ATOMIC_CMPXCHG_CPT(fixed4, mul_cpt, kmp_int32, 32, *, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_mul_cpt +ATOMIC_CMPXCHG_CPT(fixed4, orb_cpt, kmp_int32, 32, |, + 0) // __kmpc_atomic_fixed4_orb_cpt +ATOMIC_CMPXCHG_CPT(fixed4, shl_cpt, kmp_int32, 32, <<, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt +ATOMIC_CMPXCHG_CPT(fixed4, shr_cpt, kmp_int32, 32, >>, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt +ATOMIC_CMPXCHG_CPT(fixed4u, shr_cpt, kmp_uint32, 32, >>, + KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt +ATOMIC_CMPXCHG_CPT(fixed4, xor_cpt, kmp_int32, 32, ^, + 0) // __kmpc_atomic_fixed4_xor_cpt +ATOMIC_CMPXCHG_CPT(fixed8, andb_cpt, kmp_int64, 64, &, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_andb_cpt +ATOMIC_CMPXCHG_CPT(fixed8, div_cpt, kmp_int64, 64, /, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt +ATOMIC_CMPXCHG_CPT(fixed8u, div_cpt, kmp_uint64, 64, /, + KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt +ATOMIC_CMPXCHG_CPT(fixed8, mul_cpt, kmp_int64, 64, *, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt +ATOMIC_CMPXCHG_CPT(fixed8, orb_cpt, kmp_int64, 64, |, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_orb_cpt +ATOMIC_CMPXCHG_CPT(fixed8, shl_cpt, kmp_int64, 64, <<, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt +ATOMIC_CMPXCHG_CPT(fixed8, shr_cpt, kmp_int64, 64, >>, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt +ATOMIC_CMPXCHG_CPT(fixed8u, shr_cpt, kmp_uint64, 64, >>, + KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt +ATOMIC_CMPXCHG_CPT(fixed8, xor_cpt, kmp_int64, 64, ^, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_xor_cpt +ATOMIC_CMPXCHG_CPT(float4, div_cpt, kmp_real32, 32, /, + KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt +ATOMIC_CMPXCHG_CPT(float4, mul_cpt, kmp_real32, 32, *, + KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt +ATOMIC_CMPXCHG_CPT(float8, div_cpt, kmp_real64, 64, /, + KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt +ATOMIC_CMPXCHG_CPT(float8, mul_cpt, kmp_real64, 64, *, + KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG -////////////////////////////////// - // CAPTURE routines for mixed types RHS=float16 #if KMP_HAVE_QUAD // Beginning of a definition (provides name, parameters, gebug trace) -// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) +// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned +// fixed) // OP_ID - operation identifier (add, sub, mul, ...) // TYPE - operands' type -#define ATOMIC_BEGIN_CPT_MIX(TYPE_ID,OP_ID,TYPE,RTYPE_ID,RTYPE) \ -TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( ident_t *id_ref, int gtid, TYPE * lhs, RTYPE rhs, int flag ) \ -{ \ - KMP_DEBUG_ASSERT( __kmp_init_serial ); \ - KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", gtid )); +#define ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ + TYPE __kmpc_atomic_##TYPE_ID##_##OP_ID##_##RTYPE_ID( \ + ident_t *id_ref, int gtid, TYPE *lhs, RTYPE rhs, int flag) { \ + KMP_DEBUG_ASSERT(__kmp_init_serial); \ + KA_TRACE(100, \ + ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID "_" #RTYPE_ID ": T#%d\n", \ + gtid)); // ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT_MIX(TYPE_ID,OP_ID,TYPE,RTYPE_ID,RTYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) \ - OP_CMPXCHG_CPT(TYPE,BITS,OP) \ -} +#define ATOMIC_CMPXCHG_CPT_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ + RTYPE, LCK_ID, MASK, GOMP_FLAG) \ + ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ + TYPE new_value; \ + OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) \ + OP_CMPXCHG_CPT(TYPE, BITS, OP) \ + } // ------------------------------------------------------------------------- -#define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT_MIX(TYPE_ID,OP_ID,TYPE,RTYPE_ID,RTYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL_CPT(OP##=,LCK_ID) /* send assignment */ \ -} +#define ATOMIC_CRITICAL_CPT_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ + LCK_ID, GOMP_FLAG) \ + ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ + TYPE new_value; \ + OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \ + OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \ + } + +ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_add_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1u_add_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_mul_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1u_mul_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_fp + +ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_add_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2u_add_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_mul_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2u_mul_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_fp + +ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3, + 0) // __kmpc_atomic_fixed4_add_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3, + 0) // __kmpc_atomic_fixed4u_add_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3, + 0) // __kmpc_atomic_fixed4_sub_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3, + 0) // __kmpc_atomic_fixed4u_sub_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3, + 0) // __kmpc_atomic_fixed4_mul_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3, + 0) // __kmpc_atomic_fixed4u_mul_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3, + 0) // __kmpc_atomic_fixed4_div_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3, + 0) // __kmpc_atomic_fixed4u_div_cpt_fp + +ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_add_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8u_add_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_mul_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8u_mul_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_fp + +ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_add_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_mul_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_fp + +ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7, + KMP_ARCH_X86) // __kmpc_atomic_float8_add_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7, + KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7, + KMP_ARCH_X86) // __kmpc_atomic_float8_mul_cpt_fp +ATOMIC_CMPXCHG_CPT_MIX(float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7, + KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_fp + +ATOMIC_CRITICAL_CPT_MIX(float10, long double, add_cpt, +, fp, _Quad, 10r, + 1) // __kmpc_atomic_float10_add_cpt_fp +ATOMIC_CRITICAL_CPT_MIX(float10, long double, sub_cpt, -, fp, _Quad, 10r, + 1) // __kmpc_atomic_float10_sub_cpt_fp +ATOMIC_CRITICAL_CPT_MIX(float10, long double, mul_cpt, *, fp, _Quad, 10r, + 1) // __kmpc_atomic_float10_mul_cpt_fp +ATOMIC_CRITICAL_CPT_MIX(float10, long double, div_cpt, /, fp, _Quad, 10r, + 1) // __kmpc_atomic_float10_div_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed1, char, add_cpt, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_add_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed1u, uchar, add_cpt, 8, +, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_add_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed1, char, sub_cpt, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed1u, uchar, sub_cpt, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_sub_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed1, char, mul_cpt, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_mul_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed1u, uchar, mul_cpt, 8, *, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_mul_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed1, char, div_cpt, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed1u, uchar, div_cpt, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_fp - -ATOMIC_CMPXCHG_CPT_MIX( fixed2, short, add_cpt, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_add_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed2u, ushort, add_cpt, 16, +, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_add_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed2, short, sub_cpt, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed2u, ushort, sub_cpt, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_sub_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed2, short, mul_cpt, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_mul_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed2u, ushort, mul_cpt, 16, *, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_mul_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed2, short, div_cpt, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed2u, ushort, div_cpt, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_fp - -ATOMIC_CMPXCHG_CPT_MIX( fixed4, kmp_int32, add_cpt, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_add_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed4u, kmp_uint32, add_cpt, 32, +, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_add_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed4, kmp_int32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed4u, kmp_uint32, sub_cpt, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_sub_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed4, kmp_int32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_mul_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed4u, kmp_uint32, mul_cpt, 32, *, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_mul_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed4, kmp_int32, div_cpt, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed4u, kmp_uint32, div_cpt, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_cpt_fp - -ATOMIC_CMPXCHG_CPT_MIX( fixed8, kmp_int64, add_cpt, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_add_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed8u, kmp_uint64, add_cpt, 64, +, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_add_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed8, kmp_int64, sub_cpt, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed8u, kmp_uint64, sub_cpt, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_sub_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed8, kmp_int64, mul_cpt, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_mul_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed8u, kmp_uint64, mul_cpt, 64, *, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_mul_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed8, kmp_int64, div_cpt, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( fixed8u, kmp_uint64, div_cpt, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_fp - -ATOMIC_CMPXCHG_CPT_MIX( float4, kmp_real32, add_cpt, 32, +, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_add_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( float4, kmp_real32, sub_cpt, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( float4, kmp_real32, mul_cpt, 32, *, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_mul_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( float4, kmp_real32, div_cpt, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_fp - -ATOMIC_CMPXCHG_CPT_MIX( float8, kmp_real64, add_cpt, 64, +, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_add_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( float8, kmp_real64, sub_cpt, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( float8, kmp_real64, mul_cpt, 64, *, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_mul_cpt_fp -ATOMIC_CMPXCHG_CPT_MIX( float8, kmp_real64, div_cpt, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_fp - -ATOMIC_CRITICAL_CPT_MIX( float10, long double, add_cpt, +, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_add_cpt_fp -ATOMIC_CRITICAL_CPT_MIX( float10, long double, sub_cpt, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_fp -ATOMIC_CRITICAL_CPT_MIX( float10, long double, mul_cpt, *, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_mul_cpt_fp -ATOMIC_CRITICAL_CPT_MIX( float10, long double, div_cpt, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_fp - -#endif //KMP_HAVE_QUAD - -/////////////////////////////////// +#endif // KMP_HAVE_QUAD // ------------------------------------------------------------------------ // Routines for C/C++ Reduction operators && and || -// ------------------------------------------------------------------------ // ------------------------------------------------------------------------- // Operation on *lhs, rhs bound by critical section @@ -2068,285 +2501,347 @@ ATOMIC_CRITICAL_CPT_MIX( float10, long double, div_cpt, /, fp, _Quad, 10r, 1 ) // LCK_ID - lock identifier // Note: don't check gtid as it should always be valid // 1, 2-byte - expect valid parameter, other - check before this macro -#define OP_CRITICAL_L_CPT(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - if( flag ) { \ - new_value OP rhs; \ - } else \ - new_value = (*lhs); \ - \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); +#define OP_CRITICAL_L_CPT(OP, LCK_ID) \ + __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ + \ + if (flag) { \ + new_value OP rhs; \ + } else \ + new_value = (*lhs); \ + \ + __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); // ------------------------------------------------------------------------ #ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_L_CPT(OP,FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_L_CPT( OP, 0 ); \ - return new_value; \ - } +#define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) \ + if ((FLAG) && (__kmp_atomic_mode == 2)) { \ + KMP_CHECK_GTID; \ + OP_CRITICAL_L_CPT(OP, 0); \ + return new_value; \ + } #else -#define OP_GOMP_CRITICAL_L_CPT(OP,FLAG) +#define OP_GOMP_CRITICAL_L_CPT(OP, FLAG) #endif /* KMP_GOMP_COMPAT */ // ------------------------------------------------------------------------ // Need separate macros for &&, || because there is no combined assignment -#define ATOMIC_CMPX_L_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_L_CPT( = *lhs OP, GOMP_FLAG ) \ - OP_CMPXCHG_CPT(TYPE,BITS,OP) \ -} - -ATOMIC_CMPX_L_CPT( fixed1, andl_cpt, char, 8, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_andl_cpt -ATOMIC_CMPX_L_CPT( fixed1, orl_cpt, char, 8, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_orl_cpt -ATOMIC_CMPX_L_CPT( fixed2, andl_cpt, short, 16, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_andl_cpt -ATOMIC_CMPX_L_CPT( fixed2, orl_cpt, short, 16, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_orl_cpt -ATOMIC_CMPX_L_CPT( fixed4, andl_cpt, kmp_int32, 32, &&, 0 ) // __kmpc_atomic_fixed4_andl_cpt -ATOMIC_CMPX_L_CPT( fixed4, orl_cpt, kmp_int32, 32, ||, 0 ) // __kmpc_atomic_fixed4_orl_cpt -ATOMIC_CMPX_L_CPT( fixed8, andl_cpt, kmp_int64, 64, &&, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_andl_cpt -ATOMIC_CMPX_L_CPT( fixed8, orl_cpt, kmp_int64, 64, ||, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_orl_cpt - +#define ATOMIC_CMPX_L_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ + ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ + TYPE new_value; \ + OP_GOMP_CRITICAL_L_CPT(= *lhs OP, GOMP_FLAG) \ + OP_CMPXCHG_CPT(TYPE, BITS, OP) \ + } + +ATOMIC_CMPX_L_CPT(fixed1, andl_cpt, char, 8, &&, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_andl_cpt +ATOMIC_CMPX_L_CPT(fixed1, orl_cpt, char, 8, ||, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_orl_cpt +ATOMIC_CMPX_L_CPT(fixed2, andl_cpt, short, 16, &&, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_andl_cpt +ATOMIC_CMPX_L_CPT(fixed2, orl_cpt, short, 16, ||, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_orl_cpt +ATOMIC_CMPX_L_CPT(fixed4, andl_cpt, kmp_int32, 32, &&, + 0) // __kmpc_atomic_fixed4_andl_cpt +ATOMIC_CMPX_L_CPT(fixed4, orl_cpt, kmp_int32, 32, ||, + 0) // __kmpc_atomic_fixed4_orl_cpt +ATOMIC_CMPX_L_CPT(fixed8, andl_cpt, kmp_int64, 64, &&, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_andl_cpt +ATOMIC_CMPX_L_CPT(fixed8, orl_cpt, kmp_int64, 64, ||, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_orl_cpt // ------------------------------------------------------------------------- // Routines for Fortran operators that matched no one in C: // MAX, MIN, .EQV., .NEQV. // Operators .AND., .OR. are covered by __kmpc_atomic_*_{andl,orl}_cpt // Intrinsics IAND, IOR, IEOR are covered by __kmpc_atomic_*_{andb,orb,xor}_cpt -// ------------------------------------------------------------------------- // ------------------------------------------------------------------------- // MIN and MAX need separate macros // OP - operator to check if we need any actions? -#define MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - if ( *lhs OP rhs ) { /* still need actions? */ \ - old_value = *lhs; \ - *lhs = rhs; \ - if ( flag ) \ - new_value = rhs; \ - else \ - new_value = old_value; \ - } \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - return new_value; \ +#define MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ + __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ + \ + if (*lhs OP rhs) { /* still need actions? */ \ + old_value = *lhs; \ + *lhs = rhs; \ + if (flag) \ + new_value = rhs; \ + else \ + new_value = old_value; \ + } \ + __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ + return new_value; // ------------------------------------------------------------------------- #ifdef KMP_GOMP_COMPAT -#define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG) \ - if (( FLAG ) && ( __kmp_atomic_mode == 2 )) { \ - KMP_CHECK_GTID; \ - MIN_MAX_CRITSECT_CPT( OP, 0 ); \ - } +#define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) \ + if ((FLAG) && (__kmp_atomic_mode == 2)) { \ + KMP_CHECK_GTID; \ + MIN_MAX_CRITSECT_CPT(OP, 0); \ + } #else -#define GOMP_MIN_MAX_CRITSECT_CPT(OP,FLAG) +#define GOMP_MIN_MAX_CRITSECT_CPT(OP, FLAG) #endif /* KMP_GOMP_COMPAT */ // ------------------------------------------------------------------------- -#define MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - /*TYPE old_value; */ \ - temp_val = *lhs; \ - old_value = temp_val; \ - while ( old_value OP rhs && /* still need actions? */ \ - ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ - *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) &rhs ) ) \ - { \ - KMP_CPU_PAUSE(); \ - temp_val = *lhs; \ - old_value = temp_val; \ - } \ - if( flag ) \ - return rhs; \ - else \ - return old_value; \ - } +#define MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ + { \ + TYPE KMP_ATOMIC_VOLATILE temp_val; \ + /*TYPE old_value; */ \ + temp_val = *lhs; \ + old_value = temp_val; \ + while (old_value OP rhs && /* still need actions? */ \ + !KMP_COMPARE_AND_STORE_ACQ##BITS( \ + (kmp_int##BITS *)lhs, \ + *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ + *VOLATILE_CAST(kmp_int##BITS *) & rhs)) { \ + KMP_CPU_PAUSE(); \ + temp_val = *lhs; \ + old_value = temp_val; \ + } \ + if (flag) \ + return rhs; \ + else \ + return old_value; \ + } // ------------------------------------------------------------------------- // 1-byte, 2-byte operands - use critical section -#define MIN_MAX_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value, old_value; \ - if ( *lhs OP rhs ) { /* need actions? */ \ - GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \ - MIN_MAX_CRITSECT_CPT(OP,LCK_ID) \ - } \ - return *lhs; \ -} - -#define MIN_MAX_COMPXCHG_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value, old_value; \ - if ( *lhs OP rhs ) { \ - GOMP_MIN_MAX_CRITSECT_CPT(OP,GOMP_FLAG) \ - MIN_MAX_CMPXCHG_CPT(TYPE,BITS,OP) \ - } \ - return *lhs; \ -} - - -MIN_MAX_COMPXCHG_CPT( fixed1, max_cpt, char, 8, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_max_cpt -MIN_MAX_COMPXCHG_CPT( fixed1, min_cpt, char, 8, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_min_cpt -MIN_MAX_COMPXCHG_CPT( fixed2, max_cpt, short, 16, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_max_cpt -MIN_MAX_COMPXCHG_CPT( fixed2, min_cpt, short, 16, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_min_cpt -MIN_MAX_COMPXCHG_CPT( fixed4, max_cpt, kmp_int32, 32, <, 0 ) // __kmpc_atomic_fixed4_max_cpt -MIN_MAX_COMPXCHG_CPT( fixed4, min_cpt, kmp_int32, 32, >, 0 ) // __kmpc_atomic_fixed4_min_cpt -MIN_MAX_COMPXCHG_CPT( fixed8, max_cpt, kmp_int64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_max_cpt -MIN_MAX_COMPXCHG_CPT( fixed8, min_cpt, kmp_int64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_min_cpt -MIN_MAX_COMPXCHG_CPT( float4, max_cpt, kmp_real32, 32, <, KMP_ARCH_X86 ) // __kmpc_atomic_float4_max_cpt -MIN_MAX_COMPXCHG_CPT( float4, min_cpt, kmp_real32, 32, >, KMP_ARCH_X86 ) // __kmpc_atomic_float4_min_cpt -MIN_MAX_COMPXCHG_CPT( float8, max_cpt, kmp_real64, 64, <, KMP_ARCH_X86 ) // __kmpc_atomic_float8_max_cpt -MIN_MAX_COMPXCHG_CPT( float8, min_cpt, kmp_real64, 64, >, KMP_ARCH_X86 ) // __kmpc_atomic_float8_min_cpt +#define MIN_MAX_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ + ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ + TYPE new_value, old_value; \ + if (*lhs OP rhs) { /* need actions? */ \ + GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ + MIN_MAX_CRITSECT_CPT(OP, LCK_ID) \ + } \ + return *lhs; \ + } + +#define MIN_MAX_COMPXCHG_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ + ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ + TYPE new_value, old_value; \ + if (*lhs OP rhs) { \ + GOMP_MIN_MAX_CRITSECT_CPT(OP, GOMP_FLAG) \ + MIN_MAX_CMPXCHG_CPT(TYPE, BITS, OP) \ + } \ + return *lhs; \ + } + +MIN_MAX_COMPXCHG_CPT(fixed1, max_cpt, char, 8, <, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_max_cpt +MIN_MAX_COMPXCHG_CPT(fixed1, min_cpt, char, 8, >, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_min_cpt +MIN_MAX_COMPXCHG_CPT(fixed2, max_cpt, short, 16, <, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_max_cpt +MIN_MAX_COMPXCHG_CPT(fixed2, min_cpt, short, 16, >, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_min_cpt +MIN_MAX_COMPXCHG_CPT(fixed4, max_cpt, kmp_int32, 32, <, + 0) // __kmpc_atomic_fixed4_max_cpt +MIN_MAX_COMPXCHG_CPT(fixed4, min_cpt, kmp_int32, 32, >, + 0) // __kmpc_atomic_fixed4_min_cpt +MIN_MAX_COMPXCHG_CPT(fixed8, max_cpt, kmp_int64, 64, <, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_max_cpt +MIN_MAX_COMPXCHG_CPT(fixed8, min_cpt, kmp_int64, 64, >, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_min_cpt +MIN_MAX_COMPXCHG_CPT(float4, max_cpt, kmp_real32, 32, <, + KMP_ARCH_X86) // __kmpc_atomic_float4_max_cpt +MIN_MAX_COMPXCHG_CPT(float4, min_cpt, kmp_real32, 32, >, + KMP_ARCH_X86) // __kmpc_atomic_float4_min_cpt +MIN_MAX_COMPXCHG_CPT(float8, max_cpt, kmp_real64, 64, <, + KMP_ARCH_X86) // __kmpc_atomic_float8_max_cpt +MIN_MAX_COMPXCHG_CPT(float8, min_cpt, kmp_real64, 64, >, + KMP_ARCH_X86) // __kmpc_atomic_float8_min_cpt #if KMP_HAVE_QUAD -MIN_MAX_CRITICAL_CPT( float16, max_cpt, QUAD_LEGACY, <, 16r, 1 ) // __kmpc_atomic_float16_max_cpt -MIN_MAX_CRITICAL_CPT( float16, min_cpt, QUAD_LEGACY, >, 16r, 1 ) // __kmpc_atomic_float16_min_cpt -#if ( KMP_ARCH_X86 ) - MIN_MAX_CRITICAL_CPT( float16, max_a16_cpt, Quad_a16_t, <, 16r, 1 ) // __kmpc_atomic_float16_max_a16_cpt - MIN_MAX_CRITICAL_CPT( float16, min_a16_cpt, Quad_a16_t, >, 16r, 1 ) // __kmpc_atomic_float16_mix_a16_cpt +MIN_MAX_CRITICAL_CPT(float16, max_cpt, QUAD_LEGACY, <, 16r, + 1) // __kmpc_atomic_float16_max_cpt +MIN_MAX_CRITICAL_CPT(float16, min_cpt, QUAD_LEGACY, >, 16r, + 1) // __kmpc_atomic_float16_min_cpt +#if (KMP_ARCH_X86) +MIN_MAX_CRITICAL_CPT(float16, max_a16_cpt, Quad_a16_t, <, 16r, + 1) // __kmpc_atomic_float16_max_a16_cpt +MIN_MAX_CRITICAL_CPT(float16, min_a16_cpt, Quad_a16_t, >, 16r, + 1) // __kmpc_atomic_float16_mix_a16_cpt #endif #endif // ------------------------------------------------------------------------ #ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_CPT( OP, 0 ); \ - } +#define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) \ + if ((FLAG) && (__kmp_atomic_mode == 2)) { \ + KMP_CHECK_GTID; \ + OP_CRITICAL_CPT(OP, 0); \ + } #else -#define OP_GOMP_CRITICAL_EQV_CPT(OP,FLAG) +#define OP_GOMP_CRITICAL_EQV_CPT(OP, FLAG) #endif /* KMP_GOMP_COMPAT */ // ------------------------------------------------------------------------ -#define ATOMIC_CMPX_EQV_CPT(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_EQV_CPT(^=~,GOMP_FLAG) /* send assignment */ \ - OP_CMPXCHG_CPT(TYPE,BITS,OP) \ -} - -// ------------------------------------------------------------------------ - -ATOMIC_CMPXCHG_CPT( fixed1, neqv_cpt, kmp_int8, 8, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_neqv_cpt -ATOMIC_CMPXCHG_CPT( fixed2, neqv_cpt, kmp_int16, 16, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_neqv_cpt -ATOMIC_CMPXCHG_CPT( fixed4, neqv_cpt, kmp_int32, 32, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_neqv_cpt -ATOMIC_CMPXCHG_CPT( fixed8, neqv_cpt, kmp_int64, 64, ^, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_neqv_cpt -ATOMIC_CMPX_EQV_CPT( fixed1, eqv_cpt, kmp_int8, 8, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_eqv_cpt -ATOMIC_CMPX_EQV_CPT( fixed2, eqv_cpt, kmp_int16, 16, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_eqv_cpt -ATOMIC_CMPX_EQV_CPT( fixed4, eqv_cpt, kmp_int32, 32, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_eqv_cpt -ATOMIC_CMPX_EQV_CPT( fixed8, eqv_cpt, kmp_int64, 64, ^~, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_eqv_cpt - -// ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use critical section) +#define ATOMIC_CMPX_EQV_CPT(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ + ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ + TYPE new_value; \ + OP_GOMP_CRITICAL_EQV_CPT(^= ~, GOMP_FLAG) /* send assignment */ \ + OP_CMPXCHG_CPT(TYPE, BITS, OP) \ + } + +// ------------------------------------------------------------------------ + +ATOMIC_CMPXCHG_CPT(fixed1, neqv_cpt, kmp_int8, 8, ^, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_neqv_cpt +ATOMIC_CMPXCHG_CPT(fixed2, neqv_cpt, kmp_int16, 16, ^, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_neqv_cpt +ATOMIC_CMPXCHG_CPT(fixed4, neqv_cpt, kmp_int32, 32, ^, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_neqv_cpt +ATOMIC_CMPXCHG_CPT(fixed8, neqv_cpt, kmp_int64, 64, ^, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_neqv_cpt +ATOMIC_CMPX_EQV_CPT(fixed1, eqv_cpt, kmp_int8, 8, ^~, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_eqv_cpt +ATOMIC_CMPX_EQV_CPT(fixed2, eqv_cpt, kmp_int16, 16, ^~, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_eqv_cpt +ATOMIC_CMPX_EQV_CPT(fixed4, eqv_cpt, kmp_int32, 32, ^~, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_eqv_cpt +ATOMIC_CMPX_EQV_CPT(fixed8, eqv_cpt, kmp_int64, 64, ^~, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_eqv_cpt + +// ------------------------------------------------------------------------ +// Routines for Extended types: long double, _Quad, complex flavours (use +// critical section) // TYPE_ID, OP_ID, TYPE - detailed above // OP - operator // LCK_ID - lock identifier, used to possibly distinguish lock variable -#define ATOMIC_CRITICAL_CPT(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_CPT(OP,GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL_CPT(OP##=,LCK_ID) /* send assignment */ \ -} +#define ATOMIC_CRITICAL_CPT(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ + ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ + TYPE new_value; \ + OP_GOMP_CRITICAL_CPT(OP, GOMP_FLAG) /* send assignment */ \ + OP_CRITICAL_CPT(OP## =, LCK_ID) /* send assignment */ \ + } // ------------------------------------------------------------------------ - // Workaround for cmplx4. Regular routines with return value don't work // on Win_32e. Let's return captured values through the additional parameter. -#define OP_CRITICAL_CPT_WRK(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - if( flag ) { \ - (*lhs) OP rhs; \ - (*out) = (*lhs); \ - } else { \ - (*out) = (*lhs); \ - (*lhs) OP rhs; \ - } \ - \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - return; +#define OP_CRITICAL_CPT_WRK(OP, LCK_ID) \ + __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ + \ + if (flag) { \ + (*lhs) OP rhs; \ + (*out) = (*lhs); \ + } else { \ + (*out) = (*lhs); \ + (*lhs) OP rhs; \ + } \ + \ + __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ + return; // ------------------------------------------------------------------------ #ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_CPT_WRK( OP##=, 0 ); \ - } +#define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) \ + if ((FLAG) && (__kmp_atomic_mode == 2)) { \ + KMP_CHECK_GTID; \ + OP_CRITICAL_CPT_WRK(OP## =, 0); \ + } #else -#define OP_GOMP_CRITICAL_CPT_WRK(OP,FLAG) +#define OP_GOMP_CRITICAL_CPT_WRK(OP, FLAG) #endif /* KMP_GOMP_COMPAT */ // ------------------------------------------------------------------------ -#define ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \ -void __kmpc_atomic_##TYPE_ID##_##OP_ID( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out, int flag ) \ -{ \ - KMP_DEBUG_ASSERT( __kmp_init_serial ); \ - KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid )); +#define ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ + void __kmpc_atomic_##TYPE_ID##_##OP_ID(ident_t *id_ref, int gtid, TYPE *lhs, \ + TYPE rhs, TYPE *out, int flag) { \ + KMP_DEBUG_ASSERT(__kmp_init_serial); \ + KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_" #OP_ID ": T#%d\n", gtid)); // ------------------------------------------------------------------------ -#define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \ - OP_GOMP_CRITICAL_CPT_WRK(OP,GOMP_FLAG) \ - OP_CRITICAL_CPT_WRK(OP##=,LCK_ID) \ -} +#define ATOMIC_CRITICAL_CPT_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ + ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ + OP_GOMP_CRITICAL_CPT_WRK(OP, GOMP_FLAG) \ + OP_CRITICAL_CPT_WRK(OP## =, LCK_ID) \ + } // The end of workaround for cmplx4 /* ------------------------------------------------------------------------- */ // routines for long double type -ATOMIC_CRITICAL_CPT( float10, add_cpt, long double, +, 10r, 1 ) // __kmpc_atomic_float10_add_cpt -ATOMIC_CRITICAL_CPT( float10, sub_cpt, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt -ATOMIC_CRITICAL_CPT( float10, mul_cpt, long double, *, 10r, 1 ) // __kmpc_atomic_float10_mul_cpt -ATOMIC_CRITICAL_CPT( float10, div_cpt, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt +ATOMIC_CRITICAL_CPT(float10, add_cpt, long double, +, 10r, + 1) // __kmpc_atomic_float10_add_cpt +ATOMIC_CRITICAL_CPT(float10, sub_cpt, long double, -, 10r, + 1) // __kmpc_atomic_float10_sub_cpt +ATOMIC_CRITICAL_CPT(float10, mul_cpt, long double, *, 10r, + 1) // __kmpc_atomic_float10_mul_cpt +ATOMIC_CRITICAL_CPT(float10, div_cpt, long double, /, 10r, + 1) // __kmpc_atomic_float10_div_cpt #if KMP_HAVE_QUAD // routines for _Quad type -ATOMIC_CRITICAL_CPT( float16, add_cpt, QUAD_LEGACY, +, 16r, 1 ) // __kmpc_atomic_float16_add_cpt -ATOMIC_CRITICAL_CPT( float16, sub_cpt, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt -ATOMIC_CRITICAL_CPT( float16, mul_cpt, QUAD_LEGACY, *, 16r, 1 ) // __kmpc_atomic_float16_mul_cpt -ATOMIC_CRITICAL_CPT( float16, div_cpt, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL_CPT( float16, add_a16_cpt, Quad_a16_t, +, 16r, 1 ) // __kmpc_atomic_float16_add_a16_cpt - ATOMIC_CRITICAL_CPT( float16, sub_a16_cpt, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt - ATOMIC_CRITICAL_CPT( float16, mul_a16_cpt, Quad_a16_t, *, 16r, 1 ) // __kmpc_atomic_float16_mul_a16_cpt - ATOMIC_CRITICAL_CPT( float16, div_a16_cpt, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt +ATOMIC_CRITICAL_CPT(float16, add_cpt, QUAD_LEGACY, +, 16r, + 1) // __kmpc_atomic_float16_add_cpt +ATOMIC_CRITICAL_CPT(float16, sub_cpt, QUAD_LEGACY, -, 16r, + 1) // __kmpc_atomic_float16_sub_cpt +ATOMIC_CRITICAL_CPT(float16, mul_cpt, QUAD_LEGACY, *, 16r, + 1) // __kmpc_atomic_float16_mul_cpt +ATOMIC_CRITICAL_CPT(float16, div_cpt, QUAD_LEGACY, /, 16r, + 1) // __kmpc_atomic_float16_div_cpt +#if (KMP_ARCH_X86) +ATOMIC_CRITICAL_CPT(float16, add_a16_cpt, Quad_a16_t, +, 16r, + 1) // __kmpc_atomic_float16_add_a16_cpt +ATOMIC_CRITICAL_CPT(float16, sub_a16_cpt, Quad_a16_t, -, 16r, + 1) // __kmpc_atomic_float16_sub_a16_cpt +ATOMIC_CRITICAL_CPT(float16, mul_a16_cpt, Quad_a16_t, *, 16r, + 1) // __kmpc_atomic_float16_mul_a16_cpt +ATOMIC_CRITICAL_CPT(float16, div_a16_cpt, Quad_a16_t, /, 16r, + 1) // __kmpc_atomic_float16_div_a16_cpt #endif #endif // routines for complex types // cmplx4 routines to return void -ATOMIC_CRITICAL_CPT_WRK( cmplx4, add_cpt, kmp_cmplx32, +, 8c, 1 ) // __kmpc_atomic_cmplx4_add_cpt -ATOMIC_CRITICAL_CPT_WRK( cmplx4, sub_cpt, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt -ATOMIC_CRITICAL_CPT_WRK( cmplx4, mul_cpt, kmp_cmplx32, *, 8c, 1 ) // __kmpc_atomic_cmplx4_mul_cpt -ATOMIC_CRITICAL_CPT_WRK( cmplx4, div_cpt, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt - -ATOMIC_CRITICAL_CPT( cmplx8, add_cpt, kmp_cmplx64, +, 16c, 1 ) // __kmpc_atomic_cmplx8_add_cpt -ATOMIC_CRITICAL_CPT( cmplx8, sub_cpt, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt -ATOMIC_CRITICAL_CPT( cmplx8, mul_cpt, kmp_cmplx64, *, 16c, 1 ) // __kmpc_atomic_cmplx8_mul_cpt -ATOMIC_CRITICAL_CPT( cmplx8, div_cpt, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt -ATOMIC_CRITICAL_CPT( cmplx10, add_cpt, kmp_cmplx80, +, 20c, 1 ) // __kmpc_atomic_cmplx10_add_cpt -ATOMIC_CRITICAL_CPT( cmplx10, sub_cpt, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt -ATOMIC_CRITICAL_CPT( cmplx10, mul_cpt, kmp_cmplx80, *, 20c, 1 ) // __kmpc_atomic_cmplx10_mul_cpt -ATOMIC_CRITICAL_CPT( cmplx10, div_cpt, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt +ATOMIC_CRITICAL_CPT_WRK(cmplx4, add_cpt, kmp_cmplx32, +, 8c, + 1) // __kmpc_atomic_cmplx4_add_cpt +ATOMIC_CRITICAL_CPT_WRK(cmplx4, sub_cpt, kmp_cmplx32, -, 8c, + 1) // __kmpc_atomic_cmplx4_sub_cpt +ATOMIC_CRITICAL_CPT_WRK(cmplx4, mul_cpt, kmp_cmplx32, *, 8c, + 1) // __kmpc_atomic_cmplx4_mul_cpt +ATOMIC_CRITICAL_CPT_WRK(cmplx4, div_cpt, kmp_cmplx32, /, 8c, + 1) // __kmpc_atomic_cmplx4_div_cpt + +ATOMIC_CRITICAL_CPT(cmplx8, add_cpt, kmp_cmplx64, +, 16c, + 1) // __kmpc_atomic_cmplx8_add_cpt +ATOMIC_CRITICAL_CPT(cmplx8, sub_cpt, kmp_cmplx64, -, 16c, + 1) // __kmpc_atomic_cmplx8_sub_cpt +ATOMIC_CRITICAL_CPT(cmplx8, mul_cpt, kmp_cmplx64, *, 16c, + 1) // __kmpc_atomic_cmplx8_mul_cpt +ATOMIC_CRITICAL_CPT(cmplx8, div_cpt, kmp_cmplx64, /, 16c, + 1) // __kmpc_atomic_cmplx8_div_cpt +ATOMIC_CRITICAL_CPT(cmplx10, add_cpt, kmp_cmplx80, +, 20c, + 1) // __kmpc_atomic_cmplx10_add_cpt +ATOMIC_CRITICAL_CPT(cmplx10, sub_cpt, kmp_cmplx80, -, 20c, + 1) // __kmpc_atomic_cmplx10_sub_cpt +ATOMIC_CRITICAL_CPT(cmplx10, mul_cpt, kmp_cmplx80, *, 20c, + 1) // __kmpc_atomic_cmplx10_mul_cpt +ATOMIC_CRITICAL_CPT(cmplx10, div_cpt, kmp_cmplx80, /, 20c, + 1) // __kmpc_atomic_cmplx10_div_cpt #if KMP_HAVE_QUAD -ATOMIC_CRITICAL_CPT( cmplx16, add_cpt, CPLX128_LEG, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_cpt -ATOMIC_CRITICAL_CPT( cmplx16, sub_cpt, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt -ATOMIC_CRITICAL_CPT( cmplx16, mul_cpt, CPLX128_LEG, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_cpt -ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL_CPT( cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, 1 ) // __kmpc_atomic_cmplx16_add_a16_cpt - ATOMIC_CRITICAL_CPT( cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt - ATOMIC_CRITICAL_CPT( cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, 1 ) // __kmpc_atomic_cmplx16_mul_a16_cpt - ATOMIC_CRITICAL_CPT( cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt +ATOMIC_CRITICAL_CPT(cmplx16, add_cpt, CPLX128_LEG, +, 32c, + 1) // __kmpc_atomic_cmplx16_add_cpt +ATOMIC_CRITICAL_CPT(cmplx16, sub_cpt, CPLX128_LEG, -, 32c, + 1) // __kmpc_atomic_cmplx16_sub_cpt +ATOMIC_CRITICAL_CPT(cmplx16, mul_cpt, CPLX128_LEG, *, 32c, + 1) // __kmpc_atomic_cmplx16_mul_cpt +ATOMIC_CRITICAL_CPT(cmplx16, div_cpt, CPLX128_LEG, /, 32c, + 1) // __kmpc_atomic_cmplx16_div_cpt +#if (KMP_ARCH_X86) +ATOMIC_CRITICAL_CPT(cmplx16, add_a16_cpt, kmp_cmplx128_a16_t, +, 32c, + 1) // __kmpc_atomic_cmplx16_add_a16_cpt +ATOMIC_CRITICAL_CPT(cmplx16, sub_a16_cpt, kmp_cmplx128_a16_t, -, 32c, + 1) // __kmpc_atomic_cmplx16_sub_a16_cpt +ATOMIC_CRITICAL_CPT(cmplx16, mul_a16_cpt, kmp_cmplx128_a16_t, *, 32c, + 1) // __kmpc_atomic_cmplx16_mul_a16_cpt +ATOMIC_CRITICAL_CPT(cmplx16, div_a16_cpt, kmp_cmplx128_a16_t, /, 32c, + 1) // __kmpc_atomic_cmplx16_div_a16_cpt #endif #endif #if OMP_40_ENABLED -// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; } for non-commutative operations. +// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr +// binop x; v = x; } for non-commutative operations. // Supported only on IA-32 architecture and Intel(R) 64 // ------------------------------------------------------------------------- @@ -2355,29 +2850,29 @@ ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c, 1 ) // // LCK_ID - lock identifier // Note: don't check gtid as it should always be valid // 1, 2-byte - expect valid parameter, other - check before this macro -#define OP_CRITICAL_CPT_REV(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - if( flag ) { \ - /*temp_val = (*lhs);*/\ - (*lhs) = (rhs) OP (*lhs); \ - new_value = (*lhs); \ - } else { \ - new_value = (*lhs);\ - (*lhs) = (rhs) OP (*lhs); \ - } \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - return new_value; +#define OP_CRITICAL_CPT_REV(OP, LCK_ID) \ + __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ + \ + if (flag) { \ + /*temp_val = (*lhs);*/ \ + (*lhs) = (rhs)OP(*lhs); \ + new_value = (*lhs); \ + } else { \ + new_value = (*lhs); \ + (*lhs) = (rhs)OP(*lhs); \ + } \ + __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ + return new_value; // ------------------------------------------------------------------------ #ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_CPT_REV( OP, 0 ); \ - } +#define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) \ + if ((FLAG) && (__kmp_atomic_mode == 2)) { \ + KMP_CHECK_GTID; \ + OP_CRITICAL_CPT_REV(OP, 0); \ + } #else -#define OP_GOMP_CRITICAL_CPT_REV(OP,FLAG) +#define OP_GOMP_CRITICAL_CPT_REV(OP, FLAG) #endif /* KMP_GOMP_COMPAT */ // ------------------------------------------------------------------------ @@ -2387,154 +2882,194 @@ ATOMIC_CRITICAL_CPT( cmplx16, div_cpt, CPLX128_LEG, /, 32c, 1 ) // // OP - operator // Note: temp_val introduced in order to force the compiler to read // *lhs only once (w/o it the compiler reads *lhs twice) -#define OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - TYPE old_value, new_value; \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs OP old_value; \ - while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ - *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ - { \ - KMP_CPU_PAUSE(); \ - \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs OP old_value; \ - } \ - if( flag ) { \ - return new_value; \ - } else \ - return old_value; \ - } +#define OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ + { \ + TYPE KMP_ATOMIC_VOLATILE temp_val; \ + TYPE old_value, new_value; \ + temp_val = *lhs; \ + old_value = temp_val; \ + new_value = rhs OP old_value; \ + while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ + (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ + *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ + KMP_CPU_PAUSE(); \ + \ + temp_val = *lhs; \ + old_value = temp_val; \ + new_value = rhs OP old_value; \ + } \ + if (flag) { \ + return new_value; \ + } else \ + return old_value; \ + } // ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID,OP_ID,TYPE,BITS,OP,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value; \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \ - OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \ -} - - -ATOMIC_CMPXCHG_CPT_REV( fixed1, div_cpt_rev, kmp_int8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed1u, div_cpt_rev, kmp_uint8, 8, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed1, shl_cpt_rev, kmp_int8, 8, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shl_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed1, shr_cpt_rev, kmp_int8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed1, sub_cpt_rev, kmp_int8, 8, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed2, div_cpt_rev, kmp_int16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed2u, div_cpt_rev, kmp_uint16, 16, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed2, shl_cpt_rev, kmp_int16, 16, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shl_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed2, shr_cpt_rev, kmp_int16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed2, sub_cpt_rev, kmp_int16, 16, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed4, div_cpt_rev, kmp_int32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed4u, div_cpt_rev, kmp_uint32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed4, shl_cpt_rev, kmp_int32, 32, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shl_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed4, shr_cpt_rev, kmp_int32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4u_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed4, sub_cpt_rev, kmp_int32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_sub_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed8, div_cpt_rev, kmp_int64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed8u, div_cpt_rev, kmp_uint64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed8, shl_cpt_rev, kmp_int64, 64, <<, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shl_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed8, shr_cpt_rev, kmp_int64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_shr_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( fixed8, sub_cpt_rev, kmp_int64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( float4, div_cpt_rev, kmp_real32, 32, /, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( float4, sub_cpt_rev, kmp_real32, 32, -, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( float8, div_cpt_rev, kmp_real64, 64, /, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_rev -ATOMIC_CMPXCHG_CPT_REV( float8, sub_cpt_rev, kmp_real64, 64, -, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_rev +#define ATOMIC_CMPXCHG_CPT_REV(TYPE_ID, OP_ID, TYPE, BITS, OP, GOMP_FLAG) \ + ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ + TYPE new_value; \ + TYPE KMP_ATOMIC_VOLATILE temp_val; \ + OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ + OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ + } + +ATOMIC_CMPXCHG_CPT_REV(fixed1, div_cpt_rev, kmp_int8, 8, /, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed1u, div_cpt_rev, kmp_uint8, 8, /, + KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed1, shl_cpt_rev, kmp_int8, 8, <<, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_shl_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed1, shr_cpt_rev, kmp_int8, 8, >>, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_shr_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed1u, shr_cpt_rev, kmp_uint8, 8, >>, + KMP_ARCH_X86) // __kmpc_atomic_fixed1u_shr_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed1, sub_cpt_rev, kmp_int8, 8, -, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed2, div_cpt_rev, kmp_int16, 16, /, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed2u, div_cpt_rev, kmp_uint16, 16, /, + KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed2, shl_cpt_rev, kmp_int16, 16, <<, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_shl_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed2, shr_cpt_rev, kmp_int16, 16, >>, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_shr_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed2u, shr_cpt_rev, kmp_uint16, 16, >>, + KMP_ARCH_X86) // __kmpc_atomic_fixed2u_shr_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed2, sub_cpt_rev, kmp_int16, 16, -, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed4, div_cpt_rev, kmp_int32, 32, /, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_div_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed4u, div_cpt_rev, kmp_uint32, 32, /, + KMP_ARCH_X86) // __kmpc_atomic_fixed4u_div_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed4, shl_cpt_rev, kmp_int32, 32, <<, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_shl_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed4, shr_cpt_rev, kmp_int32, 32, >>, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_shr_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed4u, shr_cpt_rev, kmp_uint32, 32, >>, + KMP_ARCH_X86) // __kmpc_atomic_fixed4u_shr_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed4, sub_cpt_rev, kmp_int32, 32, -, + KMP_ARCH_X86) // __kmpc_atomic_fixed4_sub_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed8, div_cpt_rev, kmp_int64, 64, /, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed8u, div_cpt_rev, kmp_uint64, 64, /, + KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed8, shl_cpt_rev, kmp_int64, 64, <<, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_shl_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed8, shr_cpt_rev, kmp_int64, 64, >>, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_shr_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed8u, shr_cpt_rev, kmp_uint64, 64, >>, + KMP_ARCH_X86) // __kmpc_atomic_fixed8u_shr_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(fixed8, sub_cpt_rev, kmp_int64, 64, -, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(float4, div_cpt_rev, kmp_real32, 32, /, + KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(float4, sub_cpt_rev, kmp_real32, 32, -, + KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(float8, div_cpt_rev, kmp_real64, 64, /, + KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev +ATOMIC_CMPXCHG_CPT_REV(float8, sub_cpt_rev, kmp_real64, 64, -, + KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev // TYPE_ID,OP_ID, TYPE, OP, GOMP_FLAG - // ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use critical section) +// Routines for Extended types: long double, _Quad, complex flavours (use +// critical section) // TYPE_ID, OP_ID, TYPE - detailed above // OP - operator // LCK_ID - lock identifier, used to possibly distinguish lock variable -#define ATOMIC_CRITICAL_CPT_REV(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT(TYPE_ID,OP_ID,TYPE,TYPE) \ - TYPE new_value; \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/\ - OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \ - OP_CRITICAL_CPT_REV(OP,LCK_ID) \ -} - +#define ATOMIC_CRITICAL_CPT_REV(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, GOMP_FLAG) \ + ATOMIC_BEGIN_CPT(TYPE_ID, OP_ID, TYPE, TYPE) \ + TYPE new_value; \ + TYPE KMP_ATOMIC_VOLATILE temp_val; \ + /*printf("__kmp_atomic_mode = %d\n", __kmp_atomic_mode);*/ \ + OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ + OP_CRITICAL_CPT_REV(OP, LCK_ID) \ + } /* ------------------------------------------------------------------------- */ // routines for long double type -ATOMIC_CRITICAL_CPT_REV( float10, sub_cpt_rev, long double, -, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_rev -ATOMIC_CRITICAL_CPT_REV( float10, div_cpt_rev, long double, /, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_rev +ATOMIC_CRITICAL_CPT_REV(float10, sub_cpt_rev, long double, -, 10r, + 1) // __kmpc_atomic_float10_sub_cpt_rev +ATOMIC_CRITICAL_CPT_REV(float10, div_cpt_rev, long double, /, 10r, + 1) // __kmpc_atomic_float10_div_cpt_rev #if KMP_HAVE_QUAD // routines for _Quad type -ATOMIC_CRITICAL_CPT_REV( float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, 1 ) // __kmpc_atomic_float16_sub_cpt_rev -ATOMIC_CRITICAL_CPT_REV( float16, div_cpt_rev, QUAD_LEGACY, /, 16r, 1 ) // __kmpc_atomic_float16_div_cpt_rev -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL_CPT_REV( float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, 1 ) // __kmpc_atomic_float16_sub_a16_cpt_rev - ATOMIC_CRITICAL_CPT_REV( float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, 1 ) // __kmpc_atomic_float16_div_a16_cpt_rev +ATOMIC_CRITICAL_CPT_REV(float16, sub_cpt_rev, QUAD_LEGACY, -, 16r, + 1) // __kmpc_atomic_float16_sub_cpt_rev +ATOMIC_CRITICAL_CPT_REV(float16, div_cpt_rev, QUAD_LEGACY, /, 16r, + 1) // __kmpc_atomic_float16_div_cpt_rev +#if (KMP_ARCH_X86) +ATOMIC_CRITICAL_CPT_REV(float16, sub_a16_cpt_rev, Quad_a16_t, -, 16r, + 1) // __kmpc_atomic_float16_sub_a16_cpt_rev +ATOMIC_CRITICAL_CPT_REV(float16, div_a16_cpt_rev, Quad_a16_t, /, 16r, + 1) // __kmpc_atomic_float16_div_a16_cpt_rev #endif #endif // routines for complex types // ------------------------------------------------------------------------ - // Workaround for cmplx4. Regular routines with return value don't work // on Win_32e. Let's return captured values through the additional parameter. -#define OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - if( flag ) { \ - (*lhs) = (rhs) OP (*lhs); \ - (*out) = (*lhs); \ - } else { \ - (*out) = (*lhs); \ - (*lhs) = (rhs) OP (*lhs); \ - } \ - \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - return; +#define OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ + __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ + \ + if (flag) { \ + (*lhs) = (rhs)OP(*lhs); \ + (*out) = (*lhs); \ + } else { \ + (*out) = (*lhs); \ + (*lhs) = (rhs)OP(*lhs); \ + } \ + \ + __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ + return; // ------------------------------------------------------------------------ #ifdef KMP_GOMP_COMPAT -#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - OP_CRITICAL_CPT_REV_WRK( OP, 0 ); \ - } +#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) \ + if ((FLAG) && (__kmp_atomic_mode == 2)) { \ + KMP_CHECK_GTID; \ + OP_CRITICAL_CPT_REV_WRK(OP, 0); \ + } #else -#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP,FLAG) +#define OP_GOMP_CRITICAL_CPT_REV_WRK(OP, FLAG) #endif /* KMP_GOMP_COMPAT */ // ------------------------------------------------------------------------ -#define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID,OP_ID,TYPE,OP,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_WRK(TYPE_ID,OP_ID,TYPE) \ - OP_GOMP_CRITICAL_CPT_REV_WRK(OP,GOMP_FLAG) \ - OP_CRITICAL_CPT_REV_WRK(OP,LCK_ID) \ -} +#define ATOMIC_CRITICAL_CPT_REV_WRK(TYPE_ID, OP_ID, TYPE, OP, LCK_ID, \ + GOMP_FLAG) \ + ATOMIC_BEGIN_WRK(TYPE_ID, OP_ID, TYPE) \ + OP_GOMP_CRITICAL_CPT_REV_WRK(OP, GOMP_FLAG) \ + OP_CRITICAL_CPT_REV_WRK(OP, LCK_ID) \ + } // The end of workaround for cmplx4 - // !!! TODO: check if we need to return void for cmplx4 routines // cmplx4 routines to return void -ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, 1 ) // __kmpc_atomic_cmplx4_sub_cpt_rev -ATOMIC_CRITICAL_CPT_REV_WRK( cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, 1 ) // __kmpc_atomic_cmplx4_div_cpt_rev - -ATOMIC_CRITICAL_CPT_REV( cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, 1 ) // __kmpc_atomic_cmplx8_sub_cpt_rev -ATOMIC_CRITICAL_CPT_REV( cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, 1 ) // __kmpc_atomic_cmplx8_div_cpt_rev -ATOMIC_CRITICAL_CPT_REV( cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, 1 ) // __kmpc_atomic_cmplx10_sub_cpt_rev -ATOMIC_CRITICAL_CPT_REV( cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, 1 ) // __kmpc_atomic_cmplx10_div_cpt_rev +ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, sub_cpt_rev, kmp_cmplx32, -, 8c, + 1) // __kmpc_atomic_cmplx4_sub_cpt_rev +ATOMIC_CRITICAL_CPT_REV_WRK(cmplx4, div_cpt_rev, kmp_cmplx32, /, 8c, + 1) // __kmpc_atomic_cmplx4_div_cpt_rev + +ATOMIC_CRITICAL_CPT_REV(cmplx8, sub_cpt_rev, kmp_cmplx64, -, 16c, + 1) // __kmpc_atomic_cmplx8_sub_cpt_rev +ATOMIC_CRITICAL_CPT_REV(cmplx8, div_cpt_rev, kmp_cmplx64, /, 16c, + 1) // __kmpc_atomic_cmplx8_div_cpt_rev +ATOMIC_CRITICAL_CPT_REV(cmplx10, sub_cpt_rev, kmp_cmplx80, -, 20c, + 1) // __kmpc_atomic_cmplx10_sub_cpt_rev +ATOMIC_CRITICAL_CPT_REV(cmplx10, div_cpt_rev, kmp_cmplx80, /, 20c, + 1) // __kmpc_atomic_cmplx10_div_cpt_rev #if KMP_HAVE_QUAD -ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_cpt_rev -ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_cpt_rev -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL_CPT_REV( cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, 1 ) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev - ATOMIC_CRITICAL_CPT_REV( cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, 1 ) // __kmpc_atomic_cmplx16_div_a16_cpt_rev +ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_cpt_rev, CPLX128_LEG, -, 32c, + 1) // __kmpc_atomic_cmplx16_sub_cpt_rev +ATOMIC_CRITICAL_CPT_REV(cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, + 1) // __kmpc_atomic_cmplx16_div_cpt_rev +#if (KMP_ARCH_X86) +ATOMIC_CRITICAL_CPT_REV(cmplx16, sub_a16_cpt_rev, kmp_cmplx128_a16_t, -, 32c, + 1) // __kmpc_atomic_cmplx16_sub_a16_cpt_rev +ATOMIC_CRITICAL_CPT_REV(cmplx16, div_a16_cpt_rev, kmp_cmplx128_a16_t, /, 32c, + 1) // __kmpc_atomic_cmplx16_div_a16_cpt_rev #endif #endif @@ -2542,577 +3077,556 @@ ATOMIC_CRITICAL_CPT_REV( cmplx16, div_cpt_rev, CPLX128_LEG, /, 32c, 1 ) #if KMP_HAVE_QUAD // Beginning of a definition (provides name, parameters, gebug trace) -// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned fixed) +// TYPE_ID - operands type and size (fixed*, fixed*u for signed, unsigned +// fixed) // OP_ID - operation identifier (add, sub, mul, ...) // TYPE - operands' type // ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID,TYPE,OP_ID,BITS,OP,RTYPE_ID,RTYPE,LCK_ID,MASK,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT_MIX(TYPE_ID,OP_ID,TYPE,RTYPE_ID,RTYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) \ - OP_CMPXCHG_CPT_REV(TYPE,BITS,OP) \ -} +#define ATOMIC_CMPXCHG_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, BITS, OP, RTYPE_ID, \ + RTYPE, LCK_ID, MASK, GOMP_FLAG) \ + ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ + TYPE new_value; \ + OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) \ + OP_CMPXCHG_CPT_REV(TYPE, BITS, OP) \ + } // ------------------------------------------------------------------------- -#define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID,TYPE,OP_ID,OP,RTYPE_ID,RTYPE,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_CPT_MIX(TYPE_ID,OP_ID,TYPE,RTYPE_ID,RTYPE) \ - TYPE new_value; \ - OP_GOMP_CRITICAL_CPT_REV(OP,GOMP_FLAG) /* send assignment */ \ - OP_CRITICAL_CPT_REV(OP,LCK_ID) /* send assignment */ \ -} - -ATOMIC_CMPXCHG_CPT_REV_MIX( fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_sub_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX( fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX( fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_div_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX( fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1u_div_cpt_rev_fp - -ATOMIC_CMPXCHG_CPT_REV_MIX( fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_sub_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX( fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX( fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_div_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX( fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i, 1, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2u_div_cpt_rev_fp - -ATOMIC_CMPXCHG_CPT_REV_MIX( fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_sub_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX( fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX( fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4_div_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX( fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad, 4i, 3, 0 ) // __kmpc_atomic_fixed4u_div_cpt_rev_fp - -ATOMIC_CMPXCHG_CPT_REV_MIX( fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_sub_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX( fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX( fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_div_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX( fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad, 8i, 7, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8u_div_cpt_rev_fp - -ATOMIC_CMPXCHG_CPT_REV_MIX( float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_sub_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX( float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad, 4r, 3, KMP_ARCH_X86 ) // __kmpc_atomic_float4_div_cpt_rev_fp - -ATOMIC_CMPXCHG_CPT_REV_MIX( float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_sub_cpt_rev_fp -ATOMIC_CMPXCHG_CPT_REV_MIX( float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad, 8r, 7, KMP_ARCH_X86 ) // __kmpc_atomic_float8_div_cpt_rev_fp - -ATOMIC_CRITICAL_CPT_REV_MIX( float10, long double, sub_cpt_rev, -, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_sub_cpt_rev_fp -ATOMIC_CRITICAL_CPT_REV_MIX( float10, long double, div_cpt_rev, /, fp, _Quad, 10r, 1 ) // __kmpc_atomic_float10_div_cpt_rev_fp - -#endif //KMP_HAVE_QUAD +#define ATOMIC_CRITICAL_CPT_REV_MIX(TYPE_ID, TYPE, OP_ID, OP, RTYPE_ID, RTYPE, \ + LCK_ID, GOMP_FLAG) \ + ATOMIC_BEGIN_CPT_MIX(TYPE_ID, OP_ID, TYPE, RTYPE_ID, RTYPE) \ + TYPE new_value; \ + OP_GOMP_CRITICAL_CPT_REV(OP, GOMP_FLAG) /* send assignment */ \ + OP_CRITICAL_CPT_REV(OP, LCK_ID) /* send assignment */ \ + } + +ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_sub_cpt_rev_fp +ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, sub_cpt_rev, 8, -, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1u_sub_cpt_rev_fp +ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1, char, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1_div_cpt_rev_fp +ATOMIC_CMPXCHG_CPT_REV_MIX(fixed1u, uchar, div_cpt_rev, 8, /, fp, _Quad, 1i, 0, + KMP_ARCH_X86) // __kmpc_atomic_fixed1u_div_cpt_rev_fp + +ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, sub_cpt_rev, 16, -, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_sub_cpt_rev_fp +ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, sub_cpt_rev, 16, -, fp, _Quad, 2i, + 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2u_sub_cpt_rev_fp +ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2, short, div_cpt_rev, 16, /, fp, _Quad, 2i, 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2_div_cpt_rev_fp +ATOMIC_CMPXCHG_CPT_REV_MIX(fixed2u, ushort, div_cpt_rev, 16, /, fp, _Quad, 2i, + 1, + KMP_ARCH_X86) // __kmpc_atomic_fixed2u_div_cpt_rev_fp + +ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, sub_cpt_rev, 32, -, fp, _Quad, 4i, + 3, 0) // __kmpc_atomic_fixed4_sub_cpt_rev_fp +ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, sub_cpt_rev, 32, -, fp, _Quad, + 4i, 3, 0) // __kmpc_atomic_fixed4u_sub_cpt_rev_fp +ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4, kmp_int32, div_cpt_rev, 32, /, fp, _Quad, 4i, + 3, 0) // __kmpc_atomic_fixed4_div_cpt_rev_fp +ATOMIC_CMPXCHG_CPT_REV_MIX(fixed4u, kmp_uint32, div_cpt_rev, 32, /, fp, _Quad, + 4i, 3, 0) // __kmpc_atomic_fixed4u_div_cpt_rev_fp + +ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, sub_cpt_rev, 64, -, fp, _Quad, 8i, + 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_sub_cpt_rev_fp +ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, sub_cpt_rev, 64, -, fp, _Quad, + 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8u_sub_cpt_rev_fp +ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8, kmp_int64, div_cpt_rev, 64, /, fp, _Quad, 8i, + 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_div_cpt_rev_fp +ATOMIC_CMPXCHG_CPT_REV_MIX(fixed8u, kmp_uint64, div_cpt_rev, 64, /, fp, _Quad, + 8i, 7, + KMP_ARCH_X86) // __kmpc_atomic_fixed8u_div_cpt_rev_fp + +ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, sub_cpt_rev, 32, -, fp, _Quad, + 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_sub_cpt_rev_fp +ATOMIC_CMPXCHG_CPT_REV_MIX(float4, kmp_real32, div_cpt_rev, 32, /, fp, _Quad, + 4r, 3, + KMP_ARCH_X86) // __kmpc_atomic_float4_div_cpt_rev_fp + +ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, sub_cpt_rev, 64, -, fp, _Quad, + 8r, 7, + KMP_ARCH_X86) // __kmpc_atomic_float8_sub_cpt_rev_fp +ATOMIC_CMPXCHG_CPT_REV_MIX(float8, kmp_real64, div_cpt_rev, 64, /, fp, _Quad, + 8r, 7, + KMP_ARCH_X86) // __kmpc_atomic_float8_div_cpt_rev_fp + +ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, sub_cpt_rev, -, fp, _Quad, + 10r, 1) // __kmpc_atomic_float10_sub_cpt_rev_fp +ATOMIC_CRITICAL_CPT_REV_MIX(float10, long double, div_cpt_rev, /, fp, _Quad, + 10r, 1) // __kmpc_atomic_float10_div_cpt_rev_fp +#endif // KMP_HAVE_QUAD // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} -#define ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ -TYPE __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs ) \ -{ \ - KMP_DEBUG_ASSERT( __kmp_init_serial ); \ - KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid )); +#define ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ + TYPE __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ + TYPE rhs) { \ + KMP_DEBUG_ASSERT(__kmp_init_serial); \ + KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); -#define CRITICAL_SWP(LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - old_value = (*lhs); \ - (*lhs) = rhs; \ - \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - return old_value; +#define CRITICAL_SWP(LCK_ID) \ + __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ + \ + old_value = (*lhs); \ + (*lhs) = rhs; \ + \ + __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ + return old_value; // ------------------------------------------------------------------------ #ifdef KMP_GOMP_COMPAT -#define GOMP_CRITICAL_SWP(FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - CRITICAL_SWP( 0 ); \ - } +#define GOMP_CRITICAL_SWP(FLAG) \ + if ((FLAG) && (__kmp_atomic_mode == 2)) { \ + KMP_CHECK_GTID; \ + CRITICAL_SWP(0); \ + } #else #define GOMP_CRITICAL_SWP(FLAG) #endif /* KMP_GOMP_COMPAT */ - -#define ATOMIC_XCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \ -ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ - TYPE old_value; \ - GOMP_CRITICAL_SWP(GOMP_FLAG) \ - old_value = KMP_XCHG_FIXED##BITS( lhs, rhs ); \ - return old_value; \ -} -// ------------------------------------------------------------------------ -#define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \ -ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ - TYPE old_value; \ - GOMP_CRITICAL_SWP(GOMP_FLAG) \ - old_value = KMP_XCHG_REAL##BITS( lhs, rhs ); \ - return old_value; \ -} - -// ------------------------------------------------------------------------ -#define CMPXCHG_SWP(TYPE,BITS) \ - { \ - TYPE KMP_ATOMIC_VOLATILE temp_val; \ - TYPE old_value, new_value; \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs; \ - while ( ! KMP_COMPARE_AND_STORE_ACQ##BITS( (kmp_int##BITS *) lhs, \ - *VOLATILE_CAST(kmp_int##BITS *) &old_value, \ - *VOLATILE_CAST(kmp_int##BITS *) &new_value ) ) \ - { \ - KMP_CPU_PAUSE(); \ - \ - temp_val = *lhs; \ - old_value = temp_val; \ - new_value = rhs; \ - } \ - return old_value; \ - } +#define ATOMIC_XCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ + ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ + TYPE old_value; \ + GOMP_CRITICAL_SWP(GOMP_FLAG) \ + old_value = KMP_XCHG_FIXED##BITS(lhs, rhs); \ + return old_value; \ + } +// ------------------------------------------------------------------------ +#define ATOMIC_XCHG_FLOAT_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ + ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ + TYPE old_value; \ + GOMP_CRITICAL_SWP(GOMP_FLAG) \ + old_value = KMP_XCHG_REAL##BITS(lhs, rhs); \ + return old_value; \ + } + +// ------------------------------------------------------------------------ +#define CMPXCHG_SWP(TYPE, BITS) \ + { \ + TYPE KMP_ATOMIC_VOLATILE temp_val; \ + TYPE old_value, new_value; \ + temp_val = *lhs; \ + old_value = temp_val; \ + new_value = rhs; \ + while (!KMP_COMPARE_AND_STORE_ACQ##BITS( \ + (kmp_int##BITS *)lhs, *VOLATILE_CAST(kmp_int##BITS *) & old_value, \ + *VOLATILE_CAST(kmp_int##BITS *) & new_value)) { \ + KMP_CPU_PAUSE(); \ + \ + temp_val = *lhs; \ + old_value = temp_val; \ + new_value = rhs; \ + } \ + return old_value; \ + } // ------------------------------------------------------------------------- -#define ATOMIC_CMPXCHG_SWP(TYPE_ID,TYPE,BITS,GOMP_FLAG) \ -ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ - TYPE old_value; \ - GOMP_CRITICAL_SWP(GOMP_FLAG) \ - CMPXCHG_SWP(TYPE,BITS) \ -} - -ATOMIC_XCHG_SWP( fixed1, kmp_int8, 8, KMP_ARCH_X86 ) // __kmpc_atomic_fixed1_swp -ATOMIC_XCHG_SWP( fixed2, kmp_int16, 16, KMP_ARCH_X86 ) // __kmpc_atomic_fixed2_swp -ATOMIC_XCHG_SWP( fixed4, kmp_int32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_fixed4_swp - -ATOMIC_XCHG_FLOAT_SWP( float4, kmp_real32, 32, KMP_ARCH_X86 ) // __kmpc_atomic_float4_swp - -#if ( KMP_ARCH_X86 ) - ATOMIC_CMPXCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp - ATOMIC_CMPXCHG_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp +#define ATOMIC_CMPXCHG_SWP(TYPE_ID, TYPE, BITS, GOMP_FLAG) \ + ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ + TYPE old_value; \ + GOMP_CRITICAL_SWP(GOMP_FLAG) \ + CMPXCHG_SWP(TYPE, BITS) \ + } + +ATOMIC_XCHG_SWP(fixed1, kmp_int8, 8, KMP_ARCH_X86) // __kmpc_atomic_fixed1_swp +ATOMIC_XCHG_SWP(fixed2, kmp_int16, 16, KMP_ARCH_X86) // __kmpc_atomic_fixed2_swp +ATOMIC_XCHG_SWP(fixed4, kmp_int32, 32, KMP_ARCH_X86) // __kmpc_atomic_fixed4_swp + +ATOMIC_XCHG_FLOAT_SWP(float4, kmp_real32, 32, + KMP_ARCH_X86) // __kmpc_atomic_float4_swp + +#if (KMP_ARCH_X86) +ATOMIC_CMPXCHG_SWP(fixed8, kmp_int64, 64, + KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp +ATOMIC_CMPXCHG_SWP(float8, kmp_real64, 64, + KMP_ARCH_X86) // __kmpc_atomic_float8_swp #else - ATOMIC_XCHG_SWP( fixed8, kmp_int64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_fixed8_swp - ATOMIC_XCHG_FLOAT_SWP( float8, kmp_real64, 64, KMP_ARCH_X86 ) // __kmpc_atomic_float8_swp +ATOMIC_XCHG_SWP(fixed8, kmp_int64, 64, KMP_ARCH_X86) // __kmpc_atomic_fixed8_swp +ATOMIC_XCHG_FLOAT_SWP(float8, kmp_real64, 64, + KMP_ARCH_X86) // __kmpc_atomic_float8_swp #endif // ------------------------------------------------------------------------ -// Routines for Extended types: long double, _Quad, complex flavours (use critical section) -#define ATOMIC_CRITICAL_SWP(TYPE_ID,TYPE,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_SWP(TYPE_ID,TYPE) \ - TYPE old_value; \ - GOMP_CRITICAL_SWP(GOMP_FLAG) \ - CRITICAL_SWP(LCK_ID) \ -} +// Routines for Extended types: long double, _Quad, complex flavours (use +// critical section) +#define ATOMIC_CRITICAL_SWP(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ + ATOMIC_BEGIN_SWP(TYPE_ID, TYPE) \ + TYPE old_value; \ + GOMP_CRITICAL_SWP(GOMP_FLAG) \ + CRITICAL_SWP(LCK_ID) \ + } // ------------------------------------------------------------------------ - // !!! TODO: check if we need to return void for cmplx4 routines // Workaround for cmplx4. Regular routines with return value don't work // on Win_32e. Let's return captured values through the additional parameter. -#define ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \ -void __kmpc_atomic_##TYPE_ID##_swp( ident_t *id_ref, int gtid, TYPE * lhs, TYPE rhs, TYPE * out ) \ -{ \ - KMP_DEBUG_ASSERT( __kmp_init_serial ); \ - KA_TRACE(100,("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid )); - - -#define CRITICAL_SWP_WRK(LCK_ID) \ - __kmp_acquire_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - \ - tmp = (*lhs); \ - (*lhs) = (rhs); \ - (*out) = tmp; \ - __kmp_release_atomic_lock( & ATOMIC_LOCK##LCK_ID, gtid ); \ - return; +#define ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ + void __kmpc_atomic_##TYPE_ID##_swp(ident_t *id_ref, int gtid, TYPE *lhs, \ + TYPE rhs, TYPE *out) { \ + KMP_DEBUG_ASSERT(__kmp_init_serial); \ + KA_TRACE(100, ("__kmpc_atomic_" #TYPE_ID "_swp: T#%d\n", gtid)); +#define CRITICAL_SWP_WRK(LCK_ID) \ + __kmp_acquire_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ + \ + tmp = (*lhs); \ + (*lhs) = (rhs); \ + (*out) = tmp; \ + __kmp_release_atomic_lock(&ATOMIC_LOCK##LCK_ID, gtid); \ + return; // ------------------------------------------------------------------------ #ifdef KMP_GOMP_COMPAT -#define GOMP_CRITICAL_SWP_WRK(FLAG) \ - if ( (FLAG) && (__kmp_atomic_mode == 2) ) { \ - KMP_CHECK_GTID; \ - CRITICAL_SWP_WRK( 0 ); \ - } +#define GOMP_CRITICAL_SWP_WRK(FLAG) \ + if ((FLAG) && (__kmp_atomic_mode == 2)) { \ + KMP_CHECK_GTID; \ + CRITICAL_SWP_WRK(0); \ + } #else #define GOMP_CRITICAL_SWP_WRK(FLAG) #endif /* KMP_GOMP_COMPAT */ // ------------------------------------------------------------------------ -#define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE,LCK_ID,GOMP_FLAG) \ -ATOMIC_BEGIN_SWP_WRK(TYPE_ID,TYPE) \ - TYPE tmp; \ - GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \ - CRITICAL_SWP_WRK(LCK_ID) \ -} +#define ATOMIC_CRITICAL_SWP_WRK(TYPE_ID, TYPE, LCK_ID, GOMP_FLAG) \ + ATOMIC_BEGIN_SWP_WRK(TYPE_ID, TYPE) \ + TYPE tmp; \ + GOMP_CRITICAL_SWP_WRK(GOMP_FLAG) \ + CRITICAL_SWP_WRK(LCK_ID) \ + } // The end of workaround for cmplx4 - -ATOMIC_CRITICAL_SWP( float10, long double, 10r, 1 ) // __kmpc_atomic_float10_swp +ATOMIC_CRITICAL_SWP(float10, long double, 10r, 1) // __kmpc_atomic_float10_swp #if KMP_HAVE_QUAD -ATOMIC_CRITICAL_SWP( float16, QUAD_LEGACY, 16r, 1 ) // __kmpc_atomic_float16_swp +ATOMIC_CRITICAL_SWP(float16, QUAD_LEGACY, 16r, 1) // __kmpc_atomic_float16_swp #endif // cmplx4 routine to return void -ATOMIC_CRITICAL_SWP_WRK( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp - -//ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // __kmpc_atomic_cmplx4_swp +ATOMIC_CRITICAL_SWP_WRK(cmplx4, kmp_cmplx32, 8c, 1) // __kmpc_atomic_cmplx4_swp +// ATOMIC_CRITICAL_SWP( cmplx4, kmp_cmplx32, 8c, 1 ) // +// __kmpc_atomic_cmplx4_swp -ATOMIC_CRITICAL_SWP( cmplx8, kmp_cmplx64, 16c, 1 ) // __kmpc_atomic_cmplx8_swp -ATOMIC_CRITICAL_SWP( cmplx10, kmp_cmplx80, 20c, 1 ) // __kmpc_atomic_cmplx10_swp +ATOMIC_CRITICAL_SWP(cmplx8, kmp_cmplx64, 16c, 1) // __kmpc_atomic_cmplx8_swp +ATOMIC_CRITICAL_SWP(cmplx10, kmp_cmplx80, 20c, 1) // __kmpc_atomic_cmplx10_swp #if KMP_HAVE_QUAD -ATOMIC_CRITICAL_SWP( cmplx16, CPLX128_LEG, 32c, 1 ) // __kmpc_atomic_cmplx16_swp -#if ( KMP_ARCH_X86 ) - ATOMIC_CRITICAL_SWP( float16_a16, Quad_a16_t, 16r, 1 ) // __kmpc_atomic_float16_a16_swp - ATOMIC_CRITICAL_SWP( cmplx16_a16, kmp_cmplx128_a16_t, 32c, 1 ) // __kmpc_atomic_cmplx16_a16_swp +ATOMIC_CRITICAL_SWP(cmplx16, CPLX128_LEG, 32c, 1) // __kmpc_atomic_cmplx16_swp +#if (KMP_ARCH_X86) +ATOMIC_CRITICAL_SWP(float16_a16, Quad_a16_t, 16r, + 1) // __kmpc_atomic_float16_a16_swp +ATOMIC_CRITICAL_SWP(cmplx16_a16, kmp_cmplx128_a16_t, 32c, + 1) // __kmpc_atomic_cmplx16_a16_swp #endif #endif - // End of OpenMP 4.0 Capture -#endif //OMP_40_ENABLED - -#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 +#endif // OMP_40_ENABLED +#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 #undef OP_CRITICAL /* ------------------------------------------------------------------------ */ /* Generic atomic routines */ -/* ------------------------------------------------------------------------ */ -void -__kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); +void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs, + void (*f)(void *, void *, void *)) { + KMP_DEBUG_ASSERT(__kmp_init_serial); - if ( + if ( #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) - FALSE /* must use lock */ + FALSE /* must use lock */ #else - TRUE + TRUE #endif - ) - { - kmp_int8 old_value, new_value; - - old_value = *(kmp_int8 *) lhs; - (*f)( &new_value, &old_value, rhs ); + ) { + kmp_int8 old_value, new_value; - /* TODO: Should this be acquire or release? */ - while ( ! KMP_COMPARE_AND_STORE_ACQ8 ( (kmp_int8 *) lhs, - *(kmp_int8 *) &old_value, *(kmp_int8 *) &new_value ) ) - { - KMP_CPU_PAUSE(); + old_value = *(kmp_int8 *)lhs; + (*f)(&new_value, &old_value, rhs); - old_value = *(kmp_int8 *) lhs; - (*f)( &new_value, &old_value, rhs ); - } + /* TODO: Should this be acquire or release? */ + while (!KMP_COMPARE_AND_STORE_ACQ8((kmp_int8 *)lhs, *(kmp_int8 *)&old_value, + *(kmp_int8 *)&new_value)) { + KMP_CPU_PAUSE(); - return; + old_value = *(kmp_int8 *)lhs; + (*f)(&new_value, &old_value, rhs); } - else { - // - // All 1-byte data is of integer data type. - // + + return; + } else { +// All 1-byte data is of integer data type. #ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else + if (__kmp_atomic_mode == 2) { + __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); + } else #endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock( & __kmp_atomic_lock_1i, gtid ); + __kmp_acquire_atomic_lock(&__kmp_atomic_lock_1i, gtid); - (*f)( lhs, lhs, rhs ); + (*f)(lhs, lhs, rhs); #ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else + if (__kmp_atomic_mode == 2) { + __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); + } else #endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock( & __kmp_atomic_lock_1i, gtid ); - } + __kmp_release_atomic_lock(&__kmp_atomic_lock_1i, gtid); + } } -void -__kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) -{ - if ( +void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs, + void (*f)(void *, void *, void *)) { + if ( #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) - FALSE /* must use lock */ + FALSE /* must use lock */ #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 - TRUE /* no alignment problems */ + TRUE /* no alignment problems */ #else - ! ( (kmp_uintptr_t) lhs & 0x1) /* make sure address is 2-byte aligned */ + !((kmp_uintptr_t)lhs & 0x1) /* make sure address is 2-byte aligned */ #endif - ) - { - kmp_int16 old_value, new_value; + ) { + kmp_int16 old_value, new_value; - old_value = *(kmp_int16 *) lhs; - (*f)( &new_value, &old_value, rhs ); + old_value = *(kmp_int16 *)lhs; + (*f)(&new_value, &old_value, rhs); - /* TODO: Should this be acquire or release? */ - while ( ! KMP_COMPARE_AND_STORE_ACQ16 ( (kmp_int16 *) lhs, - *(kmp_int16 *) &old_value, *(kmp_int16 *) &new_value ) ) - { - KMP_CPU_PAUSE(); + /* TODO: Should this be acquire or release? */ + while (!KMP_COMPARE_AND_STORE_ACQ16( + (kmp_int16 *)lhs, *(kmp_int16 *)&old_value, *(kmp_int16 *)&new_value)) { + KMP_CPU_PAUSE(); - old_value = *(kmp_int16 *) lhs; - (*f)( &new_value, &old_value, rhs ); - } - - return; + old_value = *(kmp_int16 *)lhs; + (*f)(&new_value, &old_value, rhs); } - else { - // - // All 2-byte data is of integer data type. - // + + return; + } else { +// All 2-byte data is of integer data type. #ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else + if (__kmp_atomic_mode == 2) { + __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); + } else #endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock( & __kmp_atomic_lock_2i, gtid ); + __kmp_acquire_atomic_lock(&__kmp_atomic_lock_2i, gtid); - (*f)( lhs, lhs, rhs ); + (*f)(lhs, lhs, rhs); #ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else + if (__kmp_atomic_mode == 2) { + __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); + } else #endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock( & __kmp_atomic_lock_2i, gtid ); - } + __kmp_release_atomic_lock(&__kmp_atomic_lock_2i, gtid); + } } -void -__kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); +void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs, + void (*f)(void *, void *, void *)) { + KMP_DEBUG_ASSERT(__kmp_init_serial); - if ( - // - // FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. - // Gomp compatibility is broken if this routine is called for floats. - // + if ( +// FIXME: On IA-32 architecture, gcc uses cmpxchg only for 4-byte ints. +// Gomp compatibility is broken if this routine is called for floats. #if KMP_ARCH_X86 || KMP_ARCH_X86_64 - TRUE /* no alignment problems */ + TRUE /* no alignment problems */ #else - ! ( (kmp_uintptr_t) lhs & 0x3) /* make sure address is 4-byte aligned */ + !((kmp_uintptr_t)lhs & 0x3) /* make sure address is 4-byte aligned */ #endif - ) - { - kmp_int32 old_value, new_value; - - old_value = *(kmp_int32 *) lhs; - (*f)( &new_value, &old_value, rhs ); + ) { + kmp_int32 old_value, new_value; - /* TODO: Should this be acquire or release? */ - while ( ! KMP_COMPARE_AND_STORE_ACQ32 ( (kmp_int32 *) lhs, - *(kmp_int32 *) &old_value, *(kmp_int32 *) &new_value ) ) - { - KMP_CPU_PAUSE(); + old_value = *(kmp_int32 *)lhs; + (*f)(&new_value, &old_value, rhs); - old_value = *(kmp_int32 *) lhs; - (*f)( &new_value, &old_value, rhs ); - } + /* TODO: Should this be acquire or release? */ + while (!KMP_COMPARE_AND_STORE_ACQ32( + (kmp_int32 *)lhs, *(kmp_int32 *)&old_value, *(kmp_int32 *)&new_value)) { + KMP_CPU_PAUSE(); - return; + old_value = *(kmp_int32 *)lhs; + (*f)(&new_value, &old_value, rhs); } - else { - // - // Use __kmp_atomic_lock_4i for all 4-byte data, - // even if it isn't of integer data type. - // + + return; + } else { +// Use __kmp_atomic_lock_4i for all 4-byte data, +// even if it isn't of integer data type. #ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else + if (__kmp_atomic_mode == 2) { + __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); + } else #endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock( & __kmp_atomic_lock_4i, gtid ); + __kmp_acquire_atomic_lock(&__kmp_atomic_lock_4i, gtid); - (*f)( lhs, lhs, rhs ); + (*f)(lhs, lhs, rhs); #ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else + if (__kmp_atomic_mode == 2) { + __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); + } else #endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock( & __kmp_atomic_lock_4i, gtid ); - } + __kmp_release_atomic_lock(&__kmp_atomic_lock_4i, gtid); + } } -void -__kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - if ( +void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs, + void (*f)(void *, void *, void *)) { + KMP_DEBUG_ASSERT(__kmp_init_serial); + if ( #if KMP_ARCH_X86 && defined(KMP_GOMP_COMPAT) - FALSE /* must use lock */ + FALSE /* must use lock */ #elif KMP_ARCH_X86 || KMP_ARCH_X86_64 - TRUE /* no alignment problems */ + TRUE /* no alignment problems */ #else - ! ( (kmp_uintptr_t) lhs & 0x7) /* make sure address is 8-byte aligned */ + !((kmp_uintptr_t)lhs & 0x7) /* make sure address is 8-byte aligned */ #endif - ) - { - kmp_int64 old_value, new_value; - - old_value = *(kmp_int64 *) lhs; - (*f)( &new_value, &old_value, rhs ); - /* TODO: Should this be acquire or release? */ - while ( ! KMP_COMPARE_AND_STORE_ACQ64 ( (kmp_int64 *) lhs, - *(kmp_int64 *) &old_value, - *(kmp_int64 *) &new_value ) ) - { - KMP_CPU_PAUSE(); - - old_value = *(kmp_int64 *) lhs; - (*f)( &new_value, &old_value, rhs ); - } - - return; - } else { - // - // Use __kmp_atomic_lock_8i for all 8-byte data, - // even if it isn't of integer data type. - // + ) { + kmp_int64 old_value, new_value; + + old_value = *(kmp_int64 *)lhs; + (*f)(&new_value, &old_value, rhs); + /* TODO: Should this be acquire or release? */ + while (!KMP_COMPARE_AND_STORE_ACQ64( + (kmp_int64 *)lhs, *(kmp_int64 *)&old_value, *(kmp_int64 *)&new_value)) { + KMP_CPU_PAUSE(); + + old_value = *(kmp_int64 *)lhs; + (*f)(&new_value, &old_value, rhs); + } + + return; + } else { +// Use __kmp_atomic_lock_8i for all 8-byte data, +// even if it isn't of integer data type. #ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else + if (__kmp_atomic_mode == 2) { + __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); + } else #endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock( & __kmp_atomic_lock_8i, gtid ); + __kmp_acquire_atomic_lock(&__kmp_atomic_lock_8i, gtid); - (*f)( lhs, lhs, rhs ); + (*f)(lhs, lhs, rhs); #ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else + if (__kmp_atomic_mode == 2) { + __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); + } else #endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock( & __kmp_atomic_lock_8i, gtid ); - } + __kmp_release_atomic_lock(&__kmp_atomic_lock_8i, gtid); + } } -void -__kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); +void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs, + void (*f)(void *, void *, void *)) { + KMP_DEBUG_ASSERT(__kmp_init_serial); #ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else + if (__kmp_atomic_mode == 2) { + __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); + } else #endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock( & __kmp_atomic_lock_10r, gtid ); + __kmp_acquire_atomic_lock(&__kmp_atomic_lock_10r, gtid); - (*f)( lhs, lhs, rhs ); + (*f)(lhs, lhs, rhs); #ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else + if (__kmp_atomic_mode == 2) { + __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); + } else #endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock( & __kmp_atomic_lock_10r, gtid ); + __kmp_release_atomic_lock(&__kmp_atomic_lock_10r, gtid); } -void -__kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); +void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs, + void (*f)(void *, void *, void *)) { + KMP_DEBUG_ASSERT(__kmp_init_serial); #ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else + if (__kmp_atomic_mode == 2) { + __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); + } else #endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock( & __kmp_atomic_lock_16c, gtid ); + __kmp_acquire_atomic_lock(&__kmp_atomic_lock_16c, gtid); - (*f)( lhs, lhs, rhs ); + (*f)(lhs, lhs, rhs); #ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else + if (__kmp_atomic_mode == 2) { + __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); + } else #endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock( & __kmp_atomic_lock_16c, gtid ); + __kmp_release_atomic_lock(&__kmp_atomic_lock_16c, gtid); } -void -__kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); +void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs, + void (*f)(void *, void *, void *)) { + KMP_DEBUG_ASSERT(__kmp_init_serial); #ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else + if (__kmp_atomic_mode == 2) { + __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); + } else #endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock( & __kmp_atomic_lock_20c, gtid ); + __kmp_acquire_atomic_lock(&__kmp_atomic_lock_20c, gtid); - (*f)( lhs, lhs, rhs ); + (*f)(lhs, lhs, rhs); #ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else + if (__kmp_atomic_mode == 2) { + __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); + } else #endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock( & __kmp_atomic_lock_20c, gtid ); + __kmp_release_atomic_lock(&__kmp_atomic_lock_20c, gtid); } -void -__kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); +void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs, + void (*f)(void *, void *, void *)) { + KMP_DEBUG_ASSERT(__kmp_init_serial); #ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_acquire_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else + if (__kmp_atomic_mode == 2) { + __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); + } else #endif /* KMP_GOMP_COMPAT */ - __kmp_acquire_atomic_lock( & __kmp_atomic_lock_32c, gtid ); + __kmp_acquire_atomic_lock(&__kmp_atomic_lock_32c, gtid); - (*f)( lhs, lhs, rhs ); + (*f)(lhs, lhs, rhs); #ifdef KMP_GOMP_COMPAT - if ( __kmp_atomic_mode == 2 ) { - __kmp_release_atomic_lock( & __kmp_atomic_lock, gtid ); - } - else + if (__kmp_atomic_mode == 2) { + __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); + } else #endif /* KMP_GOMP_COMPAT */ - __kmp_release_atomic_lock( & __kmp_atomic_lock_32c, gtid ); + __kmp_release_atomic_lock(&__kmp_atomic_lock_32c, gtid); } -// AC: same two routines as GOMP_atomic_start/end, but will be called by our compiler -// duplicated in order to not use 3-party names in pure Intel code +// AC: same two routines as GOMP_atomic_start/end, but will be called by our +// compiler; duplicated in order to not use 3-party names in pure Intel code // TODO: consider adding GTID parameter after consultation with Ernesto/Xinmin. -void -__kmpc_atomic_start(void) -{ - int gtid = __kmp_entry_gtid(); - KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid)); - __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); +void __kmpc_atomic_start(void) { + int gtid = __kmp_entry_gtid(); + KA_TRACE(20, ("__kmpc_atomic_start: T#%d\n", gtid)); + __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); } - -void -__kmpc_atomic_end(void) -{ - int gtid = __kmp_get_gtid(); - KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid)); - __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); +void __kmpc_atomic_end(void) { + int gtid = __kmp_get_gtid(); + KA_TRACE(20, ("__kmpc_atomic_end: T#%d\n", gtid)); + __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); } -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ /*! @} */ diff --git a/openmp/runtime/src/kmp_atomic.h b/openmp/runtime/src/kmp_atomic.h index 3cc3a54..77ad3ae 100644 --- a/openmp/runtime/src/kmp_atomic.h +++ b/openmp/runtime/src/kmp_atomic.h @@ -16,8 +16,8 @@ #ifndef KMP_ATOMIC_H #define KMP_ATOMIC_H -#include "kmp_os.h" #include "kmp_lock.h" +#include "kmp_os.h" #if OMPT_SUPPORT #include "ompt-specific.h" @@ -32,188 +32,181 @@ // to use typedef'ed types on win. // Condition for WIN64 was modified in anticipation of 10.1 build compiler. -#if defined( __cplusplus ) && ( KMP_OS_WINDOWS ) - // create shortcuts for c99 complex types - - // Visual Studio cannot have function parameters that have the - // align __declspec attribute, so we must remove it. (Compiler Error C2719) - #if KMP_COMPILER_MSVC - # undef KMP_DO_ALIGN - # define KMP_DO_ALIGN(alignment) /* Nothing */ - #endif - - #if (_MSC_VER < 1600) && defined(_DEBUG) - // Workaround for the problem of _DebugHeapTag unresolved external. - // This problem prevented to use our static debug library for C tests - // compiled with /MDd option (the library itself built with /MTd), - #undef _DEBUG - #define _DEBUG_TEMPORARILY_UNSET_ - #endif - - #include - - template< typename type_lhs, typename type_rhs > - std::complex< type_lhs > __kmp_lhs_div_rhs( - const std::complex< type_lhs >& lhs, - const std::complex< type_rhs >& rhs ) { - type_lhs a = lhs.real(); - type_lhs b = lhs.imag(); - type_rhs c = rhs.real(); - type_rhs d = rhs.imag(); - type_rhs den = c*c + d*d; - type_rhs r = ( a*c + b*d ); - type_rhs i = ( b*c - a*d ); - std::complex< type_lhs > ret( r/den, i/den ); - return ret; - } - - // complex8 - struct __kmp_cmplx64_t : std::complex< double > { - - __kmp_cmplx64_t() : std::complex< double > () {} - - __kmp_cmplx64_t( const std::complex< double >& cd ) - : std::complex< double > ( cd ) {} - - void operator /= ( const __kmp_cmplx64_t& rhs ) { - std::complex< double > lhs = *this; - *this = __kmp_lhs_div_rhs( lhs, rhs ); - } - - __kmp_cmplx64_t operator / ( const __kmp_cmplx64_t& rhs ) { - std::complex< double > lhs = *this; - return __kmp_lhs_div_rhs( lhs, rhs ); - } - - }; - typedef struct __kmp_cmplx64_t kmp_cmplx64; - - // complex4 - struct __kmp_cmplx32_t : std::complex< float > { - - __kmp_cmplx32_t() : std::complex< float > () {} - - __kmp_cmplx32_t( const std::complex& cf ) - : std::complex< float > ( cf ) {} - - __kmp_cmplx32_t operator + ( const __kmp_cmplx32_t& b ) { - std::complex< float > lhs = *this; - std::complex< float > rhs = b; - return ( lhs + rhs ); - } - __kmp_cmplx32_t operator - ( const __kmp_cmplx32_t& b ) { - std::complex< float > lhs = *this; - std::complex< float > rhs = b; - return ( lhs - rhs ); - } - __kmp_cmplx32_t operator * ( const __kmp_cmplx32_t& b ) { - std::complex< float > lhs = *this; - std::complex< float > rhs = b; - return ( lhs * rhs ); - } - - __kmp_cmplx32_t operator + ( const kmp_cmplx64& b ) { - kmp_cmplx64 t = kmp_cmplx64( *this ) + b; - std::complex< double > d( t ); - std::complex< float > f( d ); - __kmp_cmplx32_t r( f ); - return r; - } - __kmp_cmplx32_t operator - ( const kmp_cmplx64& b ) { - kmp_cmplx64 t = kmp_cmplx64( *this ) - b; - std::complex< double > d( t ); - std::complex< float > f( d ); - __kmp_cmplx32_t r( f ); - return r; - } - __kmp_cmplx32_t operator * ( const kmp_cmplx64& b ) { - kmp_cmplx64 t = kmp_cmplx64( *this ) * b; - std::complex< double > d( t ); - std::complex< float > f( d ); - __kmp_cmplx32_t r( f ); - return r; - } - - void operator /= ( const __kmp_cmplx32_t& rhs ) { - std::complex< float > lhs = *this; - *this = __kmp_lhs_div_rhs( lhs, rhs ); - } - - __kmp_cmplx32_t operator / ( const __kmp_cmplx32_t& rhs ) { - std::complex< float > lhs = *this; - return __kmp_lhs_div_rhs( lhs, rhs ); - } - - void operator /= ( const kmp_cmplx64& rhs ) { - std::complex< float > lhs = *this; - *this = __kmp_lhs_div_rhs( lhs, rhs ); - } - - __kmp_cmplx32_t operator / ( const kmp_cmplx64& rhs ) { - std::complex< float > lhs = *this; - return __kmp_lhs_div_rhs( lhs, rhs ); - } - }; - typedef struct __kmp_cmplx32_t kmp_cmplx32; - - // complex10 - struct KMP_DO_ALIGN( 16 ) __kmp_cmplx80_t : std::complex< long double > { - - __kmp_cmplx80_t() : std::complex< long double > () {} - - __kmp_cmplx80_t( const std::complex< long double >& cld ) - : std::complex< long double > ( cld ) {} - - void operator /= ( const __kmp_cmplx80_t& rhs ) { - std::complex< long double > lhs = *this; - *this = __kmp_lhs_div_rhs( lhs, rhs ); - } - - __kmp_cmplx80_t operator / ( const __kmp_cmplx80_t& rhs ) { - std::complex< long double > lhs = *this; - return __kmp_lhs_div_rhs( lhs, rhs ); - } - - }; - typedef KMP_DO_ALIGN( 16 ) struct __kmp_cmplx80_t kmp_cmplx80; - - // complex16 - #if KMP_HAVE_QUAD - struct __kmp_cmplx128_t : std::complex< _Quad > { - - __kmp_cmplx128_t() : std::complex< _Quad > () {} - - __kmp_cmplx128_t( const std::complex< _Quad >& cq ) - : std::complex< _Quad > ( cq ) {} - - void operator /= ( const __kmp_cmplx128_t& rhs ) { - std::complex< _Quad > lhs = *this; - *this = __kmp_lhs_div_rhs( lhs, rhs ); - } - - __kmp_cmplx128_t operator / ( const __kmp_cmplx128_t& rhs ) { - std::complex< _Quad > lhs = *this; - return __kmp_lhs_div_rhs( lhs, rhs ); - } - - }; - typedef struct __kmp_cmplx128_t kmp_cmplx128; - #endif /* KMP_HAVE_QUAD */ - - #ifdef _DEBUG_TEMPORARILY_UNSET_ - #undef _DEBUG_TEMPORARILY_UNSET_ - // Set it back now - #define _DEBUG 1 - #endif +#if defined(__cplusplus) && (KMP_OS_WINDOWS) +// create shortcuts for c99 complex types + +// Visual Studio cannot have function parameters that have the +// align __declspec attribute, so we must remove it. (Compiler Error C2719) +#if KMP_COMPILER_MSVC +#undef KMP_DO_ALIGN +#define KMP_DO_ALIGN(alignment) /* Nothing */ +#endif + +#if (_MSC_VER < 1600) && defined(_DEBUG) +// Workaround for the problem of _DebugHeapTag unresolved external. +// This problem prevented to use our static debug library for C tests +// compiled with /MDd option (the library itself built with /MTd), +#undef _DEBUG +#define _DEBUG_TEMPORARILY_UNSET_ +#endif + +#include + +template +std::complex __kmp_lhs_div_rhs(const std::complex &lhs, + const std::complex &rhs) { + type_lhs a = lhs.real(); + type_lhs b = lhs.imag(); + type_rhs c = rhs.real(); + type_rhs d = rhs.imag(); + type_rhs den = c * c + d * d; + type_rhs r = (a * c + b * d); + type_rhs i = (b * c - a * d); + std::complex ret(r / den, i / den); + return ret; +} + +// complex8 +struct __kmp_cmplx64_t : std::complex { + + __kmp_cmplx64_t() : std::complex() {} + + __kmp_cmplx64_t(const std::complex &cd) : std::complex(cd) {} + + void operator/=(const __kmp_cmplx64_t &rhs) { + std::complex lhs = *this; + *this = __kmp_lhs_div_rhs(lhs, rhs); + } + + __kmp_cmplx64_t operator/(const __kmp_cmplx64_t &rhs) { + std::complex lhs = *this; + return __kmp_lhs_div_rhs(lhs, rhs); + } +}; +typedef struct __kmp_cmplx64_t kmp_cmplx64; + +// complex4 +struct __kmp_cmplx32_t : std::complex { + + __kmp_cmplx32_t() : std::complex() {} + + __kmp_cmplx32_t(const std::complex &cf) : std::complex(cf) {} + + __kmp_cmplx32_t operator+(const __kmp_cmplx32_t &b) { + std::complex lhs = *this; + std::complex rhs = b; + return (lhs + rhs); + } + __kmp_cmplx32_t operator-(const __kmp_cmplx32_t &b) { + std::complex lhs = *this; + std::complex rhs = b; + return (lhs - rhs); + } + __kmp_cmplx32_t operator*(const __kmp_cmplx32_t &b) { + std::complex lhs = *this; + std::complex rhs = b; + return (lhs * rhs); + } + + __kmp_cmplx32_t operator+(const kmp_cmplx64 &b) { + kmp_cmplx64 t = kmp_cmplx64(*this) + b; + std::complex d(t); + std::complex f(d); + __kmp_cmplx32_t r(f); + return r; + } + __kmp_cmplx32_t operator-(const kmp_cmplx64 &b) { + kmp_cmplx64 t = kmp_cmplx64(*this) - b; + std::complex d(t); + std::complex f(d); + __kmp_cmplx32_t r(f); + return r; + } + __kmp_cmplx32_t operator*(const kmp_cmplx64 &b) { + kmp_cmplx64 t = kmp_cmplx64(*this) * b; + std::complex d(t); + std::complex f(d); + __kmp_cmplx32_t r(f); + return r; + } + + void operator/=(const __kmp_cmplx32_t &rhs) { + std::complex lhs = *this; + *this = __kmp_lhs_div_rhs(lhs, rhs); + } + + __kmp_cmplx32_t operator/(const __kmp_cmplx32_t &rhs) { + std::complex lhs = *this; + return __kmp_lhs_div_rhs(lhs, rhs); + } + + void operator/=(const kmp_cmplx64 &rhs) { + std::complex lhs = *this; + *this = __kmp_lhs_div_rhs(lhs, rhs); + } + + __kmp_cmplx32_t operator/(const kmp_cmplx64 &rhs) { + std::complex lhs = *this; + return __kmp_lhs_div_rhs(lhs, rhs); + } +}; +typedef struct __kmp_cmplx32_t kmp_cmplx32; + +// complex10 +struct KMP_DO_ALIGN(16) __kmp_cmplx80_t : std::complex { + + __kmp_cmplx80_t() : std::complex() {} + + __kmp_cmplx80_t(const std::complex &cld) + : std::complex(cld) {} + + void operator/=(const __kmp_cmplx80_t &rhs) { + std::complex lhs = *this; + *this = __kmp_lhs_div_rhs(lhs, rhs); + } + + __kmp_cmplx80_t operator/(const __kmp_cmplx80_t &rhs) { + std::complex lhs = *this; + return __kmp_lhs_div_rhs(lhs, rhs); + } +}; +typedef KMP_DO_ALIGN(16) struct __kmp_cmplx80_t kmp_cmplx80; + +// complex16 +#if KMP_HAVE_QUAD +struct __kmp_cmplx128_t : std::complex<_Quad> { + + __kmp_cmplx128_t() : std::complex<_Quad>() {} + + __kmp_cmplx128_t(const std::complex<_Quad> &cq) : std::complex<_Quad>(cq) {} + + void operator/=(const __kmp_cmplx128_t &rhs) { + std::complex<_Quad> lhs = *this; + *this = __kmp_lhs_div_rhs(lhs, rhs); + } + + __kmp_cmplx128_t operator/(const __kmp_cmplx128_t &rhs) { + std::complex<_Quad> lhs = *this; + return __kmp_lhs_div_rhs(lhs, rhs); + } +}; +typedef struct __kmp_cmplx128_t kmp_cmplx128; +#endif /* KMP_HAVE_QUAD */ + +#ifdef _DEBUG_TEMPORARILY_UNSET_ +#undef _DEBUG_TEMPORARILY_UNSET_ +// Set it back now +#define _DEBUG 1 +#endif #else - // create shortcuts for c99 complex types - typedef float _Complex kmp_cmplx32; - typedef double _Complex kmp_cmplx64; - typedef long double _Complex kmp_cmplx80; - #if KMP_HAVE_QUAD - typedef _Quad _Complex kmp_cmplx128; - #endif +// create shortcuts for c99 complex types +typedef float _Complex kmp_cmplx32; +typedef double _Complex kmp_cmplx64; +typedef long double _Complex kmp_cmplx80; +#if KMP_HAVE_QUAD +typedef _Quad _Complex kmp_cmplx128; +#endif #endif // Compiler 12.0 changed alignment of 16 and 32-byte arguments (like _Quad @@ -222,377 +215,477 @@ // introduce the new alignment in 12.0. See CQ88405. #if KMP_ARCH_X86 && KMP_HAVE_QUAD - // 4-byte aligned structures for backward compatibility. - - #pragma pack( push, 4 ) - - - struct KMP_DO_ALIGN( 4 ) Quad_a4_t { - _Quad q; - - Quad_a4_t( ) : q( ) {} - Quad_a4_t( const _Quad & cq ) : q ( cq ) {} - - Quad_a4_t operator + ( const Quad_a4_t& b ) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a4_t)( lhs + rhs ); - } - - Quad_a4_t operator - ( const Quad_a4_t& b ) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a4_t)( lhs - rhs ); - } - Quad_a4_t operator * ( const Quad_a4_t& b ) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a4_t)( lhs * rhs ); - } - - Quad_a4_t operator / ( const Quad_a4_t& b ) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a4_t)( lhs / rhs ); - } - - }; - - struct KMP_DO_ALIGN( 4 ) kmp_cmplx128_a4_t { - kmp_cmplx128 q; - - kmp_cmplx128_a4_t() : q () {} - - kmp_cmplx128_a4_t( const kmp_cmplx128 & c128 ) : q ( c128 ) {} - - kmp_cmplx128_a4_t operator + ( const kmp_cmplx128_a4_t& b ) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a4_t)( lhs + rhs ); - } - kmp_cmplx128_a4_t operator - ( const kmp_cmplx128_a4_t& b ) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a4_t)( lhs - rhs ); - } - kmp_cmplx128_a4_t operator * ( const kmp_cmplx128_a4_t& b ) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a4_t)( lhs * rhs ); - } - - kmp_cmplx128_a4_t operator / ( const kmp_cmplx128_a4_t& b ) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a4_t)( lhs / rhs ); - } - - }; - - #pragma pack( pop ) - - // New 16-byte aligned structures for 12.0 compiler. - struct KMP_DO_ALIGN( 16 ) Quad_a16_t { - _Quad q; - - Quad_a16_t( ) : q( ) {} - Quad_a16_t( const _Quad & cq ) : q ( cq ) {} - - Quad_a16_t operator + ( const Quad_a16_t& b ) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a16_t)( lhs + rhs ); - } - - Quad_a16_t operator - ( const Quad_a16_t& b ) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a16_t)( lhs - rhs ); - } - Quad_a16_t operator * ( const Quad_a16_t& b ) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a16_t)( lhs * rhs ); - } - - Quad_a16_t operator / ( const Quad_a16_t& b ) { - _Quad lhs = (*this).q; - _Quad rhs = b.q; - return (Quad_a16_t)( lhs / rhs ); - } - }; - - struct KMP_DO_ALIGN( 16 ) kmp_cmplx128_a16_t { - kmp_cmplx128 q; - - kmp_cmplx128_a16_t() : q () {} - - kmp_cmplx128_a16_t( const kmp_cmplx128 & c128 ) : q ( c128 ) {} - - kmp_cmplx128_a16_t operator + ( const kmp_cmplx128_a16_t& b ) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a16_t)( lhs + rhs ); - } - kmp_cmplx128_a16_t operator - ( const kmp_cmplx128_a16_t& b ) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a16_t)( lhs - rhs ); - } - kmp_cmplx128_a16_t operator * ( const kmp_cmplx128_a16_t& b ) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a16_t)( lhs * rhs ); - } - - kmp_cmplx128_a16_t operator / ( const kmp_cmplx128_a16_t& b ) { - kmp_cmplx128 lhs = (*this).q; - kmp_cmplx128 rhs = b.q; - return (kmp_cmplx128_a16_t)( lhs / rhs ); - } - }; +// 4-byte aligned structures for backward compatibility. + +#pragma pack(push, 4) + +struct KMP_DO_ALIGN(4) Quad_a4_t { + _Quad q; + + Quad_a4_t() : q() {} + Quad_a4_t(const _Quad &cq) : q(cq) {} + + Quad_a4_t operator+(const Quad_a4_t &b) { + _Quad lhs = (*this).q; + _Quad rhs = b.q; + return (Quad_a4_t)(lhs + rhs); + } + + Quad_a4_t operator-(const Quad_a4_t &b) { + _Quad lhs = (*this).q; + _Quad rhs = b.q; + return (Quad_a4_t)(lhs - rhs); + } + Quad_a4_t operator*(const Quad_a4_t &b) { + _Quad lhs = (*this).q; + _Quad rhs = b.q; + return (Quad_a4_t)(lhs * rhs); + } + + Quad_a4_t operator/(const Quad_a4_t &b) { + _Quad lhs = (*this).q; + _Quad rhs = b.q; + return (Quad_a4_t)(lhs / rhs); + } +}; + +struct KMP_DO_ALIGN(4) kmp_cmplx128_a4_t { + kmp_cmplx128 q; + + kmp_cmplx128_a4_t() : q() {} + + kmp_cmplx128_a4_t(const kmp_cmplx128 &c128) : q(c128) {} + + kmp_cmplx128_a4_t operator+(const kmp_cmplx128_a4_t &b) { + kmp_cmplx128 lhs = (*this).q; + kmp_cmplx128 rhs = b.q; + return (kmp_cmplx128_a4_t)(lhs + rhs); + } + kmp_cmplx128_a4_t operator-(const kmp_cmplx128_a4_t &b) { + kmp_cmplx128 lhs = (*this).q; + kmp_cmplx128 rhs = b.q; + return (kmp_cmplx128_a4_t)(lhs - rhs); + } + kmp_cmplx128_a4_t operator*(const kmp_cmplx128_a4_t &b) { + kmp_cmplx128 lhs = (*this).q; + kmp_cmplx128 rhs = b.q; + return (kmp_cmplx128_a4_t)(lhs * rhs); + } + + kmp_cmplx128_a4_t operator/(const kmp_cmplx128_a4_t &b) { + kmp_cmplx128 lhs = (*this).q; + kmp_cmplx128 rhs = b.q; + return (kmp_cmplx128_a4_t)(lhs / rhs); + } +}; + +#pragma pack(pop) + +// New 16-byte aligned structures for 12.0 compiler. +struct KMP_DO_ALIGN(16) Quad_a16_t { + _Quad q; + + Quad_a16_t() : q() {} + Quad_a16_t(const _Quad &cq) : q(cq) {} + + Quad_a16_t operator+(const Quad_a16_t &b) { + _Quad lhs = (*this).q; + _Quad rhs = b.q; + return (Quad_a16_t)(lhs + rhs); + } + + Quad_a16_t operator-(const Quad_a16_t &b) { + _Quad lhs = (*this).q; + _Quad rhs = b.q; + return (Quad_a16_t)(lhs - rhs); + } + Quad_a16_t operator*(const Quad_a16_t &b) { + _Quad lhs = (*this).q; + _Quad rhs = b.q; + return (Quad_a16_t)(lhs * rhs); + } + + Quad_a16_t operator/(const Quad_a16_t &b) { + _Quad lhs = (*this).q; + _Quad rhs = b.q; + return (Quad_a16_t)(lhs / rhs); + } +}; + +struct KMP_DO_ALIGN(16) kmp_cmplx128_a16_t { + kmp_cmplx128 q; + + kmp_cmplx128_a16_t() : q() {} + + kmp_cmplx128_a16_t(const kmp_cmplx128 &c128) : q(c128) {} + + kmp_cmplx128_a16_t operator+(const kmp_cmplx128_a16_t &b) { + kmp_cmplx128 lhs = (*this).q; + kmp_cmplx128 rhs = b.q; + return (kmp_cmplx128_a16_t)(lhs + rhs); + } + kmp_cmplx128_a16_t operator-(const kmp_cmplx128_a16_t &b) { + kmp_cmplx128 lhs = (*this).q; + kmp_cmplx128 rhs = b.q; + return (kmp_cmplx128_a16_t)(lhs - rhs); + } + kmp_cmplx128_a16_t operator*(const kmp_cmplx128_a16_t &b) { + kmp_cmplx128 lhs = (*this).q; + kmp_cmplx128 rhs = b.q; + return (kmp_cmplx128_a16_t)(lhs * rhs); + } + + kmp_cmplx128_a16_t operator/(const kmp_cmplx128_a16_t &b) { + kmp_cmplx128 lhs = (*this).q; + kmp_cmplx128 rhs = b.q; + return (kmp_cmplx128_a16_t)(lhs / rhs); + } +}; #endif -#if ( KMP_ARCH_X86 ) - #define QUAD_LEGACY Quad_a4_t - #define CPLX128_LEG kmp_cmplx128_a4_t +#if (KMP_ARCH_X86) +#define QUAD_LEGACY Quad_a4_t +#define CPLX128_LEG kmp_cmplx128_a4_t #else - #define QUAD_LEGACY _Quad - #define CPLX128_LEG kmp_cmplx128 +#define QUAD_LEGACY _Quad +#define CPLX128_LEG kmp_cmplx128 #endif #ifdef __cplusplus - extern "C" { +extern "C" { #endif extern int __kmp_atomic_mode; -// // Atomic locks can easily become contended, so we use queuing locks for them. -// - typedef kmp_queuing_lock_t kmp_atomic_lock_t; -static inline void -__kmp_acquire_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid ) -{ +static inline void __kmp_acquire_atomic_lock(kmp_atomic_lock_t *lck, + kmp_int32 gtid) { #if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_wait_atomic)) { - ompt_callbacks.ompt_callback(ompt_event_wait_atomic)( - (ompt_wait_id_t) lck); - } + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_wait_atomic)) { + ompt_callbacks.ompt_callback(ompt_event_wait_atomic)((ompt_wait_id_t)lck); + } #endif - __kmp_acquire_queuing_lock( lck, gtid ); + __kmp_acquire_queuing_lock(lck, gtid); #if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)) { - ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)( - (ompt_wait_id_t) lck); - } + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)) { + ompt_callbacks.ompt_callback(ompt_event_acquired_atomic)( + (ompt_wait_id_t)lck); + } #endif } -static inline int -__kmp_test_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid ) -{ - return __kmp_test_queuing_lock( lck, gtid ); +static inline int __kmp_test_atomic_lock(kmp_atomic_lock_t *lck, + kmp_int32 gtid) { + return __kmp_test_queuing_lock(lck, gtid); } -static inline void -__kmp_release_atomic_lock( kmp_atomic_lock_t *lck, kmp_int32 gtid ) -{ - __kmp_release_queuing_lock( lck, gtid ); +static inline void __kmp_release_atomic_lock(kmp_atomic_lock_t *lck, + kmp_int32 gtid) { + __kmp_release_queuing_lock(lck, gtid); #if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_release_atomic)) { - ompt_callbacks.ompt_callback(ompt_event_release_atomic)( - (ompt_wait_id_t) lck); + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_release_atomic)) { + ompt_callbacks.ompt_callback(ompt_event_release_atomic)( + (ompt_wait_id_t)lck); } #endif } -static inline void -__kmp_init_atomic_lock( kmp_atomic_lock_t *lck ) -{ - __kmp_init_queuing_lock( lck ); +static inline void __kmp_init_atomic_lock(kmp_atomic_lock_t *lck) { + __kmp_init_queuing_lock(lck); } -static inline void -__kmp_destroy_atomic_lock( kmp_atomic_lock_t *lck ) -{ - __kmp_destroy_queuing_lock( lck ); +static inline void __kmp_destroy_atomic_lock(kmp_atomic_lock_t *lck) { + __kmp_destroy_queuing_lock(lck); } // Global Locks +extern kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded + atomics in Gnu compat mode */ +extern kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user + coded atomics for 1-byte fixed + data types */ +extern kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user + coded atomics for 2-byte fixed + data types */ +extern kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user + coded atomics for 4-byte fixed + data types */ +extern kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user + coded atomics for kmp_real32 + data type */ +extern kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user + coded atomics for 8-byte fixed + data types */ +extern kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user + coded atomics for kmp_real64 + data type */ +extern kmp_atomic_lock_t + __kmp_atomic_lock_8c; /* Control access to all user coded atomics for + complex byte data type */ +extern kmp_atomic_lock_t + __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long + double data type */ +extern kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user + coded atomics for _Quad data + type */ +extern kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user + coded atomics for double + complex data type*/ +extern kmp_atomic_lock_t + __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long + double complex type*/ +extern kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user + coded atomics for _Quad + complex data type */ -extern kmp_atomic_lock_t __kmp_atomic_lock; /* Control access to all user coded atomics in Gnu compat mode */ -extern kmp_atomic_lock_t __kmp_atomic_lock_1i; /* Control access to all user coded atomics for 1-byte fixed data types */ -extern kmp_atomic_lock_t __kmp_atomic_lock_2i; /* Control access to all user coded atomics for 2-byte fixed data types */ -extern kmp_atomic_lock_t __kmp_atomic_lock_4i; /* Control access to all user coded atomics for 4-byte fixed data types */ -extern kmp_atomic_lock_t __kmp_atomic_lock_4r; /* Control access to all user coded atomics for kmp_real32 data type */ -extern kmp_atomic_lock_t __kmp_atomic_lock_8i; /* Control access to all user coded atomics for 8-byte fixed data types */ -extern kmp_atomic_lock_t __kmp_atomic_lock_8r; /* Control access to all user coded atomics for kmp_real64 data type */ -extern kmp_atomic_lock_t __kmp_atomic_lock_8c; /* Control access to all user coded atomics for complex byte data type */ -extern kmp_atomic_lock_t __kmp_atomic_lock_10r; /* Control access to all user coded atomics for long double data type */ -extern kmp_atomic_lock_t __kmp_atomic_lock_16r; /* Control access to all user coded atomics for _Quad data type */ -extern kmp_atomic_lock_t __kmp_atomic_lock_16c; /* Control access to all user coded atomics for double complex data type*/ -extern kmp_atomic_lock_t __kmp_atomic_lock_20c; /* Control access to all user coded atomics for long double complex type*/ -extern kmp_atomic_lock_t __kmp_atomic_lock_32c; /* Control access to all user coded atomics for _Quad complex data type */ - -// // Below routines for atomic UPDATE are listed -// // 1-byte -void __kmpc_atomic_fixed1_add( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1_andb( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1_div( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1u_div( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs ); -void __kmpc_atomic_fixed1_mul( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1_orb( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1_shl( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1_shr( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1u_shr( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs ); -void __kmpc_atomic_fixed1_sub( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1_xor( ident_t *id_ref, int gtid, char * lhs, char rhs ); +void __kmpc_atomic_fixed1_add(ident_t *id_ref, int gtid, char *lhs, char rhs); +void __kmpc_atomic_fixed1_andb(ident_t *id_ref, int gtid, char *lhs, char rhs); +void __kmpc_atomic_fixed1_div(ident_t *id_ref, int gtid, char *lhs, char rhs); +void __kmpc_atomic_fixed1u_div(ident_t *id_ref, int gtid, unsigned char *lhs, + unsigned char rhs); +void __kmpc_atomic_fixed1_mul(ident_t *id_ref, int gtid, char *lhs, char rhs); +void __kmpc_atomic_fixed1_orb(ident_t *id_ref, int gtid, char *lhs, char rhs); +void __kmpc_atomic_fixed1_shl(ident_t *id_ref, int gtid, char *lhs, char rhs); +void __kmpc_atomic_fixed1_shr(ident_t *id_ref, int gtid, char *lhs, char rhs); +void __kmpc_atomic_fixed1u_shr(ident_t *id_ref, int gtid, unsigned char *lhs, + unsigned char rhs); +void __kmpc_atomic_fixed1_sub(ident_t *id_ref, int gtid, char *lhs, char rhs); +void __kmpc_atomic_fixed1_xor(ident_t *id_ref, int gtid, char *lhs, char rhs); // 2-byte -void __kmpc_atomic_fixed2_add( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2_andb( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2_div( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2u_div( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs ); -void __kmpc_atomic_fixed2_mul( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2_orb( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2_shl( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2_shr( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2u_shr( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs ); -void __kmpc_atomic_fixed2_sub( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2_xor( ident_t *id_ref, int gtid, short * lhs, short rhs ); +void __kmpc_atomic_fixed2_add(ident_t *id_ref, int gtid, short *lhs, short rhs); +void __kmpc_atomic_fixed2_andb(ident_t *id_ref, int gtid, short *lhs, + short rhs); +void __kmpc_atomic_fixed2_div(ident_t *id_ref, int gtid, short *lhs, short rhs); +void __kmpc_atomic_fixed2u_div(ident_t *id_ref, int gtid, unsigned short *lhs, + unsigned short rhs); +void __kmpc_atomic_fixed2_mul(ident_t *id_ref, int gtid, short *lhs, short rhs); +void __kmpc_atomic_fixed2_orb(ident_t *id_ref, int gtid, short *lhs, short rhs); +void __kmpc_atomic_fixed2_shl(ident_t *id_ref, int gtid, short *lhs, short rhs); +void __kmpc_atomic_fixed2_shr(ident_t *id_ref, int gtid, short *lhs, short rhs); +void __kmpc_atomic_fixed2u_shr(ident_t *id_ref, int gtid, unsigned short *lhs, + unsigned short rhs); +void __kmpc_atomic_fixed2_sub(ident_t *id_ref, int gtid, short *lhs, short rhs); +void __kmpc_atomic_fixed2_xor(ident_t *id_ref, int gtid, short *lhs, short rhs); // 4-byte add / sub fixed -void __kmpc_atomic_fixed4_add( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4_sub( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +void __kmpc_atomic_fixed4_add(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); +void __kmpc_atomic_fixed4_sub(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); // 4-byte add / sub float -void __kmpc_atomic_float4_add( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); -void __kmpc_atomic_float4_sub( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); +void __kmpc_atomic_float4_add(ident_t *id_ref, int gtid, kmp_real32 *lhs, + kmp_real32 rhs); +void __kmpc_atomic_float4_sub(ident_t *id_ref, int gtid, kmp_real32 *lhs, + kmp_real32 rhs); // 8-byte add / sub fixed -void __kmpc_atomic_fixed8_add( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8_sub( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +void __kmpc_atomic_fixed8_add(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); +void __kmpc_atomic_fixed8_sub(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); // 8-byte add / sub float -void __kmpc_atomic_float8_add( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_float8_sub( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); +void __kmpc_atomic_float8_add(ident_t *id_ref, int gtid, kmp_real64 *lhs, + kmp_real64 rhs); +void __kmpc_atomic_float8_sub(ident_t *id_ref, int gtid, kmp_real64 *lhs, + kmp_real64 rhs); // 4-byte fixed -void __kmpc_atomic_fixed4_andb( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4_div( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4u_div( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs ); -void __kmpc_atomic_fixed4_mul( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4_orb( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4_shl( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4_shr( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4u_shr( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs ); -void __kmpc_atomic_fixed4_xor( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); +void __kmpc_atomic_fixed4_andb(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); +void __kmpc_atomic_fixed4_div(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); +void __kmpc_atomic_fixed4u_div(ident_t *id_ref, int gtid, kmp_uint32 *lhs, + kmp_uint32 rhs); +void __kmpc_atomic_fixed4_mul(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); +void __kmpc_atomic_fixed4_orb(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); +void __kmpc_atomic_fixed4_shl(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); +void __kmpc_atomic_fixed4_shr(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); +void __kmpc_atomic_fixed4u_shr(ident_t *id_ref, int gtid, kmp_uint32 *lhs, + kmp_uint32 rhs); +void __kmpc_atomic_fixed4_xor(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); // 8-byte fixed -void __kmpc_atomic_fixed8_andb( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8_div( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8u_div( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs ); -void __kmpc_atomic_fixed8_mul( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8_orb( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8_shl( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8_shr( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8u_shr( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs ); -void __kmpc_atomic_fixed8_xor( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +void __kmpc_atomic_fixed8_andb(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); +void __kmpc_atomic_fixed8_div(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); +void __kmpc_atomic_fixed8u_div(ident_t *id_ref, int gtid, kmp_uint64 *lhs, + kmp_uint64 rhs); +void __kmpc_atomic_fixed8_mul(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); +void __kmpc_atomic_fixed8_orb(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); +void __kmpc_atomic_fixed8_shl(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); +void __kmpc_atomic_fixed8_shr(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); +void __kmpc_atomic_fixed8u_shr(ident_t *id_ref, int gtid, kmp_uint64 *lhs, + kmp_uint64 rhs); +void __kmpc_atomic_fixed8_xor(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); // 4-byte float -void __kmpc_atomic_float4_div( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); -void __kmpc_atomic_float4_mul( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); +void __kmpc_atomic_float4_div(ident_t *id_ref, int gtid, kmp_real32 *lhs, + kmp_real32 rhs); +void __kmpc_atomic_float4_mul(ident_t *id_ref, int gtid, kmp_real32 *lhs, + kmp_real32 rhs); // 8-byte float -void __kmpc_atomic_float8_div( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_float8_mul( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); +void __kmpc_atomic_float8_div(ident_t *id_ref, int gtid, kmp_real64 *lhs, + kmp_real64 rhs); +void __kmpc_atomic_float8_mul(ident_t *id_ref, int gtid, kmp_real64 *lhs, + kmp_real64 rhs); // 1-, 2-, 4-, 8-byte logical (&&, ||) -void __kmpc_atomic_fixed1_andl( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1_orl( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed2_andl( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2_orl( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed4_andl( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4_orl( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed8_andl( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8_orl( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +void __kmpc_atomic_fixed1_andl(ident_t *id_ref, int gtid, char *lhs, char rhs); +void __kmpc_atomic_fixed1_orl(ident_t *id_ref, int gtid, char *lhs, char rhs); +void __kmpc_atomic_fixed2_andl(ident_t *id_ref, int gtid, short *lhs, + short rhs); +void __kmpc_atomic_fixed2_orl(ident_t *id_ref, int gtid, short *lhs, short rhs); +void __kmpc_atomic_fixed4_andl(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); +void __kmpc_atomic_fixed4_orl(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); +void __kmpc_atomic_fixed8_andl(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); +void __kmpc_atomic_fixed8_orl(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); // MIN / MAX -void __kmpc_atomic_fixed1_max( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1_min( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed2_max( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2_min( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed4_max( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4_min( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed8_max( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8_min( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_float4_max( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); -void __kmpc_atomic_float4_min( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); -void __kmpc_atomic_float8_max( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_float8_min( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); +void __kmpc_atomic_fixed1_max(ident_t *id_ref, int gtid, char *lhs, char rhs); +void __kmpc_atomic_fixed1_min(ident_t *id_ref, int gtid, char *lhs, char rhs); +void __kmpc_atomic_fixed2_max(ident_t *id_ref, int gtid, short *lhs, short rhs); +void __kmpc_atomic_fixed2_min(ident_t *id_ref, int gtid, short *lhs, short rhs); +void __kmpc_atomic_fixed4_max(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); +void __kmpc_atomic_fixed4_min(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); +void __kmpc_atomic_fixed8_max(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); +void __kmpc_atomic_fixed8_min(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); +void __kmpc_atomic_float4_max(ident_t *id_ref, int gtid, kmp_real32 *lhs, + kmp_real32 rhs); +void __kmpc_atomic_float4_min(ident_t *id_ref, int gtid, kmp_real32 *lhs, + kmp_real32 rhs); +void __kmpc_atomic_float8_max(ident_t *id_ref, int gtid, kmp_real64 *lhs, + kmp_real64 rhs); +void __kmpc_atomic_float8_min(ident_t *id_ref, int gtid, kmp_real64 *lhs, + kmp_real64 rhs); #if KMP_HAVE_QUAD -void __kmpc_atomic_float16_max( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); -void __kmpc_atomic_float16_min( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); -#if ( KMP_ARCH_X86 ) - // Routines with 16-byte arguments aligned to 16-byte boundary; IA-32 architecture only - void __kmpc_atomic_float16_max_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); - void __kmpc_atomic_float16_min_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); +void __kmpc_atomic_float16_max(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs, + QUAD_LEGACY rhs); +void __kmpc_atomic_float16_min(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs, + QUAD_LEGACY rhs); +#if (KMP_ARCH_X86) +// Routines with 16-byte arguments aligned to 16-byte boundary; IA-32 +// architecture only +void __kmpc_atomic_float16_max_a16(ident_t *id_ref, int gtid, Quad_a16_t *lhs, + Quad_a16_t rhs); +void __kmpc_atomic_float16_min_a16(ident_t *id_ref, int gtid, Quad_a16_t *lhs, + Quad_a16_t rhs); #endif #endif // .NEQV. (same as xor) -void __kmpc_atomic_fixed1_neqv( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed2_neqv( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed4_neqv( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed8_neqv( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +void __kmpc_atomic_fixed1_neqv(ident_t *id_ref, int gtid, char *lhs, char rhs); +void __kmpc_atomic_fixed2_neqv(ident_t *id_ref, int gtid, short *lhs, + short rhs); +void __kmpc_atomic_fixed4_neqv(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); +void __kmpc_atomic_fixed8_neqv(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); // .EQV. (same as ~xor) -void __kmpc_atomic_fixed1_eqv( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed2_eqv( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed4_eqv( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed8_eqv( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); +void __kmpc_atomic_fixed1_eqv(ident_t *id_ref, int gtid, char *lhs, char rhs); +void __kmpc_atomic_fixed2_eqv(ident_t *id_ref, int gtid, short *lhs, short rhs); +void __kmpc_atomic_fixed4_eqv(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); +void __kmpc_atomic_fixed8_eqv(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); // long double type -void __kmpc_atomic_float10_add( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); -void __kmpc_atomic_float10_sub( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); -void __kmpc_atomic_float10_mul( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); -void __kmpc_atomic_float10_div( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); +void __kmpc_atomic_float10_add(ident_t *id_ref, int gtid, long double *lhs, + long double rhs); +void __kmpc_atomic_float10_sub(ident_t *id_ref, int gtid, long double *lhs, + long double rhs); +void __kmpc_atomic_float10_mul(ident_t *id_ref, int gtid, long double *lhs, + long double rhs); +void __kmpc_atomic_float10_div(ident_t *id_ref, int gtid, long double *lhs, + long double rhs); // _Quad type #if KMP_HAVE_QUAD -void __kmpc_atomic_float16_add( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); -void __kmpc_atomic_float16_sub( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); -void __kmpc_atomic_float16_mul( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); -void __kmpc_atomic_float16_div( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); -#if ( KMP_ARCH_X86 ) - // Routines with 16-byte arguments aligned to 16-byte boundary - void __kmpc_atomic_float16_add_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); - void __kmpc_atomic_float16_sub_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); - void __kmpc_atomic_float16_mul_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); - void __kmpc_atomic_float16_div_a16( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); +void __kmpc_atomic_float16_add(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs, + QUAD_LEGACY rhs); +void __kmpc_atomic_float16_sub(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs, + QUAD_LEGACY rhs); +void __kmpc_atomic_float16_mul(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs, + QUAD_LEGACY rhs); +void __kmpc_atomic_float16_div(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs, + QUAD_LEGACY rhs); +#if (KMP_ARCH_X86) +// Routines with 16-byte arguments aligned to 16-byte boundary +void __kmpc_atomic_float16_add_a16(ident_t *id_ref, int gtid, Quad_a16_t *lhs, + Quad_a16_t rhs); +void __kmpc_atomic_float16_sub_a16(ident_t *id_ref, int gtid, Quad_a16_t *lhs, + Quad_a16_t rhs); +void __kmpc_atomic_float16_mul_a16(ident_t *id_ref, int gtid, Quad_a16_t *lhs, + Quad_a16_t rhs); +void __kmpc_atomic_float16_div_a16(ident_t *id_ref, int gtid, Quad_a16_t *lhs, + Quad_a16_t rhs); #endif #endif // routines for complex types -void __kmpc_atomic_cmplx4_add( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); -void __kmpc_atomic_cmplx4_sub( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); -void __kmpc_atomic_cmplx4_mul( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); -void __kmpc_atomic_cmplx4_div( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); -void __kmpc_atomic_cmplx8_add( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); -void __kmpc_atomic_cmplx8_sub( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); -void __kmpc_atomic_cmplx8_mul( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); -void __kmpc_atomic_cmplx8_div( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); -void __kmpc_atomic_cmplx10_add( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); -void __kmpc_atomic_cmplx10_sub( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); -void __kmpc_atomic_cmplx10_mul( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); -void __kmpc_atomic_cmplx10_div( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); +void __kmpc_atomic_cmplx4_add(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, + kmp_cmplx32 rhs); +void __kmpc_atomic_cmplx4_sub(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, + kmp_cmplx32 rhs); +void __kmpc_atomic_cmplx4_mul(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, + kmp_cmplx32 rhs); +void __kmpc_atomic_cmplx4_div(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, + kmp_cmplx32 rhs); +void __kmpc_atomic_cmplx8_add(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs, + kmp_cmplx64 rhs); +void __kmpc_atomic_cmplx8_sub(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs, + kmp_cmplx64 rhs); +void __kmpc_atomic_cmplx8_mul(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs, + kmp_cmplx64 rhs); +void __kmpc_atomic_cmplx8_div(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs, + kmp_cmplx64 rhs); +void __kmpc_atomic_cmplx10_add(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs, + kmp_cmplx80 rhs); +void __kmpc_atomic_cmplx10_sub(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs, + kmp_cmplx80 rhs); +void __kmpc_atomic_cmplx10_mul(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs, + kmp_cmplx80 rhs); +void __kmpc_atomic_cmplx10_div(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs, + kmp_cmplx80 rhs); #if KMP_HAVE_QUAD -void __kmpc_atomic_cmplx16_add( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); -void __kmpc_atomic_cmplx16_sub( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); -void __kmpc_atomic_cmplx16_mul( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); -void __kmpc_atomic_cmplx16_div( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); -#if ( KMP_ARCH_X86 ) - // Routines with 16-byte arguments aligned to 16-byte boundary - void __kmpc_atomic_cmplx16_add_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); - void __kmpc_atomic_cmplx16_sub_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); - void __kmpc_atomic_cmplx16_mul_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); - void __kmpc_atomic_cmplx16_div_a16( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); +void __kmpc_atomic_cmplx16_add(ident_t *id_ref, int gtid, CPLX128_LEG *lhs, + CPLX128_LEG rhs); +void __kmpc_atomic_cmplx16_sub(ident_t *id_ref, int gtid, CPLX128_LEG *lhs, + CPLX128_LEG rhs); +void __kmpc_atomic_cmplx16_mul(ident_t *id_ref, int gtid, CPLX128_LEG *lhs, + CPLX128_LEG rhs); +void __kmpc_atomic_cmplx16_div(ident_t *id_ref, int gtid, CPLX128_LEG *lhs, + CPLX128_LEG rhs); +#if (KMP_ARCH_X86) +// Routines with 16-byte arguments aligned to 16-byte boundary +void __kmpc_atomic_cmplx16_add_a16(ident_t *id_ref, int gtid, + kmp_cmplx128_a16_t *lhs, + kmp_cmplx128_a16_t rhs); +void __kmpc_atomic_cmplx16_sub_a16(ident_t *id_ref, int gtid, + kmp_cmplx128_a16_t *lhs, + kmp_cmplx128_a16_t rhs); +void __kmpc_atomic_cmplx16_mul_a16(ident_t *id_ref, int gtid, + kmp_cmplx128_a16_t *lhs, + kmp_cmplx128_a16_t rhs); +void __kmpc_atomic_cmplx16_div_a16(ident_t *id_ref, int gtid, + kmp_cmplx128_a16_t *lhs, + kmp_cmplx128_a16_t rhs); #endif #endif @@ -602,381 +695,710 @@ void __kmpc_atomic_cmplx16_div( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CP // Supported only on IA-32 architecture and Intel(R) 64 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 -void __kmpc_atomic_fixed1_sub_rev( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1_div_rev( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1u_div_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs ); -void __kmpc_atomic_fixed1_shl_rev( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1_shr_rev( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed1u_shr_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs ); -void __kmpc_atomic_fixed2_sub_rev( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2_div_rev( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2u_div_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs ); -void __kmpc_atomic_fixed2_shl_rev( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2_shr_rev( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed2u_shr_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs ); -void __kmpc_atomic_fixed4_sub_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4_div_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4u_div_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs ); -void __kmpc_atomic_fixed4_shl_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4_shr_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed4u_shr_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs ); -void __kmpc_atomic_fixed8_sub_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8_div_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8u_div_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs ); -void __kmpc_atomic_fixed8_shl_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8_shr_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_fixed8u_shr_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs ); -void __kmpc_atomic_float4_sub_rev( ident_t *id_ref, int gtid, float * lhs, float rhs ); -void __kmpc_atomic_float4_div_rev( ident_t *id_ref, int gtid, float * lhs, float rhs ); -void __kmpc_atomic_float8_sub_rev( ident_t *id_ref, int gtid, double * lhs, double rhs ); -void __kmpc_atomic_float8_div_rev( ident_t *id_ref, int gtid, double * lhs, double rhs ); -void __kmpc_atomic_float10_sub_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); -void __kmpc_atomic_float10_div_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); +void __kmpc_atomic_fixed1_sub_rev(ident_t *id_ref, int gtid, char *lhs, + char rhs); +void __kmpc_atomic_fixed1_div_rev(ident_t *id_ref, int gtid, char *lhs, + char rhs); +void __kmpc_atomic_fixed1u_div_rev(ident_t *id_ref, int gtid, + unsigned char *lhs, unsigned char rhs); +void __kmpc_atomic_fixed1_shl_rev(ident_t *id_ref, int gtid, char *lhs, + char rhs); +void __kmpc_atomic_fixed1_shr_rev(ident_t *id_ref, int gtid, char *lhs, + char rhs); +void __kmpc_atomic_fixed1u_shr_rev(ident_t *id_ref, int gtid, + unsigned char *lhs, unsigned char rhs); +void __kmpc_atomic_fixed2_sub_rev(ident_t *id_ref, int gtid, short *lhs, + short rhs); +void __kmpc_atomic_fixed2_div_rev(ident_t *id_ref, int gtid, short *lhs, + short rhs); +void __kmpc_atomic_fixed2u_div_rev(ident_t *id_ref, int gtid, + unsigned short *lhs, unsigned short rhs); +void __kmpc_atomic_fixed2_shl_rev(ident_t *id_ref, int gtid, short *lhs, + short rhs); +void __kmpc_atomic_fixed2_shr_rev(ident_t *id_ref, int gtid, short *lhs, + short rhs); +void __kmpc_atomic_fixed2u_shr_rev(ident_t *id_ref, int gtid, + unsigned short *lhs, unsigned short rhs); +void __kmpc_atomic_fixed4_sub_rev(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); +void __kmpc_atomic_fixed4_div_rev(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); +void __kmpc_atomic_fixed4u_div_rev(ident_t *id_ref, int gtid, kmp_uint32 *lhs, + kmp_uint32 rhs); +void __kmpc_atomic_fixed4_shl_rev(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); +void __kmpc_atomic_fixed4_shr_rev(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); +void __kmpc_atomic_fixed4u_shr_rev(ident_t *id_ref, int gtid, kmp_uint32 *lhs, + kmp_uint32 rhs); +void __kmpc_atomic_fixed8_sub_rev(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); +void __kmpc_atomic_fixed8_div_rev(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); +void __kmpc_atomic_fixed8u_div_rev(ident_t *id_ref, int gtid, kmp_uint64 *lhs, + kmp_uint64 rhs); +void __kmpc_atomic_fixed8_shl_rev(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); +void __kmpc_atomic_fixed8_shr_rev(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); +void __kmpc_atomic_fixed8u_shr_rev(ident_t *id_ref, int gtid, kmp_uint64 *lhs, + kmp_uint64 rhs); +void __kmpc_atomic_float4_sub_rev(ident_t *id_ref, int gtid, float *lhs, + float rhs); +void __kmpc_atomic_float4_div_rev(ident_t *id_ref, int gtid, float *lhs, + float rhs); +void __kmpc_atomic_float8_sub_rev(ident_t *id_ref, int gtid, double *lhs, + double rhs); +void __kmpc_atomic_float8_div_rev(ident_t *id_ref, int gtid, double *lhs, + double rhs); +void __kmpc_atomic_float10_sub_rev(ident_t *id_ref, int gtid, long double *lhs, + long double rhs); +void __kmpc_atomic_float10_div_rev(ident_t *id_ref, int gtid, long double *lhs, + long double rhs); #if KMP_HAVE_QUAD -void __kmpc_atomic_float16_sub_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); -void __kmpc_atomic_float16_div_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); +void __kmpc_atomic_float16_sub_rev(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs, + QUAD_LEGACY rhs); +void __kmpc_atomic_float16_div_rev(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs, + QUAD_LEGACY rhs); #endif -void __kmpc_atomic_cmplx4_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); -void __kmpc_atomic_cmplx4_div_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); -void __kmpc_atomic_cmplx8_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); -void __kmpc_atomic_cmplx8_div_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); -void __kmpc_atomic_cmplx10_sub_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); -void __kmpc_atomic_cmplx10_div_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); +void __kmpc_atomic_cmplx4_sub_rev(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, + kmp_cmplx32 rhs); +void __kmpc_atomic_cmplx4_div_rev(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, + kmp_cmplx32 rhs); +void __kmpc_atomic_cmplx8_sub_rev(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs, + kmp_cmplx64 rhs); +void __kmpc_atomic_cmplx8_div_rev(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs, + kmp_cmplx64 rhs); +void __kmpc_atomic_cmplx10_sub_rev(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs, + kmp_cmplx80 rhs); +void __kmpc_atomic_cmplx10_div_rev(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs, + kmp_cmplx80 rhs); #if KMP_HAVE_QUAD -void __kmpc_atomic_cmplx16_sub_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); -void __kmpc_atomic_cmplx16_div_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); -#if ( KMP_ARCH_X86 ) - // Routines with 16-byte arguments aligned to 16-byte boundary - void __kmpc_atomic_float16_sub_a16_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); - void __kmpc_atomic_float16_div_a16_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); - void __kmpc_atomic_cmplx16_sub_a16_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); - void __kmpc_atomic_cmplx16_div_a16_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); +void __kmpc_atomic_cmplx16_sub_rev(ident_t *id_ref, int gtid, CPLX128_LEG *lhs, + CPLX128_LEG rhs); +void __kmpc_atomic_cmplx16_div_rev(ident_t *id_ref, int gtid, CPLX128_LEG *lhs, + CPLX128_LEG rhs); +#if (KMP_ARCH_X86) +// Routines with 16-byte arguments aligned to 16-byte boundary +void __kmpc_atomic_float16_sub_a16_rev(ident_t *id_ref, int gtid, + Quad_a16_t *lhs, Quad_a16_t rhs); +void __kmpc_atomic_float16_div_a16_rev(ident_t *id_ref, int gtid, + Quad_a16_t *lhs, Quad_a16_t rhs); +void __kmpc_atomic_cmplx16_sub_a16_rev(ident_t *id_ref, int gtid, + kmp_cmplx128_a16_t *lhs, + kmp_cmplx128_a16_t rhs); +void __kmpc_atomic_cmplx16_div_a16_rev(ident_t *id_ref, int gtid, + kmp_cmplx128_a16_t *lhs, + kmp_cmplx128_a16_t rhs); #endif #endif // KMP_HAVE_QUAD -#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 +#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 -#endif //OMP_40_ENABLED +#endif // OMP_40_ENABLED // routines for mixed types // RHS=float8 -void __kmpc_atomic_fixed1_mul_float8( ident_t *id_ref, int gtid, char * lhs, kmp_real64 rhs ); -void __kmpc_atomic_fixed1_div_float8( ident_t *id_ref, int gtid, char * lhs, kmp_real64 rhs ); -void __kmpc_atomic_fixed2_mul_float8( ident_t *id_ref, int gtid, short * lhs, kmp_real64 rhs ); -void __kmpc_atomic_fixed2_div_float8( ident_t *id_ref, int gtid, short * lhs, kmp_real64 rhs ); -void __kmpc_atomic_fixed4_mul_float8( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_fixed4_div_float8( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_fixed8_mul_float8( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_fixed8_div_float8( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_float4_add_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_float4_sub_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_float4_mul_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_float4_div_float8( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real64 rhs ); - -// RHS=float16 (deprecated, to be removed when we are sure the compiler does not use them) +void __kmpc_atomic_fixed1_mul_float8(ident_t *id_ref, int gtid, char *lhs, + kmp_real64 rhs); +void __kmpc_atomic_fixed1_div_float8(ident_t *id_ref, int gtid, char *lhs, + kmp_real64 rhs); +void __kmpc_atomic_fixed2_mul_float8(ident_t *id_ref, int gtid, short *lhs, + kmp_real64 rhs); +void __kmpc_atomic_fixed2_div_float8(ident_t *id_ref, int gtid, short *lhs, + kmp_real64 rhs); +void __kmpc_atomic_fixed4_mul_float8(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_real64 rhs); +void __kmpc_atomic_fixed4_div_float8(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_real64 rhs); +void __kmpc_atomic_fixed8_mul_float8(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_real64 rhs); +void __kmpc_atomic_fixed8_div_float8(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_real64 rhs); +void __kmpc_atomic_float4_add_float8(ident_t *id_ref, int gtid, kmp_real32 *lhs, + kmp_real64 rhs); +void __kmpc_atomic_float4_sub_float8(ident_t *id_ref, int gtid, kmp_real32 *lhs, + kmp_real64 rhs); +void __kmpc_atomic_float4_mul_float8(ident_t *id_ref, int gtid, kmp_real32 *lhs, + kmp_real64 rhs); +void __kmpc_atomic_float4_div_float8(ident_t *id_ref, int gtid, kmp_real32 *lhs, + kmp_real64 rhs); + +// RHS=float16 (deprecated, to be removed when we are sure the compiler does not +// use them) #if KMP_HAVE_QUAD -void __kmpc_atomic_fixed1_add_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); -void __kmpc_atomic_fixed1u_add_fp( ident_t *id_ref, int gtid, unsigned char * lhs, _Quad rhs ); -void __kmpc_atomic_fixed1_sub_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); -void __kmpc_atomic_fixed1u_sub_fp( ident_t *id_ref, int gtid, unsigned char * lhs, _Quad rhs ); -void __kmpc_atomic_fixed1_mul_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); -void __kmpc_atomic_fixed1u_mul_fp( ident_t *id_ref, int gtid, unsigned char * lhs, _Quad rhs ); -void __kmpc_atomic_fixed1_div_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); -void __kmpc_atomic_fixed1u_div_fp( ident_t *id_ref, int gtid, unsigned char * lhs, _Quad rhs ); - -void __kmpc_atomic_fixed2_add_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs ); -void __kmpc_atomic_fixed2u_add_fp( ident_t *id_ref, int gtid, unsigned short * lhs, _Quad rhs ); -void __kmpc_atomic_fixed2_sub_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs ); -void __kmpc_atomic_fixed2u_sub_fp( ident_t *id_ref, int gtid, unsigned short * lhs, _Quad rhs ); -void __kmpc_atomic_fixed2_mul_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs ); -void __kmpc_atomic_fixed2u_mul_fp( ident_t *id_ref, int gtid, unsigned short * lhs, _Quad rhs ); -void __kmpc_atomic_fixed2_div_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs ); -void __kmpc_atomic_fixed2u_div_fp( ident_t *id_ref, int gtid, unsigned short * lhs, _Quad rhs ); - -void __kmpc_atomic_fixed4_add_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed4u_add_fp( ident_t *id_ref, int gtid, kmp_uint32 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed4_sub_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed4u_sub_fp( ident_t *id_ref, int gtid, kmp_uint32 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed4_mul_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed4u_mul_fp( ident_t *id_ref, int gtid, kmp_uint32 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed4_div_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed4u_div_fp( ident_t *id_ref, int gtid, kmp_uint32 * lhs, _Quad rhs ); - -void __kmpc_atomic_fixed8_add_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed8u_add_fp( ident_t *id_ref, int gtid, kmp_uint64 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed8_sub_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed8u_sub_fp( ident_t *id_ref, int gtid, kmp_uint64 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed8_mul_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed8u_mul_fp( ident_t *id_ref, int gtid, kmp_uint64 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed8_div_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed8u_div_fp( ident_t *id_ref, int gtid, kmp_uint64 * lhs, _Quad rhs ); - -void __kmpc_atomic_float4_add_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs ); -void __kmpc_atomic_float4_sub_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs ); -void __kmpc_atomic_float4_mul_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs ); -void __kmpc_atomic_float4_div_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs ); - -void __kmpc_atomic_float8_add_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs ); -void __kmpc_atomic_float8_sub_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs ); -void __kmpc_atomic_float8_mul_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs ); -void __kmpc_atomic_float8_div_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs ); - -void __kmpc_atomic_float10_add_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); -void __kmpc_atomic_float10_sub_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); -void __kmpc_atomic_float10_mul_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); -void __kmpc_atomic_float10_div_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); +void __kmpc_atomic_fixed1_add_fp(ident_t *id_ref, int gtid, char *lhs, + _Quad rhs); +void __kmpc_atomic_fixed1u_add_fp(ident_t *id_ref, int gtid, unsigned char *lhs, + _Quad rhs); +void __kmpc_atomic_fixed1_sub_fp(ident_t *id_ref, int gtid, char *lhs, + _Quad rhs); +void __kmpc_atomic_fixed1u_sub_fp(ident_t *id_ref, int gtid, unsigned char *lhs, + _Quad rhs); +void __kmpc_atomic_fixed1_mul_fp(ident_t *id_ref, int gtid, char *lhs, + _Quad rhs); +void __kmpc_atomic_fixed1u_mul_fp(ident_t *id_ref, int gtid, unsigned char *lhs, + _Quad rhs); +void __kmpc_atomic_fixed1_div_fp(ident_t *id_ref, int gtid, char *lhs, + _Quad rhs); +void __kmpc_atomic_fixed1u_div_fp(ident_t *id_ref, int gtid, unsigned char *lhs, + _Quad rhs); + +void __kmpc_atomic_fixed2_add_fp(ident_t *id_ref, int gtid, short *lhs, + _Quad rhs); +void __kmpc_atomic_fixed2u_add_fp(ident_t *id_ref, int gtid, + unsigned short *lhs, _Quad rhs); +void __kmpc_atomic_fixed2_sub_fp(ident_t *id_ref, int gtid, short *lhs, + _Quad rhs); +void __kmpc_atomic_fixed2u_sub_fp(ident_t *id_ref, int gtid, + unsigned short *lhs, _Quad rhs); +void __kmpc_atomic_fixed2_mul_fp(ident_t *id_ref, int gtid, short *lhs, + _Quad rhs); +void __kmpc_atomic_fixed2u_mul_fp(ident_t *id_ref, int gtid, + unsigned short *lhs, _Quad rhs); +void __kmpc_atomic_fixed2_div_fp(ident_t *id_ref, int gtid, short *lhs, + _Quad rhs); +void __kmpc_atomic_fixed2u_div_fp(ident_t *id_ref, int gtid, + unsigned short *lhs, _Quad rhs); + +void __kmpc_atomic_fixed4_add_fp(ident_t *id_ref, int gtid, kmp_int32 *lhs, + _Quad rhs); +void __kmpc_atomic_fixed4u_add_fp(ident_t *id_ref, int gtid, kmp_uint32 *lhs, + _Quad rhs); +void __kmpc_atomic_fixed4_sub_fp(ident_t *id_ref, int gtid, kmp_int32 *lhs, + _Quad rhs); +void __kmpc_atomic_fixed4u_sub_fp(ident_t *id_ref, int gtid, kmp_uint32 *lhs, + _Quad rhs); +void __kmpc_atomic_fixed4_mul_fp(ident_t *id_ref, int gtid, kmp_int32 *lhs, + _Quad rhs); +void __kmpc_atomic_fixed4u_mul_fp(ident_t *id_ref, int gtid, kmp_uint32 *lhs, + _Quad rhs); +void __kmpc_atomic_fixed4_div_fp(ident_t *id_ref, int gtid, kmp_int32 *lhs, + _Quad rhs); +void __kmpc_atomic_fixed4u_div_fp(ident_t *id_ref, int gtid, kmp_uint32 *lhs, + _Quad rhs); + +void __kmpc_atomic_fixed8_add_fp(ident_t *id_ref, int gtid, kmp_int64 *lhs, + _Quad rhs); +void __kmpc_atomic_fixed8u_add_fp(ident_t *id_ref, int gtid, kmp_uint64 *lhs, + _Quad rhs); +void __kmpc_atomic_fixed8_sub_fp(ident_t *id_ref, int gtid, kmp_int64 *lhs, + _Quad rhs); +void __kmpc_atomic_fixed8u_sub_fp(ident_t *id_ref, int gtid, kmp_uint64 *lhs, + _Quad rhs); +void __kmpc_atomic_fixed8_mul_fp(ident_t *id_ref, int gtid, kmp_int64 *lhs, + _Quad rhs); +void __kmpc_atomic_fixed8u_mul_fp(ident_t *id_ref, int gtid, kmp_uint64 *lhs, + _Quad rhs); +void __kmpc_atomic_fixed8_div_fp(ident_t *id_ref, int gtid, kmp_int64 *lhs, + _Quad rhs); +void __kmpc_atomic_fixed8u_div_fp(ident_t *id_ref, int gtid, kmp_uint64 *lhs, + _Quad rhs); + +void __kmpc_atomic_float4_add_fp(ident_t *id_ref, int gtid, kmp_real32 *lhs, + _Quad rhs); +void __kmpc_atomic_float4_sub_fp(ident_t *id_ref, int gtid, kmp_real32 *lhs, + _Quad rhs); +void __kmpc_atomic_float4_mul_fp(ident_t *id_ref, int gtid, kmp_real32 *lhs, + _Quad rhs); +void __kmpc_atomic_float4_div_fp(ident_t *id_ref, int gtid, kmp_real32 *lhs, + _Quad rhs); + +void __kmpc_atomic_float8_add_fp(ident_t *id_ref, int gtid, kmp_real64 *lhs, + _Quad rhs); +void __kmpc_atomic_float8_sub_fp(ident_t *id_ref, int gtid, kmp_real64 *lhs, + _Quad rhs); +void __kmpc_atomic_float8_mul_fp(ident_t *id_ref, int gtid, kmp_real64 *lhs, + _Quad rhs); +void __kmpc_atomic_float8_div_fp(ident_t *id_ref, int gtid, kmp_real64 *lhs, + _Quad rhs); + +void __kmpc_atomic_float10_add_fp(ident_t *id_ref, int gtid, long double *lhs, + _Quad rhs); +void __kmpc_atomic_float10_sub_fp(ident_t *id_ref, int gtid, long double *lhs, + _Quad rhs); +void __kmpc_atomic_float10_mul_fp(ident_t *id_ref, int gtid, long double *lhs, + _Quad rhs); +void __kmpc_atomic_float10_div_fp(ident_t *id_ref, int gtid, long double *lhs, + _Quad rhs); // Reverse operations -void __kmpc_atomic_fixed1_sub_rev_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); -void __kmpc_atomic_fixed1u_sub_rev_fp( ident_t *id_ref, int gtid, unsigned char * lhs, _Quad rhs ); -void __kmpc_atomic_fixed1_div_rev_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs ); -void __kmpc_atomic_fixed1u_div_rev_fp( ident_t *id_ref, int gtid, unsigned char * lhs, _Quad rhs ); -void __kmpc_atomic_fixed2_sub_rev_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs ); -void __kmpc_atomic_fixed2u_sub_rev_fp( ident_t *id_ref, int gtid, unsigned short * lhs, _Quad rhs ); -void __kmpc_atomic_fixed2_div_rev_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs ); -void __kmpc_atomic_fixed2u_div_rev_fp( ident_t *id_ref, int gtid, unsigned short * lhs, _Quad rhs ); -void __kmpc_atomic_fixed4_sub_rev_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed4u_sub_rev_fp( ident_t *id_ref, int gtid, kmp_uint32 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed4_div_rev_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed4u_div_rev_fp( ident_t *id_ref, int gtid, kmp_uint32 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed8_sub_rev_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed8u_sub_rev_fp( ident_t *id_ref, int gtid, kmp_uint64 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed8_div_rev_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs ); -void __kmpc_atomic_fixed8u_div_rev_fp( ident_t *id_ref, int gtid, kmp_uint64 * lhs, _Quad rhs ); -void __kmpc_atomic_float4_sub_rev_fp( ident_t *id_ref, int gtid, float * lhs, _Quad rhs ); -void __kmpc_atomic_float4_div_rev_fp( ident_t *id_ref, int gtid, float * lhs, _Quad rhs ); -void __kmpc_atomic_float8_sub_rev_fp( ident_t *id_ref, int gtid, double * lhs, _Quad rhs ); -void __kmpc_atomic_float8_div_rev_fp( ident_t *id_ref, int gtid, double * lhs, _Quad rhs ); -void __kmpc_atomic_float10_sub_rev_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); -void __kmpc_atomic_float10_div_rev_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs ); +void __kmpc_atomic_fixed1_sub_rev_fp(ident_t *id_ref, int gtid, char *lhs, + _Quad rhs); +void __kmpc_atomic_fixed1u_sub_rev_fp(ident_t *id_ref, int gtid, + unsigned char *lhs, _Quad rhs); +void __kmpc_atomic_fixed1_div_rev_fp(ident_t *id_ref, int gtid, char *lhs, + _Quad rhs); +void __kmpc_atomic_fixed1u_div_rev_fp(ident_t *id_ref, int gtid, + unsigned char *lhs, _Quad rhs); +void __kmpc_atomic_fixed2_sub_rev_fp(ident_t *id_ref, int gtid, short *lhs, + _Quad rhs); +void __kmpc_atomic_fixed2u_sub_rev_fp(ident_t *id_ref, int gtid, + unsigned short *lhs, _Quad rhs); +void __kmpc_atomic_fixed2_div_rev_fp(ident_t *id_ref, int gtid, short *lhs, + _Quad rhs); +void __kmpc_atomic_fixed2u_div_rev_fp(ident_t *id_ref, int gtid, + unsigned short *lhs, _Quad rhs); +void __kmpc_atomic_fixed4_sub_rev_fp(ident_t *id_ref, int gtid, kmp_int32 *lhs, + _Quad rhs); +void __kmpc_atomic_fixed4u_sub_rev_fp(ident_t *id_ref, int gtid, + kmp_uint32 *lhs, _Quad rhs); +void __kmpc_atomic_fixed4_div_rev_fp(ident_t *id_ref, int gtid, kmp_int32 *lhs, + _Quad rhs); +void __kmpc_atomic_fixed4u_div_rev_fp(ident_t *id_ref, int gtid, + kmp_uint32 *lhs, _Quad rhs); +void __kmpc_atomic_fixed8_sub_rev_fp(ident_t *id_ref, int gtid, kmp_int64 *lhs, + _Quad rhs); +void __kmpc_atomic_fixed8u_sub_rev_fp(ident_t *id_ref, int gtid, + kmp_uint64 *lhs, _Quad rhs); +void __kmpc_atomic_fixed8_div_rev_fp(ident_t *id_ref, int gtid, kmp_int64 *lhs, + _Quad rhs); +void __kmpc_atomic_fixed8u_div_rev_fp(ident_t *id_ref, int gtid, + kmp_uint64 *lhs, _Quad rhs); +void __kmpc_atomic_float4_sub_rev_fp(ident_t *id_ref, int gtid, float *lhs, + _Quad rhs); +void __kmpc_atomic_float4_div_rev_fp(ident_t *id_ref, int gtid, float *lhs, + _Quad rhs); +void __kmpc_atomic_float8_sub_rev_fp(ident_t *id_ref, int gtid, double *lhs, + _Quad rhs); +void __kmpc_atomic_float8_div_rev_fp(ident_t *id_ref, int gtid, double *lhs, + _Quad rhs); +void __kmpc_atomic_float10_sub_rev_fp(ident_t *id_ref, int gtid, + long double *lhs, _Quad rhs); +void __kmpc_atomic_float10_div_rev_fp(ident_t *id_ref, int gtid, + long double *lhs, _Quad rhs); #endif // KMP_HAVE_QUAD // RHS=cmplx8 -void __kmpc_atomic_cmplx4_add_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs ); -void __kmpc_atomic_cmplx4_sub_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs ); -void __kmpc_atomic_cmplx4_mul_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs ); -void __kmpc_atomic_cmplx4_div_cmplx8( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx64 rhs ); +void __kmpc_atomic_cmplx4_add_cmplx8(ident_t *id_ref, int gtid, + kmp_cmplx32 *lhs, kmp_cmplx64 rhs); +void __kmpc_atomic_cmplx4_sub_cmplx8(ident_t *id_ref, int gtid, + kmp_cmplx32 *lhs, kmp_cmplx64 rhs); +void __kmpc_atomic_cmplx4_mul_cmplx8(ident_t *id_ref, int gtid, + kmp_cmplx32 *lhs, kmp_cmplx64 rhs); +void __kmpc_atomic_cmplx4_div_cmplx8(ident_t *id_ref, int gtid, + kmp_cmplx32 *lhs, kmp_cmplx64 rhs); // generic atomic routines -void __kmpc_atomic_1( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); -void __kmpc_atomic_2( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); -void __kmpc_atomic_4( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); -void __kmpc_atomic_8( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); -void __kmpc_atomic_10( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); -void __kmpc_atomic_16( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); -void __kmpc_atomic_20( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); -void __kmpc_atomic_32( ident_t *id_ref, int gtid, void* lhs, void* rhs, void (*f)( void *, void *, void * ) ); +void __kmpc_atomic_1(ident_t *id_ref, int gtid, void *lhs, void *rhs, + void (*f)(void *, void *, void *)); +void __kmpc_atomic_2(ident_t *id_ref, int gtid, void *lhs, void *rhs, + void (*f)(void *, void *, void *)); +void __kmpc_atomic_4(ident_t *id_ref, int gtid, void *lhs, void *rhs, + void (*f)(void *, void *, void *)); +void __kmpc_atomic_8(ident_t *id_ref, int gtid, void *lhs, void *rhs, + void (*f)(void *, void *, void *)); +void __kmpc_atomic_10(ident_t *id_ref, int gtid, void *lhs, void *rhs, + void (*f)(void *, void *, void *)); +void __kmpc_atomic_16(ident_t *id_ref, int gtid, void *lhs, void *rhs, + void (*f)(void *, void *, void *)); +void __kmpc_atomic_20(ident_t *id_ref, int gtid, void *lhs, void *rhs, + void (*f)(void *, void *, void *)); +void __kmpc_atomic_32(ident_t *id_ref, int gtid, void *lhs, void *rhs, + void (*f)(void *, void *, void *)); // READ, WRITE, CAPTURE are supported only on IA-32 architecture and Intel(R) 64 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 -// // Below routines for atomic READ are listed -// - -char __kmpc_atomic_fixed1_rd( ident_t *id_ref, int gtid, char * loc ); -short __kmpc_atomic_fixed2_rd( ident_t *id_ref, int gtid, short * loc ); -kmp_int32 __kmpc_atomic_fixed4_rd( ident_t *id_ref, int gtid, kmp_int32 * loc ); -kmp_int64 __kmpc_atomic_fixed8_rd( ident_t *id_ref, int gtid, kmp_int64 * loc ); -kmp_real32 __kmpc_atomic_float4_rd( ident_t *id_ref, int gtid, kmp_real32 * loc ); -kmp_real64 __kmpc_atomic_float8_rd( ident_t *id_ref, int gtid, kmp_real64 * loc ); -long double __kmpc_atomic_float10_rd( ident_t *id_ref, int gtid, long double * loc ); +char __kmpc_atomic_fixed1_rd(ident_t *id_ref, int gtid, char *loc); +short __kmpc_atomic_fixed2_rd(ident_t *id_ref, int gtid, short *loc); +kmp_int32 __kmpc_atomic_fixed4_rd(ident_t *id_ref, int gtid, kmp_int32 *loc); +kmp_int64 __kmpc_atomic_fixed8_rd(ident_t *id_ref, int gtid, kmp_int64 *loc); +kmp_real32 __kmpc_atomic_float4_rd(ident_t *id_ref, int gtid, kmp_real32 *loc); +kmp_real64 __kmpc_atomic_float8_rd(ident_t *id_ref, int gtid, kmp_real64 *loc); +long double __kmpc_atomic_float10_rd(ident_t *id_ref, int gtid, + long double *loc); #if KMP_HAVE_QUAD -QUAD_LEGACY __kmpc_atomic_float16_rd( ident_t *id_ref, int gtid, QUAD_LEGACY * loc ); +QUAD_LEGACY __kmpc_atomic_float16_rd(ident_t *id_ref, int gtid, + QUAD_LEGACY *loc); #endif -// Fix for CQ220361: cmplx4 READ will return void on Windows* OS; read value will be -// returned through an additional parameter -#if ( KMP_OS_WINDOWS ) - void __kmpc_atomic_cmplx4_rd( kmp_cmplx32 * out, ident_t *id_ref, int gtid, kmp_cmplx32 * loc ); +// Fix for CQ220361: cmplx4 READ will return void on Windows* OS; read value +// will be returned through an additional parameter +#if (KMP_OS_WINDOWS) +void __kmpc_atomic_cmplx4_rd(kmp_cmplx32 *out, ident_t *id_ref, int gtid, + kmp_cmplx32 *loc); #else - kmp_cmplx32 __kmpc_atomic_cmplx4_rd( ident_t *id_ref, int gtid, kmp_cmplx32 * loc ); +kmp_cmplx32 __kmpc_atomic_cmplx4_rd(ident_t *id_ref, int gtid, + kmp_cmplx32 *loc); #endif -kmp_cmplx64 __kmpc_atomic_cmplx8_rd( ident_t *id_ref, int gtid, kmp_cmplx64 * loc ); -kmp_cmplx80 __kmpc_atomic_cmplx10_rd( ident_t *id_ref, int gtid, kmp_cmplx80 * loc ); +kmp_cmplx64 __kmpc_atomic_cmplx8_rd(ident_t *id_ref, int gtid, + kmp_cmplx64 *loc); +kmp_cmplx80 __kmpc_atomic_cmplx10_rd(ident_t *id_ref, int gtid, + kmp_cmplx80 *loc); #if KMP_HAVE_QUAD -CPLX128_LEG __kmpc_atomic_cmplx16_rd( ident_t *id_ref, int gtid, CPLX128_LEG * loc ); -#if ( KMP_ARCH_X86 ) - // Routines with 16-byte arguments aligned to 16-byte boundary - Quad_a16_t __kmpc_atomic_float16_a16_rd( ident_t * id_ref, int gtid, Quad_a16_t * loc ); - kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_rd( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * loc ); +CPLX128_LEG __kmpc_atomic_cmplx16_rd(ident_t *id_ref, int gtid, + CPLX128_LEG *loc); +#if (KMP_ARCH_X86) +// Routines with 16-byte arguments aligned to 16-byte boundary +Quad_a16_t __kmpc_atomic_float16_a16_rd(ident_t *id_ref, int gtid, + Quad_a16_t *loc); +kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_rd(ident_t *id_ref, int gtid, + kmp_cmplx128_a16_t *loc); #endif #endif - -// // Below routines for atomic WRITE are listed -// - -void __kmpc_atomic_fixed1_wr( ident_t *id_ref, int gtid, char * lhs, char rhs ); -void __kmpc_atomic_fixed2_wr( ident_t *id_ref, int gtid, short * lhs, short rhs ); -void __kmpc_atomic_fixed4_wr( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -void __kmpc_atomic_fixed8_wr( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -void __kmpc_atomic_float4_wr( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs ); -void __kmpc_atomic_float8_wr( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs ); -void __kmpc_atomic_float10_wr( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); +void __kmpc_atomic_fixed1_wr(ident_t *id_ref, int gtid, char *lhs, char rhs); +void __kmpc_atomic_fixed2_wr(ident_t *id_ref, int gtid, short *lhs, short rhs); +void __kmpc_atomic_fixed4_wr(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); +void __kmpc_atomic_fixed8_wr(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); +void __kmpc_atomic_float4_wr(ident_t *id_ref, int gtid, kmp_real32 *lhs, + kmp_real32 rhs); +void __kmpc_atomic_float8_wr(ident_t *id_ref, int gtid, kmp_real64 *lhs, + kmp_real64 rhs); +void __kmpc_atomic_float10_wr(ident_t *id_ref, int gtid, long double *lhs, + long double rhs); #if KMP_HAVE_QUAD -void __kmpc_atomic_float16_wr( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); +void __kmpc_atomic_float16_wr(ident_t *id_ref, int gtid, QUAD_LEGACY *lhs, + QUAD_LEGACY rhs); #endif -void __kmpc_atomic_cmplx4_wr( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); -void __kmpc_atomic_cmplx8_wr( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); -void __kmpc_atomic_cmplx10_wr( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); +void __kmpc_atomic_cmplx4_wr(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, + kmp_cmplx32 rhs); +void __kmpc_atomic_cmplx8_wr(ident_t *id_ref, int gtid, kmp_cmplx64 *lhs, + kmp_cmplx64 rhs); +void __kmpc_atomic_cmplx10_wr(ident_t *id_ref, int gtid, kmp_cmplx80 *lhs, + kmp_cmplx80 rhs); #if KMP_HAVE_QUAD -void __kmpc_atomic_cmplx16_wr( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); -#if ( KMP_ARCH_X86 ) - // Routines with 16-byte arguments aligned to 16-byte boundary - void __kmpc_atomic_float16_a16_wr( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); - void __kmpc_atomic_cmplx16_a16_wr( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); +void __kmpc_atomic_cmplx16_wr(ident_t *id_ref, int gtid, CPLX128_LEG *lhs, + CPLX128_LEG rhs); +#if (KMP_ARCH_X86) +// Routines with 16-byte arguments aligned to 16-byte boundary +void __kmpc_atomic_float16_a16_wr(ident_t *id_ref, int gtid, Quad_a16_t *lhs, + Quad_a16_t rhs); +void __kmpc_atomic_cmplx16_a16_wr(ident_t *id_ref, int gtid, + kmp_cmplx128_a16_t *lhs, + kmp_cmplx128_a16_t rhs); #endif #endif -// // Below routines for atomic CAPTURE are listed -// // 1-byte -char __kmpc_atomic_fixed1_add_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -char __kmpc_atomic_fixed1_andb_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -char __kmpc_atomic_fixed1_div_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -unsigned char __kmpc_atomic_fixed1u_div_cpt( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag); -char __kmpc_atomic_fixed1_mul_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -char __kmpc_atomic_fixed1_orb_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -char __kmpc_atomic_fixed1_shl_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -char __kmpc_atomic_fixed1_shr_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -unsigned char __kmpc_atomic_fixed1u_shr_cpt( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag); -char __kmpc_atomic_fixed1_sub_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -char __kmpc_atomic_fixed1_xor_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); +char __kmpc_atomic_fixed1_add_cpt(ident_t *id_ref, int gtid, char *lhs, + char rhs, int flag); +char __kmpc_atomic_fixed1_andb_cpt(ident_t *id_ref, int gtid, char *lhs, + char rhs, int flag); +char __kmpc_atomic_fixed1_div_cpt(ident_t *id_ref, int gtid, char *lhs, + char rhs, int flag); +unsigned char __kmpc_atomic_fixed1u_div_cpt(ident_t *id_ref, int gtid, + unsigned char *lhs, + unsigned char rhs, int flag); +char __kmpc_atomic_fixed1_mul_cpt(ident_t *id_ref, int gtid, char *lhs, + char rhs, int flag); +char __kmpc_atomic_fixed1_orb_cpt(ident_t *id_ref, int gtid, char *lhs, + char rhs, int flag); +char __kmpc_atomic_fixed1_shl_cpt(ident_t *id_ref, int gtid, char *lhs, + char rhs, int flag); +char __kmpc_atomic_fixed1_shr_cpt(ident_t *id_ref, int gtid, char *lhs, + char rhs, int flag); +unsigned char __kmpc_atomic_fixed1u_shr_cpt(ident_t *id_ref, int gtid, + unsigned char *lhs, + unsigned char rhs, int flag); +char __kmpc_atomic_fixed1_sub_cpt(ident_t *id_ref, int gtid, char *lhs, + char rhs, int flag); +char __kmpc_atomic_fixed1_xor_cpt(ident_t *id_ref, int gtid, char *lhs, + char rhs, int flag); // 2-byte -short __kmpc_atomic_fixed2_add_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -short __kmpc_atomic_fixed2_andb_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -short __kmpc_atomic_fixed2_div_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -unsigned short __kmpc_atomic_fixed2u_div_cpt( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag); -short __kmpc_atomic_fixed2_mul_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -short __kmpc_atomic_fixed2_orb_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -short __kmpc_atomic_fixed2_shl_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -short __kmpc_atomic_fixed2_shr_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -unsigned short __kmpc_atomic_fixed2u_shr_cpt( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag); -short __kmpc_atomic_fixed2_sub_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -short __kmpc_atomic_fixed2_xor_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); +short __kmpc_atomic_fixed2_add_cpt(ident_t *id_ref, int gtid, short *lhs, + short rhs, int flag); +short __kmpc_atomic_fixed2_andb_cpt(ident_t *id_ref, int gtid, short *lhs, + short rhs, int flag); +short __kmpc_atomic_fixed2_div_cpt(ident_t *id_ref, int gtid, short *lhs, + short rhs, int flag); +unsigned short __kmpc_atomic_fixed2u_div_cpt(ident_t *id_ref, int gtid, + unsigned short *lhs, + unsigned short rhs, int flag); +short __kmpc_atomic_fixed2_mul_cpt(ident_t *id_ref, int gtid, short *lhs, + short rhs, int flag); +short __kmpc_atomic_fixed2_orb_cpt(ident_t *id_ref, int gtid, short *lhs, + short rhs, int flag); +short __kmpc_atomic_fixed2_shl_cpt(ident_t *id_ref, int gtid, short *lhs, + short rhs, int flag); +short __kmpc_atomic_fixed2_shr_cpt(ident_t *id_ref, int gtid, short *lhs, + short rhs, int flag); +unsigned short __kmpc_atomic_fixed2u_shr_cpt(ident_t *id_ref, int gtid, + unsigned short *lhs, + unsigned short rhs, int flag); +short __kmpc_atomic_fixed2_sub_cpt(ident_t *id_ref, int gtid, short *lhs, + short rhs, int flag); +short __kmpc_atomic_fixed2_xor_cpt(ident_t *id_ref, int gtid, short *lhs, + short rhs, int flag); // 4-byte add / sub fixed -kmp_int32 __kmpc_atomic_fixed4_add_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_sub_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_add_cpt(ident_t *id_ref, int gtid, + kmp_int32 *lhs, kmp_int32 rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_sub_cpt(ident_t *id_ref, int gtid, + kmp_int32 *lhs, kmp_int32 rhs, int flag); // 4-byte add / sub float -kmp_real32 __kmpc_atomic_float4_add_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); -kmp_real32 __kmpc_atomic_float4_sub_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); +kmp_real32 __kmpc_atomic_float4_add_cpt(ident_t *id_ref, int gtid, + kmp_real32 *lhs, kmp_real32 rhs, + int flag); +kmp_real32 __kmpc_atomic_float4_sub_cpt(ident_t *id_ref, int gtid, + kmp_real32 *lhs, kmp_real32 rhs, + int flag); // 8-byte add / sub fixed -kmp_int64 __kmpc_atomic_fixed8_add_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_sub_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_add_cpt(ident_t *id_ref, int gtid, + kmp_int64 *lhs, kmp_int64 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_sub_cpt(ident_t *id_ref, int gtid, + kmp_int64 *lhs, kmp_int64 rhs, int flag); // 8-byte add / sub float -kmp_real64 __kmpc_atomic_float8_add_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); -kmp_real64 __kmpc_atomic_float8_sub_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); +kmp_real64 __kmpc_atomic_float8_add_cpt(ident_t *id_ref, int gtid, + kmp_real64 *lhs, kmp_real64 rhs, + int flag); +kmp_real64 __kmpc_atomic_float8_sub_cpt(ident_t *id_ref, int gtid, + kmp_real64 *lhs, kmp_real64 rhs, + int flag); // 4-byte fixed -kmp_int32 __kmpc_atomic_fixed4_andb_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_div_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_uint32 __kmpc_atomic_fixed4u_div_cpt( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_mul_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_orb_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_shl_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_shr_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_uint32 __kmpc_atomic_fixed4u_shr_cpt( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_xor_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_andb_cpt(ident_t *id_ref, int gtid, + kmp_int32 *lhs, kmp_int32 rhs, + int flag); +kmp_int32 __kmpc_atomic_fixed4_div_cpt(ident_t *id_ref, int gtid, + kmp_int32 *lhs, kmp_int32 rhs, int flag); +kmp_uint32 __kmpc_atomic_fixed4u_div_cpt(ident_t *id_ref, int gtid, + kmp_uint32 *lhs, kmp_uint32 rhs, + int flag); +kmp_int32 __kmpc_atomic_fixed4_mul_cpt(ident_t *id_ref, int gtid, + kmp_int32 *lhs, kmp_int32 rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_orb_cpt(ident_t *id_ref, int gtid, + kmp_int32 *lhs, kmp_int32 rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_shl_cpt(ident_t *id_ref, int gtid, + kmp_int32 *lhs, kmp_int32 rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_shr_cpt(ident_t *id_ref, int gtid, + kmp_int32 *lhs, kmp_int32 rhs, int flag); +kmp_uint32 __kmpc_atomic_fixed4u_shr_cpt(ident_t *id_ref, int gtid, + kmp_uint32 *lhs, kmp_uint32 rhs, + int flag); +kmp_int32 __kmpc_atomic_fixed4_xor_cpt(ident_t *id_ref, int gtid, + kmp_int32 *lhs, kmp_int32 rhs, int flag); // 8-byte fixed -kmp_int64 __kmpc_atomic_fixed8_andb_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_div_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -kmp_uint64 __kmpc_atomic_fixed8u_div_cpt( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_mul_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_orb_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_shl_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_shr_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -kmp_uint64 __kmpc_atomic_fixed8u_shr_cpt( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_xor_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_andb_cpt(ident_t *id_ref, int gtid, + kmp_int64 *lhs, kmp_int64 rhs, + int flag); +kmp_int64 __kmpc_atomic_fixed8_div_cpt(ident_t *id_ref, int gtid, + kmp_int64 *lhs, kmp_int64 rhs, int flag); +kmp_uint64 __kmpc_atomic_fixed8u_div_cpt(ident_t *id_ref, int gtid, + kmp_uint64 *lhs, kmp_uint64 rhs, + int flag); +kmp_int64 __kmpc_atomic_fixed8_mul_cpt(ident_t *id_ref, int gtid, + kmp_int64 *lhs, kmp_int64 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_orb_cpt(ident_t *id_ref, int gtid, + kmp_int64 *lhs, kmp_int64 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_shl_cpt(ident_t *id_ref, int gtid, + kmp_int64 *lhs, kmp_int64 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_shr_cpt(ident_t *id_ref, int gtid, + kmp_int64 *lhs, kmp_int64 rhs, int flag); +kmp_uint64 __kmpc_atomic_fixed8u_shr_cpt(ident_t *id_ref, int gtid, + kmp_uint64 *lhs, kmp_uint64 rhs, + int flag); +kmp_int64 __kmpc_atomic_fixed8_xor_cpt(ident_t *id_ref, int gtid, + kmp_int64 *lhs, kmp_int64 rhs, int flag); // 4-byte float -kmp_real32 __kmpc_atomic_float4_div_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); -kmp_real32 __kmpc_atomic_float4_mul_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); +kmp_real32 __kmpc_atomic_float4_div_cpt(ident_t *id_ref, int gtid, + kmp_real32 *lhs, kmp_real32 rhs, + int flag); +kmp_real32 __kmpc_atomic_float4_mul_cpt(ident_t *id_ref, int gtid, + kmp_real32 *lhs, kmp_real32 rhs, + int flag); // 8-byte float -kmp_real64 __kmpc_atomic_float8_div_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); -kmp_real64 __kmpc_atomic_float8_mul_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); +kmp_real64 __kmpc_atomic_float8_div_cpt(ident_t *id_ref, int gtid, + kmp_real64 *lhs, kmp_real64 rhs, + int flag); +kmp_real64 __kmpc_atomic_float8_mul_cpt(ident_t *id_ref, int gtid, + kmp_real64 *lhs, kmp_real64 rhs, + int flag); // 1-, 2-, 4-, 8-byte logical (&&, ||) -char __kmpc_atomic_fixed1_andl_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -char __kmpc_atomic_fixed1_orl_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -short __kmpc_atomic_fixed2_andl_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -short __kmpc_atomic_fixed2_orl_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_andl_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_orl_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_andl_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_orl_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); +char __kmpc_atomic_fixed1_andl_cpt(ident_t *id_ref, int gtid, char *lhs, + char rhs, int flag); +char __kmpc_atomic_fixed1_orl_cpt(ident_t *id_ref, int gtid, char *lhs, + char rhs, int flag); +short __kmpc_atomic_fixed2_andl_cpt(ident_t *id_ref, int gtid, short *lhs, + short rhs, int flag); +short __kmpc_atomic_fixed2_orl_cpt(ident_t *id_ref, int gtid, short *lhs, + short rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_andl_cpt(ident_t *id_ref, int gtid, + kmp_int32 *lhs, kmp_int32 rhs, + int flag); +kmp_int32 __kmpc_atomic_fixed4_orl_cpt(ident_t *id_ref, int gtid, + kmp_int32 *lhs, kmp_int32 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_andl_cpt(ident_t *id_ref, int gtid, + kmp_int64 *lhs, kmp_int64 rhs, + int flag); +kmp_int64 __kmpc_atomic_fixed8_orl_cpt(ident_t *id_ref, int gtid, + kmp_int64 *lhs, kmp_int64 rhs, int flag); // MIN / MAX -char __kmpc_atomic_fixed1_max_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -char __kmpc_atomic_fixed1_min_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -short __kmpc_atomic_fixed2_max_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -short __kmpc_atomic_fixed2_min_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_max_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_min_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_max_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_min_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); -kmp_real32 __kmpc_atomic_float4_max_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); -kmp_real32 __kmpc_atomic_float4_min_cpt( ident_t *id_ref, int gtid, kmp_real32 * lhs, kmp_real32 rhs, int flag); -kmp_real64 __kmpc_atomic_float8_max_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); -kmp_real64 __kmpc_atomic_float8_min_cpt( ident_t *id_ref, int gtid, kmp_real64 * lhs, kmp_real64 rhs, int flag); +char __kmpc_atomic_fixed1_max_cpt(ident_t *id_ref, int gtid, char *lhs, + char rhs, int flag); +char __kmpc_atomic_fixed1_min_cpt(ident_t *id_ref, int gtid, char *lhs, + char rhs, int flag); +short __kmpc_atomic_fixed2_max_cpt(ident_t *id_ref, int gtid, short *lhs, + short rhs, int flag); +short __kmpc_atomic_fixed2_min_cpt(ident_t *id_ref, int gtid, short *lhs, + short rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_max_cpt(ident_t *id_ref, int gtid, + kmp_int32 *lhs, kmp_int32 rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_min_cpt(ident_t *id_ref, int gtid, + kmp_int32 *lhs, kmp_int32 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_max_cpt(ident_t *id_ref, int gtid, + kmp_int64 *lhs, kmp_int64 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_min_cpt(ident_t *id_ref, int gtid, + kmp_int64 *lhs, kmp_int64 rhs, int flag); +kmp_real32 __kmpc_atomic_float4_max_cpt(ident_t *id_ref, int gtid, + kmp_real32 *lhs, kmp_real32 rhs, + int flag); +kmp_real32 __kmpc_atomic_float4_min_cpt(ident_t *id_ref, int gtid, + kmp_real32 *lhs, kmp_real32 rhs, + int flag); +kmp_real64 __kmpc_atomic_float8_max_cpt(ident_t *id_ref, int gtid, + kmp_real64 *lhs, kmp_real64 rhs, + int flag); +kmp_real64 __kmpc_atomic_float8_min_cpt(ident_t *id_ref, int gtid, + kmp_real64 *lhs, kmp_real64 rhs, + int flag); #if KMP_HAVE_QUAD -QUAD_LEGACY __kmpc_atomic_float16_max_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); -QUAD_LEGACY __kmpc_atomic_float16_min_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); +QUAD_LEGACY __kmpc_atomic_float16_max_cpt(ident_t *id_ref, int gtid, + QUAD_LEGACY *lhs, QUAD_LEGACY rhs, + int flag); +QUAD_LEGACY __kmpc_atomic_float16_min_cpt(ident_t *id_ref, int gtid, + QUAD_LEGACY *lhs, QUAD_LEGACY rhs, + int flag); #endif // .NEQV. (same as xor) -char __kmpc_atomic_fixed1_neqv_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -short __kmpc_atomic_fixed2_neqv_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_neqv_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_neqv_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); +char __kmpc_atomic_fixed1_neqv_cpt(ident_t *id_ref, int gtid, char *lhs, + char rhs, int flag); +short __kmpc_atomic_fixed2_neqv_cpt(ident_t *id_ref, int gtid, short *lhs, + short rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_neqv_cpt(ident_t *id_ref, int gtid, + kmp_int32 *lhs, kmp_int32 rhs, + int flag); +kmp_int64 __kmpc_atomic_fixed8_neqv_cpt(ident_t *id_ref, int gtid, + kmp_int64 *lhs, kmp_int64 rhs, + int flag); // .EQV. (same as ~xor) -char __kmpc_atomic_fixed1_eqv_cpt( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag); -short __kmpc_atomic_fixed2_eqv_cpt( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag); -kmp_int32 __kmpc_atomic_fixed4_eqv_cpt( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag); -kmp_int64 __kmpc_atomic_fixed8_eqv_cpt( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag); +char __kmpc_atomic_fixed1_eqv_cpt(ident_t *id_ref, int gtid, char *lhs, + char rhs, int flag); +short __kmpc_atomic_fixed2_eqv_cpt(ident_t *id_ref, int gtid, short *lhs, + short rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_eqv_cpt(ident_t *id_ref, int gtid, + kmp_int32 *lhs, kmp_int32 rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_eqv_cpt(ident_t *id_ref, int gtid, + kmp_int64 *lhs, kmp_int64 rhs, int flag); // long double type -long double __kmpc_atomic_float10_add_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag); -long double __kmpc_atomic_float10_sub_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag); -long double __kmpc_atomic_float10_mul_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag); -long double __kmpc_atomic_float10_div_cpt( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag); +long double __kmpc_atomic_float10_add_cpt(ident_t *id_ref, int gtid, + long double *lhs, long double rhs, + int flag); +long double __kmpc_atomic_float10_sub_cpt(ident_t *id_ref, int gtid, + long double *lhs, long double rhs, + int flag); +long double __kmpc_atomic_float10_mul_cpt(ident_t *id_ref, int gtid, + long double *lhs, long double rhs, + int flag); +long double __kmpc_atomic_float10_div_cpt(ident_t *id_ref, int gtid, + long double *lhs, long double rhs, + int flag); #if KMP_HAVE_QUAD // _Quad type -QUAD_LEGACY __kmpc_atomic_float16_add_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); -QUAD_LEGACY __kmpc_atomic_float16_sub_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); -QUAD_LEGACY __kmpc_atomic_float16_mul_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); -QUAD_LEGACY __kmpc_atomic_float16_div_cpt( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag); +QUAD_LEGACY __kmpc_atomic_float16_add_cpt(ident_t *id_ref, int gtid, + QUAD_LEGACY *lhs, QUAD_LEGACY rhs, + int flag); +QUAD_LEGACY __kmpc_atomic_float16_sub_cpt(ident_t *id_ref, int gtid, + QUAD_LEGACY *lhs, QUAD_LEGACY rhs, + int flag); +QUAD_LEGACY __kmpc_atomic_float16_mul_cpt(ident_t *id_ref, int gtid, + QUAD_LEGACY *lhs, QUAD_LEGACY rhs, + int flag); +QUAD_LEGACY __kmpc_atomic_float16_div_cpt(ident_t *id_ref, int gtid, + QUAD_LEGACY *lhs, QUAD_LEGACY rhs, + int flag); #endif // routines for complex types -// Workaround for cmplx4 routines - return void; captured value is returned via the argument -void __kmpc_atomic_cmplx4_add_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag); -void __kmpc_atomic_cmplx4_sub_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag); -void __kmpc_atomic_cmplx4_mul_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag); -void __kmpc_atomic_cmplx4_div_cpt( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag); - -kmp_cmplx64 __kmpc_atomic_cmplx8_add_cpt( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag); -kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag); -kmp_cmplx64 __kmpc_atomic_cmplx8_mul_cpt( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag); -kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag); -kmp_cmplx80 __kmpc_atomic_cmplx10_add_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag); -kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag); -kmp_cmplx80 __kmpc_atomic_cmplx10_mul_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag); -kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag); +// Workaround for cmplx4 routines - return void; captured value is returned via +// the argument +void __kmpc_atomic_cmplx4_add_cpt(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, + kmp_cmplx32 rhs, kmp_cmplx32 *out, int flag); +void __kmpc_atomic_cmplx4_sub_cpt(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, + kmp_cmplx32 rhs, kmp_cmplx32 *out, int flag); +void __kmpc_atomic_cmplx4_mul_cpt(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, + kmp_cmplx32 rhs, kmp_cmplx32 *out, int flag); +void __kmpc_atomic_cmplx4_div_cpt(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, + kmp_cmplx32 rhs, kmp_cmplx32 *out, int flag); + +kmp_cmplx64 __kmpc_atomic_cmplx8_add_cpt(ident_t *id_ref, int gtid, + kmp_cmplx64 *lhs, kmp_cmplx64 rhs, + int flag); +kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt(ident_t *id_ref, int gtid, + kmp_cmplx64 *lhs, kmp_cmplx64 rhs, + int flag); +kmp_cmplx64 __kmpc_atomic_cmplx8_mul_cpt(ident_t *id_ref, int gtid, + kmp_cmplx64 *lhs, kmp_cmplx64 rhs, + int flag); +kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt(ident_t *id_ref, int gtid, + kmp_cmplx64 *lhs, kmp_cmplx64 rhs, + int flag); +kmp_cmplx80 __kmpc_atomic_cmplx10_add_cpt(ident_t *id_ref, int gtid, + kmp_cmplx80 *lhs, kmp_cmplx80 rhs, + int flag); +kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt(ident_t *id_ref, int gtid, + kmp_cmplx80 *lhs, kmp_cmplx80 rhs, + int flag); +kmp_cmplx80 __kmpc_atomic_cmplx10_mul_cpt(ident_t *id_ref, int gtid, + kmp_cmplx80 *lhs, kmp_cmplx80 rhs, + int flag); +kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt(ident_t *id_ref, int gtid, + kmp_cmplx80 *lhs, kmp_cmplx80 rhs, + int flag); #if KMP_HAVE_QUAD -CPLX128_LEG __kmpc_atomic_cmplx16_add_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag); -CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag); -CPLX128_LEG __kmpc_atomic_cmplx16_mul_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag); -CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag); -#if ( KMP_ARCH_X86 ) - // Routines with 16-byte arguments aligned to 16-byte boundary - Quad_a16_t __kmpc_atomic_float16_add_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); - Quad_a16_t __kmpc_atomic_float16_sub_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); - Quad_a16_t __kmpc_atomic_float16_mul_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); - Quad_a16_t __kmpc_atomic_float16_div_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); - Quad_a16_t __kmpc_atomic_float16_max_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); - Quad_a16_t __kmpc_atomic_float16_min_a16_cpt( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag); - kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_add_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag); - kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_sub_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag); - kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_mul_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag); - kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag); +CPLX128_LEG __kmpc_atomic_cmplx16_add_cpt(ident_t *id_ref, int gtid, + CPLX128_LEG *lhs, CPLX128_LEG rhs, + int flag); +CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt(ident_t *id_ref, int gtid, + CPLX128_LEG *lhs, CPLX128_LEG rhs, + int flag); +CPLX128_LEG __kmpc_atomic_cmplx16_mul_cpt(ident_t *id_ref, int gtid, + CPLX128_LEG *lhs, CPLX128_LEG rhs, + int flag); +CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt(ident_t *id_ref, int gtid, + CPLX128_LEG *lhs, CPLX128_LEG rhs, + int flag); +#if (KMP_ARCH_X86) +// Routines with 16-byte arguments aligned to 16-byte boundary +Quad_a16_t __kmpc_atomic_float16_add_a16_cpt(ident_t *id_ref, int gtid, + Quad_a16_t *lhs, Quad_a16_t rhs, + int flag); +Quad_a16_t __kmpc_atomic_float16_sub_a16_cpt(ident_t *id_ref, int gtid, + Quad_a16_t *lhs, Quad_a16_t rhs, + int flag); +Quad_a16_t __kmpc_atomic_float16_mul_a16_cpt(ident_t *id_ref, int gtid, + Quad_a16_t *lhs, Quad_a16_t rhs, + int flag); +Quad_a16_t __kmpc_atomic_float16_div_a16_cpt(ident_t *id_ref, int gtid, + Quad_a16_t *lhs, Quad_a16_t rhs, + int flag); +Quad_a16_t __kmpc_atomic_float16_max_a16_cpt(ident_t *id_ref, int gtid, + Quad_a16_t *lhs, Quad_a16_t rhs, + int flag); +Quad_a16_t __kmpc_atomic_float16_min_a16_cpt(ident_t *id_ref, int gtid, + Quad_a16_t *lhs, Quad_a16_t rhs, + int flag); +kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_add_a16_cpt(ident_t *id_ref, int gtid, + kmp_cmplx128_a16_t *lhs, + kmp_cmplx128_a16_t rhs, + int flag); +kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_sub_a16_cpt(ident_t *id_ref, int gtid, + kmp_cmplx128_a16_t *lhs, + kmp_cmplx128_a16_t rhs, + int flag); +kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_mul_a16_cpt(ident_t *id_ref, int gtid, + kmp_cmplx128_a16_t *lhs, + kmp_cmplx128_a16_t rhs, + int flag); +kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt(ident_t *id_ref, int gtid, + kmp_cmplx128_a16_t *lhs, + kmp_cmplx128_a16_t rhs, + int flag); #endif #endif @@ -985,175 +1407,369 @@ void __kmpc_atomic_end(void); #if OMP_40_ENABLED -// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr binop x; v = x; } for non-commutative operations. - -char __kmpc_atomic_fixed1_sub_cpt_rev( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag ); -char __kmpc_atomic_fixed1_div_cpt_rev( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag ); -unsigned char __kmpc_atomic_fixed1u_div_cpt_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag ); -char __kmpc_atomic_fixed1_shl_cpt_rev( ident_t *id_ref, int gtid, char * lhs, char rhs , int flag); -char __kmpc_atomic_fixed1_shr_cpt_rev( ident_t *id_ref, int gtid, char * lhs, char rhs, int flag ); -unsigned char __kmpc_atomic_fixed1u_shr_cpt_rev( ident_t *id_ref, int gtid, unsigned char * lhs, unsigned char rhs, int flag ); -short __kmpc_atomic_fixed2_sub_cpt_rev( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag ); -short __kmpc_atomic_fixed2_div_cpt_rev( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag ); -unsigned short __kmpc_atomic_fixed2u_div_cpt_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag ); -short __kmpc_atomic_fixed2_shl_cpt_rev( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag ); -short __kmpc_atomic_fixed2_shr_cpt_rev( ident_t *id_ref, int gtid, short * lhs, short rhs, int flag ); -unsigned short __kmpc_atomic_fixed2u_shr_cpt_rev( ident_t *id_ref, int gtid, unsigned short * lhs, unsigned short rhs, int flag ); -kmp_int32 __kmpc_atomic_fixed4_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag ); -kmp_int32 __kmpc_atomic_fixed4_div_cpt_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag ); -kmp_uint32 __kmpc_atomic_fixed4u_div_cpt_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag ); -kmp_int32 __kmpc_atomic_fixed4_shl_cpt_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag ); -kmp_int32 __kmpc_atomic_fixed4_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs, int flag ); -kmp_uint32 __kmpc_atomic_fixed4u_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_uint32 * lhs, kmp_uint32 rhs, int flag ); -kmp_int64 __kmpc_atomic_fixed8_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag ); -kmp_int64 __kmpc_atomic_fixed8_div_cpt_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag ); -kmp_uint64 __kmpc_atomic_fixed8u_div_cpt_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag ); -kmp_int64 __kmpc_atomic_fixed8_shl_cpt_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag ); -kmp_int64 __kmpc_atomic_fixed8_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs, int flag ); -kmp_uint64 __kmpc_atomic_fixed8u_shr_cpt_rev( ident_t *id_ref, int gtid, kmp_uint64 * lhs, kmp_uint64 rhs, int flag ); -float __kmpc_atomic_float4_sub_cpt_rev( ident_t *id_ref, int gtid, float * lhs, float rhs, int flag ); -float __kmpc_atomic_float4_div_cpt_rev( ident_t *id_ref, int gtid, float * lhs, float rhs, int flag ); -double __kmpc_atomic_float8_sub_cpt_rev( ident_t *id_ref, int gtid, double * lhs, double rhs, int flag ); -double __kmpc_atomic_float8_div_cpt_rev( ident_t *id_ref, int gtid, double * lhs, double rhs, int flag ); -long double __kmpc_atomic_float10_sub_cpt_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag ); -long double __kmpc_atomic_float10_div_cpt_rev( ident_t *id_ref, int gtid, long double * lhs, long double rhs, int flag ); +// OpenMP 4.0: v = x = expr binop x; { v = x; x = expr binop x; } { x = expr +// binop x; v = x; } for non-commutative operations. + +char __kmpc_atomic_fixed1_sub_cpt_rev(ident_t *id_ref, int gtid, char *lhs, + char rhs, int flag); +char __kmpc_atomic_fixed1_div_cpt_rev(ident_t *id_ref, int gtid, char *lhs, + char rhs, int flag); +unsigned char __kmpc_atomic_fixed1u_div_cpt_rev(ident_t *id_ref, int gtid, + unsigned char *lhs, + unsigned char rhs, int flag); +char __kmpc_atomic_fixed1_shl_cpt_rev(ident_t *id_ref, int gtid, char *lhs, + char rhs, int flag); +char __kmpc_atomic_fixed1_shr_cpt_rev(ident_t *id_ref, int gtid, char *lhs, + char rhs, int flag); +unsigned char __kmpc_atomic_fixed1u_shr_cpt_rev(ident_t *id_ref, int gtid, + unsigned char *lhs, + unsigned char rhs, int flag); +short __kmpc_atomic_fixed2_sub_cpt_rev(ident_t *id_ref, int gtid, short *lhs, + short rhs, int flag); +short __kmpc_atomic_fixed2_div_cpt_rev(ident_t *id_ref, int gtid, short *lhs, + short rhs, int flag); +unsigned short __kmpc_atomic_fixed2u_div_cpt_rev(ident_t *id_ref, int gtid, + unsigned short *lhs, + unsigned short rhs, int flag); +short __kmpc_atomic_fixed2_shl_cpt_rev(ident_t *id_ref, int gtid, short *lhs, + short rhs, int flag); +short __kmpc_atomic_fixed2_shr_cpt_rev(ident_t *id_ref, int gtid, short *lhs, + short rhs, int flag); +unsigned short __kmpc_atomic_fixed2u_shr_cpt_rev(ident_t *id_ref, int gtid, + unsigned short *lhs, + unsigned short rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_sub_cpt_rev(ident_t *id_ref, int gtid, + kmp_int32 *lhs, kmp_int32 rhs, + int flag); +kmp_int32 __kmpc_atomic_fixed4_div_cpt_rev(ident_t *id_ref, int gtid, + kmp_int32 *lhs, kmp_int32 rhs, + int flag); +kmp_uint32 __kmpc_atomic_fixed4u_div_cpt_rev(ident_t *id_ref, int gtid, + kmp_uint32 *lhs, kmp_uint32 rhs, + int flag); +kmp_int32 __kmpc_atomic_fixed4_shl_cpt_rev(ident_t *id_ref, int gtid, + kmp_int32 *lhs, kmp_int32 rhs, + int flag); +kmp_int32 __kmpc_atomic_fixed4_shr_cpt_rev(ident_t *id_ref, int gtid, + kmp_int32 *lhs, kmp_int32 rhs, + int flag); +kmp_uint32 __kmpc_atomic_fixed4u_shr_cpt_rev(ident_t *id_ref, int gtid, + kmp_uint32 *lhs, kmp_uint32 rhs, + int flag); +kmp_int64 __kmpc_atomic_fixed8_sub_cpt_rev(ident_t *id_ref, int gtid, + kmp_int64 *lhs, kmp_int64 rhs, + int flag); +kmp_int64 __kmpc_atomic_fixed8_div_cpt_rev(ident_t *id_ref, int gtid, + kmp_int64 *lhs, kmp_int64 rhs, + int flag); +kmp_uint64 __kmpc_atomic_fixed8u_div_cpt_rev(ident_t *id_ref, int gtid, + kmp_uint64 *lhs, kmp_uint64 rhs, + int flag); +kmp_int64 __kmpc_atomic_fixed8_shl_cpt_rev(ident_t *id_ref, int gtid, + kmp_int64 *lhs, kmp_int64 rhs, + int flag); +kmp_int64 __kmpc_atomic_fixed8_shr_cpt_rev(ident_t *id_ref, int gtid, + kmp_int64 *lhs, kmp_int64 rhs, + int flag); +kmp_uint64 __kmpc_atomic_fixed8u_shr_cpt_rev(ident_t *id_ref, int gtid, + kmp_uint64 *lhs, kmp_uint64 rhs, + int flag); +float __kmpc_atomic_float4_sub_cpt_rev(ident_t *id_ref, int gtid, float *lhs, + float rhs, int flag); +float __kmpc_atomic_float4_div_cpt_rev(ident_t *id_ref, int gtid, float *lhs, + float rhs, int flag); +double __kmpc_atomic_float8_sub_cpt_rev(ident_t *id_ref, int gtid, double *lhs, + double rhs, int flag); +double __kmpc_atomic_float8_div_cpt_rev(ident_t *id_ref, int gtid, double *lhs, + double rhs, int flag); +long double __kmpc_atomic_float10_sub_cpt_rev(ident_t *id_ref, int gtid, + long double *lhs, long double rhs, + int flag); +long double __kmpc_atomic_float10_div_cpt_rev(ident_t *id_ref, int gtid, + long double *lhs, long double rhs, + int flag); #if KMP_HAVE_QUAD -QUAD_LEGACY __kmpc_atomic_float16_sub_cpt_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag ); -QUAD_LEGACY __kmpc_atomic_float16_div_cpt_rev( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs, int flag ); +QUAD_LEGACY __kmpc_atomic_float16_sub_cpt_rev(ident_t *id_ref, int gtid, + QUAD_LEGACY *lhs, QUAD_LEGACY rhs, + int flag); +QUAD_LEGACY __kmpc_atomic_float16_div_cpt_rev(ident_t *id_ref, int gtid, + QUAD_LEGACY *lhs, QUAD_LEGACY rhs, + int flag); #endif -// Workaround for cmplx4 routines - return void; captured value is returned via the argument -void __kmpc_atomic_cmplx4_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); -void __kmpc_atomic_cmplx4_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out, int flag ); -kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag ); -kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs, int flag ); -kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag ); -kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt_rev( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs, int flag ); +// Workaround for cmplx4 routines - return void; captured value is returned via +// the argument +void __kmpc_atomic_cmplx4_sub_cpt_rev(ident_t *id_ref, int gtid, + kmp_cmplx32 *lhs, kmp_cmplx32 rhs, + kmp_cmplx32 *out, int flag); +void __kmpc_atomic_cmplx4_div_cpt_rev(ident_t *id_ref, int gtid, + kmp_cmplx32 *lhs, kmp_cmplx32 rhs, + kmp_cmplx32 *out, int flag); +kmp_cmplx64 __kmpc_atomic_cmplx8_sub_cpt_rev(ident_t *id_ref, int gtid, + kmp_cmplx64 *lhs, kmp_cmplx64 rhs, + int flag); +kmp_cmplx64 __kmpc_atomic_cmplx8_div_cpt_rev(ident_t *id_ref, int gtid, + kmp_cmplx64 *lhs, kmp_cmplx64 rhs, + int flag); +kmp_cmplx80 __kmpc_atomic_cmplx10_sub_cpt_rev(ident_t *id_ref, int gtid, + kmp_cmplx80 *lhs, kmp_cmplx80 rhs, + int flag); +kmp_cmplx80 __kmpc_atomic_cmplx10_div_cpt_rev(ident_t *id_ref, int gtid, + kmp_cmplx80 *lhs, kmp_cmplx80 rhs, + int flag); #if KMP_HAVE_QUAD -CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag ); -CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt_rev( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs, int flag ); -#if ( KMP_ARCH_X86 ) - Quad_a16_t __kmpc_atomic_float16_sub_a16_cpt_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag ); - Quad_a16_t __kmpc_atomic_float16_div_a16_cpt_rev( ident_t * id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs, int flag ); - kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_sub_a16_cpt_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag ); - kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_div_a16_cpt_rev( ident_t * id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs, int flag ); +CPLX128_LEG __kmpc_atomic_cmplx16_sub_cpt_rev(ident_t *id_ref, int gtid, + CPLX128_LEG *lhs, CPLX128_LEG rhs, + int flag); +CPLX128_LEG __kmpc_atomic_cmplx16_div_cpt_rev(ident_t *id_ref, int gtid, + CPLX128_LEG *lhs, CPLX128_LEG rhs, + int flag); +#if (KMP_ARCH_X86) +Quad_a16_t __kmpc_atomic_float16_sub_a16_cpt_rev(ident_t *id_ref, int gtid, + Quad_a16_t *lhs, + Quad_a16_t rhs, int flag); +Quad_a16_t __kmpc_atomic_float16_div_a16_cpt_rev(ident_t *id_ref, int gtid, + Quad_a16_t *lhs, + Quad_a16_t rhs, int flag); +kmp_cmplx128_a16_t +__kmpc_atomic_cmplx16_sub_a16_cpt_rev(ident_t *id_ref, int gtid, + kmp_cmplx128_a16_t *lhs, + kmp_cmplx128_a16_t rhs, int flag); +kmp_cmplx128_a16_t +__kmpc_atomic_cmplx16_div_a16_cpt_rev(ident_t *id_ref, int gtid, + kmp_cmplx128_a16_t *lhs, + kmp_cmplx128_a16_t rhs, int flag); #endif #endif // OpenMP 4.0 Capture-write (swap): {v = x; x = expr;} -char __kmpc_atomic_fixed1_swp( ident_t *id_ref, int gtid, char * lhs, char rhs ); -short __kmpc_atomic_fixed2_swp( ident_t *id_ref, int gtid, short * lhs, short rhs ); -kmp_int32 __kmpc_atomic_fixed4_swp( ident_t *id_ref, int gtid, kmp_int32 * lhs, kmp_int32 rhs ); -kmp_int64 __kmpc_atomic_fixed8_swp( ident_t *id_ref, int gtid, kmp_int64 * lhs, kmp_int64 rhs ); -float __kmpc_atomic_float4_swp( ident_t *id_ref, int gtid, float * lhs, float rhs ); -double __kmpc_atomic_float8_swp( ident_t *id_ref, int gtid, double * lhs, double rhs ); -long double __kmpc_atomic_float10_swp( ident_t *id_ref, int gtid, long double * lhs, long double rhs ); +char __kmpc_atomic_fixed1_swp(ident_t *id_ref, int gtid, char *lhs, char rhs); +short __kmpc_atomic_fixed2_swp(ident_t *id_ref, int gtid, short *lhs, + short rhs); +kmp_int32 __kmpc_atomic_fixed4_swp(ident_t *id_ref, int gtid, kmp_int32 *lhs, + kmp_int32 rhs); +kmp_int64 __kmpc_atomic_fixed8_swp(ident_t *id_ref, int gtid, kmp_int64 *lhs, + kmp_int64 rhs); +float __kmpc_atomic_float4_swp(ident_t *id_ref, int gtid, float *lhs, + float rhs); +double __kmpc_atomic_float8_swp(ident_t *id_ref, int gtid, double *lhs, + double rhs); +long double __kmpc_atomic_float10_swp(ident_t *id_ref, int gtid, + long double *lhs, long double rhs); #if KMP_HAVE_QUAD -QUAD_LEGACY __kmpc_atomic_float16_swp( ident_t *id_ref, int gtid, QUAD_LEGACY * lhs, QUAD_LEGACY rhs ); +QUAD_LEGACY __kmpc_atomic_float16_swp(ident_t *id_ref, int gtid, + QUAD_LEGACY *lhs, QUAD_LEGACY rhs); #endif // !!! TODO: check if we need a workaround here -void __kmpc_atomic_cmplx4_swp( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs, kmp_cmplx32 * out ); -//kmp_cmplx32 __kmpc_atomic_cmplx4_swp( ident_t *id_ref, int gtid, kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); - -kmp_cmplx64 __kmpc_atomic_cmplx8_swp( ident_t *id_ref, int gtid, kmp_cmplx64 * lhs, kmp_cmplx64 rhs ); -kmp_cmplx80 __kmpc_atomic_cmplx10_swp( ident_t *id_ref, int gtid, kmp_cmplx80 * lhs, kmp_cmplx80 rhs ); +void __kmpc_atomic_cmplx4_swp(ident_t *id_ref, int gtid, kmp_cmplx32 *lhs, + kmp_cmplx32 rhs, kmp_cmplx32 *out); +// kmp_cmplx32 __kmpc_atomic_cmplx4_swp( ident_t *id_ref, int gtid, +// kmp_cmplx32 * lhs, kmp_cmplx32 rhs ); + +kmp_cmplx64 __kmpc_atomic_cmplx8_swp(ident_t *id_ref, int gtid, + kmp_cmplx64 *lhs, kmp_cmplx64 rhs); +kmp_cmplx80 __kmpc_atomic_cmplx10_swp(ident_t *id_ref, int gtid, + kmp_cmplx80 *lhs, kmp_cmplx80 rhs); #if KMP_HAVE_QUAD -CPLX128_LEG __kmpc_atomic_cmplx16_swp( ident_t *id_ref, int gtid, CPLX128_LEG * lhs, CPLX128_LEG rhs ); -#if ( KMP_ARCH_X86 ) - Quad_a16_t __kmpc_atomic_float16_a16_swp( ident_t *id_ref, int gtid, Quad_a16_t * lhs, Quad_a16_t rhs ); - kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_swp( ident_t *id_ref, int gtid, kmp_cmplx128_a16_t * lhs, kmp_cmplx128_a16_t rhs ); +CPLX128_LEG __kmpc_atomic_cmplx16_swp(ident_t *id_ref, int gtid, + CPLX128_LEG *lhs, CPLX128_LEG rhs); +#if (KMP_ARCH_X86) +Quad_a16_t __kmpc_atomic_float16_a16_swp(ident_t *id_ref, int gtid, + Quad_a16_t *lhs, Quad_a16_t rhs); +kmp_cmplx128_a16_t __kmpc_atomic_cmplx16_a16_swp(ident_t *id_ref, int gtid, + kmp_cmplx128_a16_t *lhs, + kmp_cmplx128_a16_t rhs); #endif #endif // Capture routines for mixed types (RHS=float16) #if KMP_HAVE_QUAD -char __kmpc_atomic_fixed1_add_cpt_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs, int flag ); -char __kmpc_atomic_fixed1_sub_cpt_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs, int flag ); -char __kmpc_atomic_fixed1_mul_cpt_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs, int flag ); -char __kmpc_atomic_fixed1_div_cpt_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs, int flag ); -unsigned char __kmpc_atomic_fixed1u_add_cpt_fp( ident_t *id_ref, int gtid, unsigned char * lhs, _Quad rhs, int flag ); -unsigned char __kmpc_atomic_fixed1u_sub_cpt_fp( ident_t *id_ref, int gtid, unsigned char * lhs, _Quad rhs, int flag ); -unsigned char __kmpc_atomic_fixed1u_mul_cpt_fp( ident_t *id_ref, int gtid, unsigned char * lhs, _Quad rhs, int flag ); -unsigned char __kmpc_atomic_fixed1u_div_cpt_fp( ident_t *id_ref, int gtid, unsigned char * lhs, _Quad rhs, int flag ); - -short __kmpc_atomic_fixed2_add_cpt_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs, int flag ); -short __kmpc_atomic_fixed2_sub_cpt_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs, int flag ); -short __kmpc_atomic_fixed2_mul_cpt_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs, int flag ); -short __kmpc_atomic_fixed2_div_cpt_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs, int flag ); -unsigned short __kmpc_atomic_fixed2u_add_cpt_fp( ident_t *id_ref, int gtid, unsigned short * lhs, _Quad rhs, int flag ); -unsigned short __kmpc_atomic_fixed2u_sub_cpt_fp( ident_t *id_ref, int gtid, unsigned short * lhs, _Quad rhs, int flag ); -unsigned short __kmpc_atomic_fixed2u_mul_cpt_fp( ident_t *id_ref, int gtid, unsigned short * lhs, _Quad rhs, int flag ); -unsigned short __kmpc_atomic_fixed2u_div_cpt_fp( ident_t *id_ref, int gtid, unsigned short * lhs, _Quad rhs, int flag ); - -kmp_int32 __kmpc_atomic_fixed4_add_cpt_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs, int flag ); -kmp_int32 __kmpc_atomic_fixed4_sub_cpt_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs, int flag ); -kmp_int32 __kmpc_atomic_fixed4_mul_cpt_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs, int flag ); -kmp_int32 __kmpc_atomic_fixed4_div_cpt_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs, int flag ); -kmp_uint32 __kmpc_atomic_fixed4u_add_cpt_fp( ident_t *id_ref, int gtid, kmp_uint32 * lhs, _Quad rhs, int flag ); -kmp_uint32 __kmpc_atomic_fixed4u_sub_cpt_fp( ident_t *id_ref, int gtid, kmp_uint32 * lhs, _Quad rhs, int flag ); -kmp_uint32 __kmpc_atomic_fixed4u_mul_cpt_fp( ident_t *id_ref, int gtid, kmp_uint32 * lhs, _Quad rhs, int flag ); -kmp_uint32 __kmpc_atomic_fixed4u_div_cpt_fp( ident_t *id_ref, int gtid, kmp_uint32 * lhs, _Quad rhs, int flag ); - -kmp_int64 __kmpc_atomic_fixed8_add_cpt_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs, int flag ); -kmp_int64 __kmpc_atomic_fixed8_sub_cpt_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs, int flag ); -kmp_int64 __kmpc_atomic_fixed8_mul_cpt_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs, int flag ); -kmp_int64 __kmpc_atomic_fixed8_div_cpt_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs, int flag ); -kmp_uint64 __kmpc_atomic_fixed8u_add_cpt_fp( ident_t *id_ref, int gtid, kmp_uint64 * lhs, _Quad rhs, int flag ); -kmp_uint64 __kmpc_atomic_fixed8u_sub_cpt_fp( ident_t *id_ref, int gtid, kmp_uint64 * lhs, _Quad rhs, int flag ); -kmp_uint64 __kmpc_atomic_fixed8u_mul_cpt_fp( ident_t *id_ref, int gtid, kmp_uint64 * lhs, _Quad rhs, int flag ); -kmp_uint64 __kmpc_atomic_fixed8u_div_cpt_fp( ident_t *id_ref, int gtid, kmp_uint64 * lhs, _Quad rhs, int flag ); - -float __kmpc_atomic_float4_add_cpt_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs, int flag ); -float __kmpc_atomic_float4_sub_cpt_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs, int flag ); -float __kmpc_atomic_float4_mul_cpt_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs, int flag ); -float __kmpc_atomic_float4_div_cpt_fp( ident_t *id_ref, int gtid, kmp_real32 * lhs, _Quad rhs, int flag ); - -double __kmpc_atomic_float8_add_cpt_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs, int flag ); -double __kmpc_atomic_float8_sub_cpt_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs, int flag ); -double __kmpc_atomic_float8_mul_cpt_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs, int flag ); -double __kmpc_atomic_float8_div_cpt_fp( ident_t *id_ref, int gtid, kmp_real64 * lhs, _Quad rhs, int flag ); - -long double __kmpc_atomic_float10_add_cpt_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs, int flag ); -long double __kmpc_atomic_float10_sub_cpt_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs, int flag ); -long double __kmpc_atomic_float10_mul_cpt_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs, int flag ); -long double __kmpc_atomic_float10_div_cpt_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs, int flag ); - -char __kmpc_atomic_fixed1_sub_cpt_rev_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs, int flag ); -unsigned char __kmpc_atomic_fixed1u_sub_cpt_rev_fp( ident_t *id_ref, int gtid, unsigned char * lhs, _Quad rhs, int flag ); -char __kmpc_atomic_fixed1_div_cpt_rev_fp( ident_t *id_ref, int gtid, char * lhs, _Quad rhs, int flag ); -unsigned char __kmpc_atomic_fixed1u_div_cpt_rev_fp( ident_t *id_ref, int gtid, unsigned char * lhs, _Quad rhs, int flag ); -short __kmpc_atomic_fixed2_sub_cpt_rev_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs, int flag ); -unsigned short __kmpc_atomic_fixed2u_sub_cpt_rev_fp( ident_t *id_ref, int gtid, unsigned short * lhs, _Quad rhs, int flag ); -short __kmpc_atomic_fixed2_div_cpt_rev_fp( ident_t *id_ref, int gtid, short * lhs, _Quad rhs, int flag ); -unsigned short __kmpc_atomic_fixed2u_div_cpt_rev_fp( ident_t *id_ref, int gtid, unsigned short * lhs, _Quad rhs, int flag ); -kmp_int32 __kmpc_atomic_fixed4_sub_cpt_rev_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs, int flag ); -kmp_uint32 __kmpc_atomic_fixed4u_sub_cpt_rev_fp( ident_t *id_ref, int gtid, kmp_uint32 * lhs, _Quad rhs, int flag ); -kmp_int32 __kmpc_atomic_fixed4_div_cpt_rev_fp( ident_t *id_ref, int gtid, kmp_int32 * lhs, _Quad rhs, int flag ); -kmp_uint32 __kmpc_atomic_fixed4u_div_cpt_rev_fp( ident_t *id_ref, int gtid, kmp_uint32 * lhs, _Quad rhs, int flag ); -kmp_int64 __kmpc_atomic_fixed8_sub_cpt_rev_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs, int flag ); -kmp_uint64 __kmpc_atomic_fixed8u_sub_cpt_rev_fp( ident_t *id_ref, int gtid, kmp_uint64 * lhs, _Quad rhs, int flag ); -kmp_int64 __kmpc_atomic_fixed8_div_cpt_rev_fp( ident_t *id_ref, int gtid, kmp_int64 * lhs, _Quad rhs, int flag ); -kmp_uint64 __kmpc_atomic_fixed8u_div_cpt_rev_fp( ident_t *id_ref, int gtid, kmp_uint64 * lhs, _Quad rhs, int flag ); -float __kmpc_atomic_float4_sub_cpt_rev_fp( ident_t *id_ref, int gtid, float * lhs, _Quad rhs, int flag ); -float __kmpc_atomic_float4_div_cpt_rev_fp( ident_t *id_ref, int gtid, float * lhs, _Quad rhs, int flag ); -double __kmpc_atomic_float8_sub_cpt_rev_fp( ident_t *id_ref, int gtid, double * lhs, _Quad rhs, int flag ); -double __kmpc_atomic_float8_div_cpt_rev_fp( ident_t *id_ref, int gtid, double * lhs, _Quad rhs, int flag ); -long double __kmpc_atomic_float10_sub_cpt_rev_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs, int flag ); -long double __kmpc_atomic_float10_div_cpt_rev_fp( ident_t *id_ref, int gtid, long double * lhs, _Quad rhs, int flag ); +char __kmpc_atomic_fixed1_add_cpt_fp(ident_t *id_ref, int gtid, char *lhs, + _Quad rhs, int flag); +char __kmpc_atomic_fixed1_sub_cpt_fp(ident_t *id_ref, int gtid, char *lhs, + _Quad rhs, int flag); +char __kmpc_atomic_fixed1_mul_cpt_fp(ident_t *id_ref, int gtid, char *lhs, + _Quad rhs, int flag); +char __kmpc_atomic_fixed1_div_cpt_fp(ident_t *id_ref, int gtid, char *lhs, + _Quad rhs, int flag); +unsigned char __kmpc_atomic_fixed1u_add_cpt_fp(ident_t *id_ref, int gtid, + unsigned char *lhs, _Quad rhs, + int flag); +unsigned char __kmpc_atomic_fixed1u_sub_cpt_fp(ident_t *id_ref, int gtid, + unsigned char *lhs, _Quad rhs, + int flag); +unsigned char __kmpc_atomic_fixed1u_mul_cpt_fp(ident_t *id_ref, int gtid, + unsigned char *lhs, _Quad rhs, + int flag); +unsigned char __kmpc_atomic_fixed1u_div_cpt_fp(ident_t *id_ref, int gtid, + unsigned char *lhs, _Quad rhs, + int flag); + +short __kmpc_atomic_fixed2_add_cpt_fp(ident_t *id_ref, int gtid, short *lhs, + _Quad rhs, int flag); +short __kmpc_atomic_fixed2_sub_cpt_fp(ident_t *id_ref, int gtid, short *lhs, + _Quad rhs, int flag); +short __kmpc_atomic_fixed2_mul_cpt_fp(ident_t *id_ref, int gtid, short *lhs, + _Quad rhs, int flag); +short __kmpc_atomic_fixed2_div_cpt_fp(ident_t *id_ref, int gtid, short *lhs, + _Quad rhs, int flag); +unsigned short __kmpc_atomic_fixed2u_add_cpt_fp(ident_t *id_ref, int gtid, + unsigned short *lhs, _Quad rhs, + int flag); +unsigned short __kmpc_atomic_fixed2u_sub_cpt_fp(ident_t *id_ref, int gtid, + unsigned short *lhs, _Quad rhs, + int flag); +unsigned short __kmpc_atomic_fixed2u_mul_cpt_fp(ident_t *id_ref, int gtid, + unsigned short *lhs, _Quad rhs, + int flag); +unsigned short __kmpc_atomic_fixed2u_div_cpt_fp(ident_t *id_ref, int gtid, + unsigned short *lhs, _Quad rhs, + int flag); + +kmp_int32 __kmpc_atomic_fixed4_add_cpt_fp(ident_t *id_ref, int gtid, + kmp_int32 *lhs, _Quad rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_sub_cpt_fp(ident_t *id_ref, int gtid, + kmp_int32 *lhs, _Quad rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_mul_cpt_fp(ident_t *id_ref, int gtid, + kmp_int32 *lhs, _Quad rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_div_cpt_fp(ident_t *id_ref, int gtid, + kmp_int32 *lhs, _Quad rhs, int flag); +kmp_uint32 __kmpc_atomic_fixed4u_add_cpt_fp(ident_t *id_ref, int gtid, + kmp_uint32 *lhs, _Quad rhs, + int flag); +kmp_uint32 __kmpc_atomic_fixed4u_sub_cpt_fp(ident_t *id_ref, int gtid, + kmp_uint32 *lhs, _Quad rhs, + int flag); +kmp_uint32 __kmpc_atomic_fixed4u_mul_cpt_fp(ident_t *id_ref, int gtid, + kmp_uint32 *lhs, _Quad rhs, + int flag); +kmp_uint32 __kmpc_atomic_fixed4u_div_cpt_fp(ident_t *id_ref, int gtid, + kmp_uint32 *lhs, _Quad rhs, + int flag); + +kmp_int64 __kmpc_atomic_fixed8_add_cpt_fp(ident_t *id_ref, int gtid, + kmp_int64 *lhs, _Quad rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_sub_cpt_fp(ident_t *id_ref, int gtid, + kmp_int64 *lhs, _Quad rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_mul_cpt_fp(ident_t *id_ref, int gtid, + kmp_int64 *lhs, _Quad rhs, int flag); +kmp_int64 __kmpc_atomic_fixed8_div_cpt_fp(ident_t *id_ref, int gtid, + kmp_int64 *lhs, _Quad rhs, int flag); +kmp_uint64 __kmpc_atomic_fixed8u_add_cpt_fp(ident_t *id_ref, int gtid, + kmp_uint64 *lhs, _Quad rhs, + int flag); +kmp_uint64 __kmpc_atomic_fixed8u_sub_cpt_fp(ident_t *id_ref, int gtid, + kmp_uint64 *lhs, _Quad rhs, + int flag); +kmp_uint64 __kmpc_atomic_fixed8u_mul_cpt_fp(ident_t *id_ref, int gtid, + kmp_uint64 *lhs, _Quad rhs, + int flag); +kmp_uint64 __kmpc_atomic_fixed8u_div_cpt_fp(ident_t *id_ref, int gtid, + kmp_uint64 *lhs, _Quad rhs, + int flag); + +float __kmpc_atomic_float4_add_cpt_fp(ident_t *id_ref, int gtid, + kmp_real32 *lhs, _Quad rhs, int flag); +float __kmpc_atomic_float4_sub_cpt_fp(ident_t *id_ref, int gtid, + kmp_real32 *lhs, _Quad rhs, int flag); +float __kmpc_atomic_float4_mul_cpt_fp(ident_t *id_ref, int gtid, + kmp_real32 *lhs, _Quad rhs, int flag); +float __kmpc_atomic_float4_div_cpt_fp(ident_t *id_ref, int gtid, + kmp_real32 *lhs, _Quad rhs, int flag); + +double __kmpc_atomic_float8_add_cpt_fp(ident_t *id_ref, int gtid, + kmp_real64 *lhs, _Quad rhs, int flag); +double __kmpc_atomic_float8_sub_cpt_fp(ident_t *id_ref, int gtid, + kmp_real64 *lhs, _Quad rhs, int flag); +double __kmpc_atomic_float8_mul_cpt_fp(ident_t *id_ref, int gtid, + kmp_real64 *lhs, _Quad rhs, int flag); +double __kmpc_atomic_float8_div_cpt_fp(ident_t *id_ref, int gtid, + kmp_real64 *lhs, _Quad rhs, int flag); + +long double __kmpc_atomic_float10_add_cpt_fp(ident_t *id_ref, int gtid, + long double *lhs, _Quad rhs, + int flag); +long double __kmpc_atomic_float10_sub_cpt_fp(ident_t *id_ref, int gtid, + long double *lhs, _Quad rhs, + int flag); +long double __kmpc_atomic_float10_mul_cpt_fp(ident_t *id_ref, int gtid, + long double *lhs, _Quad rhs, + int flag); +long double __kmpc_atomic_float10_div_cpt_fp(ident_t *id_ref, int gtid, + long double *lhs, _Quad rhs, + int flag); + +char __kmpc_atomic_fixed1_sub_cpt_rev_fp(ident_t *id_ref, int gtid, char *lhs, + _Quad rhs, int flag); +unsigned char __kmpc_atomic_fixed1u_sub_cpt_rev_fp(ident_t *id_ref, int gtid, + unsigned char *lhs, + _Quad rhs, int flag); +char __kmpc_atomic_fixed1_div_cpt_rev_fp(ident_t *id_ref, int gtid, char *lhs, + _Quad rhs, int flag); +unsigned char __kmpc_atomic_fixed1u_div_cpt_rev_fp(ident_t *id_ref, int gtid, + unsigned char *lhs, + _Quad rhs, int flag); +short __kmpc_atomic_fixed2_sub_cpt_rev_fp(ident_t *id_ref, int gtid, short *lhs, + _Quad rhs, int flag); +unsigned short __kmpc_atomic_fixed2u_sub_cpt_rev_fp(ident_t *id_ref, int gtid, + unsigned short *lhs, + _Quad rhs, int flag); +short __kmpc_atomic_fixed2_div_cpt_rev_fp(ident_t *id_ref, int gtid, short *lhs, + _Quad rhs, int flag); +unsigned short __kmpc_atomic_fixed2u_div_cpt_rev_fp(ident_t *id_ref, int gtid, + unsigned short *lhs, + _Quad rhs, int flag); +kmp_int32 __kmpc_atomic_fixed4_sub_cpt_rev_fp(ident_t *id_ref, int gtid, + kmp_int32 *lhs, _Quad rhs, + int flag); +kmp_uint32 __kmpc_atomic_fixed4u_sub_cpt_rev_fp(ident_t *id_ref, int gtid, + kmp_uint32 *lhs, _Quad rhs, + int flag); +kmp_int32 __kmpc_atomic_fixed4_div_cpt_rev_fp(ident_t *id_ref, int gtid, + kmp_int32 *lhs, _Quad rhs, + int flag); +kmp_uint32 __kmpc_atomic_fixed4u_div_cpt_rev_fp(ident_t *id_ref, int gtid, + kmp_uint32 *lhs, _Quad rhs, + int flag); +kmp_int64 __kmpc_atomic_fixed8_sub_cpt_rev_fp(ident_t *id_ref, int gtid, + kmp_int64 *lhs, _Quad rhs, + int flag); +kmp_uint64 __kmpc_atomic_fixed8u_sub_cpt_rev_fp(ident_t *id_ref, int gtid, + kmp_uint64 *lhs, _Quad rhs, + int flag); +kmp_int64 __kmpc_atomic_fixed8_div_cpt_rev_fp(ident_t *id_ref, int gtid, + kmp_int64 *lhs, _Quad rhs, + int flag); +kmp_uint64 __kmpc_atomic_fixed8u_div_cpt_rev_fp(ident_t *id_ref, int gtid, + kmp_uint64 *lhs, _Quad rhs, + int flag); +float __kmpc_atomic_float4_sub_cpt_rev_fp(ident_t *id_ref, int gtid, float *lhs, + _Quad rhs, int flag); +float __kmpc_atomic_float4_div_cpt_rev_fp(ident_t *id_ref, int gtid, float *lhs, + _Quad rhs, int flag); +double __kmpc_atomic_float8_sub_cpt_rev_fp(ident_t *id_ref, int gtid, + double *lhs, _Quad rhs, int flag); +double __kmpc_atomic_float8_div_cpt_rev_fp(ident_t *id_ref, int gtid, + double *lhs, _Quad rhs, int flag); +long double __kmpc_atomic_float10_sub_cpt_rev_fp(ident_t *id_ref, int gtid, + long double *lhs, _Quad rhs, + int flag); +long double __kmpc_atomic_float10_div_cpt_rev_fp(ident_t *id_ref, int gtid, + long double *lhs, _Quad rhs, + int flag); #endif // KMP_HAVE_QUAD // End of OpenMP 4.0 capture -#endif //OMP_40_ENABLED +#endif // OMP_40_ENABLED -#endif //KMP_ARCH_X86 || KMP_ARCH_X86_64 +#endif // KMP_ARCH_X86 || KMP_ARCH_X86_64 /* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ #ifdef __cplusplus - } // extern "C" +} // extern "C" #endif #endif /* KMP_ATOMIC_H */ diff --git a/openmp/runtime/src/kmp_barrier.cpp b/openmp/runtime/src/kmp_barrier.cpp index a9a46f1..f25915f 100644 --- a/openmp/runtime/src/kmp_barrier.cpp +++ b/openmp/runtime/src/kmp_barrier.cpp @@ -15,9 +15,9 @@ #include "kmp.h" #include "kmp_wait_release.h" -#include "kmp_stats.h" #include "kmp_itt.h" #include "kmp_os.h" +#include "kmp_stats.h" #if KMP_MIC @@ -29,15 +29,15 @@ #if KMP_MIC && USE_NGO_STORES // ICV copying -#define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src)) +#define ngo_load(src) __m512d Vt = _mm512_load_pd((void *)(src)) #define ngo_store_icvs(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) -#define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) -#define ngo_sync() __asm__ volatile ("lock; addl $0,0(%%rsp)" ::: "memory") +#define ngo_store_go(dst, src) _mm512_storenrngo_pd((void *)(dst), Vt) +#define ngo_sync() __asm__ volatile("lock; addl $0,0(%%rsp)" ::: "memory") #else -#define ngo_load(src) ((void)0) +#define ngo_load(src) ((void)0) #define ngo_store_icvs(dst, src) copy_icvs((dst), (src)) -#define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE) -#define ngo_sync() ((void)0) +#define ngo_store_go(dst, src) KMP_MEMCPY((dst), (src), CACHE_LINE) +#define ngo_sync() ((void)0) #endif /* KMP_MIC && USE_NGO_STORES */ void __kmp_print_structure(void); // Forward declaration @@ -45,1785 +45,1966 @@ void __kmp_print_structure(void); // Forward declaration // ---------------------------- Barrier Algorithms ---------------------------- // Linear Barrier -static void -__kmp_linear_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - void (*reduce)(void *, void *) - USE_ITT_BUILD_ARG(void * itt_sync_obj) ) -{ - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather); - register kmp_team_t *team = this_thr->th.th_team; - register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb; - register kmp_info_t **other_threads = team->t.t_threads; - - KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); +static void __kmp_linear_barrier_gather( + enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, + void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) { + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_gather); + register kmp_team_t *team = this_thr->th.th_team; + register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; + register kmp_info_t **other_threads = team->t.t_threads; + + KA_TRACE( + 20, + ("__kmp_linear_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); + KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); #if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - save arrive time to the thread - if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp(); - } + // Barrier imbalance - save arrive time to the thread + if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { + this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = + __itt_get_timestamp(); + } #endif - // We now perform a linear reduction to signal that all of the threads have arrived. - if (!KMP_MASTER_TID(tid)) { - KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d)" - "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(0, team), team->t.t_id, 0, &thr_bar->b_arrived, - thr_bar->b_arrived, thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); - // Mark arrival to master thread - /* After performing this write, a worker thread may not assume that the team is valid - any more - it could be deallocated by the master thread at any time. */ - ANNOTATE_BARRIER_BEGIN(this_thr); - kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]); - flag.release(); - } else { - register kmp_balign_team_t *team_bar = &team->t.t_bar[bt]; - register int nproc = this_thr->th.th_team_nproc; - register int i; - // Don't have to worry about sleep bit here or atomic since team setting - register kmp_uint64 new_state = team_bar->b_arrived + KMP_BARRIER_STATE_BUMP; - - // Collect all the worker team member threads. - for (i=1; i %llu\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(0, team), + team->t.t_id, 0, &thr_bar->b_arrived, thr_bar->b_arrived, + thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); + // Mark arrival to master thread + /* After performing this write, a worker thread may not assume that the team + is valid any more - it could be deallocated by the master thread at any + time. */ + ANNOTATE_BARRIER_BEGIN(this_thr); + kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[0]); + flag.release(); + } else { + register kmp_balign_team_t *team_bar = &team->t.t_bar[bt]; + register int nproc = this_thr->th.th_team_nproc; + register int i; + // Don't have to worry about sleep bit here or atomic since team setting + register kmp_uint64 new_state = + team_bar->b_arrived + KMP_BARRIER_STATE_BUMP; + + // Collect all the worker team member threads. + for (i = 1; i < nproc; ++i) { #if KMP_CACHE_MANAGE - // Prefetch next thread's arrived count - if (i+1 < nproc) - KMP_CACHE_PREFETCH(&other_threads[i+1]->th.th_bar[bt].bb.b_arrived); + // Prefetch next thread's arrived count + if (i + 1 < nproc) + KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_arrived); #endif /* KMP_CACHE_MANAGE */ - KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " - "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(i, team), team->t.t_id, i, - &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state)); - - // Wait for worker thread to arrive - kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state); - flag.wait(this_thr, FALSE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - ANNOTATE_BARRIER_END(other_threads[i]); + KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " + "arrived(%p) == %llu\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), + team->t.t_id, i, + &other_threads[i]->th.th_bar[bt].bb.b_arrived, new_state)); + + // Wait for worker thread to arrive + kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_arrived, + new_state); + flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); + ANNOTATE_BARRIER_END(other_threads[i]); #if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - write min of the thread time and the other thread time to the thread. - if (__kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, - other_threads[i]->th.th_bar_min_time); - } + // Barrier imbalance - write min of the thread time and the other thread + // time to the thread. + if (__kmp_forkjoin_frames_mode == 2) { + this_thr->th.th_bar_min_time = KMP_MIN( + this_thr->th.th_bar_min_time, other_threads[i]->th.th_bar_min_time); + } #endif - if (reduce) { - KA_TRACE(100, ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", gtid, - team->t.t_id, tid, __kmp_gtid_from_tid(i, team), team->t.t_id, i)); - ANNOTATE_REDUCE_AFTER(reduce); - (*reduce)(this_thr->th.th_local.reduce_data, - other_threads[i]->th.th_local.reduce_data); - ANNOTATE_REDUCE_BEFORE(reduce); - ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); - } - } - // Don't have to worry about sleep bit here or atomic since team setting - team_bar->b_arrived = new_state; - KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n", - gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived, new_state)); + if (reduce) { + KA_TRACE(100, + ("__kmp_linear_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(i, team), + team->t.t_id, i)); + ANNOTATE_REDUCE_AFTER(reduce); + (*reduce)(this_thr->th.th_local.reduce_data, + other_threads[i]->th.th_local.reduce_data); + ANNOTATE_REDUCE_BEFORE(reduce); + ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); + } } - KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); + // Don't have to worry about sleep bit here or atomic since team setting + team_bar->b_arrived = new_state; + KA_TRACE(20, ("__kmp_linear_barrier_gather: T#%d(%d:%d) set team %d " + "arrived(%p) = %llu\n", + gtid, team->t.t_id, tid, team->t.t_id, &team_bar->b_arrived, + new_state)); + } + KA_TRACE( + 20, + ("__kmp_linear_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); } -static void -__kmp_linear_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - int propagate_icvs - USE_ITT_BUILD_ARG(void *itt_sync_obj) ) -{ - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release); - register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; - register kmp_team_t *team; +static void __kmp_linear_barrier_release( + enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, + int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) { + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_linear_release); + register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; + register kmp_team_t *team; - if (KMP_MASTER_TID(tid)) { - register unsigned int i; - register kmp_uint32 nproc = this_thr->th.th_team_nproc; - register kmp_info_t **other_threads; + if (KMP_MASTER_TID(tid)) { + register unsigned int i; + register kmp_uint32 nproc = this_thr->th.th_team_nproc; + register kmp_info_t **other_threads; - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - other_threads = team->t.t_threads; + team = __kmp_threads[gtid]->th.th_team; + KMP_DEBUG_ASSERT(team != NULL); + other_threads = team->t.t_threads; - KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); + KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) master enter for " + "barrier type %d\n", + gtid, team->t.t_id, tid, bt)); - if (nproc > 1) { + if (nproc > 1) { #if KMP_BARRIER_ICV_PUSH - { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy); - if (propagate_icvs) { - ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs); - for (i=1; it.t_ident, team->t.t_threads[i], team, i, FALSE); - ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs, - &team->t.t_implicit_task_taskdata[0].td_icvs); - } - ngo_sync(); - } - } + { + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy); + if (propagate_icvs) { + ngo_load(&team->t.t_implicit_task_taskdata[0].td_icvs); + for (i = 1; i < nproc; ++i) { + __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[i], + team, i, FALSE); + ngo_store_icvs(&team->t.t_implicit_task_taskdata[i].td_icvs, + &team->t.t_implicit_task_taskdata[0].td_icvs); + } + ngo_sync(); + } + } #endif // KMP_BARRIER_ICV_PUSH - // Now, release all of the worker threads - for (i=1; ith.th_bar[bt].bb.b_go); + // Prefetch next thread's go flag + if (i + 1 < nproc) + KMP_CACHE_PREFETCH(&other_threads[i + 1]->th.th_bar[bt].bb.b_go); #endif /* KMP_CACHE_MANAGE */ - KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " - "go(%p): %u => %u\n", gtid, team->t.t_id, tid, - other_threads[i]->th.th_info.ds.ds_gtid, team->t.t_id, i, - &other_threads[i]->th.th_bar[bt].bb.b_go, - other_threads[i]->th.th_bar[bt].bb.b_go, - other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP)); - ANNOTATE_BARRIER_BEGIN(other_threads[i]); - kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go, other_threads[i]); - flag.release(); - } - } - } else { // Wait for the MASTER thread to release us - KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n", - gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); - kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); - flag.wait(this_thr, TRUE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - ANNOTATE_BARRIER_END(this_thr); + KA_TRACE( + 20, + ("__kmp_linear_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d) " + "go(%p): %u => %u\n", + gtid, team->t.t_id, tid, other_threads[i]->th.th_info.ds.ds_gtid, + team->t.t_id, i, &other_threads[i]->th.th_bar[bt].bb.b_go, + other_threads[i]->th.th_bar[bt].bb.b_go, + other_threads[i]->th.th_bar[bt].bb.b_go + KMP_BARRIER_STATE_BUMP)); + ANNOTATE_BARRIER_BEGIN(other_threads[i]); + kmp_flag_64 flag(&other_threads[i]->th.th_bar[bt].bb.b_go, + other_threads[i]); + flag.release(); + } + } + } else { // Wait for the MASTER thread to release us + KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d wait go(%p) == %u\n", + gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); + kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); + flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); + ANNOTATE_BARRIER_END(this_thr); #if USE_ITT_BUILD && USE_ITT_NOTIFY - if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { - // In a fork barrier; cannot get the object reliably (or ITTNOTIFY is disabled) - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); - // Cancel wait on previous parallel region... - __kmp_itt_task_starting(itt_sync_obj); - - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); - if (itt_sync_obj != NULL) - // Call prepare as early as possible for "new" barrier - __kmp_itt_task_finished(itt_sync_obj); - } else + if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { + // In a fork barrier; cannot get the object reliably (or ITTNOTIFY is + // disabled) + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); + // Cancel wait on previous parallel region... + __kmp_itt_task_starting(itt_sync_obj); + + if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) + return; + + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); + if (itt_sync_obj != NULL) + // Call prepare as early as possible for "new" barrier + __kmp_itt_task_finished(itt_sync_obj); + } else #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ // Early exit for reaping threads releasing forkjoin barrier - if ( bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done) ) - return; - // The worker thread may now assume that the team is valid. + if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) + return; +// The worker thread may now assume that the team is valid. #ifdef KMP_DEBUG - tid = __kmp_tid_from_gtid(gtid); - team = __kmp_threads[gtid]->th.th_team; + tid = __kmp_tid_from_gtid(gtid); + team = __kmp_threads[gtid]->th.th_team; #endif - KMP_DEBUG_ASSERT(team != NULL); - TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); - KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", - gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); - KMP_MB(); // Flush all pending memory write invalidates. - } - KA_TRACE(20, ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); + KMP_DEBUG_ASSERT(team != NULL); + TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); + KA_TRACE(20, + ("__kmp_linear_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", + gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); + KMP_MB(); // Flush all pending memory write invalidates. + } + KA_TRACE( + 20, + ("__kmp_linear_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); } // Tree barrier static void -__kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - void (*reduce)(void *, void *) - USE_ITT_BUILD_ARG(void *itt_sync_obj) ) -{ - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather); - register kmp_team_t *team = this_thr->th.th_team; - register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; - register kmp_info_t **other_threads = team->t.t_threads; - register kmp_uint32 nproc = this_thr->th.th_team_nproc; - register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt]; - register kmp_uint32 branch_factor = 1 << branch_bits; - register kmp_uint32 child; - register kmp_uint32 child_tid; - register kmp_uint64 new_state; - - KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); +__kmp_tree_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, + int tid, void (*reduce)(void *, void *) + USE_ITT_BUILD_ARG(void *itt_sync_obj)) { + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_gather); + register kmp_team_t *team = this_thr->th.th_team; + register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; + register kmp_info_t **other_threads = team->t.t_threads; + register kmp_uint32 nproc = this_thr->th.th_team_nproc; + register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt]; + register kmp_uint32 branch_factor = 1 << branch_bits; + register kmp_uint32 child; + register kmp_uint32 child_tid; + register kmp_uint64 new_state; + + KA_TRACE( + 20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); + KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); #if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - save arrive time to the thread - if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp(); - } + // Barrier imbalance - save arrive time to the thread + if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { + this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = + __itt_get_timestamp(); + } #endif - // Perform tree gather to wait until all threads have arrived; reduce any required data as we go - child_tid = (tid << branch_bits) + 1; - if (child_tid < nproc) { - // Parent threads wait for all their children to arrive - new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; - child = 1; - do { - register kmp_info_t *child_thr = other_threads[child_tid]; - register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; + // Perform tree gather to wait until all threads have arrived; reduce any + // required data as we go + child_tid = (tid << branch_bits) + 1; + if (child_tid < nproc) { + // Parent threads wait for all their children to arrive + new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; + child = 1; + do { + register kmp_info_t *child_thr = other_threads[child_tid]; + register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; #if KMP_CACHE_MANAGE - // Prefetch next thread's arrived count - if (child+1 <= branch_factor && child_tid+1 < nproc) - KMP_CACHE_PREFETCH(&other_threads[child_tid+1]->th.th_bar[bt].bb.b_arrived); + // Prefetch next thread's arrived count + if (child + 1 <= branch_factor && child_tid + 1 < nproc) + KMP_CACHE_PREFETCH( + &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_arrived); #endif /* KMP_CACHE_MANAGE */ - KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " - "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, - &child_bar->b_arrived, new_state)); - // Wait for child to arrive - kmp_flag_64 flag(&child_bar->b_arrived, new_state); - flag.wait(this_thr, FALSE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - ANNOTATE_BARRIER_END(child_thr); + KA_TRACE(20, + ("__kmp_tree_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " + "arrived(%p) == %llu\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), + team->t.t_id, child_tid, &child_bar->b_arrived, new_state)); + // Wait for child to arrive + kmp_flag_64 flag(&child_bar->b_arrived, new_state); + flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); + ANNOTATE_BARRIER_END(child_thr); #if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - write min of the thread time and a child time to the thread. - if (__kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, - child_thr->th.th_bar_min_time); - } + // Barrier imbalance - write min of the thread time and a child time to + // the thread. + if (__kmp_forkjoin_frames_mode == 2) { + this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, + child_thr->th.th_bar_min_time); + } #endif - if (reduce) { - KA_TRACE(100, ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid)); - ANNOTATE_REDUCE_AFTER(reduce); - (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); - ANNOTATE_REDUCE_BEFORE(reduce); - ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); - } - child++; - child_tid++; - } - while (child <= branch_factor && child_tid < nproc); - } - - if (!KMP_MASTER_TID(tid)) { // Worker threads - register kmp_int32 parent_tid = (tid - 1) >> branch_bits; - - KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " - "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid, - &thr_bar->b_arrived, thr_bar->b_arrived, - thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); - - // Mark arrival to parent thread - /* After performing this write, a worker thread may not assume that the team is valid - any more - it could be deallocated by the master thread at any time. */ - ANNOTATE_BARRIER_BEGIN(this_thr); - kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]); - flag.release(); - } else { - // Need to update the team arrived pointer if we are the master thread - if (nproc > 1) // New value was already computed above - team->t.t_bar[bt].b_arrived = new_state; - else - team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP; - KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n", - gtid, team->t.t_id, tid, team->t.t_id, - &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); - } - KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); + if (reduce) { + KA_TRACE(100, + ("__kmp_tree_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), + team->t.t_id, child_tid)); + ANNOTATE_REDUCE_AFTER(reduce); + (*reduce)(this_thr->th.th_local.reduce_data, + child_thr->th.th_local.reduce_data); + ANNOTATE_REDUCE_BEFORE(reduce); + ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); + } + child++; + child_tid++; + } while (child <= branch_factor && child_tid < nproc); + } + + if (!KMP_MASTER_TID(tid)) { // Worker threads + register kmp_int32 parent_tid = (tid - 1) >> branch_bits; + + KA_TRACE(20, + ("__kmp_tree_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " + "arrived(%p): %llu => %llu\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team), + team->t.t_id, parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived, + thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); + + // Mark arrival to parent thread + /* After performing this write, a worker thread may not assume that the team + is valid any more - it could be deallocated by the master thread at any + time. */ + ANNOTATE_BARRIER_BEGIN(this_thr); + kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[parent_tid]); + flag.release(); + } else { + // Need to update the team arrived pointer if we are the master thread + if (nproc > 1) // New value was already computed above + team->t.t_bar[bt].b_arrived = new_state; + else + team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP; + KA_TRACE(20, ("__kmp_tree_barrier_gather: T#%d(%d:%d) set team %d " + "arrived(%p) = %llu\n", + gtid, team->t.t_id, tid, team->t.t_id, + &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); + } + KA_TRACE(20, + ("__kmp_tree_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); } -static void -__kmp_tree_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - int propagate_icvs - USE_ITT_BUILD_ARG(void *itt_sync_obj) ) -{ - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release); - register kmp_team_t *team; - register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; - register kmp_uint32 nproc; - register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt]; - register kmp_uint32 branch_factor = 1 << branch_bits; - register kmp_uint32 child; - register kmp_uint32 child_tid; - - // Perform a tree release for all of the threads that have been gathered - if (!KMP_MASTER_TID(tid)) { // Handle fork barrier workers who aren't part of a team yet - KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", - gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); - // Wait for parent thread to release us - kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); - flag.wait(this_thr, TRUE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - ANNOTATE_BARRIER_END(this_thr); +static void __kmp_tree_barrier_release( + enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, + int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) { + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_tree_release); + register kmp_team_t *team; + register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; + register kmp_uint32 nproc; + register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt]; + register kmp_uint32 branch_factor = 1 << branch_bits; + register kmp_uint32 child; + register kmp_uint32 child_tid; + + // Perform a tree release for all of the threads that have been gathered + if (!KMP_MASTER_TID( + tid)) { // Handle fork barrier workers who aren't part of a team yet + KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d wait go(%p) == %u\n", gtid, + &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); + // Wait for parent thread to release us + kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); + flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); + ANNOTATE_BARRIER_END(this_thr); #if USE_ITT_BUILD && USE_ITT_NOTIFY - if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { - // In fork barrier where we could not get the object reliably (or ITTNOTIFY is disabled) - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); - // Cancel wait on previous parallel region... - __kmp_itt_task_starting(itt_sync_obj); - - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); - if (itt_sync_obj != NULL) - // Call prepare as early as possible for "new" barrier - __kmp_itt_task_finished(itt_sync_obj); - } else + if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { + // In fork barrier where we could not get the object reliably (or + // ITTNOTIFY is disabled) + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); + // Cancel wait on previous parallel region... + __kmp_itt_task_starting(itt_sync_obj); + + if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) + return; + + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); + if (itt_sync_obj != NULL) + // Call prepare as early as possible for "new" barrier + __kmp_itt_task_finished(itt_sync_obj); + } else #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ // Early exit for reaping threads releasing forkjoin barrier if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; + return; - // The worker thread may now assume that the team is valid. - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - tid = __kmp_tid_from_gtid(gtid); + // The worker thread may now assume that the team is valid. + team = __kmp_threads[gtid]->th.th_team; + KMP_DEBUG_ASSERT(team != NULL); + tid = __kmp_tid_from_gtid(gtid); - TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); - KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", - gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); - KMP_MB(); // Flush all pending memory write invalidates. - } else { - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - } - nproc = this_thr->th.th_team_nproc; - child_tid = (tid << branch_bits) + 1; - - if (child_tid < nproc) { - register kmp_info_t **other_threads = team->t.t_threads; - child = 1; - // Parent threads release all their children - do { - register kmp_info_t *child_thr = other_threads[child_tid]; - register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; + TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); + KA_TRACE(20, + ("__kmp_tree_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", gtid, + team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); + KMP_MB(); // Flush all pending memory write invalidates. + } else { + team = __kmp_threads[gtid]->th.th_team; + KMP_DEBUG_ASSERT(team != NULL); + KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) master enter for " + "barrier type %d\n", + gtid, team->t.t_id, tid, bt)); + } + nproc = this_thr->th.th_team_nproc; + child_tid = (tid << branch_bits) + 1; + + if (child_tid < nproc) { + register kmp_info_t **other_threads = team->t.t_threads; + child = 1; + // Parent threads release all their children + do { + register kmp_info_t *child_thr = other_threads[child_tid]; + register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; #if KMP_CACHE_MANAGE - // Prefetch next thread's go count - if (child+1 <= branch_factor && child_tid+1 < nproc) - KMP_CACHE_PREFETCH(&other_threads[child_tid+1]->th.th_bar[bt].bb.b_go); + // Prefetch next thread's go count + if (child + 1 <= branch_factor && child_tid + 1 < nproc) + KMP_CACHE_PREFETCH( + &other_threads[child_tid + 1]->th.th_bar[bt].bb.b_go); #endif /* KMP_CACHE_MANAGE */ #if KMP_BARRIER_ICV_PUSH - { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy); - if (propagate_icvs) { - __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[child_tid], - team, child_tid, FALSE); - copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs, - &team->t.t_implicit_task_taskdata[0].td_icvs); - } - } -#endif // KMP_BARRIER_ICV_PUSH - KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" - "go(%p): %u => %u\n", gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(child_tid, team), team->t.t_id, - child_tid, &child_bar->b_go, child_bar->b_go, - child_bar->b_go + KMP_BARRIER_STATE_BUMP)); - // Release child from barrier - ANNOTATE_BARRIER_BEGIN(child_thr); - kmp_flag_64 flag(&child_bar->b_go, child_thr); - flag.release(); - child++; - child_tid++; + { + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy); + if (propagate_icvs) { + __kmp_init_implicit_task(team->t.t_ident, + team->t.t_threads[child_tid], team, + child_tid, FALSE); + copy_icvs(&team->t.t_implicit_task_taskdata[child_tid].td_icvs, + &team->t.t_implicit_task_taskdata[0].td_icvs); } - while (child <= branch_factor && child_tid < nproc); - } - KA_TRACE(20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); + } +#endif // KMP_BARRIER_ICV_PUSH + KA_TRACE(20, + ("__kmp_tree_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" + "go(%p): %u => %u\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), + team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, + child_bar->b_go + KMP_BARRIER_STATE_BUMP)); + // Release child from barrier + ANNOTATE_BARRIER_BEGIN(child_thr); + kmp_flag_64 flag(&child_bar->b_go, child_thr); + flag.release(); + child++; + child_tid++; + } while (child <= branch_factor && child_tid < nproc); + } + KA_TRACE( + 20, ("__kmp_tree_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); } - // Hyper Barrier static void -__kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - void (*reduce)(void *, void *) - USE_ITT_BUILD_ARG(void *itt_sync_obj) ) -{ - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather); - register kmp_team_t *team = this_thr->th.th_team; - register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; - register kmp_info_t **other_threads = team->t.t_threads; - register kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE; - register kmp_uint32 num_threads = this_thr->th.th_team_nproc; - register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt]; - register kmp_uint32 branch_factor = 1 << branch_bits; - register kmp_uint32 offset; - register kmp_uint32 level; - - KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - - KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); +__kmp_hyper_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, int gtid, + int tid, void (*reduce)(void *, void *) + USE_ITT_BUILD_ARG(void *itt_sync_obj)) { + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_gather); + register kmp_team_t *team = this_thr->th.th_team; + register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; + register kmp_info_t **other_threads = team->t.t_threads; + register kmp_uint64 new_state = KMP_BARRIER_UNUSED_STATE; + register kmp_uint32 num_threads = this_thr->th.th_team_nproc; + register kmp_uint32 branch_bits = __kmp_barrier_gather_branch_bits[bt]; + register kmp_uint32 branch_factor = 1 << branch_bits; + register kmp_uint32 offset; + register kmp_uint32 level; + + KA_TRACE( + 20, + ("__kmp_hyper_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); + KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); #if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - save arrive time to the thread - if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = __itt_get_timestamp(); - } + // Barrier imbalance - save arrive time to the thread + if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { + this_thr->th.th_bar_arrive_time = this_thr->th.th_bar_min_time = + __itt_get_timestamp(); + } #endif - /* Perform a hypercube-embedded tree gather to wait until all of the threads have - arrived, and reduce any required data as we go. */ - kmp_flag_64 p_flag(&thr_bar->b_arrived); - for (level=0, offset=1; offset> level) & (branch_factor - 1)) != 0) { - register kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) -1); - - KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " - "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(parent_tid, team), team->t.t_id, parent_tid, - &thr_bar->b_arrived, thr_bar->b_arrived, - thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); - // Mark arrival to parent thread - /* After performing this write (in the last iteration of the enclosing for loop), - a worker thread may not assume that the team is valid any more - it could be - deallocated by the master thread at any time. */ - ANNOTATE_BARRIER_BEGIN(this_thr); - p_flag.set_waiter(other_threads[parent_tid]); - p_flag.release(); - break; - } + /* Perform a hypercube-embedded tree gather to wait until all of the threads + have arrived, and reduce any required data as we go. */ + kmp_flag_64 p_flag(&thr_bar->b_arrived); + for (level = 0, offset = 1; offset < num_threads; + level += branch_bits, offset <<= branch_bits) { + register kmp_uint32 child; + register kmp_uint32 child_tid; - // Parent threads wait for children to arrive - if (new_state == KMP_BARRIER_UNUSED_STATE) - new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; - for (child=1, child_tid=tid+(1 << level); childth.th_bar[bt].bb; + if (((tid >> level) & (branch_factor - 1)) != 0) { + register kmp_int32 parent_tid = tid & ~((1 << (level + branch_bits)) - 1); + + KA_TRACE(20, + ("__kmp_hyper_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " + "arrived(%p): %llu => %llu\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(parent_tid, team), + team->t.t_id, parent_tid, &thr_bar->b_arrived, + thr_bar->b_arrived, + thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); + // Mark arrival to parent thread + /* After performing this write (in the last iteration of the enclosing for + loop), a worker thread may not assume that the team is valid any more + - it could be deallocated by the master thread at any time. */ + ANNOTATE_BARRIER_BEGIN(this_thr); + p_flag.set_waiter(other_threads[parent_tid]); + p_flag.release(); + break; + } + + // Parent threads wait for children to arrive + if (new_state == KMP_BARRIER_UNUSED_STATE) + new_state = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; + for (child = 1, child_tid = tid + (1 << level); + child < branch_factor && child_tid < num_threads; + child++, child_tid += (1 << level)) { + register kmp_info_t *child_thr = other_threads[child_tid]; + register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; #if KMP_CACHE_MANAGE - register kmp_uint32 next_child_tid = child_tid + (1 << level); - // Prefetch next thread's arrived count - if (child+1 < branch_factor && next_child_tid < num_threads) - KMP_CACHE_PREFETCH(&other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived); + register kmp_uint32 next_child_tid = child_tid + (1 << level); + // Prefetch next thread's arrived count + if (child + 1 < branch_factor && next_child_tid < num_threads) + KMP_CACHE_PREFETCH( + &other_threads[next_child_tid]->th.th_bar[bt].bb.b_arrived); #endif /* KMP_CACHE_MANAGE */ - KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " - "arrived(%p) == %llu\n", gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(child_tid, team), team->t.t_id, child_tid, - &child_bar->b_arrived, new_state)); - // Wait for child to arrive - kmp_flag_64 c_flag(&child_bar->b_arrived, new_state); - c_flag.wait(this_thr, FALSE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - ANNOTATE_BARRIER_END(child_thr); + KA_TRACE(20, + ("__kmp_hyper_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%u) " + "arrived(%p) == %llu\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), + team->t.t_id, child_tid, &child_bar->b_arrived, new_state)); + // Wait for child to arrive + kmp_flag_64 c_flag(&child_bar->b_arrived, new_state); + c_flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); + ANNOTATE_BARRIER_END(child_thr); #if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - write min of the thread time and a child time to the thread. - if (__kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, - child_thr->th.th_bar_min_time); - } + // Barrier imbalance - write min of the thread time and a child time to + // the thread. + if (__kmp_forkjoin_frames_mode == 2) { + this_thr->th.th_bar_min_time = KMP_MIN(this_thr->th.th_bar_min_time, + child_thr->th.th_bar_min_time); + } #endif - if (reduce) { - KA_TRACE(100, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid)); - ANNOTATE_REDUCE_AFTER(reduce); - (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); - ANNOTATE_REDUCE_BEFORE(reduce); - ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); - } - } - } - - if (KMP_MASTER_TID(tid)) { - // Need to update the team arrived pointer if we are the master thread - if (new_state == KMP_BARRIER_UNUSED_STATE) - team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP; - else - team->t.t_bar[bt].b_arrived = new_state; - KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n", - gtid, team->t.t_id, tid, team->t.t_id, - &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); + if (reduce) { + KA_TRACE(100, + ("__kmp_hyper_barrier_gather: T#%d(%d:%d) += T#%d(%d:%u)\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), + team->t.t_id, child_tid)); + ANNOTATE_REDUCE_AFTER(reduce); + (*reduce)(this_thr->th.th_local.reduce_data, + child_thr->th.th_local.reduce_data); + ANNOTATE_REDUCE_BEFORE(reduce); + ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); + } } - KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); + } + + if (KMP_MASTER_TID(tid)) { + // Need to update the team arrived pointer if we are the master thread + if (new_state == KMP_BARRIER_UNUSED_STATE) + team->t.t_bar[bt].b_arrived += KMP_BARRIER_STATE_BUMP; + else + team->t.t_bar[bt].b_arrived = new_state; + KA_TRACE(20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) set team %d " + "arrived(%p) = %llu\n", + gtid, team->t.t_id, tid, team->t.t_id, + &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); + } + KA_TRACE( + 20, ("__kmp_hyper_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); } // The reverse versions seem to beat the forward versions overall #define KMP_REVERSE_HYPER_BAR -static void -__kmp_hyper_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - int propagate_icvs - USE_ITT_BUILD_ARG(void *itt_sync_obj) ) -{ - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release); - register kmp_team_t *team; - register kmp_bstate_t *thr_bar = & this_thr -> th.th_bar[ bt ].bb; - register kmp_info_t **other_threads; - register kmp_uint32 num_threads; - register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[ bt ]; - register kmp_uint32 branch_factor = 1 << branch_bits; - register kmp_uint32 child; - register kmp_uint32 child_tid; - register kmp_uint32 offset; - register kmp_uint32 level; - - /* Perform a hypercube-embedded tree release for all of the threads that have been gathered. - If KMP_REVERSE_HYPER_BAR is defined (default) the threads are released in the reverse - order of the corresponding gather, otherwise threads are released in the same order. */ - if (KMP_MASTER_TID(tid)) { // master - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); +static void __kmp_hyper_barrier_release( + enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, + int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) { + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hyper_release); + register kmp_team_t *team; + register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; + register kmp_info_t **other_threads; + register kmp_uint32 num_threads; + register kmp_uint32 branch_bits = __kmp_barrier_release_branch_bits[bt]; + register kmp_uint32 branch_factor = 1 << branch_bits; + register kmp_uint32 child; + register kmp_uint32 child_tid; + register kmp_uint32 offset; + register kmp_uint32 level; + + /* Perform a hypercube-embedded tree release for all of the threads that have + been gathered. If KMP_REVERSE_HYPER_BAR is defined (default) the threads + are released in the reverse order of the corresponding gather, otherwise + threads are released in the same order. */ + if (KMP_MASTER_TID(tid)) { // master + team = __kmp_threads[gtid]->th.th_team; + KMP_DEBUG_ASSERT(team != NULL); + KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) master enter for " + "barrier type %d\n", + gtid, team->t.t_id, tid, bt)); #if KMP_BARRIER_ICV_PUSH - if (propagate_icvs) { // master already has ICVs in final destination; copy - copy_icvs(&thr_bar->th_fixed_icvs, &team->t.t_implicit_task_taskdata[tid].td_icvs); - } -#endif + if (propagate_icvs) { // master already has ICVs in final destination; copy + copy_icvs(&thr_bar->th_fixed_icvs, + &team->t.t_implicit_task_taskdata[tid].td_icvs); } - else { // Handle fork barrier workers who aren't part of a team yet - KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", - gtid, &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); - // Wait for parent thread to release us - kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); - flag.wait(this_thr, TRUE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - ANNOTATE_BARRIER_END(this_thr); +#endif + } else { // Handle fork barrier workers who aren't part of a team yet + KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d wait go(%p) == %u\n", gtid, + &thr_bar->b_go, KMP_BARRIER_STATE_BUMP)); + // Wait for parent thread to release us + kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); + flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); + ANNOTATE_BARRIER_END(this_thr); #if USE_ITT_BUILD && USE_ITT_NOTIFY - if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { - // In fork barrier where we could not get the object reliably - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); - // Cancel wait on previous parallel region... - __kmp_itt_task_starting(itt_sync_obj); - - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); - if (itt_sync_obj != NULL) - // Call prepare as early as possible for "new" barrier - __kmp_itt_task_finished(itt_sync_obj); - } else + if ((__itt_sync_create_ptr && itt_sync_obj == NULL) || KMP_ITT_DEBUG) { + // In fork barrier where we could not get the object reliably + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 0, -1); + // Cancel wait on previous parallel region... + __kmp_itt_task_starting(itt_sync_obj); + + if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) + return; + + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); + if (itt_sync_obj != NULL) + // Call prepare as early as possible for "new" barrier + __kmp_itt_task_finished(itt_sync_obj); + } else #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ // Early exit for reaping threads releasing forkjoin barrier if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; + return; - // The worker thread may now assume that the team is valid. - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - tid = __kmp_tid_from_gtid(gtid); + // The worker thread may now assume that the team is valid. + team = __kmp_threads[gtid]->th.th_team; + KMP_DEBUG_ASSERT(team != NULL); + tid = __kmp_tid_from_gtid(gtid); - TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); - KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", - gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); - KMP_MB(); // Flush all pending memory write invalidates. - } - num_threads = this_thr->th.th_team_nproc; - other_threads = team->t.t_threads; + TCW_4(thr_bar->b_go, KMP_INIT_BARRIER_STATE); + KA_TRACE(20, + ("__kmp_hyper_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", + gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); + KMP_MB(); // Flush all pending memory write invalidates. + } + num_threads = this_thr->th.th_team_nproc; + other_threads = team->t.t_threads; #ifdef KMP_REVERSE_HYPER_BAR - // Count up to correct level for parent - for (level=0, offset=1; offset>level) & (branch_factor-1)) == 0); - level+=branch_bits, offset<<=branch_bits); - - // Now go down from there - for (level-=branch_bits, offset>>=branch_bits; offset != 0; - level-=branch_bits, offset>>=branch_bits) + // Count up to correct level for parent + for (level = 0, offset = 1; + offset < num_threads && (((tid >> level) & (branch_factor - 1)) == 0); + level += branch_bits, offset <<= branch_bits) + ; + + // Now go down from there + for (level -= branch_bits, offset >>= branch_bits; offset != 0; + level -= branch_bits, offset >>= branch_bits) #else - // Go down the tree, level by level - for (level=0, offset=1; offset> ((level==0)?level:level-1); - for (child=(child=1; child--, child_tid-=(1<> ((level == 0) ? level : level - 1); + for (child = (child < branch_factor - 1) ? child : branch_factor - 1, + child_tid = tid + (child << level); + child >= 1; child--, child_tid -= (1 << level)) #else - if (((tid >> level) & (branch_factor - 1)) != 0) - // No need to go lower than this, since this is the level parent would be notified - break; - // Iterate through children on this level of the tree - for (child=1, child_tid=tid+(1<> level) & (branch_factor - 1)) != 0) + // No need to go lower than this, since this is the level parent would be + // notified + break; + // Iterate through children on this level of the tree + for (child = 1, child_tid = tid + (1 << level); + child < branch_factor && child_tid < num_threads; + child++, child_tid += (1 << level)) #endif // KMP_REVERSE_HYPER_BAR - { - if (child_tid >= num_threads) continue; // Child doesn't exist so keep going - else { - register kmp_info_t *child_thr = other_threads[child_tid]; - register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; + { + if (child_tid >= num_threads) + continue; // Child doesn't exist so keep going + else { + register kmp_info_t *child_thr = other_threads[child_tid]; + register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; #if KMP_CACHE_MANAGE - register kmp_uint32 next_child_tid = child_tid - (1 << level); - // Prefetch next thread's go count -# ifdef KMP_REVERSE_HYPER_BAR - if (child-1 >= 1 && next_child_tid < num_threads) -# else - if (child+1 < branch_factor && next_child_tid < num_threads) -# endif // KMP_REVERSE_HYPER_BAR - KMP_CACHE_PREFETCH(&other_threads[next_child_tid]->th.th_bar[bt].bb.b_go); + register kmp_uint32 next_child_tid = child_tid - (1 << level); +// Prefetch next thread's go count +#ifdef KMP_REVERSE_HYPER_BAR + if (child - 1 >= 1 && next_child_tid < num_threads) +#else + if (child + 1 < branch_factor && next_child_tid < num_threads) +#endif // KMP_REVERSE_HYPER_BAR + KMP_CACHE_PREFETCH( + &other_threads[next_child_tid]->th.th_bar[bt].bb.b_go); #endif /* KMP_CACHE_MANAGE */ #if KMP_BARRIER_ICV_PUSH - if (propagate_icvs) // push my fixed ICVs to my child - copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs); + if (propagate_icvs) // push my fixed ICVs to my child + copy_icvs(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs); #endif // KMP_BARRIER_ICV_PUSH - KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" - "go(%p): %u => %u\n", gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(child_tid, team), team->t.t_id, - child_tid, &child_bar->b_go, child_bar->b_go, - child_bar->b_go + KMP_BARRIER_STATE_BUMP)); - // Release child from barrier - ANNOTATE_BARRIER_BEGIN(child_thr); - kmp_flag_64 flag(&child_bar->b_go, child_thr); - flag.release(); - } - } + KA_TRACE( + 20, + ("__kmp_hyper_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%u)" + "go(%p): %u => %u\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), + team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, + child_bar->b_go + KMP_BARRIER_STATE_BUMP)); + // Release child from barrier + ANNOTATE_BARRIER_BEGIN(child_thr); + kmp_flag_64 flag(&child_bar->b_go, child_thr); + flag.release(); + } } + } #if KMP_BARRIER_ICV_PUSH - if (propagate_icvs && !KMP_MASTER_TID(tid)) { // copy ICVs locally to final dest - __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE); - copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &thr_bar->th_fixed_icvs); - } + if (propagate_icvs && + !KMP_MASTER_TID(tid)) { // copy ICVs locally to final dest + __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, + FALSE); + copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, + &thr_bar->th_fixed_icvs); + } #endif - KA_TRACE(20, ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); + KA_TRACE( + 20, + ("__kmp_hyper_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", + gtid, team->t.t_id, tid, bt)); } // Hierarchical Barrier // Initialize thread barrier data -/* Initializes/re-initializes the hierarchical barrier data stored on a thread. Performs the - minimum amount of initialization required based on how the team has changed. Returns true if - leaf children will require both on-core and traditional wake-up mechanisms. For example, if the - team size increases, threads already in the team will respond to on-core wakeup on their parent - thread, but threads newly added to the team will only be listening on the their local b_go. */ -static bool -__kmp_init_hierarchical_barrier_thread(enum barrier_type bt, kmp_bstate_t *thr_bar, kmp_uint32 nproc, - int gtid, int tid, kmp_team_t *team) -{ - // Checks to determine if (re-)initialization is needed - bool uninitialized = thr_bar->team == NULL; - bool team_changed = team != thr_bar->team; - bool team_sz_changed = nproc != thr_bar->nproc; - bool tid_changed = tid != thr_bar->old_tid; - bool retval = false; - - if (uninitialized || team_sz_changed) { - __kmp_get_hierarchy(nproc, thr_bar); - } - - if (uninitialized || team_sz_changed || tid_changed) { - thr_bar->my_level = thr_bar->depth-1; // default for master - thr_bar->parent_tid = -1; // default for master - if (!KMP_MASTER_TID(tid)) { // if not master, find parent thread in hierarchy - kmp_uint32 d=0; - while (ddepth) { // find parent based on level of thread in hierarchy, and note level - kmp_uint32 rem; - if (d == thr_bar->depth-2) { // reached level right below the master - thr_bar->parent_tid = 0; - thr_bar->my_level = d; - break; - } - else if ((rem = tid%thr_bar->skip_per_level[d+1]) != 0) { // TODO: can we make this op faster? - // thread is not a subtree root at next level, so this is max - thr_bar->parent_tid = tid - rem; - thr_bar->my_level = d; - break; - } - ++d; - } +/* Initializes/re-initializes the hierarchical barrier data stored on a thread. + Performs the minimum amount of initialization required based on how the team + has changed. Returns true if leaf children will require both on-core and + traditional wake-up mechanisms. For example, if the team size increases, + threads already in the team will respond to on-core wakeup on their parent + thread, but threads newly added to the team will only be listening on the + their local b_go. */ +static bool __kmp_init_hierarchical_barrier_thread(enum barrier_type bt, + kmp_bstate_t *thr_bar, + kmp_uint32 nproc, int gtid, + int tid, kmp_team_t *team) { + // Checks to determine if (re-)initialization is needed + bool uninitialized = thr_bar->team == NULL; + bool team_changed = team != thr_bar->team; + bool team_sz_changed = nproc != thr_bar->nproc; + bool tid_changed = tid != thr_bar->old_tid; + bool retval = false; + + if (uninitialized || team_sz_changed) { + __kmp_get_hierarchy(nproc, thr_bar); + } + + if (uninitialized || team_sz_changed || tid_changed) { + thr_bar->my_level = thr_bar->depth - 1; // default for master + thr_bar->parent_tid = -1; // default for master + if (!KMP_MASTER_TID( + tid)) { // if not master, find parent thread in hierarchy + kmp_uint32 d = 0; + while (d < thr_bar->depth) { // find parent based on level of thread in + // hierarchy, and note level + kmp_uint32 rem; + if (d == thr_bar->depth - 2) { // reached level right below the master + thr_bar->parent_tid = 0; + thr_bar->my_level = d; + break; + } else if ((rem = tid % thr_bar->skip_per_level[d + 1]) != + 0) { // TODO: can we make this op faster? + // thread is not a subtree root at next level, so this is max + thr_bar->parent_tid = tid - rem; + thr_bar->my_level = d; + break; } - thr_bar->offset = 7-(tid-thr_bar->parent_tid-1); - thr_bar->old_tid = tid; - thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING; - thr_bar->team = team; - thr_bar->parent_bar = &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb; - } - if (uninitialized || team_changed || tid_changed) { - thr_bar->team = team; - thr_bar->parent_bar = &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb; - retval = true; - } - if (uninitialized || team_sz_changed || tid_changed) { - thr_bar->nproc = nproc; - thr_bar->leaf_kids = thr_bar->base_leaf_kids; - if (thr_bar->my_level == 0) thr_bar->leaf_kids=0; - if (thr_bar->leaf_kids && (kmp_uint32)tid+thr_bar->leaf_kids+1 > nproc) - thr_bar->leaf_kids = nproc - tid - 1; - thr_bar->leaf_state = 0; - for (int i=0; ileaf_kids; ++i) ((char *)&(thr_bar->leaf_state))[7-i] = 1; + ++d; + } } - return retval; + thr_bar->offset = 7 - (tid - thr_bar->parent_tid - 1); + thr_bar->old_tid = tid; + thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING; + thr_bar->team = team; + thr_bar->parent_bar = + &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb; + } + if (uninitialized || team_changed || tid_changed) { + thr_bar->team = team; + thr_bar->parent_bar = + &team->t.t_threads[thr_bar->parent_tid]->th.th_bar[bt].bb; + retval = true; + } + if (uninitialized || team_sz_changed || tid_changed) { + thr_bar->nproc = nproc; + thr_bar->leaf_kids = thr_bar->base_leaf_kids; + if (thr_bar->my_level == 0) + thr_bar->leaf_kids = 0; + if (thr_bar->leaf_kids && (kmp_uint32)tid + thr_bar->leaf_kids + 1 > nproc) + thr_bar->leaf_kids = nproc - tid - 1; + thr_bar->leaf_state = 0; + for (int i = 0; i < thr_bar->leaf_kids; ++i) + ((char *)&(thr_bar->leaf_state))[7 - i] = 1; + } + return retval; } -static void -__kmp_hierarchical_barrier_gather(enum barrier_type bt, kmp_info_t *this_thr, - int gtid, int tid, void (*reduce) (void *, void *) - USE_ITT_BUILD_ARG(void * itt_sync_obj) ) -{ - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather); - register kmp_team_t *team = this_thr->th.th_team; - register kmp_bstate_t *thr_bar = & this_thr->th.th_bar[bt].bb; - register kmp_uint32 nproc = this_thr->th.th_team_nproc; - register kmp_info_t **other_threads = team->t.t_threads; - register kmp_uint64 new_state; - - int level = team->t.t_level; +static void __kmp_hierarchical_barrier_gather( + enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, + void (*reduce)(void *, void *) USE_ITT_BUILD_ARG(void *itt_sync_obj)) { + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_gather); + register kmp_team_t *team = this_thr->th.th_team; + register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; + register kmp_uint32 nproc = this_thr->th.th_team_nproc; + register kmp_info_t **other_threads = team->t.t_threads; + register kmp_uint64 new_state; + + int level = team->t.t_level; #if OMP_40_ENABLED - if (other_threads[0]->th.th_teams_microtask) // are we inside the teams construct? - if (this_thr->th.th_teams_size.nteams > 1) - ++level; // level was not increased in teams construct for team_of_masters + if (other_threads[0] + ->th.th_teams_microtask) // are we inside the teams construct? + if (this_thr->th.th_teams_size.nteams > 1) + ++level; // level was not increased in teams construct for team_of_masters #endif - if (level == 1) thr_bar->use_oncore_barrier = 1; - else thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested + if (level == 1) + thr_bar->use_oncore_barrier = 1; + else + thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); + KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) enter for " + "barrier type %d\n", + gtid, team->t.t_id, tid, bt)); + KMP_DEBUG_ASSERT(this_thr == other_threads[this_thr->th.th_info.ds.ds_tid]); #if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - save arrive time to the thread - if(__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { - this_thr->th.th_bar_arrive_time = __itt_get_timestamp(); - } + // Barrier imbalance - save arrive time to the thread + if (__kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 2) { + this_thr->th.th_bar_arrive_time = __itt_get_timestamp(); + } #endif - (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, team); - - if (thr_bar->my_level) { // not a leaf (my_level==0 means leaf) - register kmp_int32 child_tid; - new_state = (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; - if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) { - if (thr_bar->leaf_kids) { // First, wait for leaf children to check-in on my b_arrived flag - kmp_uint64 leaf_state = KMP_MASTER_TID(tid) ? thr_bar->b_arrived | thr_bar->leaf_state : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state; - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting for leaf kids\n", - gtid, team->t.t_id, tid)); - kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state); - flag.wait(this_thr, FALSE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - if (reduce) { - ANNOTATE_REDUCE_AFTER(reduce); - for (child_tid=tid+1; child_tid<=tid+thr_bar->leaf_kids; ++child_tid) { - KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid)); - ANNOTATE_BARRIER_END(other_threads[child_tid]); - (*reduce)(this_thr->th.th_local.reduce_data, other_threads[child_tid]->th.th_local.reduce_data); - } - ANNOTATE_REDUCE_BEFORE(reduce); - ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); - } - (void) KMP_TEST_THEN_AND64((volatile kmp_int64 *)&thr_bar->b_arrived, ~(thr_bar->leaf_state)); // clear leaf_state bits - } - // Next, wait for higher level children on each child's b_arrived flag - for (kmp_uint32 d=1; dmy_level; ++d) { // gather lowest level threads first, but skip 0 - kmp_uint32 last = tid+thr_bar->skip_per_level[d+1], skip = thr_bar->skip_per_level[d]; - if (last > nproc) last = nproc; - for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) { - register kmp_info_t *child_thr = other_threads[child_tid]; - register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " - "arrived(%p) == %llu\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid, &child_bar->b_arrived, new_state)); - kmp_flag_64 flag(&child_bar->b_arrived, new_state); - flag.wait(this_thr, FALSE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - ANNOTATE_BARRIER_END(child_thr); - if (reduce) { - KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid)); - ANNOTATE_REDUCE_AFTER(reduce); - (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); - ANNOTATE_REDUCE_BEFORE(reduce); - ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); - } - } - } + (void)__kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, + team); + + if (thr_bar->my_level) { // not a leaf (my_level==0 means leaf) + register kmp_int32 child_tid; + new_state = + (kmp_uint64)team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; + if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && + thr_bar->use_oncore_barrier) { + if (thr_bar->leaf_kids) { // First, wait for leaf children to check-in on + // my b_arrived flag + kmp_uint64 leaf_state = + KMP_MASTER_TID(tid) + ? thr_bar->b_arrived | thr_bar->leaf_state + : team->t.t_bar[bt].b_arrived | thr_bar->leaf_state; + KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) waiting " + "for leaf kids\n", + gtid, team->t.t_id, tid)); + kmp_flag_64 flag(&thr_bar->b_arrived, leaf_state); + flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); + if (reduce) { + ANNOTATE_REDUCE_AFTER(reduce); + for (child_tid = tid + 1; child_tid <= tid + thr_bar->leaf_kids; + ++child_tid) { + KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " + "T#%d(%d:%d)\n", + gtid, team->t.t_id, tid, + __kmp_gtid_from_tid(child_tid, team), team->t.t_id, + child_tid)); + ANNOTATE_BARRIER_END(other_threads[child_tid]); + (*reduce)(this_thr->th.th_local.reduce_data, + other_threads[child_tid]->th.th_local.reduce_data); + } + ANNOTATE_REDUCE_BEFORE(reduce); + ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); } - else { // Blocktime is not infinite - for (kmp_uint32 d=0; dmy_level; ++d) { // Gather lowest level threads first - kmp_uint32 last = tid+thr_bar->skip_per_level[d+1], skip = thr_bar->skip_per_level[d]; - if (last > nproc) last = nproc; - for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) { - register kmp_info_t *child_thr = other_threads[child_tid]; - register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait T#%d(%d:%d) " - "arrived(%p) == %llu\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid, &child_bar->b_arrived, new_state)); - kmp_flag_64 flag(&child_bar->b_arrived, new_state); - flag.wait(this_thr, FALSE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - ANNOTATE_BARRIER_END(child_thr); - if (reduce) { - KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += T#%d(%d:%d)\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid)); - ANNOTATE_REDUCE_AFTER(reduce); - (*reduce)(this_thr->th.th_local.reduce_data, child_thr->th.th_local.reduce_data); - ANNOTATE_REDUCE_BEFORE(reduce); - ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); - } - } - } - } - } - // All subordinates are gathered; now release parent if not master thread - - if (!KMP_MASTER_TID(tid)) { // worker threads release parent in hierarchy - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " - "arrived(%p): %llu => %llu\n", gtid, team->t.t_id, tid, - __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id, thr_bar->parent_tid, - &thr_bar->b_arrived, thr_bar->b_arrived, thr_bar->b_arrived+KMP_BARRIER_STATE_BUMP)); - /* Mark arrival to parent: After performing this write, a worker thread may not assume that - the team is valid any more - it could be deallocated by the master thread at any time. */ - if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME - || !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived flag; release it - ANNOTATE_BARRIER_BEGIN(this_thr); - kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]); - flag.release(); + (void)KMP_TEST_THEN_AND64( + (volatile kmp_int64 *)&thr_bar->b_arrived, + ~(thr_bar->leaf_state)); // clear leaf_state bits + } + // Next, wait for higher level children on each child's b_arrived flag + for (kmp_uint32 d = 1; d < thr_bar->my_level; + ++d) { // gather lowest level threads first, but skip 0 + kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1], + skip = thr_bar->skip_per_level[d]; + if (last > nproc) + last = nproc; + for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) { + register kmp_info_t *child_thr = other_threads[child_tid]; + register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; + KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " + "T#%d(%d:%d) " + "arrived(%p) == %llu\n", + gtid, team->t.t_id, tid, + __kmp_gtid_from_tid(child_tid, team), team->t.t_id, + child_tid, &child_bar->b_arrived, new_state)); + kmp_flag_64 flag(&child_bar->b_arrived, new_state); + flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); + ANNOTATE_BARRIER_END(child_thr); + if (reduce) { + KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " + "T#%d(%d:%d)\n", + gtid, team->t.t_id, tid, + __kmp_gtid_from_tid(child_tid, team), team->t.t_id, + child_tid)); + ANNOTATE_REDUCE_AFTER(reduce); + (*reduce)(this_thr->th.th_local.reduce_data, + child_thr->th.th_local.reduce_data); + ANNOTATE_REDUCE_BEFORE(reduce); + ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); + } } - else { // Leaf does special release on the "offset" bits of parent's b_arrived flag - thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; - kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, thr_bar->offset); - flag.set_waiter(other_threads[thr_bar->parent_tid]); - flag.release(); + } + } else { // Blocktime is not infinite + for (kmp_uint32 d = 0; d < thr_bar->my_level; + ++d) { // Gather lowest level threads first + kmp_uint32 last = tid + thr_bar->skip_per_level[d + 1], + skip = thr_bar->skip_per_level[d]; + if (last > nproc) + last = nproc; + for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) { + register kmp_info_t *child_thr = other_threads[child_tid]; + register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; + KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) wait " + "T#%d(%d:%d) " + "arrived(%p) == %llu\n", + gtid, team->t.t_id, tid, + __kmp_gtid_from_tid(child_tid, team), team->t.t_id, + child_tid, &child_bar->b_arrived, new_state)); + kmp_flag_64 flag(&child_bar->b_arrived, new_state); + flag.wait(this_thr, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); + ANNOTATE_BARRIER_END(child_thr); + if (reduce) { + KA_TRACE(100, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) += " + "T#%d(%d:%d)\n", + gtid, team->t.t_id, tid, + __kmp_gtid_from_tid(child_tid, team), team->t.t_id, + child_tid)); + ANNOTATE_REDUCE_AFTER(reduce); + (*reduce)(this_thr->th.th_local.reduce_data, + child_thr->th.th_local.reduce_data); + ANNOTATE_REDUCE_BEFORE(reduce); + ANNOTATE_REDUCE_BEFORE(&team->t.t_bar); + } } - } else { // Master thread needs to update the team's b_arrived value - team->t.t_bar[bt].b_arrived = new_state; - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d arrived(%p) = %llu\n", - gtid, team->t.t_id, tid, team->t.t_id, &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); + } } - // Is the team access below unsafe or just technically invalid? - KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); + } + // All subordinates are gathered; now release parent if not master thread + + if (!KMP_MASTER_TID(tid)) { // worker threads release parent in hierarchy + KA_TRACE( + 20, + ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) releasing T#%d(%d:%d) " + "arrived(%p): %llu => %llu\n", + gtid, team->t.t_id, tid, + __kmp_gtid_from_tid(thr_bar->parent_tid, team), team->t.t_id, + thr_bar->parent_tid, &thr_bar->b_arrived, thr_bar->b_arrived, + thr_bar->b_arrived + KMP_BARRIER_STATE_BUMP)); + /* Mark arrival to parent: After performing this write, a worker thread may + not assume that the team is valid any more - it could be deallocated by + the master thread at any time. */ + if (thr_bar->my_level || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || + !thr_bar->use_oncore_barrier) { // Parent is waiting on my b_arrived + // flag; release it + ANNOTATE_BARRIER_BEGIN(this_thr); + kmp_flag_64 flag(&thr_bar->b_arrived, other_threads[thr_bar->parent_tid]); + flag.release(); + } else { // Leaf does special release on the "offset" bits of parent's + // b_arrived flag + thr_bar->b_arrived = team->t.t_bar[bt].b_arrived + KMP_BARRIER_STATE_BUMP; + kmp_flag_oncore flag(&thr_bar->parent_bar->b_arrived, thr_bar->offset); + flag.set_waiter(other_threads[thr_bar->parent_tid]); + flag.release(); + } + } else { // Master thread needs to update the team's b_arrived value + team->t.t_bar[bt].b_arrived = new_state; + KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) set team %d " + "arrived(%p) = %llu\n", + gtid, team->t.t_id, tid, team->t.t_id, + &team->t.t_bar[bt].b_arrived, team->t.t_bar[bt].b_arrived)); + } + // Is the team access below unsafe or just technically invalid? + KA_TRACE(20, ("__kmp_hierarchical_barrier_gather: T#%d(%d:%d) exit for " + "barrier type %d\n", + gtid, team->t.t_id, tid, bt)); } -static void -__kmp_hierarchical_barrier_release(enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, - int propagate_icvs - USE_ITT_BUILD_ARG(void * itt_sync_obj) ) -{ - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release); - register kmp_team_t *team; - register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; - register kmp_uint32 nproc; - bool team_change = false; // indicates on-core barrier shouldn't be used - - if (KMP_MASTER_TID(tid)) { - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) master entered barrier type %d\n", - gtid, team->t.t_id, tid, bt)); - } - else { // Worker threads - // Wait for parent thread to release me - if (!thr_bar->use_oncore_barrier || __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME - || thr_bar->my_level != 0 || thr_bar->team == NULL) { - // Use traditional method of waiting on my own b_go flag - thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG; - kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); - flag.wait(this_thr, TRUE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - ANNOTATE_BARRIER_END(this_thr); - TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time - } - else { // Thread barrier data is initialized, this is a leaf, blocktime is infinite, not nested - // Wait on my "offset" bits on parent's b_go flag - thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG; - kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP, thr_bar->offset, - bt, this_thr - USE_ITT_BUILD_ARG(itt_sync_obj) ); - flag.wait(this_thr, TRUE); - if (thr_bar->wait_flag == KMP_BARRIER_SWITCHING) { // Thread was switched to own b_go - TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time - } - else { // Reset my bits on parent's b_go flag - ((char*)&(thr_bar->parent_bar->b_go))[thr_bar->offset] = 0; - } - } - thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING; - // Early exit for reaping threads releasing forkjoin barrier - if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) - return; - // The worker thread may now assume that the team is valid. - team = __kmp_threads[gtid]->th.th_team; - KMP_DEBUG_ASSERT(team != NULL); - tid = __kmp_tid_from_gtid(gtid); - - KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", - gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); - KMP_MB(); // Flush all pending memory write invalidates. +static void __kmp_hierarchical_barrier_release( + enum barrier_type bt, kmp_info_t *this_thr, int gtid, int tid, + int propagate_icvs USE_ITT_BUILD_ARG(void *itt_sync_obj)) { + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_hier_release); + register kmp_team_t *team; + register kmp_bstate_t *thr_bar = &this_thr->th.th_bar[bt].bb; + register kmp_uint32 nproc; + bool team_change = false; // indicates on-core barrier shouldn't be used + + if (KMP_MASTER_TID(tid)) { + team = __kmp_threads[gtid]->th.th_team; + KMP_DEBUG_ASSERT(team != NULL); + KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) master " + "entered barrier type %d\n", + gtid, team->t.t_id, tid, bt)); + } else { // Worker threads + // Wait for parent thread to release me + if (!thr_bar->use_oncore_barrier || + __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME || thr_bar->my_level != 0 || + thr_bar->team == NULL) { + // Use traditional method of waiting on my own b_go flag + thr_bar->wait_flag = KMP_BARRIER_OWN_FLAG; + kmp_flag_64 flag(&thr_bar->b_go, KMP_BARRIER_STATE_BUMP); + flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); + ANNOTATE_BARRIER_END(this_thr); + TCW_8(thr_bar->b_go, + KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time + } else { // Thread barrier data is initialized, this is a leaf, blocktime is + // infinite, not nested + // Wait on my "offset" bits on parent's b_go flag + thr_bar->wait_flag = KMP_BARRIER_PARENT_FLAG; + kmp_flag_oncore flag(&thr_bar->parent_bar->b_go, KMP_BARRIER_STATE_BUMP, + thr_bar->offset, bt, + this_thr USE_ITT_BUILD_ARG(itt_sync_obj)); + flag.wait(this_thr, TRUE); + if (thr_bar->wait_flag == + KMP_BARRIER_SWITCHING) { // Thread was switched to own b_go + TCW_8(thr_bar->b_go, + KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time + } else { // Reset my bits on parent's b_go flag + ((char *)&(thr_bar->parent_bar->b_go))[thr_bar->offset] = 0; + } } + thr_bar->wait_flag = KMP_BARRIER_NOT_WAITING; + // Early exit for reaping threads releasing forkjoin barrier + if (bt == bs_forkjoin_barrier && TCR_4(__kmp_global.g.g_done)) + return; + // The worker thread may now assume that the team is valid. + team = __kmp_threads[gtid]->th.th_team; + KMP_DEBUG_ASSERT(team != NULL); + tid = __kmp_tid_from_gtid(gtid); - nproc = this_thr->th.th_team_nproc; - int level = team->t.t_level; + KA_TRACE( + 20, + ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) set go(%p) = %u\n", + gtid, team->t.t_id, tid, &thr_bar->b_go, KMP_INIT_BARRIER_STATE)); + KMP_MB(); // Flush all pending memory write invalidates. + } + + nproc = this_thr->th.th_team_nproc; + int level = team->t.t_level; #if OMP_40_ENABLED - if (team->t.t_threads[0]->th.th_teams_microtask ) { // are we inside the teams construct? - if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && this_thr->th.th_teams_level == level) - ++level; // level was not increased in teams construct for team_of_workers - if( this_thr->th.th_teams_size.nteams > 1 ) - ++level; // level was not increased in teams construct for team_of_masters - } + if (team->t.t_threads[0] + ->th.th_teams_microtask) { // are we inside the teams construct? + if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && + this_thr->th.th_teams_level == level) + ++level; // level was not increased in teams construct for team_of_workers + if (this_thr->th.th_teams_size.nteams > 1) + ++level; // level was not increased in teams construct for team_of_masters + } #endif - if (level == 1) thr_bar->use_oncore_barrier = 1; - else thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested - - // If the team size has increased, we still communicate with old leaves via oncore barrier. - unsigned short int old_leaf_kids = thr_bar->leaf_kids; - kmp_uint64 old_leaf_state = thr_bar->leaf_state; - team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, tid, team); - // But if the entire team changes, we won't use oncore barrier at all - if (team_change) old_leaf_kids = 0; + if (level == 1) + thr_bar->use_oncore_barrier = 1; + else + thr_bar->use_oncore_barrier = 0; // Do not use oncore barrier when nested + + // If the team size has increased, we still communicate with old leaves via + // oncore barrier. + unsigned short int old_leaf_kids = thr_bar->leaf_kids; + kmp_uint64 old_leaf_state = thr_bar->leaf_state; + team_change = __kmp_init_hierarchical_barrier_thread(bt, thr_bar, nproc, gtid, + tid, team); + // But if the entire team changes, we won't use oncore barrier at all + if (team_change) + old_leaf_kids = 0; #if KMP_BARRIER_ICV_PUSH - if (propagate_icvs) { - __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE); - if (KMP_MASTER_TID(tid)) { // master already has copy in final destination; copy - copy_icvs(&thr_bar->th_fixed_icvs, &team->t.t_implicit_task_taskdata[tid].td_icvs); - } - else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) { // optimization for inf blocktime - if (!thr_bar->my_level) // I'm a leaf in the hierarchy (my_level==0) - // leaves (on-core children) pull parent's fixed ICVs directly to local ICV store - copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, - &thr_bar->parent_bar->th_fixed_icvs); - // non-leaves will get ICVs piggybacked with b_go via NGO store - } - else { // blocktime is not infinite; pull ICVs from parent's fixed ICVs - if (thr_bar->my_level) // not a leaf; copy ICVs to my fixed ICVs child can access - copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs); - else // leaves copy parent's fixed ICVs directly to local ICV store - copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, - &thr_bar->parent_bar->th_fixed_icvs); - } + if (propagate_icvs) { + __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, + FALSE); + if (KMP_MASTER_TID( + tid)) { // master already has copy in final destination; copy + copy_icvs(&thr_bar->th_fixed_icvs, + &team->t.t_implicit_task_taskdata[tid].td_icvs); + } else if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && + thr_bar->use_oncore_barrier) { // optimization for inf blocktime + if (!thr_bar->my_level) // I'm a leaf in the hierarchy (my_level==0) + // leaves (on-core children) pull parent's fixed ICVs directly to local + // ICV store + copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, + &thr_bar->parent_bar->th_fixed_icvs); + // non-leaves will get ICVs piggybacked with b_go via NGO store + } else { // blocktime is not infinite; pull ICVs from parent's fixed ICVs + if (thr_bar->my_level) // not a leaf; copy ICVs to my fixed ICVs child can + // access + copy_icvs(&thr_bar->th_fixed_icvs, &thr_bar->parent_bar->th_fixed_icvs); + else // leaves copy parent's fixed ICVs directly to local ICV store + copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, + &thr_bar->parent_bar->th_fixed_icvs); } + } #endif // KMP_BARRIER_ICV_PUSH - // Now, release my children - if (thr_bar->my_level) { // not a leaf - register kmp_int32 child_tid; - kmp_uint32 last; - if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && thr_bar->use_oncore_barrier) { - if (KMP_MASTER_TID(tid)) { // do a flat release - // Set local b_go to bump children via NGO store of the cache line containing IVCs and b_go. - thr_bar->b_go = KMP_BARRIER_STATE_BUMP; - // Use ngo stores if available; b_go piggybacks in the last 8 bytes of the cache line - ngo_load(&thr_bar->th_fixed_icvs); - // This loops over all the threads skipping only the leaf nodes in the hierarchy - for (child_tid=thr_bar->skip_per_level[1]; child_tid<(int)nproc; child_tid+=thr_bar->skip_per_level[1]) { - register kmp_bstate_t *child_bar = &team->t.t_threads[child_tid]->th.th_bar[bt].bb; - KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)" - " go(%p): %u => %u\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, - child_bar->b_go + KMP_BARRIER_STATE_BUMP)); - // Use ngo store (if available) to both store ICVs and release child via child's b_go - ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs); - } - ngo_sync(); - } - TCW_8(thr_bar->b_go, KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time - // Now, release leaf children - if (thr_bar->leaf_kids) { // if there are any - // We test team_change on the off-chance that the level 1 team changed. - if (team_change || old_leaf_kids < thr_bar->leaf_kids) { // some old leaf_kids, some new - if (old_leaf_kids) { // release old leaf kids - thr_bar->b_go |= old_leaf_state; - } - // Release new leaf kids - last = tid+thr_bar->skip_per_level[1]; - if (last > nproc) last = nproc; - for (child_tid=tid+1+old_leaf_kids; child_tid<(int)last; ++child_tid) { // skip_per_level[0]=1 - register kmp_info_t *child_thr = team->t.t_threads[child_tid]; - register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; - KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" - " T#%d(%d:%d) go(%p): %u => %u\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, - child_bar->b_go + KMP_BARRIER_STATE_BUMP)); - // Release child using child's b_go flag - ANNOTATE_BARRIER_BEGIN(child_thr); - kmp_flag_64 flag(&child_bar->b_go, child_thr); - flag.release(); - } - } - else { // Release all children at once with leaf_state bits on my own b_go flag - thr_bar->b_go |= thr_bar->leaf_state; - } - } + // Now, release my children + if (thr_bar->my_level) { // not a leaf + register kmp_int32 child_tid; + kmp_uint32 last; + if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME && + thr_bar->use_oncore_barrier) { + if (KMP_MASTER_TID(tid)) { // do a flat release + // Set local b_go to bump children via NGO store of the cache line + // containing IVCs and b_go. + thr_bar->b_go = KMP_BARRIER_STATE_BUMP; + // Use ngo stores if available; b_go piggybacks in the last 8 bytes of + // the cache line + ngo_load(&thr_bar->th_fixed_icvs); + // This loops over all the threads skipping only the leaf nodes in the + // hierarchy + for (child_tid = thr_bar->skip_per_level[1]; child_tid < (int)nproc; + child_tid += thr_bar->skip_per_level[1]) { + register kmp_bstate_t *child_bar = + &team->t.t_threads[child_tid]->th.th_bar[bt].bb; + KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " + "releasing T#%d(%d:%d)" + " go(%p): %u => %u\n", + gtid, team->t.t_id, tid, + __kmp_gtid_from_tid(child_tid, team), team->t.t_id, + child_tid, &child_bar->b_go, child_bar->b_go, + child_bar->b_go + KMP_BARRIER_STATE_BUMP)); + // Use ngo store (if available) to both store ICVs and release child + // via child's b_go + ngo_store_go(&child_bar->th_fixed_icvs, &thr_bar->th_fixed_icvs); } - else { // Blocktime is not infinite; do a simple hierarchical release - for (int d=thr_bar->my_level-1; d>=0; --d) { // Release highest level threads first - last = tid+thr_bar->skip_per_level[d+1]; - kmp_uint32 skip = thr_bar->skip_per_level[d]; - if (last > nproc) last = nproc; - for (child_tid=tid+skip; child_tid<(int)last; child_tid+=skip) { - register kmp_info_t *child_thr = team->t.t_threads[child_tid]; - register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; - KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing T#%d(%d:%d)" - " go(%p): %u => %u\n", - gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), - team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, - child_bar->b_go + KMP_BARRIER_STATE_BUMP)); - // Release child using child's b_go flag - ANNOTATE_BARRIER_BEGIN(child_thr); - kmp_flag_64 flag(&child_bar->b_go, child_thr); - flag.release(); - } - } + ngo_sync(); + } + TCW_8(thr_bar->b_go, + KMP_INIT_BARRIER_STATE); // Reset my b_go flag for next time + // Now, release leaf children + if (thr_bar->leaf_kids) { // if there are any + // We test team_change on the off-chance that the level 1 team changed. + if (team_change || + old_leaf_kids < thr_bar->leaf_kids) { // some old, some new + if (old_leaf_kids) { // release old leaf kids + thr_bar->b_go |= old_leaf_state; + } + // Release new leaf kids + last = tid + thr_bar->skip_per_level[1]; + if (last > nproc) + last = nproc; + for (child_tid = tid + 1 + old_leaf_kids; child_tid < (int)last; + ++child_tid) { // skip_per_level[0]=1 + register kmp_info_t *child_thr = team->t.t_threads[child_tid]; + register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; + KA_TRACE( + 20, + ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) releasing" + " T#%d(%d:%d) go(%p): %u => %u\n", + gtid, team->t.t_id, tid, __kmp_gtid_from_tid(child_tid, team), + team->t.t_id, child_tid, &child_bar->b_go, child_bar->b_go, + child_bar->b_go + KMP_BARRIER_STATE_BUMP)); + // Release child using child's b_go flag + ANNOTATE_BARRIER_BEGIN(child_thr); + kmp_flag_64 flag(&child_bar->b_go, child_thr); + flag.release(); + } + } else { // Release all children at once with leaf_state bits on my own + // b_go flag + thr_bar->b_go |= thr_bar->leaf_state; } + } + } else { // Blocktime is not infinite; do a simple hierarchical release + for (int d = thr_bar->my_level - 1; d >= 0; + --d) { // Release highest level threads first + last = tid + thr_bar->skip_per_level[d + 1]; + kmp_uint32 skip = thr_bar->skip_per_level[d]; + if (last > nproc) + last = nproc; + for (child_tid = tid + skip; child_tid < (int)last; child_tid += skip) { + register kmp_info_t *child_thr = team->t.t_threads[child_tid]; + register kmp_bstate_t *child_bar = &child_thr->th.th_bar[bt].bb; + KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) " + "releasing T#%d(%d:%d) go(%p): %u => %u\n", + gtid, team->t.t_id, tid, + __kmp_gtid_from_tid(child_tid, team), team->t.t_id, + child_tid, &child_bar->b_go, child_bar->b_go, + child_bar->b_go + KMP_BARRIER_STATE_BUMP)); + // Release child using child's b_go flag + ANNOTATE_BARRIER_BEGIN(child_thr); + kmp_flag_64 flag(&child_bar->b_go, child_thr); + flag.release(); + } + } + } #if KMP_BARRIER_ICV_PUSH - if (propagate_icvs && !KMP_MASTER_TID(tid)) // non-leaves copy ICVs from fixed ICVs to local dest - copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, &thr_bar->th_fixed_icvs); + if (propagate_icvs && !KMP_MASTER_TID(tid)) + // non-leaves copy ICVs from fixed ICVs to local dest + copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, + &thr_bar->th_fixed_icvs); #endif // KMP_BARRIER_ICV_PUSH - } - KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for barrier type %d\n", - gtid, team->t.t_id, tid, bt)); + } + KA_TRACE(20, ("__kmp_hierarchical_barrier_release: T#%d(%d:%d) exit for " + "barrier type %d\n", + gtid, team->t.t_id, tid, bt)); } -// ---------------------------- End of Barrier Algorithms ---------------------------- + +// End of Barrier Algorithms // Internal function to do a barrier. /* If is_split is true, do a split barrier, otherwise, do a plain barrier - If reduce is non-NULL, do a split reduction barrier, otherwise, do a split barrier + If reduce is non-NULL, do a split reduction barrier, otherwise, do a split + barrier Returns 0 if master thread, 1 if worker thread. */ -int -__kmp_barrier(enum barrier_type bt, int gtid, int is_split, size_t reduce_size, - void *reduce_data, void (*reduce)(void *, void *)) -{ - KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier); - KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER); - register int tid = __kmp_tid_from_gtid(gtid); - register kmp_info_t *this_thr = __kmp_threads[gtid]; - register kmp_team_t *team = this_thr->th.th_team; - register int status = 0; - ident_t *loc = __kmp_threads[gtid]->th.th_ident; +int __kmp_barrier(enum barrier_type bt, int gtid, int is_split, + size_t reduce_size, void *reduce_data, + void (*reduce)(void *, void *)) { + KMP_TIME_PARTITIONED_BLOCK(OMP_plain_barrier); + KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER); + register int tid = __kmp_tid_from_gtid(gtid); + register kmp_info_t *this_thr = __kmp_threads[gtid]; + register kmp_team_t *team = this_thr->th.th_team; + register int status = 0; + ident_t *loc = __kmp_threads[gtid]->th.th_ident; #if OMPT_SUPPORT - ompt_task_id_t my_task_id; - ompt_parallel_id_t my_parallel_id; + ompt_task_id_t my_task_id; + ompt_parallel_id_t my_parallel_id; #endif - KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", - gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid))); + KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) has arrived\n", gtid, + __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid))); - ANNOTATE_BARRIER_BEGIN(&team->t.t_bar); + ANNOTATE_BARRIER_BEGIN(&team->t.t_bar); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled) { #if OMPT_BLAME - my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id; - my_parallel_id = team->t.ompt_team_info.parallel_id; + my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id; + my_parallel_id = team->t.ompt_team_info.parallel_id; #if OMPT_TRACE - if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) { - if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) { - ompt_callbacks.ompt_callback(ompt_event_single_others_end)( - my_parallel_id, my_task_id); - } - } -#endif - if (ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) { - ompt_callbacks.ompt_callback(ompt_event_barrier_begin)( - my_parallel_id, my_task_id); - } + if (this_thr->th.ompt_thread_info.state == ompt_state_wait_single) { + if (ompt_callbacks.ompt_callback(ompt_event_single_others_end)) { + ompt_callbacks.ompt_callback(ompt_event_single_others_end)( + my_parallel_id, my_task_id); + } + } #endif - // It is OK to report the barrier state after the barrier begin callback. - // According to the OMPT specification, a compliant implementation may - // even delay reporting this state until the barrier begins to wait. - this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier; + if (ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) { + ompt_callbacks.ompt_callback(ompt_event_barrier_begin)(my_parallel_id, + my_task_id); } #endif + // It is OK to report the barrier state after the barrier begin callback. + // According to the OMPT specification, a compliant implementation may + // even delay reporting this state until the barrier begins to wait. + this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier; + } +#endif - if (! team->t.t_serialized) { + if (!team->t.t_serialized) { #if USE_ITT_BUILD - // This value will be used in itt notify events below. - void *itt_sync_obj = NULL; -# if USE_ITT_NOTIFY - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1); -# endif + // This value will be used in itt notify events below. + void *itt_sync_obj = NULL; +#if USE_ITT_NOTIFY + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) + itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1); +#endif #endif /* USE_ITT_BUILD */ - if (__kmp_tasking_mode == tskm_extra_barrier) { - __kmp_tasking_barrier(team, this_thr, gtid); - KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", - gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid))); - } + if (__kmp_tasking_mode == tskm_extra_barrier) { + __kmp_tasking_barrier(team, this_thr, gtid); + KA_TRACE(15, + ("__kmp_barrier: T#%d(%d:%d) past tasking barrier\n", gtid, + __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid))); + } - /* Copy the blocktime info to the thread, where __kmp_wait_template() can access it when - the team struct is not guaranteed to exist. */ - // See note about the corresponding code in __kmp_join_barrier() being performance-critical. - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { + /* Copy the blocktime info to the thread, where __kmp_wait_template() can + access it when the team struct is not guaranteed to exist. */ + // See note about the corresponding code in __kmp_join_barrier() being + // performance-critical. + if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { #if KMP_USE_MONITOR - this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; - this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; + this_thr->th.th_team_bt_intervals = + team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; + this_thr->th.th_team_bt_set = + team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; #else - this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(); + this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(); #endif - } + } #if USE_ITT_BUILD - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_starting(gtid, itt_sync_obj); + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) + __kmp_itt_barrier_starting(gtid, itt_sync_obj); #endif /* USE_ITT_BUILD */ #if USE_DEBUGGER - // Let the debugger know: the thread arrived to the barrier and waiting. - if (KMP_MASTER_TID(tid)) { // Master counter is stored in team structure. - team->t.t_bar[bt].b_master_arrived += 1; - } else { - this_thr->th.th_bar[bt].bb.b_worker_arrived += 1; - } // if + // Let the debugger know: the thread arrived to the barrier and waiting. + if (KMP_MASTER_TID(tid)) { // Master counter is stored in team structure. + team->t.t_bar[bt].b_master_arrived += 1; + } else { + this_thr->th.th_bar[bt].bb.b_worker_arrived += 1; + } // if #endif /* USE_DEBUGGER */ - if (reduce != NULL) { - //KMP_DEBUG_ASSERT( is_split == TRUE ); // #C69956 - this_thr->th.th_local.reduce_data = reduce_data; - } + if (reduce != NULL) { + // KMP_DEBUG_ASSERT( is_split == TRUE ); // #C69956 + this_thr->th.th_local.reduce_data = reduce_data; + } - if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec) - __kmp_task_team_setup(this_thr, team, 0); // use 0 to only setup the current team if nthreads > 1 + if (KMP_MASTER_TID(tid) && __kmp_tasking_mode != tskm_immediate_exec) + __kmp_task_team_setup( + this_thr, team, + 0); // use 0 to only setup the current team if nthreads > 1 - switch (__kmp_barrier_gather_pattern[bt]) { - case bp_hyper_bar: { - KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits to 0; use linear - __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid, reduce - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - case bp_hierarchical_bar: { - __kmp_hierarchical_barrier_gather(bt, this_thr, gtid, tid, reduce - USE_ITT_BUILD_ARG(itt_sync_obj)); - break; - } - case bp_tree_bar: { - KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits to 0; use linear - __kmp_tree_barrier_gather(bt, this_thr, gtid, tid, reduce - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - default: { - __kmp_linear_barrier_gather(bt, this_thr, gtid, tid, reduce - USE_ITT_BUILD_ARG(itt_sync_obj) ); - } - } + switch (__kmp_barrier_gather_pattern[bt]) { + case bp_hyper_bar: { + KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits + // to 0; use linear + __kmp_hyper_barrier_gather(bt, this_thr, gtid, tid, + reduce USE_ITT_BUILD_ARG(itt_sync_obj)); + break; + } + case bp_hierarchical_bar: { + __kmp_hierarchical_barrier_gather(bt, this_thr, gtid, tid, + reduce USE_ITT_BUILD_ARG(itt_sync_obj)); + break; + } + case bp_tree_bar: { + KMP_ASSERT(__kmp_barrier_gather_branch_bits[bt]); // don't set branch bits + // to 0; use linear + __kmp_tree_barrier_gather(bt, this_thr, gtid, tid, + reduce USE_ITT_BUILD_ARG(itt_sync_obj)); + break; + } + default: { + __kmp_linear_barrier_gather(bt, this_thr, gtid, tid, + reduce USE_ITT_BUILD_ARG(itt_sync_obj)); + } + } - KMP_MB(); + KMP_MB(); - if (KMP_MASTER_TID(tid)) { - status = 0; - if (__kmp_tasking_mode != tskm_immediate_exec) { - __kmp_task_team_wait(this_thr, team - USE_ITT_BUILD_ARG(itt_sync_obj) ); - } + if (KMP_MASTER_TID(tid)) { + status = 0; + if (__kmp_tasking_mode != tskm_immediate_exec) { + __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj)); + } #if USE_DEBUGGER - // Let the debugger know: All threads are arrived and starting leaving the barrier. - team->t.t_bar[bt].b_team_arrived += 1; + // Let the debugger know: All threads are arrived and starting leaving the + // barrier. + team->t.t_bar[bt].b_team_arrived += 1; #endif #if OMP_40_ENABLED // Reset cancellation flag for worksharing constructs - if(team->t.t_cancel_request == cancel_loop || - team->t.t_cancel_request == cancel_sections ) { + if (team->t.t_cancel_request == cancel_loop || + team->t.t_cancel_request == cancel_sections) { team->t.t_cancel_request = cancel_noreq; } #endif #if USE_ITT_BUILD - /* TODO: In case of split reduction barrier, master thread may send acquired event early, - before the final summation into the shared variable is done (final summation can be a - long operation for array reductions). */ - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_middle(gtid, itt_sync_obj); + /* TODO: In case of split reduction barrier, master thread may send + acquired event early, before the final summation into the shared + variable is done (final summation can be a long operation for array + reductions). */ + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) + __kmp_itt_barrier_middle(gtid, itt_sync_obj); #endif /* USE_ITT_BUILD */ #if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier - report frame end (only if active_level == 1) - if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode && + // Barrier - report frame end (only if active_level == 1) + if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && + __kmp_forkjoin_frames_mode && #if OMP_40_ENABLED - this_thr->th.th_teams_microtask == NULL && + this_thr->th.th_teams_microtask == NULL && #endif - team->t.t_active_level == 1) - { - kmp_uint64 cur_time = __itt_get_timestamp(); - kmp_info_t **other_threads = team->t.t_threads; - int nproc = this_thr->th.th_team_nproc; - int i; - switch(__kmp_forkjoin_frames_mode) { - case 1: - __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc); - this_thr->th.th_frame_time = cur_time; - break; - case 2: // AC 2015-01-19: currently does not work for hierarchical (to be fixed) - __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc); - break; - case 3: - if( __itt_metadata_add_ptr ) { - // Initialize with master's wait time - kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; - // Set arrive time to zero to be able to check it in __kmp_invoke_task(); the same is done inside the loop below - this_thr->th.th_bar_arrive_time = 0; - for (i=1; ith.th_bar_arrive_time ); - other_threads[i]->th.th_bar_arrive_time = 0; - } - __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, cur_time, delta, (kmp_uint64)( reduce != NULL)); - } - __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc); - this_thr->th.th_frame_time = cur_time; - break; - } + team->t.t_active_level == 1) { + kmp_uint64 cur_time = __itt_get_timestamp(); + kmp_info_t **other_threads = team->t.t_threads; + int nproc = this_thr->th.th_team_nproc; + int i; + switch (__kmp_forkjoin_frames_mode) { + case 1: + __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, + loc, nproc); + this_thr->th.th_frame_time = cur_time; + break; + case 2: // AC 2015-01-19: currently does not work for hierarchical (to + // be fixed) + __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, + 1, loc, nproc); + break; + case 3: + if (__itt_metadata_add_ptr) { + // Initialize with master's wait time + kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; + // Set arrive time to zero to be able to check it in + // __kmp_invoke_task(); the same is done inside the loop below + this_thr->th.th_bar_arrive_time = 0; + for (i = 1; i < nproc; ++i) { + delta += (cur_time - other_threads[i]->th.th_bar_arrive_time); + other_threads[i]->th.th_bar_arrive_time = 0; } + __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, + cur_time, delta, + (kmp_uint64)(reduce != NULL)); + } + __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, + loc, nproc); + this_thr->th.th_frame_time = cur_time; + break; + } + } #endif /* USE_ITT_BUILD */ - } else { - status = 1; + } else { + status = 1; #if USE_ITT_BUILD - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_middle(gtid, itt_sync_obj); + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) + __kmp_itt_barrier_middle(gtid, itt_sync_obj); #endif /* USE_ITT_BUILD */ - } - if (status == 1 || ! is_split) { - switch (__kmp_barrier_release_pattern[bt]) { - case bp_hyper_bar: { - KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); - __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, FALSE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - case bp_hierarchical_bar: { - __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, FALSE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - case bp_tree_bar: { - KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); - __kmp_tree_barrier_release(bt, this_thr, gtid, tid, FALSE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - default: { - __kmp_linear_barrier_release(bt, this_thr, gtid, tid, FALSE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - } - } - if (__kmp_tasking_mode != tskm_immediate_exec) { - __kmp_task_team_sync(this_thr, team); - } - } + } + if (status == 1 || !is_split) { + switch (__kmp_barrier_release_pattern[bt]) { + case bp_hyper_bar: { + KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); + __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, + FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); + break; + } + case bp_hierarchical_bar: { + __kmp_hierarchical_barrier_release( + bt, this_thr, gtid, tid, FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); + break; + } + case bp_tree_bar: { + KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); + __kmp_tree_barrier_release(bt, this_thr, gtid, tid, + FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); + break; + } + default: { + __kmp_linear_barrier_release(bt, this_thr, gtid, tid, + FALSE USE_ITT_BUILD_ARG(itt_sync_obj)); + } + } + if (__kmp_tasking_mode != tskm_immediate_exec) { + __kmp_task_team_sync(this_thr, team); + } + } #if USE_ITT_BUILD - /* GEH: TODO: Move this under if-condition above and also include in - __kmp_end_split_barrier(). This will more accurately represent the actual release time - of the threads for split barriers. */ - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_finished(gtid, itt_sync_obj); + /* GEH: TODO: Move this under if-condition above and also include in + __kmp_end_split_barrier(). This will more accurately represent the actual + release time of the threads for split barriers. */ + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) + __kmp_itt_barrier_finished(gtid, itt_sync_obj); #endif /* USE_ITT_BUILD */ - } else { // Team is serialized. - status = 0; - if (__kmp_tasking_mode != tskm_immediate_exec) { + } else { // Team is serialized. + status = 0; + if (__kmp_tasking_mode != tskm_immediate_exec) { #if OMP_45_ENABLED - if ( this_thr->th.th_task_team != NULL ) { - void *itt_sync_obj = NULL; + if (this_thr->th.th_task_team != NULL) { + void *itt_sync_obj = NULL; #if USE_ITT_NOTIFY - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { - itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1); - __kmp_itt_barrier_starting(gtid, itt_sync_obj); - } + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { + itt_sync_obj = __kmp_itt_barrier_object(gtid, bt, 1); + __kmp_itt_barrier_starting(gtid, itt_sync_obj); + } #endif - KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == TRUE); - __kmp_task_team_wait(this_thr, team - USE_ITT_BUILD_ARG(itt_sync_obj)); - __kmp_task_team_setup(this_thr, team, 0); + KMP_DEBUG_ASSERT(this_thr->th.th_task_team->tt.tt_found_proxy_tasks == + TRUE); + __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj)); + __kmp_task_team_setup(this_thr, team, 0); #if USE_ITT_BUILD - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_finished(gtid, itt_sync_obj); + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) + __kmp_itt_barrier_finished(gtid, itt_sync_obj); #endif /* USE_ITT_BUILD */ - } + } #else - // The task team should be NULL for serialized code (tasks will be executed immediately) - KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL); - KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL); + // The task team should be NULL for serialized code (tasks will be + // executed immediately) + KMP_DEBUG_ASSERT(team->t.t_task_team[this_thr->th.th_task_state] == NULL); + KMP_DEBUG_ASSERT(this_thr->th.th_task_team == NULL); #endif - } } - KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n", - gtid, __kmp_team_from_gtid(gtid)->t.t_id, __kmp_tid_from_gtid(gtid), status)); + } + KA_TRACE(15, ("__kmp_barrier: T#%d(%d:%d) is leaving with return value %d\n", + gtid, __kmp_team_from_gtid(gtid)->t.t_id, + __kmp_tid_from_gtid(gtid), status)); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled) { #if OMPT_BLAME - if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) { - ompt_callbacks.ompt_callback(ompt_event_barrier_end)( - my_parallel_id, my_task_id); - } -#endif - this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; + if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) { + ompt_callbacks.ompt_callback(ompt_event_barrier_end)(my_parallel_id, + my_task_id); } #endif - ANNOTATE_BARRIER_END(&team->t.t_bar); + this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; + } +#endif + ANNOTATE_BARRIER_END(&team->t.t_bar); - return status; + return status; } - -void -__kmp_end_split_barrier(enum barrier_type bt, int gtid) -{ - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier); - KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER); - int tid = __kmp_tid_from_gtid(gtid); - kmp_info_t *this_thr = __kmp_threads[gtid]; - kmp_team_t *team = this_thr->th.th_team; - - ANNOTATE_BARRIER_BEGIN(&team->t.t_bar); - if (!team->t.t_serialized) { - if (KMP_MASTER_GTID(gtid)) { - switch (__kmp_barrier_release_pattern[bt]) { - case bp_hyper_bar: { - KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); - __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, FALSE - USE_ITT_BUILD_ARG(NULL) ); - break; - } - case bp_hierarchical_bar: { - __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, FALSE - USE_ITT_BUILD_ARG(NULL)); - break; - } - case bp_tree_bar: { - KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); - __kmp_tree_barrier_release(bt, this_thr, gtid, tid, FALSE - USE_ITT_BUILD_ARG(NULL) ); - break; - } - default: { - __kmp_linear_barrier_release(bt, this_thr, gtid, tid, FALSE - USE_ITT_BUILD_ARG(NULL) ); - } - } - if (__kmp_tasking_mode != tskm_immediate_exec) { - __kmp_task_team_sync(this_thr, team); - } // if - } +void __kmp_end_split_barrier(enum barrier_type bt, int gtid) { + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_end_split_barrier); + KMP_SET_THREAD_STATE_BLOCK(PLAIN_BARRIER); + int tid = __kmp_tid_from_gtid(gtid); + kmp_info_t *this_thr = __kmp_threads[gtid]; + kmp_team_t *team = this_thr->th.th_team; + + ANNOTATE_BARRIER_BEGIN(&team->t.t_bar); + if (!team->t.t_serialized) { + if (KMP_MASTER_GTID(gtid)) { + switch (__kmp_barrier_release_pattern[bt]) { + case bp_hyper_bar: { + KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); + __kmp_hyper_barrier_release(bt, this_thr, gtid, tid, + FALSE USE_ITT_BUILD_ARG(NULL)); + break; + } + case bp_hierarchical_bar: { + __kmp_hierarchical_barrier_release(bt, this_thr, gtid, tid, + FALSE USE_ITT_BUILD_ARG(NULL)); + break; + } + case bp_tree_bar: { + KMP_ASSERT(__kmp_barrier_release_branch_bits[bt]); + __kmp_tree_barrier_release(bt, this_thr, gtid, tid, + FALSE USE_ITT_BUILD_ARG(NULL)); + break; + } + default: { + __kmp_linear_barrier_release(bt, this_thr, gtid, tid, + FALSE USE_ITT_BUILD_ARG(NULL)); + } + } + if (__kmp_tasking_mode != tskm_immediate_exec) { + __kmp_task_team_sync(this_thr, team); + } // if } - ANNOTATE_BARRIER_END(&team->t.t_bar); + } + ANNOTATE_BARRIER_END(&team->t.t_bar); } - -void -__kmp_join_barrier(int gtid) -{ - KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier); - KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER); - register kmp_info_t *this_thr = __kmp_threads[gtid]; - register kmp_team_t *team; - register kmp_uint nproc; - kmp_info_t *master_thread; - int tid; +void __kmp_join_barrier(int gtid) { + KMP_TIME_PARTITIONED_BLOCK(OMP_join_barrier); + KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER); + register kmp_info_t *this_thr = __kmp_threads[gtid]; + register kmp_team_t *team; + register kmp_uint nproc; + kmp_info_t *master_thread; + int tid; #ifdef KMP_DEBUG - int team_id; + int team_id; #endif /* KMP_DEBUG */ #if USE_ITT_BUILD - void *itt_sync_obj = NULL; -# if USE_ITT_NOTIFY - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) // Don't call routine without need - // Get object created at fork_barrier - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); -# endif + void *itt_sync_obj = NULL; +#if USE_ITT_NOTIFY + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) // Don't call routine without need + // Get object created at fork_barrier + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); +#endif #endif /* USE_ITT_BUILD */ - KMP_MB(); + KMP_MB(); - // Get current info - team = this_thr->th.th_team; - nproc = this_thr->th.th_team_nproc; - KMP_DEBUG_ASSERT((int)nproc == team->t.t_nproc); - tid = __kmp_tid_from_gtid(gtid); + // Get current info + team = this_thr->th.th_team; + nproc = this_thr->th.th_team_nproc; + KMP_DEBUG_ASSERT((int)nproc == team->t.t_nproc); + tid = __kmp_tid_from_gtid(gtid); #ifdef KMP_DEBUG - team_id = team->t.t_id; + team_id = team->t.t_id; #endif /* KMP_DEBUG */ - master_thread = this_thr->th.th_team_master; + master_thread = this_thr->th.th_team_master; #ifdef KMP_DEBUG - if (master_thread != team->t.t_threads[0]) { - __kmp_print_structure(); - } + if (master_thread != team->t.t_threads[0]) { + __kmp_print_structure(); + } #endif /* KMP_DEBUG */ - KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]); - KMP_MB(); - - // Verify state - KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); - KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team)); - KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root)); - KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]); - KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n", gtid, team_id, tid)); - - ANNOTATE_BARRIER_BEGIN(&team->t.t_bar); + KMP_DEBUG_ASSERT(master_thread == team->t.t_threads[0]); + KMP_MB(); + + // Verify state + KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); + KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_team)); + KMP_DEBUG_ASSERT(TCR_PTR(this_thr->th.th_root)); + KMP_DEBUG_ASSERT(this_thr == team->t.t_threads[tid]); + KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) arrived at join barrier\n", + gtid, team_id, tid)); + + ANNOTATE_BARRIER_BEGIN(&team->t.t_bar); #if OMPT_SUPPORT #if OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) { - ompt_callbacks.ompt_callback(ompt_event_barrier_begin)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); - } + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_barrier_begin)) { + ompt_callbacks.ompt_callback(ompt_event_barrier_begin)( + team->t.ompt_team_info.parallel_id, + team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); + } #endif - this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier; + this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier; #endif - if (__kmp_tasking_mode == tskm_extra_barrier) { - __kmp_tasking_barrier(team, this_thr, gtid); - KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid, team_id, tid)); - } -# ifdef KMP_DEBUG - if (__kmp_tasking_mode != tskm_immediate_exec) { - KA_TRACE(20, ( "__kmp_join_barrier: T#%d, old team = %d, old task_team = %p, th_task_team = %p\n", - __kmp_gtid_from_thread(this_thr), team_id, team->t.t_task_team[this_thr->th.th_task_state], - this_thr->th.th_task_team)); - KMP_DEBUG_ASSERT(this_thr->th.th_task_team == team->t.t_task_team[this_thr->th.th_task_state]); - } -# endif /* KMP_DEBUG */ + if (__kmp_tasking_mode == tskm_extra_barrier) { + __kmp_tasking_barrier(team, this_thr, gtid); + KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) past taking barrier\n", gtid, + team_id, tid)); + } +#ifdef KMP_DEBUG + if (__kmp_tasking_mode != tskm_immediate_exec) { + KA_TRACE(20, ("__kmp_join_barrier: T#%d, old team = %d, old task_team = " + "%p, th_task_team = %p\n", + __kmp_gtid_from_thread(this_thr), team_id, + team->t.t_task_team[this_thr->th.th_task_state], + this_thr->th.th_task_team)); + KMP_DEBUG_ASSERT(this_thr->th.th_task_team == + team->t.t_task_team[this_thr->th.th_task_state]); + } +#endif /* KMP_DEBUG */ - /* Copy the blocktime info to the thread, where __kmp_wait_template() can access it when the - team struct is not guaranteed to exist. Doing these loads causes a cache miss slows - down EPCC parallel by 2x. As a workaround, we do not perform the copy if blocktime=infinite, - since the values are not used by __kmp_wait_template() in that case. */ - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { + /* Copy the blocktime info to the thread, where __kmp_wait_template() can + access it when the team struct is not guaranteed to exist. Doing these + loads causes a cache miss slows down EPCC parallel by 2x. As a workaround, + we do not perform the copy if blocktime=infinite, since the values are not + used by __kmp_wait_template() in that case. */ + if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { #if KMP_USE_MONITOR - this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; - this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; + this_thr->th.th_team_bt_intervals = + team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; + this_thr->th.th_team_bt_set = + team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; #else - this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(); + this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(); #endif - } + } #if USE_ITT_BUILD - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_starting(gtid, itt_sync_obj); + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) + __kmp_itt_barrier_starting(gtid, itt_sync_obj); #endif /* USE_ITT_BUILD */ - switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) { - case bp_hyper_bar: { - KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]); - __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - case bp_hierarchical_bar: { - __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - case bp_tree_bar: { - KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]); - __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - default: { - __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, NULL - USE_ITT_BUILD_ARG(itt_sync_obj) ); - } + switch (__kmp_barrier_gather_pattern[bs_forkjoin_barrier]) { + case bp_hyper_bar: { + KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]); + __kmp_hyper_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, + NULL USE_ITT_BUILD_ARG(itt_sync_obj)); + break; + } + case bp_hierarchical_bar: { + __kmp_hierarchical_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, + NULL USE_ITT_BUILD_ARG(itt_sync_obj)); + break; + } + case bp_tree_bar: { + KMP_ASSERT(__kmp_barrier_gather_branch_bits[bs_forkjoin_barrier]); + __kmp_tree_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, + NULL USE_ITT_BUILD_ARG(itt_sync_obj)); + break; + } + default: { + __kmp_linear_barrier_gather(bs_forkjoin_barrier, this_thr, gtid, tid, + NULL USE_ITT_BUILD_ARG(itt_sync_obj)); + } + } + + /* From this point on, the team data structure may be deallocated at any time + by the master thread - it is unsafe to reference it in any of the worker + threads. Any per-team data items that need to be referenced before the + end of the barrier should be moved to the kmp_task_team_t structs. */ + if (KMP_MASTER_TID(tid)) { + if (__kmp_tasking_mode != tskm_immediate_exec) { + __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj)); } - - /* From this point on, the team data structure may be deallocated at any time by the - master thread - it is unsafe to reference it in any of the worker threads. Any per-team - data items that need to be referenced before the end of the barrier should be moved to - the kmp_task_team_t structs. */ - if (KMP_MASTER_TID(tid)) { - if (__kmp_tasking_mode != tskm_immediate_exec) { - __kmp_task_team_wait(this_thr, team - USE_ITT_BUILD_ARG(itt_sync_obj) ); - } #if KMP_STATS_ENABLED - // Have master thread flag the workers to indicate they are now waiting for - // next parallel region, Also wake them up so they switch their timers to idle. - for (int i=0; it.t_nproc; ++i) { - kmp_info_t* team_thread = team->t.t_threads[i]; - if (team_thread == this_thr) - continue; - team_thread->th.th_stats->setIdleFlag(); - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && team_thread->th.th_sleep_loc != NULL) - __kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread), team_thread->th.th_sleep_loc); - } + // Have master thread flag the workers to indicate they are now waiting for + // next parallel region, Also wake them up so they switch their timers to + // idle. + for (int i = 0; i < team->t.t_nproc; ++i) { + kmp_info_t *team_thread = team->t.t_threads[i]; + if (team_thread == this_thr) + continue; + team_thread->th.th_stats->setIdleFlag(); + if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME && + team_thread->th.th_sleep_loc != NULL) + __kmp_null_resume_wrapper(__kmp_gtid_from_thread(team_thread), + team_thread->th.th_sleep_loc); + } #endif #if USE_ITT_BUILD - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_middle(gtid, itt_sync_obj); + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) + __kmp_itt_barrier_middle(gtid, itt_sync_obj); #endif /* USE_ITT_BUILD */ -# if USE_ITT_BUILD && USE_ITT_NOTIFY - // Join barrier - report frame end - if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && __kmp_forkjoin_frames_mode && +#if USE_ITT_BUILD && USE_ITT_NOTIFY + // Join barrier - report frame end + if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && + __kmp_forkjoin_frames_mode && #if OMP_40_ENABLED - this_thr->th.th_teams_microtask == NULL && + this_thr->th.th_teams_microtask == NULL && #endif - team->t.t_active_level == 1) - { - kmp_uint64 cur_time = __itt_get_timestamp(); - ident_t * loc = team->t.t_ident; - kmp_info_t **other_threads = team->t.t_threads; - int nproc = this_thr->th.th_team_nproc; - int i; - switch(__kmp_forkjoin_frames_mode) { - case 1: - __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc); - break; - case 2: - __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, loc, nproc); - break; - case 3: - if( __itt_metadata_add_ptr ) { - // Initialize with master's wait time - kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; - // Set arrive time to zero to be able to check it in __kmp_invoke_task(); the same is done inside the loop below - this_thr->th.th_bar_arrive_time = 0; - for (i=1; ith.th_bar_arrive_time ); - other_threads[i]->th.th_bar_arrive_time = 0; - } - __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, cur_time, delta, 0); - } - __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, loc, nproc); - this_thr->th.th_frame_time = cur_time; - break; - } + team->t.t_active_level == 1) { + kmp_uint64 cur_time = __itt_get_timestamp(); + ident_t *loc = team->t.t_ident; + kmp_info_t **other_threads = team->t.t_threads; + int nproc = this_thr->th.th_team_nproc; + int i; + switch (__kmp_forkjoin_frames_mode) { + case 1: + __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, + loc, nproc); + break; + case 2: + __kmp_itt_frame_submit(gtid, this_thr->th.th_bar_min_time, cur_time, 1, + loc, nproc); + break; + case 3: + if (__itt_metadata_add_ptr) { + // Initialize with master's wait time + kmp_uint64 delta = cur_time - this_thr->th.th_bar_arrive_time; + // Set arrive time to zero to be able to check it in + // __kmp_invoke_task(); the same is done inside the loop below + this_thr->th.th_bar_arrive_time = 0; + for (i = 1; i < nproc; ++i) { + delta += (cur_time - other_threads[i]->th.th_bar_arrive_time); + other_threads[i]->th.th_bar_arrive_time = 0; + } + __kmp_itt_metadata_imbalance(gtid, this_thr->th.th_frame_time, + cur_time, delta, 0); } -# endif /* USE_ITT_BUILD */ + __kmp_itt_frame_submit(gtid, this_thr->th.th_frame_time, cur_time, 0, + loc, nproc); + this_thr->th.th_frame_time = cur_time; + break; + } } +#endif /* USE_ITT_BUILD */ + } #if USE_ITT_BUILD - else { - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) - __kmp_itt_barrier_middle(gtid, itt_sync_obj); - } + else { + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) + __kmp_itt_barrier_middle(gtid, itt_sync_obj); + } #endif /* USE_ITT_BUILD */ #if KMP_DEBUG - if (KMP_MASTER_TID(tid)) { - KA_TRACE(15, ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n", - gtid, team_id, tid, nproc)); - } + if (KMP_MASTER_TID(tid)) { + KA_TRACE( + 15, + ("__kmp_join_barrier: T#%d(%d:%d) says all %d team threads arrived\n", + gtid, team_id, tid, nproc)); + } #endif /* KMP_DEBUG */ - // TODO now, mark worker threads as done so they may be disbanded - KMP_MB(); // Flush all pending memory write invalidates. - KA_TRACE(10, ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid)); + // TODO now, mark worker threads as done so they may be disbanded + KMP_MB(); // Flush all pending memory write invalidates. + KA_TRACE(10, + ("__kmp_join_barrier: T#%d(%d:%d) leaving\n", gtid, team_id, tid)); #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled) { #if OMPT_BLAME - if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) { - ompt_callbacks.ompt_callback(ompt_event_barrier_end)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); - } + if (ompt_callbacks.ompt_callback(ompt_event_barrier_end)) { + ompt_callbacks.ompt_callback(ompt_event_barrier_end)( + team->t.ompt_team_info.parallel_id, + team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); + } #endif - // return to default state - this_thr->th.ompt_thread_info.state = ompt_state_overhead; - } + // return to default state + this_thr->th.ompt_thread_info.state = ompt_state_overhead; + } #endif - ANNOTATE_BARRIER_END(&team->t.t_bar); + ANNOTATE_BARRIER_END(&team->t.t_bar); } - -// TODO release worker threads' fork barriers as we are ready instead of all at once -void -__kmp_fork_barrier(int gtid, int tid) -{ - KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier); - KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER); - kmp_info_t *this_thr = __kmp_threads[gtid]; - kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL; +// TODO release worker threads' fork barriers as we are ready instead of all at +// once +void __kmp_fork_barrier(int gtid, int tid) { + KMP_TIME_PARTITIONED_BLOCK(OMP_fork_barrier); + KMP_SET_THREAD_STATE_BLOCK(FORK_JOIN_BARRIER); + kmp_info_t *this_thr = __kmp_threads[gtid]; + kmp_team_t *team = (tid == 0) ? this_thr->th.th_team : NULL; #if USE_ITT_BUILD - void * itt_sync_obj = NULL; + void *itt_sync_obj = NULL; #endif /* USE_ITT_BUILD */ - if (team) - ANNOTATE_BARRIER_END(&team->t.t_bar); + if (team) + ANNOTATE_BARRIER_END(&team->t.t_bar); - KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", - gtid, (team != NULL) ? team->t.t_id : -1, tid)); + KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) has arrived\n", gtid, + (team != NULL) ? team->t.t_id : -1, tid)); - // th_team pointer only valid for master thread here - if (KMP_MASTER_TID(tid)) { + // th_team pointer only valid for master thread here + if (KMP_MASTER_TID(tid)) { #if USE_ITT_BUILD && USE_ITT_NOTIFY - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { - // Create itt barrier object - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1); - __kmp_itt_barrier_middle(gtid, itt_sync_obj); // Call acquired/releasing - } + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { + // Create itt barrier object + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier, 1); + __kmp_itt_barrier_middle(gtid, itt_sync_obj); // Call acquired/releasing + } #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ #ifdef KMP_DEBUG - register kmp_info_t **other_threads = team->t.t_threads; - register int i; - - // Verify state - KMP_MB(); - - for(i=1; it.t_nproc; ++i) { - KA_TRACE(500, ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go == %u.\n", - gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid, - team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid, - other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go)); - KMP_DEBUG_ASSERT((TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) - & ~(KMP_BARRIER_SLEEP_STATE)) - == KMP_INIT_BARRIER_STATE); - KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team); - } + register kmp_info_t **other_threads = team->t.t_threads; + register int i; + + // Verify state + KMP_MB(); + + for (i = 1; i < team->t.t_nproc; ++i) { + KA_TRACE(500, + ("__kmp_fork_barrier: T#%d(%d:0) checking T#%d(%d:%d) fork go " + "== %u.\n", + gtid, team->t.t_id, other_threads[i]->th.th_info.ds.ds_gtid, + team->t.t_id, other_threads[i]->th.th_info.ds.ds_tid, + other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go)); + KMP_DEBUG_ASSERT( + (TCR_4(other_threads[i]->th.th_bar[bs_forkjoin_barrier].bb.b_go) & + ~(KMP_BARRIER_SLEEP_STATE)) == KMP_INIT_BARRIER_STATE); + KMP_DEBUG_ASSERT(other_threads[i]->th.th_team == team); + } #endif - if (__kmp_tasking_mode != tskm_immediate_exec) { - __kmp_task_team_setup(this_thr, team, 0); // 0 indicates setup current task team if nthreads > 1 - } + if (__kmp_tasking_mode != tskm_immediate_exec) { + __kmp_task_team_setup( + this_thr, team, + 0); // 0 indicates setup current task team if nthreads > 1 + } - /* The master thread may have changed its blocktime between the join barrier and the - fork barrier. Copy the blocktime info to the thread, where __kmp_wait_template() can - access it when the team struct is not guaranteed to exist. */ - // See note about the corresponding code in __kmp_join_barrier() being performance-critical - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { + /* The master thread may have changed its blocktime between the join barrier + and the fork barrier. Copy the blocktime info to the thread, where + __kmp_wait_template() can access it when the team struct is not + guaranteed to exist. */ + // See note about the corresponding code in __kmp_join_barrier() being + // performance-critical + if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { #if KMP_USE_MONITOR - this_thr->th.th_team_bt_intervals = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; - this_thr->th.th_team_bt_set = team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; + this_thr->th.th_team_bt_intervals = + team->t.t_implicit_task_taskdata[tid].td_icvs.bt_intervals; + this_thr->th.th_team_bt_set = + team->t.t_implicit_task_taskdata[tid].td_icvs.bt_set; #else - this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(); + this_thr->th.th_team_bt_intervals = KMP_BLOCKTIME_INTERVAL(); #endif - } - } // master - - switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) { - case bp_hyper_bar: { - KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]); - __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - case bp_hierarchical_bar: { - __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - case bp_tree_bar: { - KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]); - __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE - USE_ITT_BUILD_ARG(itt_sync_obj) ); - break; - } - default: { - __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, TRUE - USE_ITT_BUILD_ARG(itt_sync_obj) ); } - } - - // Early exit for reaping threads releasing forkjoin barrier - if (TCR_4(__kmp_global.g.g_done)) { - this_thr->th.th_task_team = NULL; + } // master + + switch (__kmp_barrier_release_pattern[bs_forkjoin_barrier]) { + case bp_hyper_bar: { + KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]); + __kmp_hyper_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, + TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); + break; + } + case bp_hierarchical_bar: { + __kmp_hierarchical_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, + TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); + break; + } + case bp_tree_bar: { + KMP_ASSERT(__kmp_barrier_release_branch_bits[bs_forkjoin_barrier]); + __kmp_tree_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, + TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); + break; + } + default: { + __kmp_linear_barrier_release(bs_forkjoin_barrier, this_thr, gtid, tid, + TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); + } + } + + // Early exit for reaping threads releasing forkjoin barrier + if (TCR_4(__kmp_global.g.g_done)) { + this_thr->th.th_task_team = NULL; #if USE_ITT_BUILD && USE_ITT_NOTIFY - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { - if (!KMP_MASTER_TID(tid)) { - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); - if (itt_sync_obj) - __kmp_itt_barrier_finished(gtid, itt_sync_obj); - } - } -#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid)); - return; + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { + if (!KMP_MASTER_TID(tid)) { + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); + if (itt_sync_obj) + __kmp_itt_barrier_finished(gtid, itt_sync_obj); + } } - - /* We can now assume that a valid team structure has been allocated by the master and - propagated to all worker threads. The current thread, however, may not be part of the - team, so we can't blindly assume that the team pointer is non-null. */ - team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team); - KMP_DEBUG_ASSERT(team != NULL); - tid = __kmp_tid_from_gtid(gtid); - +#endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ + KA_TRACE(10, ("__kmp_fork_barrier: T#%d is leaving early\n", gtid)); + return; + } + + /* We can now assume that a valid team structure has been allocated by the + master and propagated to all worker threads. The current thread, however, + may not be part of the team, so we can't blindly assume that the team + pointer is non-null. */ + team = (kmp_team_t *)TCR_PTR(this_thr->th.th_team); + KMP_DEBUG_ASSERT(team != NULL); + tid = __kmp_tid_from_gtid(gtid); #if KMP_BARRIER_ICV_PULL - /* Master thread's copy of the ICVs was set up on the implicit taskdata in - __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's implicit task has - this data before this function is called. We cannot modify __kmp_fork_call() to look at - the fixed ICVs in the master's thread struct, because it is not always the case that the - threads arrays have been allocated when __kmp_fork_call() is executed. */ - { - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy); - if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs - // Copy the initial ICVs from the master's thread struct to the implicit task for this tid. - KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid)); - __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, tid, FALSE); - copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, - &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs); - } + /* Master thread's copy of the ICVs was set up on the implicit taskdata in + __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's + implicit task has this data before this function is called. We cannot + modify __kmp_fork_call() to look at the fixed ICVs in the master's thread + struct, because it is not always the case that the threads arrays have + been allocated when __kmp_fork_call() is executed. */ + { + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_icv_copy); + if (!KMP_MASTER_TID(tid)) { // master thread already has ICVs + // Copy the initial ICVs from the master's thread struct to the implicit + // task for this tid. + KA_TRACE(10, + ("__kmp_fork_barrier: T#%d(%d) is PULLing ICVs\n", gtid, tid)); + __kmp_init_implicit_task(team->t.t_ident, team->t.t_threads[tid], team, + tid, FALSE); + copy_icvs(&team->t.t_implicit_task_taskdata[tid].td_icvs, + &team->t.t_threads[0] + ->th.th_bar[bs_forkjoin_barrier] + .bb.th_fixed_icvs); } + } #endif // KMP_BARRIER_ICV_PULL - if (__kmp_tasking_mode != tskm_immediate_exec) { - __kmp_task_team_sync(this_thr, team); - } + if (__kmp_tasking_mode != tskm_immediate_exec) { + __kmp_task_team_sync(this_thr, team); + } #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - kmp_proc_bind_t proc_bind = team->t.t_proc_bind; - if (proc_bind == proc_bind_intel) { + kmp_proc_bind_t proc_bind = team->t.t_proc_bind; + if (proc_bind == proc_bind_intel) { #endif #if KMP_AFFINITY_SUPPORTED - // Call dynamic affinity settings - if(__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) { - __kmp_balanced_affinity(tid, team->t.t_nproc); - } + // Call dynamic affinity settings + if (__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) { + __kmp_balanced_affinity(tid, team->t.t_nproc); + } #endif // KMP_AFFINITY_SUPPORTED #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED + } else if (proc_bind != proc_bind_false) { + if (this_thr->th.th_new_place == this_thr->th.th_current_place) { + KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n", + __kmp_gtid_from_thread(this_thr), + this_thr->th.th_current_place)); + } else { + __kmp_affinity_set_place(gtid); } - else if (proc_bind != proc_bind_false) { - if (this_thr->th.th_new_place == this_thr->th.th_current_place) { - KA_TRACE(100, ("__kmp_fork_barrier: T#%d already in correct place %d\n", - __kmp_gtid_from_thread(this_thr), this_thr->th.th_current_place)); - } - else { - __kmp_affinity_set_place(gtid); - } - } + } #endif #if USE_ITT_BUILD && USE_ITT_NOTIFY - if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { - if (!KMP_MASTER_TID(tid)) { - // Get correct barrier object - itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); - __kmp_itt_barrier_finished(gtid, itt_sync_obj); // Workers call acquired - } // (prepare called inside barrier_release) - } + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { + if (!KMP_MASTER_TID(tid)) { + // Get correct barrier object + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); + __kmp_itt_barrier_finished(gtid, itt_sync_obj); // Workers call acquired + } // (prepare called inside barrier_release) + } #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - ANNOTATE_BARRIER_END(&team->t.t_bar); - KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid, team->t.t_id, tid)); + ANNOTATE_BARRIER_END(&team->t.t_bar); + KA_TRACE(10, ("__kmp_fork_barrier: T#%d(%d:%d) is leaving\n", gtid, + team->t.t_id, tid)); } +void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, + kmp_internal_control_t *new_icvs, ident_t *loc) { + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy); -void -__kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, kmp_internal_control_t *new_icvs, ident_t *loc ) -{ - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_setup_icv_copy); - - KMP_DEBUG_ASSERT(team && new_nproc && new_icvs); - KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc); + KMP_DEBUG_ASSERT(team && new_nproc && new_icvs); + KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc); - /* Master thread's copy of the ICVs was set up on the implicit taskdata in - __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's implicit task has - this data before this function is called. */ +/* Master thread's copy of the ICVs was set up on the implicit taskdata in + __kmp_reinitialize_team. __kmp_fork_call() assumes the master thread's + implicit task has this data before this function is called. */ #if KMP_BARRIER_ICV_PULL - /* Copy ICVs to master's thread structure into th_fixed_icvs (which remains untouched), where - all of the worker threads can access them and make their own copies after the barrier. */ - KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be allocated at this point - copy_icvs(&team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs, new_icvs); - KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", - 0, team->t.t_threads[0], team)); + /* Copy ICVs to master's thread structure into th_fixed_icvs (which remains + untouched), where all of the worker threads can access them and make their + own copies after the barrier. */ + KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be + // allocated at this point + copy_icvs( + &team->t.t_threads[0]->th.th_bar[bs_forkjoin_barrier].bb.th_fixed_icvs, + new_icvs); + KF_TRACE(10, ("__kmp_setup_icv_copy: PULL: T#%d this_thread=%p team=%p\n", 0, + team->t.t_threads[0], team)); #elif KMP_BARRIER_ICV_PUSH - // The ICVs will be propagated in the fork barrier, so nothing needs to be done here. - KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", - 0, team->t.t_threads[0], team)); + // The ICVs will be propagated in the fork barrier, so nothing needs to be + // done here. + KF_TRACE(10, ("__kmp_setup_icv_copy: PUSH: T#%d this_thread=%p team=%p\n", 0, + team->t.t_threads[0], team)); #else - // Copy the ICVs to each of the non-master threads. This takes O(nthreads) time. - ngo_load(new_icvs); - KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be allocated at this point - for (int f=1; ft.t_threads[f], team)); - __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE); - ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs); - KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n", - f, team->t.t_threads[f], team)); - } - ngo_sync(); + // Copy the ICVs to each of the non-master threads. This takes O(nthreads) + // time. + ngo_load(new_icvs); + KMP_DEBUG_ASSERT(team->t.t_threads[0]); // The threads arrays should be + // allocated at this point + for (int f = 1; f < new_nproc; ++f) { // Skip the master thread + // TODO: GEH - pass in better source location info since usually NULL here + KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n", + f, team->t.t_threads[f], team)); + __kmp_init_implicit_task(loc, team->t.t_threads[f], team, f, FALSE); + ngo_store_icvs(&team->t.t_implicit_task_taskdata[f].td_icvs, new_icvs); + KF_TRACE(10, ("__kmp_setup_icv_copy: LINEAR: T#%d this_thread=%p team=%p\n", + f, team->t.t_threads[f], team)); + } + ngo_sync(); #endif // KMP_BARRIER_ICV_PULL } diff --git a/openmp/runtime/src/kmp_cancel.cpp b/openmp/runtime/src/kmp_cancel.cpp index 5416049..f680d1a 100644 --- a/openmp/runtime/src/kmp_cancel.cpp +++ b/openmp/runtime/src/kmp_cancel.cpp @@ -22,76 +22,80 @@ @param gtid Global thread ID of encountering thread @param cncl_kind Cancellation kind (parallel, for, sections, taskgroup) -@return returns true if the cancellation request has been activated and the execution thread -needs to proceed to the end of the canceled region. +@return returns true if the cancellation request has been activated and the +execution thread needs to proceed to the end of the canceled region. Request cancellation of the binding OpenMP region. */ -kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) { - kmp_info_t *this_thr = __kmp_threads [ gtid ]; - - KC_TRACE( 10, ("__kmpc_cancel: T#%d request %d OMP_CANCELLATION=%d\n", gtid, cncl_kind, __kmp_omp_cancellation) ); - - KMP_DEBUG_ASSERT(cncl_kind != cancel_noreq); - KMP_DEBUG_ASSERT(cncl_kind == cancel_parallel || cncl_kind == cancel_loop || - cncl_kind == cancel_sections || cncl_kind == cancel_taskgroup); - KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); - - if (__kmp_omp_cancellation) { - switch (cncl_kind) { - case cancel_parallel: - case cancel_loop: - case cancel_sections: - // cancellation requests for parallel and worksharing constructs - // are handled through the team structure - { - kmp_team_t *this_team = this_thr->th.th_team; - KMP_DEBUG_ASSERT(this_team); - kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(&(this_team->t.t_cancel_request), cancel_noreq, cncl_kind); - if (old == cancel_noreq || old == cncl_kind) { - //printf("__kmpc_cancel: this_team->t.t_cancel_request=%d @ %p\n", - // this_team->t.t_cancel_request, &(this_team->t.t_cancel_request)); - // we do not have a cancellation request in this team or we do have one - // that matches the current request -> cancel - return 1 /* true */; - } - break; - } - case cancel_taskgroup: - // cancellation requests for a task group - // are handled through the taskgroup structure - { - kmp_taskdata_t* task; - kmp_taskgroup_t* taskgroup; - - task = this_thr->th.th_current_task; - KMP_DEBUG_ASSERT( task ); - - taskgroup = task->td_taskgroup; - if (taskgroup) { - kmp_int32 old = KMP_COMPARE_AND_STORE_RET32(&(taskgroup->cancel_request), cancel_noreq, cncl_kind); - if (old == cancel_noreq || old == cncl_kind) { - // we do not have a cancellation request in this taskgroup or we do have one - // that matches the current request -> cancel - return 1 /* true */; - } - } - else { - // TODO: what needs to happen here? - // the specification disallows cancellation w/o taskgroups - // so we might do anything here, let's abort for now - KMP_ASSERT( 0 /* false */); - } - } - break; - default: - KMP_ASSERT (0 /* false */); +kmp_int32 __kmpc_cancel(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) { + kmp_info_t *this_thr = __kmp_threads[gtid]; + + KC_TRACE(10, ("__kmpc_cancel: T#%d request %d OMP_CANCELLATION=%d\n", gtid, + cncl_kind, __kmp_omp_cancellation)); + + KMP_DEBUG_ASSERT(cncl_kind != cancel_noreq); + KMP_DEBUG_ASSERT(cncl_kind == cancel_parallel || cncl_kind == cancel_loop || + cncl_kind == cancel_sections || + cncl_kind == cancel_taskgroup); + KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); + + if (__kmp_omp_cancellation) { + switch (cncl_kind) { + case cancel_parallel: + case cancel_loop: + case cancel_sections: + // cancellation requests for parallel and worksharing constructs + // are handled through the team structure + { + kmp_team_t *this_team = this_thr->th.th_team; + KMP_DEBUG_ASSERT(this_team); + kmp_int32 old = KMP_COMPARE_AND_STORE_RET32( + &(this_team->t.t_cancel_request), cancel_noreq, cncl_kind); + if (old == cancel_noreq || old == cncl_kind) { + // printf("__kmpc_cancel: this_team->t.t_cancel_request=%d @ %p\n", + // this_team->t.t_cancel_request, + // &(this_team->t.t_cancel_request)); + // we do not have a cancellation request in this team or we do have + // one that matches the current request -> cancel + return 1 /* true */; } + break; + } + case cancel_taskgroup: + // cancellation requests for a task group + // are handled through the taskgroup structure + { + kmp_taskdata_t *task; + kmp_taskgroup_t *taskgroup; + + task = this_thr->th.th_current_task; + KMP_DEBUG_ASSERT(task); + + taskgroup = task->td_taskgroup; + if (taskgroup) { + kmp_int32 old = KMP_COMPARE_AND_STORE_RET32( + &(taskgroup->cancel_request), cancel_noreq, cncl_kind); + if (old == cancel_noreq || old == cncl_kind) { + // we do not have a cancellation request in this taskgroup or we do + // have one that matches the current request -> cancel + return 1 /* true */; + } + } else { + // TODO: what needs to happen here? + // the specification disallows cancellation w/o taskgroups + // so we might do anything here, let's abort for now + KMP_ASSERT(0 /* false */); + } + } + break; + default: + KMP_ASSERT(0 /* false */); } + } - // ICV OMP_CANCELLATION=false, so we ignored this cancel request - KMP_DEBUG_ASSERT(!__kmp_omp_cancellation); - return 0 /* false */; + // ICV OMP_CANCELLATION=false, so we ignored this cancel request + KMP_DEBUG_ASSERT(!__kmp_omp_cancellation); + return 0 /* false */; } /*! @@ -100,77 +104,77 @@ kmp_int32 __kmpc_cancel(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) { @param gtid Global thread ID of encountering thread @param cncl_kind Cancellation kind (parallel, for, sections, taskgroup) -@return returns true if a matching cancellation request has been flagged in the RTL and the -encountering thread has to cancel.. +@return returns true if a matching cancellation request has been flagged in the +RTL and the encountering thread has to cancel.. Cancellation point for the encountering thread. */ -kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 cncl_kind) { - kmp_info_t *this_thr = __kmp_threads [ gtid ]; - - KC_TRACE( 10, ("__kmpc_cancellationpoint: T#%d request %d OMP_CANCELLATION=%d\n", gtid, cncl_kind, __kmp_omp_cancellation) ); - - KMP_DEBUG_ASSERT(cncl_kind != cancel_noreq); - KMP_DEBUG_ASSERT(cncl_kind == cancel_parallel || cncl_kind == cancel_loop || - cncl_kind == cancel_sections || cncl_kind == cancel_taskgroup); - KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); - - if (__kmp_omp_cancellation) { - switch (cncl_kind) { - case cancel_parallel: - case cancel_loop: - case cancel_sections: - // cancellation requests for parallel and worksharing constructs - // are handled through the team structure - { - kmp_team_t *this_team = this_thr->th.th_team; - KMP_DEBUG_ASSERT(this_team); - if (this_team->t.t_cancel_request) { - if (cncl_kind == this_team->t.t_cancel_request) { - // the request in the team structure matches the type of - // cancellation point so we can cancel - return 1 /* true */; - } - KMP_ASSERT( 0 /* false */); - } - else { - // we do not have a cancellation request pending, so we just - // ignore this cancellation point - return 0; - } - break; - } - case cancel_taskgroup: - // cancellation requests for a task group - // are handled through the taskgroup structure - { - kmp_taskdata_t* task; - kmp_taskgroup_t* taskgroup; - - task = this_thr->th.th_current_task; - KMP_DEBUG_ASSERT( task ); - - taskgroup = task->td_taskgroup; - if (taskgroup) { - // return the current status of cancellation for the - // taskgroup - return !!taskgroup->cancel_request; - } - else { - // if a cancellation point is encountered by a task - // that does not belong to a taskgroup, it is OK - // to ignore it - return 0 /* false */; - } - } - default: - KMP_ASSERT (0 /* false */); +kmp_int32 __kmpc_cancellationpoint(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 cncl_kind) { + kmp_info_t *this_thr = __kmp_threads[gtid]; + + KC_TRACE(10, + ("__kmpc_cancellationpoint: T#%d request %d OMP_CANCELLATION=%d\n", + gtid, cncl_kind, __kmp_omp_cancellation)); + + KMP_DEBUG_ASSERT(cncl_kind != cancel_noreq); + KMP_DEBUG_ASSERT(cncl_kind == cancel_parallel || cncl_kind == cancel_loop || + cncl_kind == cancel_sections || + cncl_kind == cancel_taskgroup); + KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); + + if (__kmp_omp_cancellation) { + switch (cncl_kind) { + case cancel_parallel: + case cancel_loop: + case cancel_sections: + // cancellation requests for parallel and worksharing constructs + // are handled through the team structure + { + kmp_team_t *this_team = this_thr->th.th_team; + KMP_DEBUG_ASSERT(this_team); + if (this_team->t.t_cancel_request) { + if (cncl_kind == this_team->t.t_cancel_request) { + // the request in the team structure matches the type of + // cancellation point so we can cancel + return 1 /* true */; + } + KMP_ASSERT(0 /* false */); + } else { + // we do not have a cancellation request pending, so we just + // ignore this cancellation point + return 0; + } + break; + } + case cancel_taskgroup: + // cancellation requests for a task group + // are handled through the taskgroup structure + { + kmp_taskdata_t *task; + kmp_taskgroup_t *taskgroup; + + task = this_thr->th.th_current_task; + KMP_DEBUG_ASSERT(task); + + taskgroup = task->td_taskgroup; + if (taskgroup) { + // return the current status of cancellation for the taskgroup + return !!taskgroup->cancel_request; + } else { + // if a cancellation point is encountered by a task that does not + // belong to a taskgroup, it is OK to ignore it + return 0 /* false */; } + } + default: + KMP_ASSERT(0 /* false */); } + } - // ICV OMP_CANCELLATION=false, so we ignore the cancellation point - KMP_DEBUG_ASSERT(!__kmp_omp_cancellation); - return 0 /* false */; + // ICV OMP_CANCELLATION=false, so we ignore the cancellation point + KMP_DEBUG_ASSERT(!__kmp_omp_cancellation); + return 0 /* false */; } /*! @@ -178,63 +182,61 @@ kmp_int32 __kmpc_cancellationpoint(ident_t* loc_ref, kmp_int32 gtid, kmp_int32 c @param loc_ref location of the original task directive @param gtid Global thread ID of encountering thread -@return returns true if a matching cancellation request has been flagged in the RTL and the -encountering thread has to cancel.. +@return returns true if a matching cancellation request has been flagged in the +RTL and the encountering thread has to cancel.. Barrier with cancellation point to send threads from the barrier to the end of the parallel region. Needs a special code pattern as documented in the design document for the cancellation feature. */ -kmp_int32 -__kmpc_cancel_barrier(ident_t *loc, kmp_int32 gtid) { - int ret = 0 /* false */; - kmp_info_t *this_thr = __kmp_threads [ gtid ]; - kmp_team_t *this_team = this_thr->th.th_team; - - KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); - - // call into the standard barrier - __kmpc_barrier(loc, gtid); - - // if cancellation is active, check cancellation flag - if (__kmp_omp_cancellation) { - // depending on which construct to cancel, check the flag and - // reset the flag - switch (this_team->t.t_cancel_request) { - case cancel_parallel: - ret = 1; - // ensure that threads have checked the flag, when - // leaving the above barrier - __kmpc_barrier(loc, gtid); - this_team->t.t_cancel_request = cancel_noreq; - // the next barrier is the fork/join barrier, which - // synchronizes the threads leaving here - break; - case cancel_loop: - case cancel_sections: - ret = 1; - // ensure that threads have checked the flag, when - // leaving the above barrier - __kmpc_barrier(loc, gtid); - this_team->t.t_cancel_request = cancel_noreq; - // synchronize the threads again to make sure we - // do not have any run-away threads that cause a race - // on the cancellation flag - __kmpc_barrier(loc, gtid); - break; - case cancel_taskgroup: - // this case should not occur - KMP_ASSERT (0 /* false */ ); - break; - case cancel_noreq: - // do nothing - break; - default: - KMP_ASSERT ( 0 /* false */); - } +kmp_int32 __kmpc_cancel_barrier(ident_t *loc, kmp_int32 gtid) { + int ret = 0 /* false */; + kmp_info_t *this_thr = __kmp_threads[gtid]; + kmp_team_t *this_team = this_thr->th.th_team; + + KMP_DEBUG_ASSERT(__kmp_get_gtid() == gtid); + + // call into the standard barrier + __kmpc_barrier(loc, gtid); + + // if cancellation is active, check cancellation flag + if (__kmp_omp_cancellation) { + // depending on which construct to cancel, check the flag and + // reset the flag + switch (this_team->t.t_cancel_request) { + case cancel_parallel: + ret = 1; + // ensure that threads have checked the flag, when + // leaving the above barrier + __kmpc_barrier(loc, gtid); + this_team->t.t_cancel_request = cancel_noreq; + // the next barrier is the fork/join barrier, which + // synchronizes the threads leaving here + break; + case cancel_loop: + case cancel_sections: + ret = 1; + // ensure that threads have checked the flag, when + // leaving the above barrier + __kmpc_barrier(loc, gtid); + this_team->t.t_cancel_request = cancel_noreq; + // synchronize the threads again to make sure we do not have any run-away + // threads that cause a race on the cancellation flag + __kmpc_barrier(loc, gtid); + break; + case cancel_taskgroup: + // this case should not occur + KMP_ASSERT(0 /* false */); + break; + case cancel_noreq: + // do nothing + break; + default: + KMP_ASSERT(0 /* false */); } + } - return ret; + return ret; } /*! @@ -242,8 +244,8 @@ __kmpc_cancel_barrier(ident_t *loc, kmp_int32 gtid) { @param loc_ref location of the original task directive @param gtid Global thread ID of encountering thread -@return returns true if a matching cancellation request has been flagged in the RTL and the -encountering thread has to cancel.. +@return returns true if a matching cancellation request has been flagged in the +RTL and the encountering thread has to cancel.. Query function to query the current status of cancellation requests. Can be used to implement the following pattern: @@ -254,29 +256,27 @@ if (kmp_get_cancellation_status(kmp_cancel_parallel)) { } */ int __kmp_get_cancellation_status(int cancel_kind) { - if (__kmp_omp_cancellation) { - kmp_info_t *this_thr = __kmp_entry_thread(); - - switch (cancel_kind) { - case cancel_parallel: - case cancel_loop: - case cancel_sections: - { - kmp_team_t *this_team = this_thr->th.th_team; - return this_team->t.t_cancel_request == cancel_kind; - } - case cancel_taskgroup: - { - kmp_taskdata_t* task; - kmp_taskgroup_t* taskgroup; - task = this_thr->th.th_current_task; - taskgroup = task->td_taskgroup; - return taskgroup && taskgroup->cancel_request; - } - } + if (__kmp_omp_cancellation) { + kmp_info_t *this_thr = __kmp_entry_thread(); + + switch (cancel_kind) { + case cancel_parallel: + case cancel_loop: + case cancel_sections: { + kmp_team_t *this_team = this_thr->th.th_team; + return this_team->t.t_cancel_request == cancel_kind; + } + case cancel_taskgroup: { + kmp_taskdata_t *task; + kmp_taskgroup_t *taskgroup; + task = this_thr->th.th_current_task; + taskgroup = task->td_taskgroup; + return taskgroup && taskgroup->cancel_request; + } } + } - return 0 /* false */; + return 0 /* false */; } #endif diff --git a/openmp/runtime/src/kmp_csupport.cpp b/openmp/runtime/src/kmp_csupport.cpp index 9718a1f..eb562b2 100644 --- a/openmp/runtime/src/kmp_csupport.cpp +++ b/openmp/runtime/src/kmp_csupport.cpp @@ -13,12 +13,12 @@ //===----------------------------------------------------------------------===// -#include "omp.h" /* extern "C" declarations of user-visible routines */ +#include "omp.h" /* extern "C" declarations of user-visible routines */ #include "kmp.h" +#include "kmp_error.h" #include "kmp_i18n.h" #include "kmp_itt.h" #include "kmp_lock.h" -#include "kmp_error.h" #include "kmp_stats.h" #if OMPT_SUPPORT @@ -28,11 +28,8 @@ #define MAX_MESSAGE 512 -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* flags will be used in future, e.g., to implement */ -/* openmp_strict library restrictions */ +// flags will be used in future, e.g. to implement openmp_strict library +// restrictions /*! * @ingroup STARTUP_SHUTDOWN @@ -41,44 +38,41 @@ * * Initialize the runtime library. This call is optional; if it is not made then * it will be implicitly called by attempts to use other library functions. - * */ -void -__kmpc_begin(ident_t *loc, kmp_int32 flags) -{ - // By default __kmpc_begin() is no-op. - char *env; - if ((env = getenv( "KMP_INITIAL_THREAD_BIND" )) != NULL && - __kmp_str_match_true( env )) { - __kmp_middle_initialize(); - KC_TRACE(10, ("__kmpc_begin: middle initialization called\n" )); - } else if (__kmp_ignore_mppbeg() == FALSE) { - // By default __kmp_ignore_mppbeg() returns TRUE. - __kmp_internal_begin(); - KC_TRACE( 10, ("__kmpc_begin: called\n" ) ); - } +void __kmpc_begin(ident_t *loc, kmp_int32 flags) { + // By default __kmpc_begin() is no-op. + char *env; + if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL && + __kmp_str_match_true(env)) { + __kmp_middle_initialize(); + KC_TRACE(10, ("__kmpc_begin: middle initialization called\n")); + } else if (__kmp_ignore_mppbeg() == FALSE) { + // By default __kmp_ignore_mppbeg() returns TRUE. + __kmp_internal_begin(); + KC_TRACE(10, ("__kmpc_begin: called\n")); + } } /*! * @ingroup STARTUP_SHUTDOWN * @param loc source location information * - * Shutdown the runtime library. This is also optional, and even if called will not - * do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to zero. - */ -void -__kmpc_end(ident_t *loc) -{ - // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() call no-op. - // However, this can be overridden with KMP_IGNORE_MPPEND environment variable. - // If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() returns FALSE and __kmpc_end() - // will unregister this root (it can cause library shut down). - if (__kmp_ignore_mppend() == FALSE) { - KC_TRACE( 10, ("__kmpc_end: called\n" ) ); - KA_TRACE( 30, ("__kmpc_end\n" )); - - __kmp_internal_end_thread( -1 ); - } + * Shutdown the runtime library. This is also optional, and even if called will + * not do anything unless the `KMP_IGNORE_MPPEND` environment variable is set to + * zero. + */ +void __kmpc_end(ident_t *loc) { + // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end() + // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND + // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend() + // returns FALSE and __kmpc_end() will unregister this root (it can cause + // library shut down). + if (__kmp_ignore_mppend() == FALSE) { + KC_TRACE(10, ("__kmpc_end: called\n")); + KA_TRACE(30, ("__kmpc_end\n")); + + __kmp_internal_end_thread(-1); + } } /*! @@ -89,8 +83,8 @@ __kmpc_end(ident_t *loc) This function can be called in any context. If the runtime has ony been entered at the outermost level from a -single (necessarily non-OpenMP*) thread, then the thread number is that -which would be returned by omp_get_thread_num() in the outermost +single (necessarily non-OpenMP*) thread, then the thread number is +that which would be returned by omp_get_thread_num() in the outermost active parallel construct. (Or zero if there is no active parallel construct, since the master thread is necessarily thread zero). @@ -98,16 +92,13 @@ If multiple non-OpenMP threads all enter an OpenMP construct then this will be a unique thread identifier among all the threads created by the OpenMP runtime (but the value cannote be defined in terms of OpenMP thread ids returned by omp_get_thread_num()). - */ -kmp_int32 -__kmpc_global_thread_num(ident_t *loc) -{ - kmp_int32 gtid = __kmp_entry_gtid(); +kmp_int32 __kmpc_global_thread_num(ident_t *loc) { + kmp_int32 gtid = __kmp_entry_gtid(); - KC_TRACE( 10, ("__kmpc_global_thread_num: T#%d\n", gtid ) ); + KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid)); - return gtid; + return gtid; } /*! @@ -116,32 +107,30 @@ __kmpc_global_thread_num(ident_t *loc) @return The number of threads under control of the OpenMP* runtime This function can be called in any context. -It returns the total number of threads under the control of the OpenMP runtime. That is -not a number that can be determined by any OpenMP standard calls, since the library may be -called from more than one non-OpenMP thread, and this reflects the total over all such calls. -Similarly the runtime maintains underlying threads even when they are not active (since the cost -of creating and destroying OS threads is high), this call counts all such threads even if they are not -waiting for work. +It returns the total number of threads under the control of the OpenMP runtime. +That is not a number that can be determined by any OpenMP standard calls, since +the library may be called from more than one non-OpenMP thread, and this +reflects the total over all such calls. Similarly the runtime maintains +underlying threads even when they are not active (since the cost of creating +and destroying OS threads is high), this call counts all such threads even if +they are not waiting for work. */ -kmp_int32 -__kmpc_global_num_threads(ident_t *loc) -{ - KC_TRACE(10,("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); +kmp_int32 __kmpc_global_num_threads(ident_t *loc) { + KC_TRACE(10, + ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth)); - return TCR_4(__kmp_all_nth); + return TCR_4(__kmp_all_nth); } /*! @ingroup THREAD_STATES @param loc Source location information. -@return The thread number of the calling thread in the innermost active parallel construct. - +@return The thread number of the calling thread in the innermost active parallel +construct. */ -kmp_int32 -__kmpc_bound_thread_num(ident_t *loc) -{ - KC_TRACE( 10, ("__kmpc_bound_thread_num: called\n" ) ); - return __kmp_tid_from_gtid( __kmp_entry_gtid() ); +kmp_int32 __kmpc_bound_thread_num(ident_t *loc) { + KC_TRACE(10, ("__kmpc_bound_thread_num: called\n")); + return __kmp_tid_from_gtid(__kmp_entry_gtid()); } /*! @@ -149,12 +138,10 @@ __kmpc_bound_thread_num(ident_t *loc) @param loc Source location information. @return The number of threads in the innermost active parallel construct. */ -kmp_int32 -__kmpc_bound_num_threads(ident_t *loc) -{ - KC_TRACE( 10, ("__kmpc_bound_num_threads: called\n" ) ); +kmp_int32 __kmpc_bound_num_threads(ident_t *loc) { + KC_TRACE(10, ("__kmpc_bound_num_threads: called\n")); - return __kmp_entry_thread() -> th.th_team -> t.t_nproc; + return __kmp_entry_thread()->th.th_team->t.t_nproc; } /*! @@ -163,74 +150,70 @@ __kmpc_bound_num_threads(ident_t *loc) * * This function need not be called. It always returns TRUE. */ -kmp_int32 -__kmpc_ok_to_fork(ident_t *loc) -{ +kmp_int32 __kmpc_ok_to_fork(ident_t *loc) { #ifndef KMP_DEBUG - return TRUE; + return TRUE; #else - const char *semi2; - const char *semi3; - int line_no; + const char *semi2; + const char *semi3; + int line_no; - if (__kmp_par_range == 0) { - return TRUE; - } - semi2 = loc->psource; - if (semi2 == NULL) { - return TRUE; - } - semi2 = strchr(semi2, ';'); - if (semi2 == NULL) { - return TRUE; + if (__kmp_par_range == 0) { + return TRUE; + } + semi2 = loc->psource; + if (semi2 == NULL) { + return TRUE; + } + semi2 = strchr(semi2, ';'); + if (semi2 == NULL) { + return TRUE; + } + semi2 = strchr(semi2 + 1, ';'); + if (semi2 == NULL) { + return TRUE; + } + if (__kmp_par_range_filename[0]) { + const char *name = semi2 - 1; + while ((name > loc->psource) && (*name != '/') && (*name != ';')) { + name--; } - semi2 = strchr(semi2 + 1, ';'); - if (semi2 == NULL) { - return TRUE; + if ((*name == '/') || (*name == ';')) { + name++; } - if (__kmp_par_range_filename[0]) { - const char *name = semi2 - 1; - while ((name > loc->psource) && (*name != '/') && (*name != ';')) { - name--; - } - if ((*name == '/') || (*name == ';')) { - name++; - } - if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { - return __kmp_par_range < 0; - } + if (strncmp(__kmp_par_range_filename, name, semi2 - name)) { + return __kmp_par_range < 0; } - semi3 = strchr(semi2 + 1, ';'); - if (__kmp_par_range_routine[0]) { - if ((semi3 != NULL) && (semi3 > semi2) - && (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { - return __kmp_par_range < 0; - } + } + semi3 = strchr(semi2 + 1, ';'); + if (__kmp_par_range_routine[0]) { + if ((semi3 != NULL) && (semi3 > semi2) && + (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) { + return __kmp_par_range < 0; } - if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { - if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { - return __kmp_par_range > 0; - } - return __kmp_par_range < 0; + } + if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) { + if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) { + return __kmp_par_range > 0; } - return TRUE; + return __kmp_par_range < 0; + } + return TRUE; #endif /* KMP_DEBUG */ - } /*! @ingroup THREAD_STATES @param loc Source location information. -@return 1 if this thread is executing inside an active parallel region, zero if not. +@return 1 if this thread is executing inside an active parallel region, zero if +not. */ -kmp_int32 -__kmpc_in_parallel( ident_t *loc ) -{ - return __kmp_entry_thread() -> th.th_root -> r.r_active; +kmp_int32 __kmpc_in_parallel(ident_t *loc) { + return __kmp_entry_thread()->th.th_root->r.r_active; } /*! @@ -242,115 +225,103 @@ __kmpc_in_parallel( ident_t *loc ) Set the number of threads to be used by the next fork spawned by this thread. This call is only required if the parallel construct has a `num_threads` clause. */ -void -__kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads ) -{ - KA_TRACE( 20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", - global_tid, num_threads ) ); +void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, + kmp_int32 num_threads) { + KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n", + global_tid, num_threads)); - __kmp_push_num_threads( loc, global_tid, num_threads ); + __kmp_push_num_threads(loc, global_tid, num_threads); } -void -__kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid ) -{ - KA_TRACE( 20, ("__kmpc_pop_num_threads: enter\n" ) ); +void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) { + KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n")); - /* the num_threads are automatically popped */ + /* the num_threads are automatically popped */ } - #if OMP_40_ENABLED -void -__kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, kmp_int32 proc_bind ) -{ - KA_TRACE( 20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", - global_tid, proc_bind ) ); +void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid, + kmp_int32 proc_bind) { + KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid, + proc_bind)); - __kmp_push_proc_bind( loc, global_tid, (kmp_proc_bind_t)proc_bind ); + __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind); } #endif /* OMP_40_ENABLED */ - /*! @ingroup PARALLEL @param loc source location information @param argc total number of arguments in the ellipsis -@param microtask pointer to callback routine consisting of outlined parallel construct +@param microtask pointer to callback routine consisting of outlined parallel +construct @param ... pointers to shared variables that aren't global Do the actual fork and call the microtask in the relevant number of threads. */ -void -__kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) -{ - int gtid = __kmp_entry_gtid(); +void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { + int gtid = __kmp_entry_gtid(); #if (KMP_STATS_ENABLED) int inParallel = __kmpc_in_parallel(loc); - if (inParallel) - { - KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); - } - else - { - KMP_COUNT_BLOCK(OMP_PARALLEL); + if (inParallel) { + KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL); + } else { + KMP_COUNT_BLOCK(OMP_PARALLEL); } #endif // maybe to save thr_state is enough here { - va_list ap; - va_start( ap, microtask ); + va_list ap; + va_start(ap, microtask); #if OMPT_SUPPORT - ompt_frame_t* ompt_frame; + ompt_frame_t *ompt_frame; if (ompt_enabled) { - kmp_info_t *master_th = __kmp_threads[ gtid ]; - kmp_team_t *parent_team = master_th->th.th_team; - ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; - if (lwt) - ompt_frame = &(lwt->ompt_task_info.frame); - else - { - int tid = __kmp_tid_from_gtid( gtid ); - ompt_frame = &(parent_team->t.t_implicit_task_taskdata[tid]. - ompt_task_info.frame); - } - ompt_frame->reenter_runtime_frame = __builtin_frame_address(1); + kmp_info_t *master_th = __kmp_threads[gtid]; + kmp_team_t *parent_team = master_th->th.th_team; + ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info; + if (lwt) + ompt_frame = &(lwt->ompt_task_info.frame); + else { + int tid = __kmp_tid_from_gtid(gtid); + ompt_frame = &( + parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); + } + ompt_frame->reenter_runtime_frame = __builtin_frame_address(1); } #endif #if INCLUDE_SSC_MARKS SSC_MARK_FORKING(); #endif - __kmp_fork_call( loc, gtid, fork_context_intel, - argc, + __kmp_fork_call(loc, gtid, fork_context_intel, argc, #if OMPT_SUPPORT - VOLATILE_CAST(void *) microtask, // "unwrapped" task + VOLATILE_CAST(void *) microtask, // "unwrapped" task #endif - VOLATILE_CAST(microtask_t) microtask, // "wrapped" task - VOLATILE_CAST(launch_t) __kmp_invoke_task_func, + VOLATILE_CAST(microtask_t) microtask, // "wrapped" task + VOLATILE_CAST(launch_t) __kmp_invoke_task_func, /* TODO: revert workaround for Intel(R) 64 tracker #96 */ #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - &ap + &ap #else - ap + ap #endif - ); + ); #if INCLUDE_SSC_MARKS SSC_MARK_JOINING(); #endif - __kmp_join_call( loc, gtid + __kmp_join_call(loc, gtid #if OMPT_SUPPORT - , fork_context_intel + , + fork_context_intel #endif - ); - - va_end( ap ); + ); + va_end(ap); } } @@ -366,93 +337,90 @@ Set the number of teams to be used by the teams construct. This call is only required if the teams construct has a `num_teams` clause or a `thread_limit` clause (or both). */ -void -__kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads ) -{ - KA_TRACE( 20, ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", - global_tid, num_teams, num_threads ) ); +void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, + kmp_int32 num_teams, kmp_int32 num_threads) { + KA_TRACE(20, + ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n", + global_tid, num_teams, num_threads)); - __kmp_push_num_teams( loc, global_tid, num_teams, num_threads ); + __kmp_push_num_teams(loc, global_tid, num_teams, num_threads); } /*! @ingroup PARALLEL @param loc source location information @param argc total number of arguments in the ellipsis -@param microtask pointer to callback routine consisting of outlined teams construct +@param microtask pointer to callback routine consisting of outlined teams +construct @param ... pointers to shared variables that aren't global Do the actual fork and call the microtask in the relevant number of threads. */ -void -__kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) -{ - int gtid = __kmp_entry_gtid(); - kmp_info_t *this_thr = __kmp_threads[ gtid ]; - va_list ap; - va_start( ap, microtask ); +void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, + ...) { + int gtid = __kmp_entry_gtid(); + kmp_info_t *this_thr = __kmp_threads[gtid]; + va_list ap; + va_start(ap, microtask); - KMP_COUNT_BLOCK(OMP_TEAMS); + KMP_COUNT_BLOCK(OMP_TEAMS); - // remember teams entry point and nesting level - this_thr->th.th_teams_microtask = microtask; - this_thr->th.th_teams_level = this_thr->th.th_team->t.t_level; // AC: can be >0 on host + // remember teams entry point and nesting level + this_thr->th.th_teams_microtask = microtask; + this_thr->th.th_teams_level = + this_thr->th.th_team->t.t_level; // AC: can be >0 on host #if OMPT_SUPPORT - kmp_team_t *parent_team = this_thr->th.th_team; - int tid = __kmp_tid_from_gtid( gtid ); - if (ompt_enabled) { - parent_team->t.t_implicit_task_taskdata[tid]. - ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(1); - } + kmp_team_t *parent_team = this_thr->th.th_team; + int tid = __kmp_tid_from_gtid(gtid); + if (ompt_enabled) { + parent_team->t.t_implicit_task_taskdata[tid] + .ompt_task_info.frame.reenter_runtime_frame = + __builtin_frame_address(1); + } #endif - // check if __kmpc_push_num_teams called, set default number of teams otherwise - if ( this_thr->th.th_teams_size.nteams == 0 ) { - __kmp_push_num_teams( loc, gtid, 0, 0 ); - } - KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); - KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); - KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); + // check if __kmpc_push_num_teams called, set default number of teams + // otherwise + if (this_thr->th.th_teams_size.nteams == 0) { + __kmp_push_num_teams(loc, gtid, 0, 0); + } + KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1); + KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1); + KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1); - __kmp_fork_call( loc, gtid, fork_context_intel, - argc, + __kmp_fork_call(loc, gtid, fork_context_intel, argc, #if OMPT_SUPPORT - VOLATILE_CAST(void *) microtask, // "unwrapped" task + VOLATILE_CAST(void *) microtask, // "unwrapped" task #endif - VOLATILE_CAST(microtask_t) __kmp_teams_master, // "wrapped" task - VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, + VOLATILE_CAST(microtask_t) + __kmp_teams_master, // "wrapped" task + VOLATILE_CAST(launch_t) __kmp_invoke_teams_master, #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - &ap + &ap #else - ap + ap #endif - ); - __kmp_join_call( loc, gtid + ); + __kmp_join_call(loc, gtid #if OMPT_SUPPORT - , fork_context_intel + , + fork_context_intel #endif - ); + ); - this_thr->th.th_teams_microtask = NULL; - this_thr->th.th_teams_level = 0; - *(kmp_int64*)(&this_thr->th.th_teams_size) = 0L; - va_end( ap ); + this_thr->th.th_teams_microtask = NULL; + this_thr->th.th_teams_level = 0; + *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L; + va_end(ap); } #endif /* OMP_40_ENABLED */ - -// // I don't think this function should ever have been exported. // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated // openmp code ever called it, but it's been exported from the RTL for so // long that I'm afraid to remove the definition. -// -int -__kmpc_invoke_task_func( int gtid ) -{ - return __kmp_invoke_task_func( gtid ); -} +int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); } /*! @ingroup PARALLEL @@ -466,13 +434,11 @@ conditional parallel region, like this, @endcode when the condition is false. */ -void -__kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) -{ - // The implementation is now in kmp_runtime.cpp so that it can share static - // functions with kmp_fork_call since the tasks to be done are similar in - // each case. - __kmp_serialized_parallel(loc, global_tid); +void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { + // The implementation is now in kmp_runtime.cpp so that it can share static + // functions with kmp_fork_call since the tasks to be done are similar in + // each case. + __kmp_serialized_parallel(loc, global_tid); } /*! @@ -482,108 +448,114 @@ __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) Leave a serialized parallel construct. */ -void -__kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) -{ - kmp_internal_control_t *top; - kmp_info_t *this_thr; - kmp_team_t *serial_team; - - KC_TRACE( 10, ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid ) ); - - /* skip all this code for autopar serialized loops since it results in - unacceptable overhead */ - if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) ) - return; +void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { + kmp_internal_control_t *top; + kmp_info_t *this_thr; + kmp_team_t *serial_team; - // Not autopar code - if( ! TCR_4( __kmp_init_parallel ) ) - __kmp_parallel_initialize(); + KC_TRACE(10, + ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid)); - this_thr = __kmp_threads[ global_tid ]; - serial_team = this_thr->th.th_serial_team; + /* skip all this code for autopar serialized loops since it results in + unacceptable overhead */ + if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) + return; - #if OMP_45_ENABLED - kmp_task_team_t * task_team = this_thr->th.th_task_team; + // Not autopar code + if (!TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); - // we need to wait for the proxy tasks before finishing the thread - if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) - __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL) ); // is an ITT object needed here? - #endif + this_thr = __kmp_threads[global_tid]; + serial_team = this_thr->th.th_serial_team; - KMP_MB(); - KMP_DEBUG_ASSERT( serial_team ); - KMP_ASSERT( serial_team -> t.t_serialized ); - KMP_DEBUG_ASSERT( this_thr -> th.th_team == serial_team ); - KMP_DEBUG_ASSERT( serial_team != this_thr->th.th_root->r.r_root_team ); - KMP_DEBUG_ASSERT( serial_team -> t.t_threads ); - KMP_DEBUG_ASSERT( serial_team -> t.t_threads[0] == this_thr ); - - /* If necessary, pop the internal control stack values and replace the team values */ - top = serial_team -> t.t_control_stack_top; - if ( top && top -> serial_nesting_level == serial_team -> t.t_serialized ) { - copy_icvs( &serial_team -> t.t_threads[0] -> th.th_current_task -> td_icvs, top ); - serial_team -> t.t_control_stack_top = top -> next; - __kmp_free(top); - } +#if OMP_45_ENABLED + kmp_task_team_t *task_team = this_thr->th.th_task_team; + + // we need to wait for the proxy tasks before finishing the thread + if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) + __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL)); +#endif + + KMP_MB(); + KMP_DEBUG_ASSERT(serial_team); + KMP_ASSERT(serial_team->t.t_serialized); + KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); + KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team); + KMP_DEBUG_ASSERT(serial_team->t.t_threads); + KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); + + /* If necessary, pop the internal control stack values and replace the team + * values */ + top = serial_team->t.t_control_stack_top; + if (top && top->serial_nesting_level == serial_team->t.t_serialized) { + copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top); + serial_team->t.t_control_stack_top = top->next; + __kmp_free(top); + } - //if( serial_team -> t.t_serialized > 1 ) - serial_team -> t.t_level--; + // if( serial_team -> t.t_serialized > 1 ) + serial_team->t.t_level--; - /* pop dispatch buffers stack */ - KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); - { - dispatch_private_info_t * disp_buffer = serial_team->t.t_dispatch->th_disp_buffer; - serial_team->t.t_dispatch->th_disp_buffer = - serial_team->t.t_dispatch->th_disp_buffer->next; - __kmp_free( disp_buffer ); - } + /* pop dispatch buffers stack */ + KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer); + { + dispatch_private_info_t *disp_buffer = + serial_team->t.t_dispatch->th_disp_buffer; + serial_team->t.t_dispatch->th_disp_buffer = + serial_team->t.t_dispatch->th_disp_buffer->next; + __kmp_free(disp_buffer); + } - -- serial_team -> t.t_serialized; - if ( serial_team -> t.t_serialized == 0 ) { + --serial_team->t.t_serialized; + if (serial_team->t.t_serialized == 0) { - /* return to the parallel section */ +/* return to the parallel section */ #if KMP_ARCH_X86 || KMP_ARCH_X86_64 - if ( __kmp_inherit_fp_control && serial_team->t.t_fp_control_saved ) { - __kmp_clear_x87_fpu_status_word(); - __kmp_load_x87_fpu_control_word( &serial_team->t.t_x87_fpu_control_word ); - __kmp_load_mxcsr( &serial_team->t.t_mxcsr ); - } + if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) { + __kmp_clear_x87_fpu_status_word(); + __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word); + __kmp_load_mxcsr(&serial_team->t.t_mxcsr); + } #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - this_thr -> th.th_team = serial_team -> t.t_parent; - this_thr -> th.th_info.ds.ds_tid = serial_team -> t.t_master_tid; + this_thr->th.th_team = serial_team->t.t_parent; + this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid; - /* restore values cached in the thread */ - this_thr -> th.th_team_nproc = serial_team -> t.t_parent -> t.t_nproc; /* JPH */ - this_thr -> th.th_team_master = serial_team -> t.t_parent -> t.t_threads[0]; /* JPH */ - this_thr -> th.th_team_serialized = this_thr -> th.th_team -> t.t_serialized; + /* restore values cached in the thread */ + this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */ + this_thr->th.th_team_master = + serial_team->t.t_parent->t.t_threads[0]; /* JPH */ + this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized; - /* TODO the below shouldn't need to be adjusted for serialized teams */ - this_thr -> th.th_dispatch = & this_thr -> th.th_team -> - t.t_dispatch[ serial_team -> t.t_master_tid ]; + /* TODO the below shouldn't need to be adjusted for serialized teams */ + this_thr->th.th_dispatch = + &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid]; - __kmp_pop_current_task_from_thread( this_thr ); + __kmp_pop_current_task_from_thread(this_thr); - KMP_ASSERT( this_thr -> th.th_current_task -> td_flags.executing == 0 ); - this_thr -> th.th_current_task -> td_flags.executing = 1; + KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0); + this_thr->th.th_current_task->td_flags.executing = 1; - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - // Copy the task team from the new child / old parent team to the thread. - this_thr->th.th_task_team = this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; - KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d restoring task_team %p / team %p\n", - global_tid, this_thr -> th.th_task_team, this_thr -> th.th_team ) ); - } - } else { - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - KA_TRACE( 20, ( "__kmpc_end_serialized_parallel: T#%d decreasing nesting depth of serial team %p to %d\n", - global_tid, serial_team, serial_team -> t.t_serialized ) ); - } + if (__kmp_tasking_mode != tskm_immediate_exec) { + // Copy the task team from the new child / old parent team to the thread. + this_thr->th.th_task_team = + this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]; + KA_TRACE(20, + ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / " + "team %p\n", + global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); + } + } else { + if (__kmp_tasking_mode != tskm_immediate_exec) { + KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting " + "depth of serial team %p to %d\n", + global_tid, serial_team, serial_team->t.t_serialized)); } + } - if ( __kmp_env_consistency_check ) - __kmp_pop_parallel( global_tid, NULL ); + if (__kmp_env_consistency_check) + __kmp_pop_parallel(global_tid, NULL); } /*! @@ -594,67 +566,62 @@ Execute flush. This is implemented as a full memory fence. (Though depending on the memory ordering convention obeyed by the compiler even that may not be necessary). */ -void -__kmpc_flush(ident_t *loc) -{ - KC_TRACE( 10, ("__kmpc_flush: called\n" ) ); - - /* need explicit __mf() here since use volatile instead in library */ - KMP_MB(); /* Flush all pending memory write invalidates. */ - - #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 ) - #if KMP_MIC - // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. - // We shouldn't need it, though, since the ABI rules require that - // * If the compiler generates NGO stores it also generates the fence - // * If users hand-code NGO stores they should insert the fence - // therefore no incomplete unordered stores should be visible. - #else - // C74404 - // This is to address non-temporal store instructions (sfence needed). - // The clflush instruction is addressed either (mfence needed). - // Probably the non-temporal load monvtdqa instruction should also be addressed. - // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. - if ( ! __kmp_cpuinfo.initialized ) { - __kmp_query_cpuid( & __kmp_cpuinfo ); - }; // if - if ( ! __kmp_cpuinfo.sse2 ) { - // CPU cannot execute SSE2 instructions. - } else { - #if KMP_COMPILER_ICC - _mm_mfence(); - #elif KMP_COMPILER_MSVC - MemoryBarrier(); - #else - __sync_synchronize(); - #endif // KMP_COMPILER_ICC - }; // if - #endif // KMP_MIC - #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64) - // Nothing to see here move along - #elif KMP_ARCH_PPC64 - // Nothing needed here (we have a real MB above). - #if KMP_OS_CNK - // The flushing thread needs to yield here; this prevents a - // busy-waiting thread from saturating the pipeline. flush is - // often used in loops like this: - // while (!flag) { - // #pragma omp flush(flag) - // } - // and adding the yield here is good for at least a 10x speedup - // when running >2 threads per core (on the NAS LU benchmark). - __kmp_yield(TRUE); - #endif - #else - #error Unknown or unsupported architecture - #endif - +void __kmpc_flush(ident_t *loc) { + KC_TRACE(10, ("__kmpc_flush: called\n")); + + /* need explicit __mf() here since use volatile instead in library */ + KMP_MB(); /* Flush all pending memory write invalidates. */ + +#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) +#if KMP_MIC +// fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used. +// We shouldn't need it, though, since the ABI rules require that +// * If the compiler generates NGO stores it also generates the fence +// * If users hand-code NGO stores they should insert the fence +// therefore no incomplete unordered stores should be visible. +#else + // C74404 + // This is to address non-temporal store instructions (sfence needed). + // The clflush instruction is addressed either (mfence needed). + // Probably the non-temporal load monvtdqa instruction should also be + // addressed. + // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2. + if (!__kmp_cpuinfo.initialized) { + __kmp_query_cpuid(&__kmp_cpuinfo); + }; // if + if (!__kmp_cpuinfo.sse2) { + // CPU cannot execute SSE2 instructions. + } else { +#if KMP_COMPILER_ICC + _mm_mfence(); +#elif KMP_COMPILER_MSVC + MemoryBarrier(); +#else + __sync_synchronize(); +#endif // KMP_COMPILER_ICC + }; // if +#endif // KMP_MIC +#elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64) +// Nothing to see here move along +#elif KMP_ARCH_PPC64 +// Nothing needed here (we have a real MB above). +#if KMP_OS_CNK + // The flushing thread needs to yield here; this prevents a + // busy-waiting thread from saturating the pipeline. flush is + // often used in loops like this: + // while (!flag) { + // #pragma omp flush(flag) + // } + // and adding the yield here is good for at least a 10x speedup + // when running >2 threads per core (on the NAS LU benchmark). + __kmp_yield(TRUE); +#endif +#else +#error Unknown or unsupported architecture +#endif } /* -------------------------------------------------------------------------- */ - -/* -------------------------------------------------------------------------- */ - /*! @ingroup SYNCHRONIZATION @param loc source location information @@ -662,44 +629,42 @@ __kmpc_flush(ident_t *loc) Execute a barrier. */ -void -__kmpc_barrier(ident_t *loc, kmp_int32 global_tid) -{ - KMP_COUNT_BLOCK(OMP_BARRIER); - KC_TRACE( 10, ("__kmpc_barrier: called T#%d\n", global_tid ) ); +void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { + KMP_COUNT_BLOCK(OMP_BARRIER); + KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid)); - if (! TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); + if (!TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); - if ( __kmp_env_consistency_check ) { - if ( loc == 0 ) { - KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user? - }; // if + if (__kmp_env_consistency_check) { + if (loc == 0) { + KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? + }; // if - __kmp_check_barrier( global_tid, ct_barrier, loc ); - } + __kmp_check_barrier(global_tid, ct_barrier, loc); + } #if OMPT_SUPPORT && OMPT_TRACE - ompt_frame_t * ompt_frame; - if (ompt_enabled ) { - ompt_frame = __ompt_get_task_frame_internal(0); - if ( ompt_frame->reenter_runtime_frame == NULL ) - ompt_frame->reenter_runtime_frame = __builtin_frame_address(1); - } + ompt_frame_t *ompt_frame; + if (ompt_enabled) { + ompt_frame = __ompt_get_task_frame_internal(0); + if (ompt_frame->reenter_runtime_frame == NULL) + ompt_frame->reenter_runtime_frame = __builtin_frame_address(1); + } #endif - __kmp_threads[ global_tid ]->th.th_ident = loc; - // TODO: explicit barrier_wait_id: - // this function is called when 'barrier' directive is present or - // implicit barrier at the end of a worksharing construct. - // 1) better to add a per-thread barrier counter to a thread data structure - // 2) set to 0 when a new team is created - // 4) no sync is required + __kmp_threads[global_tid]->th.th_ident = loc; + // TODO: explicit barrier_wait_id: + // this function is called when 'barrier' directive is present or + // implicit barrier at the end of a worksharing construct. + // 1) better to add a per-thread barrier counter to a thread data structure + // 2) set to 0 when a new team is created + // 4) no sync is required - __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL ); + __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); #if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled ) { - ompt_frame->reenter_runtime_frame = NULL; - } + if (ompt_enabled) { + ompt_frame->reenter_runtime_frame = NULL; + } #endif } @@ -710,52 +675,49 @@ __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) @param global_tid global thread number . @return 1 if this thread should execute the master block, 0 otherwise. */ -kmp_int32 -__kmpc_master(ident_t *loc, kmp_int32 global_tid) -{ - int status = 0; +kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) { + int status = 0; - KC_TRACE( 10, ("__kmpc_master: called T#%d\n", global_tid ) ); + KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid)); - if( ! TCR_4( __kmp_init_parallel ) ) - __kmp_parallel_initialize(); + if (!TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); - if( KMP_MASTER_GTID( global_tid )) { - KMP_COUNT_BLOCK(OMP_MASTER); - KMP_PUSH_PARTITIONED_TIMER(OMP_master); - status = 1; - } + if (KMP_MASTER_GTID(global_tid)) { + KMP_COUNT_BLOCK(OMP_MASTER); + KMP_PUSH_PARTITIONED_TIMER(OMP_master); + status = 1; + } #if OMPT_SUPPORT && OMPT_TRACE - if (status) { - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_master_begin)) { - kmp_info_t *this_thr = __kmp_threads[ global_tid ]; - kmp_team_t *team = this_thr -> th.th_team; - - int tid = __kmp_tid_from_gtid( global_tid ); - ompt_callbacks.ompt_callback(ompt_event_master_begin)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); - } + if (status) { + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_master_begin)) { + kmp_info_t *this_thr = __kmp_threads[global_tid]; + kmp_team_t *team = this_thr->th.th_team; + + int tid = __kmp_tid_from_gtid(global_tid); + ompt_callbacks.ompt_callback(ompt_event_master_begin)( + team->t.ompt_team_info.parallel_id, + team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); } + } #endif - if ( __kmp_env_consistency_check ) { + if (__kmp_env_consistency_check) { #if KMP_USE_DYNAMIC_LOCK - if (status) - __kmp_push_sync( global_tid, ct_master, loc, NULL, 0 ); - else - __kmp_check_sync( global_tid, ct_master, loc, NULL, 0 ); + if (status) + __kmp_push_sync(global_tid, ct_master, loc, NULL, 0); + else + __kmp_check_sync(global_tid, ct_master, loc, NULL, 0); #else - if (status) - __kmp_push_sync( global_tid, ct_master, loc, NULL ); - else - __kmp_check_sync( global_tid, ct_master, loc, NULL ); + if (status) + __kmp_push_sync(global_tid, ct_master, loc, NULL); + else + __kmp_check_sync(global_tid, ct_master, loc, NULL); #endif - } + } - return status; + return status; } /*! @@ -763,36 +725,33 @@ __kmpc_master(ident_t *loc, kmp_int32 global_tid) @param loc source location information. @param global_tid global thread number . -Mark the end of a master region. This should only be called by the thread -that executes the master region. +Mark the end of a master region. This should only be called by the +thread that executes the master region. */ -void -__kmpc_end_master(ident_t *loc, kmp_int32 global_tid) -{ - KC_TRACE( 10, ("__kmpc_end_master: called T#%d\n", global_tid ) ); +void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) { + KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid)); - KMP_DEBUG_ASSERT( KMP_MASTER_GTID( global_tid )); - KMP_POP_PARTITIONED_TIMER(); + KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid)); + KMP_POP_PARTITIONED_TIMER(); #if OMPT_SUPPORT && OMPT_TRACE - kmp_info_t *this_thr = __kmp_threads[ global_tid ]; - kmp_team_t *team = this_thr -> th.th_team; - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_master_end)) { - int tid = __kmp_tid_from_gtid( global_tid ); - ompt_callbacks.ompt_callback(ompt_event_master_end)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); - } + kmp_info_t *this_thr = __kmp_threads[global_tid]; + kmp_team_t *team = this_thr->th.th_team; + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_master_end)) { + int tid = __kmp_tid_from_gtid(global_tid); + ompt_callbacks.ompt_callback(ompt_event_master_end)( + team->t.ompt_team_info.parallel_id, + team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); + } #endif - if ( __kmp_env_consistency_check ) { - if( global_tid < 0 ) - KMP_WARNING( ThreadIdentInvalid ); + if (__kmp_env_consistency_check) { + if (global_tid < 0) + KMP_WARNING(ThreadIdentInvalid); - if( KMP_MASTER_GTID( global_tid )) - __kmp_pop_sync( global_tid, ct_master, loc ); - } + if (KMP_MASTER_GTID(global_tid)) + __kmp_pop_sync(global_tid, ct_master, loc); + } } /*! @@ -802,60 +761,58 @@ __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) Start execution of an ordered construct. */ -void -__kmpc_ordered( ident_t * loc, kmp_int32 gtid ) -{ - int cid = 0; - kmp_info_t *th; - KMP_DEBUG_ASSERT( __kmp_init_serial ); +void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { + int cid = 0; + kmp_info_t *th; + KMP_DEBUG_ASSERT(__kmp_init_serial); - KC_TRACE( 10, ("__kmpc_ordered: called T#%d\n", gtid )); + KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid)); - if (! TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); + if (!TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); #if USE_ITT_BUILD - __kmp_itt_ordered_prep( gtid ); - // TODO: ordered_wait_id + __kmp_itt_ordered_prep(gtid); +// TODO: ordered_wait_id #endif /* USE_ITT_BUILD */ - th = __kmp_threads[ gtid ]; + th = __kmp_threads[gtid]; #if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { - /* OMPT state update */ - th->th.ompt_thread_info.wait_id = (uint64_t) loc; - th->th.ompt_thread_info.state = ompt_state_wait_ordered; - - /* OMPT event callback */ - if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) { - ompt_callbacks.ompt_callback(ompt_event_wait_ordered)( - th->th.ompt_thread_info.wait_id); - } + if (ompt_enabled) { + /* OMPT state update */ + th->th.ompt_thread_info.wait_id = (uint64_t)loc; + th->th.ompt_thread_info.state = ompt_state_wait_ordered; + + /* OMPT event callback */ + if (ompt_callbacks.ompt_callback(ompt_event_wait_ordered)) { + ompt_callbacks.ompt_callback(ompt_event_wait_ordered)( + th->th.ompt_thread_info.wait_id); } + } #endif - if ( th -> th.th_dispatch -> th_deo_fcn != 0 ) - (*th->th.th_dispatch->th_deo_fcn)( & gtid, & cid, loc ); - else - __kmp_parallel_deo( & gtid, & cid, loc ); + if (th->th.th_dispatch->th_deo_fcn != 0) + (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc); + else + __kmp_parallel_deo(>id, &cid, loc); #if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { - /* OMPT state update */ - th->th.ompt_thread_info.state = ompt_state_work_parallel; - th->th.ompt_thread_info.wait_id = 0; - - /* OMPT event callback */ - if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) { - ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)( - th->th.ompt_thread_info.wait_id); - } + if (ompt_enabled) { + /* OMPT state update */ + th->th.ompt_thread_info.state = ompt_state_work_parallel; + th->th.ompt_thread_info.wait_id = 0; + + /* OMPT event callback */ + if (ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)) { + ompt_callbacks.ompt_callback(ompt_event_acquired_ordered)( + th->th.ompt_thread_info.wait_id); } + } #endif #if USE_ITT_BUILD - __kmp_itt_ordered_start( gtid ); + __kmp_itt_ordered_start(gtid); #endif /* USE_ITT_BUILD */ } @@ -866,216 +823,231 @@ __kmpc_ordered( ident_t * loc, kmp_int32 gtid ) End execution of an ordered construct. */ -void -__kmpc_end_ordered( ident_t * loc, kmp_int32 gtid ) -{ - int cid = 0; - kmp_info_t *th; +void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { + int cid = 0; + kmp_info_t *th; - KC_TRACE( 10, ("__kmpc_end_ordered: called T#%d\n", gtid ) ); + KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid)); #if USE_ITT_BUILD - __kmp_itt_ordered_end( gtid ); - // TODO: ordered_wait_id + __kmp_itt_ordered_end(gtid); +// TODO: ordered_wait_id #endif /* USE_ITT_BUILD */ - th = __kmp_threads[ gtid ]; + th = __kmp_threads[gtid]; - if ( th -> th.th_dispatch -> th_dxo_fcn != 0 ) - (*th->th.th_dispatch->th_dxo_fcn)( & gtid, & cid, loc ); - else - __kmp_parallel_dxo( & gtid, & cid, loc ); + if (th->th.th_dispatch->th_dxo_fcn != 0) + (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc); + else + __kmp_parallel_dxo(>id, &cid, loc); #if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_release_ordered)) { - ompt_callbacks.ompt_callback(ompt_event_release_ordered)( - th->th.ompt_thread_info.wait_id); - } + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_release_ordered)) { + ompt_callbacks.ompt_callback(ompt_event_release_ordered)( + th->th.ompt_thread_info.wait_id); + } #endif } #if KMP_USE_DYNAMIC_LOCK static __forceinline void -__kmp_init_indirect_csptr(kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid, kmp_indirect_locktag_t tag) -{ - // Pointer to the allocated indirect lock is written to crit, while indexing is ignored. - void *idx; - kmp_indirect_lock_t **lck; - lck = (kmp_indirect_lock_t **)crit; - kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); - KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); - KMP_SET_I_LOCK_LOCATION(ilk, loc); - KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); - KA_TRACE(20, ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); +__kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc, + kmp_int32 gtid, kmp_indirect_locktag_t tag) { + // Pointer to the allocated indirect lock is written to crit, while indexing + // is ignored. + void *idx; + kmp_indirect_lock_t **lck; + lck = (kmp_indirect_lock_t **)crit; + kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag); + KMP_I_LOCK_FUNC(ilk, init)(ilk->lock); + KMP_SET_I_LOCK_LOCATION(ilk, loc); + KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section); + KA_TRACE(20, + ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag)); #if USE_ITT_BUILD - __kmp_itt_critical_creating(ilk->lock, loc); + __kmp_itt_critical_creating(ilk->lock, loc); #endif - int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk); - if (status == 0) { + int status = KMP_COMPARE_AND_STORE_PTR(lck, 0, ilk); + if (status == 0) { #if USE_ITT_BUILD - __kmp_itt_critical_destroyed(ilk->lock); + __kmp_itt_critical_destroyed(ilk->lock); #endif - // We don't really need to destroy the unclaimed lock here since it will be cleaned up at program exit. - //KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); - } - KMP_DEBUG_ASSERT(*lck != NULL); + // We don't really need to destroy the unclaimed lock here since it will be + // cleaned up at program exit. + // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx); + } + KMP_DEBUG_ASSERT(*lck != NULL); } // Fast-path acquire tas lock -#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) { \ - kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ - if (l->lk.poll != KMP_LOCK_FREE(tas) || \ - ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \ - kmp_uint32 spins; \ - KMP_FSYNC_PREPARE(l); \ - KMP_INIT_YIELD(spins); \ - if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ - KMP_YIELD(TRUE); \ - } else { \ - KMP_YIELD_SPIN(spins); \ - } \ - kmp_backoff_t backoff = __kmp_spin_backoff_params; \ - while (l->lk.poll != KMP_LOCK_FREE(tas) || \ - ! KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas))) { \ - __kmp_spin_backoff(&backoff); \ - if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ - KMP_YIELD(TRUE); \ - } else { \ - KMP_YIELD_SPIN(spins); \ - } \ - } \ - } \ - KMP_FSYNC_ACQUIRED(l); \ -} +#define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \ + { \ + kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ + if (l->lk.poll != KMP_LOCK_FREE(tas) || \ + !KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), \ + KMP_LOCK_BUSY(gtid + 1, tas))) { \ + kmp_uint32 spins; \ + KMP_FSYNC_PREPARE(l); \ + KMP_INIT_YIELD(spins); \ + if (TCR_4(__kmp_nth) > \ + (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ + KMP_YIELD(TRUE); \ + } else { \ + KMP_YIELD_SPIN(spins); \ + } \ + kmp_backoff_t backoff = __kmp_spin_backoff_params; \ + while (l->lk.poll != KMP_LOCK_FREE(tas) || \ + !KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), \ + KMP_LOCK_BUSY(gtid + 1, tas))) { \ + __kmp_spin_backoff(&backoff); \ + if (TCR_4(__kmp_nth) > \ + (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ + KMP_YIELD(TRUE); \ + } else { \ + KMP_YIELD_SPIN(spins); \ + } \ + } \ + } \ + KMP_FSYNC_ACQUIRED(l); \ + } // Fast-path test tas lock -#define KMP_TEST_TAS_LOCK(lock, gtid, rc) { \ - kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ - rc = l->lk.poll == KMP_LOCK_FREE(tas) && \ - KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas)); \ -} +#define KMP_TEST_TAS_LOCK(lock, gtid, rc) \ + { \ + kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \ + rc = l->lk.poll == KMP_LOCK_FREE(tas) && \ + KMP_COMPARE_AND_STORE_ACQ32(&(l->lk.poll), KMP_LOCK_FREE(tas), \ + KMP_LOCK_BUSY(gtid + 1, tas)); \ + } // Fast-path release tas lock -#define KMP_RELEASE_TAS_LOCK(lock, gtid) { \ - TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); \ - KMP_MB(); \ -} +#define KMP_RELEASE_TAS_LOCK(lock, gtid) \ + { \ + TCW_4(((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); \ + KMP_MB(); \ + } #if KMP_USE_FUTEX -# include -# include -# ifndef FUTEX_WAIT -# define FUTEX_WAIT 0 -# endif -# ifndef FUTEX_WAKE -# define FUTEX_WAKE 1 -# endif +#include +#include +#ifndef FUTEX_WAIT +#define FUTEX_WAIT 0 +#endif +#ifndef FUTEX_WAKE +#define FUTEX_WAKE 1 +#endif // Fast-path acquire futex lock -#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) { \ - kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ - kmp_int32 gtid_code = (gtid+1) << 1; \ - KMP_MB(); \ - KMP_FSYNC_PREPARE(ftx); \ - kmp_int32 poll_val; \ - while ((poll_val = KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ - KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ - kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ - if (!cond) { \ - if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, poll_val | KMP_LOCK_BUSY(1, futex))) { \ - continue; \ - } \ - poll_val |= KMP_LOCK_BUSY(1, futex); \ - } \ - kmp_int32 rc; \ - if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, NULL, NULL, 0)) != 0) { \ - continue; \ - } \ - gtid_code |= 1; \ - } \ - KMP_FSYNC_ACQUIRED(ftx); \ -} +#define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \ + { \ + kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ + kmp_int32 gtid_code = (gtid + 1) << 1; \ + KMP_MB(); \ + KMP_FSYNC_PREPARE(ftx); \ + kmp_int32 poll_val; \ + while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \ + &(ftx->lk.poll), KMP_LOCK_FREE(futex), \ + KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \ + kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \ + if (!cond) { \ + if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \ + poll_val | \ + KMP_LOCK_BUSY(1, futex))) { \ + continue; \ + } \ + poll_val |= KMP_LOCK_BUSY(1, futex); \ + } \ + kmp_int32 rc; \ + if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \ + NULL, NULL, 0)) != 0) { \ + continue; \ + } \ + gtid_code |= 1; \ + } \ + KMP_FSYNC_ACQUIRED(ftx); \ + } // Fast-path test futex lock -#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) { \ - kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ - if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY(gtid+1 << 1, futex))) { \ - KMP_FSYNC_ACQUIRED(ftx); \ - rc = TRUE; \ - } else { \ - rc = FALSE; \ - } \ -} +#define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \ + { \ + kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ + if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \ + KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \ + KMP_FSYNC_ACQUIRED(ftx); \ + rc = TRUE; \ + } else { \ + rc = FALSE; \ + } \ + } // Fast-path release futex lock -#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) { \ - kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ - KMP_MB(); \ - KMP_FSYNC_RELEASING(ftx); \ - kmp_int32 poll_val = KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ - if (KMP_LOCK_STRIP(poll_val) & 1) { \ - syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ - } \ - KMP_MB(); \ - KMP_YIELD(TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \ -} +#define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \ + { \ + kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \ + KMP_MB(); \ + KMP_FSYNC_RELEASING(ftx); \ + kmp_int32 poll_val = \ + KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \ + if (KMP_LOCK_STRIP(poll_val) & 1) { \ + syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \ + KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \ + } \ + KMP_MB(); \ + KMP_YIELD(TCR_4(__kmp_nth) > \ + (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); \ + } #endif // KMP_USE_FUTEX #else // KMP_USE_DYNAMIC_LOCK -static kmp_user_lock_p -__kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, kmp_int32 gtid ) -{ - kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; - - // - // Because of the double-check, the following load - // doesn't need to be volatile. - // - kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR( *lck_pp ); - - if ( lck == NULL ) { - void * idx; - - // Allocate & initialize the lock. - // Remember allocated locks in table in order to free them in __kmp_cleanup() - lck = __kmp_user_lock_allocate( &idx, gtid, kmp_lf_critical_section ); - __kmp_init_user_lock_with_checks( lck ); - __kmp_set_user_lock_location( lck, loc ); +static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit, + ident_t const *loc, + kmp_int32 gtid) { + kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit; + + // Because of the double-check, the following load doesn't need to be volatile + kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); + + if (lck == NULL) { + void *idx; + + // Allocate & initialize the lock. + // Remember alloc'ed locks in table in order to free them in __kmp_cleanup() + lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section); + __kmp_init_user_lock_with_checks(lck); + __kmp_set_user_lock_location(lck, loc); #if USE_ITT_BUILD - __kmp_itt_critical_creating( lck ); - // __kmp_itt_critical_creating() should be called *before* the first usage of underlying - // lock. It is the only place where we can guarantee it. There are chances the lock will - // destroyed with no usage, but it is not a problem, because this is not real event seen - // by user but rather setting name for object (lock). See more details in kmp_itt.h. + __kmp_itt_critical_creating(lck); +// __kmp_itt_critical_creating() should be called *before* the first usage +// of underlying lock. It is the only place where we can guarantee it. There +// are chances the lock will destroyed with no usage, but it is not a +// problem, because this is not real event seen by user but rather setting +// name for object (lock). See more details in kmp_itt.h. #endif /* USE_ITT_BUILD */ - // - // Use a cmpxchg instruction to slam the start of the critical - // section with the lock pointer. If another thread beat us - // to it, deallocate the lock, and use the lock that the other - // thread allocated. - // - int status = KMP_COMPARE_AND_STORE_PTR( lck_pp, 0, lck ); + // Use a cmpxchg instruction to slam the start of the critical section with + // the lock pointer. If another thread beat us to it, deallocate the lock, + // and use the lock that the other thread allocated. + int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck); - if ( status == 0 ) { - // Deallocate the lock and reload the value. + if (status == 0) { +// Deallocate the lock and reload the value. #if USE_ITT_BUILD - __kmp_itt_critical_destroyed( lck ); - // Let ITT know the lock is destroyed and the same memory location may be reused for - // another purpose. + __kmp_itt_critical_destroyed(lck); +// Let ITT know the lock is destroyed and the same memory location may be reused +// for another purpose. #endif /* USE_ITT_BUILD */ - __kmp_destroy_user_lock_with_checks( lck ); - __kmp_user_lock_free( &idx, gtid, lck ); - lck = (kmp_user_lock_p)TCR_PTR( *lck_pp ); - KMP_DEBUG_ASSERT( lck != NULL ); - } + __kmp_destroy_user_lock_with_checks(lck); + __kmp_user_lock_free(&idx, gtid, lck); + lck = (kmp_user_lock_p)TCR_PTR(*lck_pp); + KMP_DEBUG_ASSERT(lck != NULL); } - return lck; + } + return lck; } #endif // KMP_USE_DYNAMIC_LOCK @@ -1084,183 +1056,186 @@ __kmp_get_critical_section_ptr( kmp_critical_name * crit, ident_t const * loc, k @ingroup WORK_SHARING @param loc source location information. @param global_tid global thread number . -@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or -some other suitably unique value. +@param crit identity of the critical section. This could be a pointer to a lock +associated with the critical section, or some other suitably unique value. Enter code protected by a `critical` construct. This function blocks until the executing thread can enter the critical section. */ -void -__kmpc_critical( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) -{ +void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit) { #if KMP_USE_DYNAMIC_LOCK - __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); + __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none); #else - KMP_COUNT_BLOCK(OMP_CRITICAL); - KMP_TIME_PARTITIONED_BLOCK(OMP_critical_wait); /* Time spent waiting to enter the critical section */ - kmp_user_lock_p lck; + KMP_COUNT_BLOCK(OMP_CRITICAL); + KMP_TIME_PARTITIONED_BLOCK( + OMP_critical_wait); /* Time spent waiting to enter the critical section */ + kmp_user_lock_p lck; - KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) ); + KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); - //TODO: add THR_OVHD_STATE + // TODO: add THR_OVHD_STATE - KMP_CHECK_USER_LOCK_INIT(); + KMP_CHECK_USER_LOCK_INIT(); - if ( ( __kmp_user_lock_kind == lk_tas ) - && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { - lck = (kmp_user_lock_p)crit; - } + if ((__kmp_user_lock_kind == lk_tas) && + (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { + lck = (kmp_user_lock_p)crit; + } #if KMP_USE_FUTEX - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { - lck = (kmp_user_lock_p)crit; - } + else if ((__kmp_user_lock_kind == lk_futex) && + (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { + lck = (kmp_user_lock_p)crit; + } #endif - else { // ticket, queuing or drdpa - lck = __kmp_get_critical_section_ptr( crit, loc, global_tid ); - } + else { // ticket, queuing or drdpa + lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); + } - if ( __kmp_env_consistency_check ) - __kmp_push_sync( global_tid, ct_critical, loc, lck ); + if (__kmp_env_consistency_check) + __kmp_push_sync(global_tid, ct_critical, loc, lck); - /* since the critical directive binds to all threads, not just - * the current team we have to check this even if we are in a - * serialized team */ - /* also, even if we are the uber thread, we still have to conduct the lock, - * as we have to contend with sibling threads */ +// since the critical directive binds to all threads, not just the current +// team we have to check this even if we are in a serialized team. +// also, even if we are the uber thread, we still have to conduct the lock, +// as we have to contend with sibling threads. #if USE_ITT_BUILD - __kmp_itt_critical_acquiring( lck ); + __kmp_itt_critical_acquiring(lck); #endif /* USE_ITT_BUILD */ - // Value of 'crit' should be good for using as a critical_id of the critical section directive. - __kmp_acquire_user_lock_with_checks( lck, global_tid ); + // Value of 'crit' should be good for using as a critical_id of the critical + // section directive. + __kmp_acquire_user_lock_with_checks(lck, global_tid); #if USE_ITT_BUILD - __kmp_itt_critical_acquired( lck ); + __kmp_itt_critical_acquired(lck); #endif /* USE_ITT_BUILD */ - KMP_START_EXPLICIT_TIMER(OMP_critical); - KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid )); + KMP_START_EXPLICIT_TIMER(OMP_critical); + KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); #endif // KMP_USE_DYNAMIC_LOCK } #if KMP_USE_DYNAMIC_LOCK // Converts the given hint to an internal lock implementation -static __forceinline kmp_dyna_lockseq_t -__kmp_map_hint_to_lock(uintptr_t hint) -{ +static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) { #if KMP_USE_TSX -# define KMP_TSX_LOCK(seq) lockseq_##seq +#define KMP_TSX_LOCK(seq) lockseq_##seq #else -# define KMP_TSX_LOCK(seq) __kmp_user_lock_seq +#define KMP_TSX_LOCK(seq) __kmp_user_lock_seq #endif #if KMP_ARCH_X86 || KMP_ARCH_X86_64 -# define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) +#define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm) #else -# define KMP_CPUINFO_RTM 0 +#define KMP_CPUINFO_RTM 0 #endif - // Hints that do not require further logic - if (hint & kmp_lock_hint_hle) - return KMP_TSX_LOCK(hle); - if (hint & kmp_lock_hint_rtm) - return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm): __kmp_user_lock_seq; - if (hint & kmp_lock_hint_adaptive) - return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive): __kmp_user_lock_seq; + // Hints that do not require further logic + if (hint & kmp_lock_hint_hle) + return KMP_TSX_LOCK(hle); + if (hint & kmp_lock_hint_rtm) + return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq; + if (hint & kmp_lock_hint_adaptive) + return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq; - // Rule out conflicting hints first by returning the default lock - if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) - return __kmp_user_lock_seq; - if ((hint & omp_lock_hint_speculative) && (hint & omp_lock_hint_nonspeculative)) - return __kmp_user_lock_seq; + // Rule out conflicting hints first by returning the default lock + if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended)) + return __kmp_user_lock_seq; + if ((hint & omp_lock_hint_speculative) && + (hint & omp_lock_hint_nonspeculative)) + return __kmp_user_lock_seq; - // Do not even consider speculation when it appears to be contended - if (hint & omp_lock_hint_contended) - return lockseq_queuing; + // Do not even consider speculation when it appears to be contended + if (hint & omp_lock_hint_contended) + return lockseq_queuing; - // Uncontended lock without speculation - if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) - return lockseq_tas; + // Uncontended lock without speculation + if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative)) + return lockseq_tas; - // HLE lock for speculation - if (hint & omp_lock_hint_speculative) - return KMP_TSX_LOCK(hle); + // HLE lock for speculation + if (hint & omp_lock_hint_speculative) + return KMP_TSX_LOCK(hle); - return __kmp_user_lock_seq; + return __kmp_user_lock_seq; } /*! @ingroup WORK_SHARING @param loc source location information. @param global_tid global thread number. -@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, -or some other suitably unique value. +@param crit identity of the critical section. This could be a pointer to a lock +associated with the critical section, or some other suitably unique value. @param hint the lock hint. -Enter code protected by a `critical` construct with a hint. The hint value is used to suggest a lock implementation. -This function blocks until the executing thread can enter the critical section unless the hint suggests use of +Enter code protected by a `critical` construct with a hint. The hint value is +used to suggest a lock implementation. This function blocks until the executing +thread can enter the critical section unless the hint suggests use of speculative execution and the hardware supports it. */ -void -__kmpc_critical_with_hint( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit, uintptr_t hint ) -{ - KMP_COUNT_BLOCK(OMP_CRITICAL); - kmp_user_lock_p lck; - - KC_TRACE( 10, ("__kmpc_critical: called T#%d\n", global_tid ) ); - - kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; - // Check if it is initialized. - if (*lk == 0) { - kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); - if (KMP_IS_D_LOCK(lckseq)) { - KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(lckseq)); - } else { - __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); - } - } - // Branch for accessing the actual lock object and set operation. This branching is inevitable since - // this lock initialization does not follow the normal dispatch path (lock table is not used). - if (KMP_EXTRACT_D_TAG(lk) != 0) { - lck = (kmp_user_lock_p)lk; - if (__kmp_env_consistency_check) { - __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint)); - } -# if USE_ITT_BUILD - __kmp_itt_critical_acquiring(lck); -# endif -# if KMP_USE_INLINED_TAS - if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { - KMP_ACQUIRE_TAS_LOCK(lck, global_tid); - } else -# elif KMP_USE_INLINED_FUTEX - if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { - KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); - } else -# endif - { - KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); - } +void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit, uintptr_t hint) { + KMP_COUNT_BLOCK(OMP_CRITICAL); + kmp_user_lock_p lck; + + KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid)); + + kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; + // Check if it is initialized. + if (*lk == 0) { + kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint); + if (KMP_IS_D_LOCK(lckseq)) { + KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, + KMP_GET_D_TAG(lckseq)); } else { - kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); - lck = ilk->lock; - if (__kmp_env_consistency_check) { - __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_map_hint_to_lock(hint)); - } -# if USE_ITT_BUILD - __kmp_itt_critical_acquiring(lck); -# endif - KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); + __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq)); + } + } + // Branch for accessing the actual lock object and set operation. This + // branching is inevitable since this lock initialization does not follow the + // normal dispatch path (lock table is not used). + if (KMP_EXTRACT_D_TAG(lk) != 0) { + lck = (kmp_user_lock_p)lk; + if (__kmp_env_consistency_check) { + __kmp_push_sync(global_tid, ct_critical, loc, lck, + __kmp_map_hint_to_lock(hint)); + } +#if USE_ITT_BUILD + __kmp_itt_critical_acquiring(lck); +#endif +#if KMP_USE_INLINED_TAS + if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { + KMP_ACQUIRE_TAS_LOCK(lck, global_tid); + } else +#elif KMP_USE_INLINED_FUTEX + if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { + KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid); + } else +#endif + { + KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); } + } else { + kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); + lck = ilk->lock; + if (__kmp_env_consistency_check) { + __kmp_push_sync(global_tid, ct_critical, loc, lck, + __kmp_map_hint_to_lock(hint)); + } +#if USE_ITT_BUILD + __kmp_itt_critical_acquiring(lck); +#endif + KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); + } #if USE_ITT_BUILD - __kmp_itt_critical_acquired( lck ); + __kmp_itt_critical_acquired(lck); #endif /* USE_ITT_BUILD */ - KMP_PUSH_PARTITIONED_TIMER(OMP_critical); - KA_TRACE( 15, ("__kmpc_critical: done T#%d\n", global_tid )); + KMP_PUSH_PARTITIONED_TIMER(OMP_critical); + KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid)); } // __kmpc_critical_with_hint #endif // KMP_USE_DYNAMIC_LOCK @@ -1269,91 +1244,91 @@ __kmpc_critical_with_hint( ident_t * loc, kmp_int32 global_tid, kmp_critical_nam @ingroup WORK_SHARING @param loc source location information. @param global_tid global thread number . -@param crit identity of the critical section. This could be a pointer to a lock associated with the critical section, or -some other suitably unique value. +@param crit identity of the critical section. This could be a pointer to a lock +associated with the critical section, or some other suitably unique value. Leave a critical section, releasing any lock that was held during its execution. */ -void -__kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit) -{ - kmp_user_lock_p lck; +void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit) { + kmp_user_lock_p lck; - KC_TRACE( 10, ("__kmpc_end_critical: called T#%d\n", global_tid )); + KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid)); #if KMP_USE_DYNAMIC_LOCK - if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { - lck = (kmp_user_lock_p)crit; - KMP_ASSERT(lck != NULL); - if (__kmp_env_consistency_check) { - __kmp_pop_sync(global_tid, ct_critical, loc); - } -# if USE_ITT_BUILD - __kmp_itt_critical_releasing( lck ); -# endif -# if KMP_USE_INLINED_TAS - if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { - KMP_RELEASE_TAS_LOCK(lck, global_tid); - } else -# elif KMP_USE_INLINED_FUTEX - if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { - KMP_RELEASE_FUTEX_LOCK(lck, global_tid); - } else -# endif - { - KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); - } - } else { - kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); - KMP_ASSERT(ilk != NULL); - lck = ilk->lock; - if (__kmp_env_consistency_check) { - __kmp_pop_sync(global_tid, ct_critical, loc); - } -# if USE_ITT_BUILD - __kmp_itt_critical_releasing( lck ); -# endif - KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); + if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { + lck = (kmp_user_lock_p)crit; + KMP_ASSERT(lck != NULL); + if (__kmp_env_consistency_check) { + __kmp_pop_sync(global_tid, ct_critical, loc); } +#if USE_ITT_BUILD + __kmp_itt_critical_releasing(lck); +#endif +#if KMP_USE_INLINED_TAS + if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) { + KMP_RELEASE_TAS_LOCK(lck, global_tid); + } else +#elif KMP_USE_INLINED_FUTEX + if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) { + KMP_RELEASE_FUTEX_LOCK(lck, global_tid); + } else +#endif + { + KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); + } + } else { + kmp_indirect_lock_t *ilk = + (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); + KMP_ASSERT(ilk != NULL); + lck = ilk->lock; + if (__kmp_env_consistency_check) { + __kmp_pop_sync(global_tid, ct_critical, loc); + } +#if USE_ITT_BUILD + __kmp_itt_critical_releasing(lck); +#endif + KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid); + } #else // KMP_USE_DYNAMIC_LOCK - if ( ( __kmp_user_lock_kind == lk_tas ) - && ( sizeof( lck->tas.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { - lck = (kmp_user_lock_p)crit; - } + if ((__kmp_user_lock_kind == lk_tas) && + (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) { + lck = (kmp_user_lock_p)crit; + } #if KMP_USE_FUTEX - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) <= OMP_CRITICAL_SIZE ) ) { - lck = (kmp_user_lock_p)crit; - } + else if ((__kmp_user_lock_kind == lk_futex) && + (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) { + lck = (kmp_user_lock_p)crit; + } #endif - else { // ticket, queuing or drdpa - lck = (kmp_user_lock_p) TCR_PTR(*((kmp_user_lock_p *)crit)); - } + else { // ticket, queuing or drdpa + lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit)); + } - KMP_ASSERT(lck != NULL); + KMP_ASSERT(lck != NULL); - if ( __kmp_env_consistency_check ) - __kmp_pop_sync( global_tid, ct_critical, loc ); + if (__kmp_env_consistency_check) + __kmp_pop_sync(global_tid, ct_critical, loc); #if USE_ITT_BUILD - __kmp_itt_critical_releasing( lck ); + __kmp_itt_critical_releasing(lck); #endif /* USE_ITT_BUILD */ - // Value of 'crit' should be good for using as a critical_id of the critical section directive. - __kmp_release_user_lock_with_checks( lck, global_tid ); + // Value of 'crit' should be good for using as a critical_id of the critical + // section directive. + __kmp_release_user_lock_with_checks(lck, global_tid); #if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_release_critical)) { - ompt_callbacks.ompt_callback(ompt_event_release_critical)( - (uint64_t) lck); - } + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_release_critical)) { + ompt_callbacks.ompt_callback(ompt_event_release_critical)((uint64_t)lck); + } #endif #endif // KMP_USE_DYNAMIC_LOCK - KMP_POP_PARTITIONED_TIMER(); - KA_TRACE( 15, ("__kmpc_end_critical: done T#%d\n", global_tid )); + KMP_POP_PARTITIONED_TIMER(); + KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid)); } /*! @@ -1362,27 +1337,26 @@ __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit) @param global_tid thread id. @return one if the thread should execute the master block, zero otherwise -Start execution of a combined barrier and master. The barrier is executed inside this function. +Start execution of a combined barrier and master. The barrier is executed inside +this function. */ -kmp_int32 -__kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) -{ - int status; +kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { + int status; - KC_TRACE( 10, ("__kmpc_barrier_master: called T#%d\n", global_tid ) ); + KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid)); - if (! TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); + if (!TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); - if ( __kmp_env_consistency_check ) - __kmp_check_barrier( global_tid, ct_barrier, loc ); + if (__kmp_env_consistency_check) + __kmp_check_barrier(global_tid, ct_barrier, loc); #if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = loc; + __kmp_threads[global_tid]->th.th_ident = loc; #endif - status = __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL ); + status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); - return (status != 0) ? 0 : 1; + return (status != 0) ? 0 : 1; } /*! @@ -1394,12 +1368,10 @@ Complete the execution of a combined barrier and master. This function should only be called at the completion of the master code. Other threads will still be waiting at the barrier and this call releases them. */ -void -__kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) -{ - KC_TRACE( 10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid )); +void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) { + KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid)); - __kmp_end_split_barrier ( bs_plain_barrier, global_tid ); + __kmp_end_split_barrier(bs_plain_barrier, global_tid); } /*! @@ -1412,46 +1384,44 @@ Start execution of a combined barrier and master(nowait) construct. The barrier is executed inside this function. There is no equivalent "end" function, since the */ -kmp_int32 -__kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid ) -{ - kmp_int32 ret; +kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { + kmp_int32 ret; - KC_TRACE( 10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid )); + KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid)); - if (! TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); + if (!TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); - if ( __kmp_env_consistency_check ) { - if ( loc == 0 ) { - KMP_WARNING( ConstructIdentInvalid ); // ??? What does it mean for the user? - } - __kmp_check_barrier( global_tid, ct_barrier, loc ); + if (__kmp_env_consistency_check) { + if (loc == 0) { + KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user? } + __kmp_check_barrier(global_tid, ct_barrier, loc); + } #if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = loc; + __kmp_threads[global_tid]->th.th_ident = loc; #endif - __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL ); + __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); - ret = __kmpc_master (loc, global_tid); + ret = __kmpc_master(loc, global_tid); - if ( __kmp_env_consistency_check ) { - /* there's no __kmpc_end_master called; so the (stats) */ - /* actions of __kmpc_end_master are done here */ + if (__kmp_env_consistency_check) { + /* there's no __kmpc_end_master called; so the (stats) */ + /* actions of __kmpc_end_master are done here */ - if ( global_tid < 0 ) { - KMP_WARNING( ThreadIdentInvalid ); - } - if (ret) { - /* only one thread should do the pop since only */ - /* one did the push (see __kmpc_master()) */ + if (global_tid < 0) { + KMP_WARNING(ThreadIdentInvalid); + } + if (ret) { + /* only one thread should do the pop since only */ + /* one did the push (see __kmpc_master()) */ - __kmp_pop_sync( global_tid, ct_master, loc ); - } + __kmp_pop_sync(global_tid, ct_master, loc); } + } - return (ret); + return (ret); } /* The BARRIER for a SINGLE process section is always explicit */ @@ -1462,46 +1432,44 @@ __kmpc_barrier_master_nowait( ident_t * loc, kmp_int32 global_tid ) @return One if this thread should execute the single construct, zero otherwise. Test whether to execute a single construct. -There are no implicit barriers in the two "single" calls, rather the compiler should -introduce an explicit barrier if it is required. +There are no implicit barriers in the two "single" calls, rather the compiler +should introduce an explicit barrier if it is required. */ -kmp_int32 -__kmpc_single(ident_t *loc, kmp_int32 global_tid) -{ - kmp_int32 rc = __kmp_enter_single( global_tid, loc, TRUE ); +kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) { + kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE); - if (rc) { - // We are going to execute the single statement, so we should count it. - KMP_COUNT_BLOCK(OMP_SINGLE); - KMP_PUSH_PARTITIONED_TIMER(OMP_single); - } + if (rc) { + // We are going to execute the single statement, so we should count it. + KMP_COUNT_BLOCK(OMP_SINGLE); + KMP_PUSH_PARTITIONED_TIMER(OMP_single); + } #if OMPT_SUPPORT && OMPT_TRACE - kmp_info_t *this_thr = __kmp_threads[ global_tid ]; - kmp_team_t *team = this_thr -> th.th_team; - int tid = __kmp_tid_from_gtid( global_tid ); + kmp_info_t *this_thr = __kmp_threads[global_tid]; + kmp_team_t *team = this_thr->th.th_team; + int tid = __kmp_tid_from_gtid(global_tid); - if (ompt_enabled) { - if (rc) { - if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) { - ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id, - team->t.ompt_team_info.microtask); - } - } else { - if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) { - ompt_callbacks.ompt_callback(ompt_event_single_others_begin)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); - } - this_thr->th.ompt_thread_info.state = ompt_state_wait_single; - } + if (ompt_enabled) { + if (rc) { + if (ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)) { + ompt_callbacks.ompt_callback(ompt_event_single_in_block_begin)( + team->t.ompt_team_info.parallel_id, + team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id, + team->t.ompt_team_info.microtask); + } + } else { + if (ompt_callbacks.ompt_callback(ompt_event_single_others_begin)) { + ompt_callbacks.ompt_callback(ompt_event_single_others_begin)( + team->t.ompt_team_info.parallel_id, + team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); + } + this_thr->th.ompt_thread_info.state = ompt_state_wait_single; } + } #endif - return rc; + return rc; } /*! @@ -1513,23 +1481,21 @@ Mark the end of a single construct. This function should only be called by the thread that executed the block of code protected by the `single` construct. */ -void -__kmpc_end_single(ident_t *loc, kmp_int32 global_tid) -{ - __kmp_exit_single( global_tid ); - KMP_POP_PARTITIONED_TIMER(); +void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) { + __kmp_exit_single(global_tid); + KMP_POP_PARTITIONED_TIMER(); #if OMPT_SUPPORT && OMPT_TRACE - kmp_info_t *this_thr = __kmp_threads[ global_tid ]; - kmp_team_t *team = this_thr -> th.th_team; - int tid = __kmp_tid_from_gtid( global_tid ); - - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) { - ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)( - team->t.ompt_team_info.parallel_id, - team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); - } + kmp_info_t *this_thr = __kmp_threads[global_tid]; + kmp_team_t *team = this_thr->th.th_team; + int tid = __kmp_tid_from_gtid(global_tid); + + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)) { + ompt_callbacks.ompt_callback(ompt_event_single_in_block_end)( + team->t.ompt_team_info.parallel_id, + team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id); + } #endif } @@ -1540,182 +1506,144 @@ __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) Mark the end of a statically scheduled loop. */ -void -__kmpc_for_static_fini( ident_t *loc, kmp_int32 global_tid ) -{ - KE_TRACE( 10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); +void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) { + KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid)); #if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_loop_end)) { - ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - ompt_callbacks.ompt_callback(ompt_event_loop_end)( - team_info->parallel_id, task_info->task_id); - } + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_end)) { + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); + ompt_task_info_t *task_info = __ompt_get_taskinfo(0); + ompt_callbacks.ompt_callback(ompt_event_loop_end)(team_info->parallel_id, + task_info->task_id); + } #endif - if ( __kmp_env_consistency_check ) - __kmp_pop_workshare( global_tid, ct_pdo, loc ); + if (__kmp_env_consistency_check) + __kmp_pop_workshare(global_tid, ct_pdo, loc); } -/* - * User routines which take C-style arguments (call by value) - * different from the Fortran equivalent routines - */ +// User routines which take C-style arguments (call by value) +// different from the Fortran equivalent routines -void -ompc_set_num_threads( int arg ) -{ -// !!!!! TODO: check the per-task binding - __kmp_set_num_threads( arg, __kmp_entry_gtid() ); +void ompc_set_num_threads(int arg) { + // !!!!! TODO: check the per-task binding + __kmp_set_num_threads(arg, __kmp_entry_gtid()); } -void -ompc_set_dynamic( int flag ) -{ - kmp_info_t *thread; +void ompc_set_dynamic(int flag) { + kmp_info_t *thread; - /* For the thread-private implementation of the internal controls */ - thread = __kmp_entry_thread(); + /* For the thread-private implementation of the internal controls */ + thread = __kmp_entry_thread(); - __kmp_save_internal_controls( thread ); + __kmp_save_internal_controls(thread); - set__dynamic( thread, flag ? TRUE : FALSE ); + set__dynamic(thread, flag ? TRUE : FALSE); } -void -ompc_set_nested( int flag ) -{ - kmp_info_t *thread; +void ompc_set_nested(int flag) { + kmp_info_t *thread; - /* For the thread-private internal controls implementation */ - thread = __kmp_entry_thread(); + /* For the thread-private internal controls implementation */ + thread = __kmp_entry_thread(); - __kmp_save_internal_controls( thread ); + __kmp_save_internal_controls(thread); - set__nested( thread, flag ? TRUE : FALSE ); + set__nested(thread, flag ? TRUE : FALSE); } -void -ompc_set_max_active_levels( int max_active_levels ) -{ - /* TO DO */ - /* we want per-task implementation of this internal control */ +void ompc_set_max_active_levels(int max_active_levels) { + /* TO DO */ + /* we want per-task implementation of this internal control */ - /* For the per-thread internal controls implementation */ - __kmp_set_max_active_levels( __kmp_entry_gtid(), max_active_levels ); + /* For the per-thread internal controls implementation */ + __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels); } -void -ompc_set_schedule( omp_sched_t kind, int modifier ) -{ -// !!!!! TODO: check the per-task binding - __kmp_set_schedule( __kmp_entry_gtid(), ( kmp_sched_t ) kind, modifier ); +void ompc_set_schedule(omp_sched_t kind, int modifier) { + // !!!!! TODO: check the per-task binding + __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier); } -int -ompc_get_ancestor_thread_num( int level ) -{ - return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), level ); +int ompc_get_ancestor_thread_num(int level) { + return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level); } -int -ompc_get_team_size( int level ) -{ - return __kmp_get_team_size( __kmp_entry_gtid(), level ); +int ompc_get_team_size(int level) { + return __kmp_get_team_size(__kmp_entry_gtid(), level); } -void -kmpc_set_stacksize( int arg ) -{ - // __kmp_aux_set_stacksize initializes the library if needed - __kmp_aux_set_stacksize( arg ); +void kmpc_set_stacksize(int arg) { + // __kmp_aux_set_stacksize initializes the library if needed + __kmp_aux_set_stacksize(arg); } -void -kmpc_set_stacksize_s( size_t arg ) -{ - // __kmp_aux_set_stacksize initializes the library if needed - __kmp_aux_set_stacksize( arg ); +void kmpc_set_stacksize_s(size_t arg) { + // __kmp_aux_set_stacksize initializes the library if needed + __kmp_aux_set_stacksize(arg); } -void -kmpc_set_blocktime( int arg ) -{ - int gtid, tid; - kmp_info_t *thread; +void kmpc_set_blocktime(int arg) { + int gtid, tid; + kmp_info_t *thread; - gtid = __kmp_entry_gtid(); - tid = __kmp_tid_from_gtid(gtid); - thread = __kmp_thread_from_gtid(gtid); + gtid = __kmp_entry_gtid(); + tid = __kmp_tid_from_gtid(gtid); + thread = __kmp_thread_from_gtid(gtid); - __kmp_aux_set_blocktime( arg, thread, tid ); + __kmp_aux_set_blocktime(arg, thread, tid); } -void -kmpc_set_library( int arg ) -{ - // __kmp_user_set_library initializes the library if needed - __kmp_user_set_library( (enum library_type)arg ); +void kmpc_set_library(int arg) { + // __kmp_user_set_library initializes the library if needed + __kmp_user_set_library((enum library_type)arg); } -void -kmpc_set_defaults( char const * str ) -{ - // __kmp_aux_set_defaults initializes the library if needed - __kmp_aux_set_defaults( str, KMP_STRLEN( str ) ); +void kmpc_set_defaults(char const *str) { + // __kmp_aux_set_defaults initializes the library if needed + __kmp_aux_set_defaults(str, KMP_STRLEN(str)); } -void -kmpc_set_disp_num_buffers( int arg ) -{ - // ignore after initialization because some teams have already - // allocated dispatch buffers - if( __kmp_init_serial == 0 && arg > 0 ) - __kmp_dispatch_num_buffers = arg; +void kmpc_set_disp_num_buffers(int arg) { + // ignore after initialization because some teams have already + // allocated dispatch buffers + if (__kmp_init_serial == 0 && arg > 0) + __kmp_dispatch_num_buffers = arg; } -int -kmpc_set_affinity_mask_proc( int proc, void **mask ) -{ +int kmpc_set_affinity_mask_proc(int proc, void **mask) { #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; + return -1; #else - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - return __kmp_aux_set_affinity_mask_proc( proc, mask ); + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + return __kmp_aux_set_affinity_mask_proc(proc, mask); #endif } -int -kmpc_unset_affinity_mask_proc( int proc, void **mask ) -{ +int kmpc_unset_affinity_mask_proc(int proc, void **mask) { #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; + return -1; #else - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - return __kmp_aux_unset_affinity_mask_proc( proc, mask ); + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + return __kmp_aux_unset_affinity_mask_proc(proc, mask); #endif } -int -kmpc_get_affinity_mask_proc( int proc, void **mask ) -{ +int kmpc_get_affinity_mask_proc(int proc, void **mask) { #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; + return -1; #else - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - return __kmp_aux_get_affinity_mask_proc( proc, mask ); + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + return __kmp_aux_get_affinity_mask_proc(proc, mask); #endif } - /* -------------------------------------------------------------------------- */ /*! @ingroup THREADPRIVATE @@ -1726,29 +1654,33 @@ kmpc_get_affinity_mask_proc( int proc, void **mask ) @param cpy_func helper function to call for copying data @param didit flag variable: 1=single thread; 0=not single thread -__kmpc_copyprivate implements the interface for the private data broadcast needed for -the copyprivate clause associated with a single region in an OpenMP* program (both C and Fortran). +__kmpc_copyprivate implements the interface for the private data broadcast +needed for the copyprivate clause associated with a single region in an +OpenMP* program (both C and Fortran). All threads participating in the parallel region call this routine. -One of the threads (called the single thread) should have the didit variable set to 1 -and all other threads should have that variable set to 0. +One of the threads (called the single thread) should have the didit +variable set to 1 and all other threads should have that variable set to 0. All threads pass a pointer to a data buffer (cpy_data) that they have built. -The OpenMP specification forbids the use of nowait on the single region when a copyprivate -clause is present. However, @ref __kmpc_copyprivate implements a barrier internally to avoid -race conditions, so the code generation for the single region should avoid generating a barrier -after the call to @ref __kmpc_copyprivate. +The OpenMP specification forbids the use of nowait on the single region when a +copyprivate clause is present. However, @ref __kmpc_copyprivate implements a +barrier internally to avoid race conditions, so the code generation for the +single region should avoid generating a barrier after the call to @ref +__kmpc_copyprivate. The gtid parameter is the global thread id for the current thread. The loc parameter is a pointer to source location information. -Internal implementation: The single thread will first copy its descriptor address (cpy_data) -to a team-private location, then the other threads will each call the function pointed to by -the parameter cpy_func, which carries out the copy by copying the data using the cpy_data buffer. +Internal implementation: The single thread will first copy its descriptor +address (cpy_data) to a team-private location, then the other threads will each +call the function pointed to by the parameter cpy_func, which carries out the +copy by copying the data using the cpy_data buffer. -The cpy_func routine used for the copy and the contents of the data area defined by cpy_data -and cpy_size may be built in any fashion that will allow the copy to be done. For instance, -the cpy_data buffer can hold the actual data to be copied or it may hold a list of pointers -to the data. The cpy_func routine must interpret the cpy_data buffer appropriately. +The cpy_func routine used for the copy and the contents of the data area defined +by cpy_data and cpy_size may be built in any fashion that will allow the copy +to be done. For instance, the cpy_data buffer can hold the actual data to be +copied or it may hold a list of pointers to the data. The cpy_func routine must +interpret the cpy_data buffer appropriately. The interface to cpy_func is as follows: @code @@ -1757,891 +1689,886 @@ void cpy_func( void *destination, void *source ) where void *destination is the cpy_data pointer for the thread being copied to and void *source is the cpy_data pointer for the thread being copied from. */ -void -__kmpc_copyprivate( ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void*,void*), kmp_int32 didit ) -{ - void **data_ptr; +void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, + void *cpy_data, void (*cpy_func)(void *, void *), + kmp_int32 didit) { + void **data_ptr; - KC_TRACE( 10, ("__kmpc_copyprivate: called T#%d\n", gtid )); + KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid)); - KMP_MB(); + KMP_MB(); - data_ptr = & __kmp_team_from_gtid( gtid )->t.t_copypriv_data; + data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data; - if ( __kmp_env_consistency_check ) { - if ( loc == 0 ) { - KMP_WARNING( ConstructIdentInvalid ); - } + if (__kmp_env_consistency_check) { + if (loc == 0) { + KMP_WARNING(ConstructIdentInvalid); } + } - /* ToDo: Optimize the following two barriers into some kind of split barrier */ + // ToDo: Optimize the following two barriers into some kind of split barrier - if (didit) *data_ptr = cpy_data; + if (didit) + *data_ptr = cpy_data; - /* This barrier is not a barrier region boundary */ +/* This barrier is not a barrier region boundary */ #if USE_ITT_NOTIFY - __kmp_threads[gtid]->th.th_ident = loc; + __kmp_threads[gtid]->th.th_ident = loc; #endif - __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL ); + __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - if (! didit) (*cpy_func)( cpy_data, *data_ptr ); + if (!didit) + (*cpy_func)(cpy_data, *data_ptr); - /* Consider next barrier the user-visible barrier for barrier region boundaries */ - /* Nesting checks are already handled by the single construct checks */ +// Consider next barrier a user-visible barrier for barrier region boundaries +// Nesting checks are already handled by the single construct checks #if USE_ITT_NOTIFY - __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. tasks can overwrite the location) + __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g. +// tasks can overwrite the location) #endif - __kmp_barrier( bs_plain_barrier, gtid, FALSE , 0, NULL, NULL ); + __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); } /* -------------------------------------------------------------------------- */ -#define INIT_LOCK __kmp_init_user_lock_with_checks -#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks -#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks -#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed -#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks -#define ACQUIRE_NESTED_LOCK_TIMED __kmp_acquire_nested_user_lock_with_checks_timed -#define RELEASE_LOCK __kmp_release_user_lock_with_checks -#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks -#define TEST_LOCK __kmp_test_user_lock_with_checks -#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks -#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks -#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks - - -/* - * TODO: Make check abort messages use location info & pass it - * into with_checks routines - */ +#define INIT_LOCK __kmp_init_user_lock_with_checks +#define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks +#define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks +#define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed +#define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks +#define ACQUIRE_NESTED_LOCK_TIMED \ + __kmp_acquire_nested_user_lock_with_checks_timed +#define RELEASE_LOCK __kmp_release_user_lock_with_checks +#define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks +#define TEST_LOCK __kmp_test_user_lock_with_checks +#define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks +#define DESTROY_LOCK __kmp_destroy_user_lock_with_checks +#define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks + +// TODO: Make check abort messages use location info & pass it into +// with_checks routines #if KMP_USE_DYNAMIC_LOCK // internal lock initializer -static __forceinline void -__kmp_init_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq) -{ - if (KMP_IS_D_LOCK(seq)) { - KMP_INIT_D_LOCK(lock, seq); +static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock, + kmp_dyna_lockseq_t seq) { + if (KMP_IS_D_LOCK(seq)) { + KMP_INIT_D_LOCK(lock, seq); #if USE_ITT_BUILD - __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); + __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL); #endif - } else { - KMP_INIT_I_LOCK(lock, seq); + } else { + KMP_INIT_I_LOCK(lock, seq); #if USE_ITT_BUILD - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __kmp_itt_lock_creating(ilk->lock, loc); + kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); + __kmp_itt_lock_creating(ilk->lock, loc); #endif - } + } } // internal nest lock initializer static __forceinline void -__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, kmp_dyna_lockseq_t seq) -{ +__kmp_init_nest_lock_with_hint(ident_t *loc, void **lock, + kmp_dyna_lockseq_t seq) { #if KMP_USE_TSX - // Don't have nested lock implementation for speculative locks - if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive) - seq = __kmp_user_lock_seq; -#endif - switch (seq) { - case lockseq_tas: - seq = lockseq_nested_tas; - break; + // Don't have nested lock implementation for speculative locks + if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive) + seq = __kmp_user_lock_seq; +#endif + switch (seq) { + case lockseq_tas: + seq = lockseq_nested_tas; + break; #if KMP_USE_FUTEX - case lockseq_futex: - seq = lockseq_nested_futex; - break; -#endif - case lockseq_ticket: - seq = lockseq_nested_ticket; - break; - case lockseq_queuing: - seq = lockseq_nested_queuing; - break; - case lockseq_drdpa: - seq = lockseq_nested_drdpa; - break; - default: - seq = lockseq_nested_queuing; - } - KMP_INIT_I_LOCK(lock, seq); + case lockseq_futex: + seq = lockseq_nested_futex; + break; +#endif + case lockseq_ticket: + seq = lockseq_nested_ticket; + break; + case lockseq_queuing: + seq = lockseq_nested_queuing; + break; + case lockseq_drdpa: + seq = lockseq_nested_drdpa; + break; + default: + seq = lockseq_nested_queuing; + } + KMP_INIT_I_LOCK(lock, seq); #if USE_ITT_BUILD - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __kmp_itt_lock_creating(ilk->lock, loc); + kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); + __kmp_itt_lock_creating(ilk->lock, loc); #endif } /* initialize the lock with a hint */ -void -__kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint) -{ - KMP_DEBUG_ASSERT(__kmp_init_serial); - if (__kmp_env_consistency_check && user_lock == NULL) { - KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); - } +void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, + uintptr_t hint) { + KMP_DEBUG_ASSERT(__kmp_init_serial); + if (__kmp_env_consistency_check && user_lock == NULL) { + KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint"); + } - __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); + __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); } /* initialize the lock with a hint */ -void -__kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, uintptr_t hint) -{ - KMP_DEBUG_ASSERT(__kmp_init_serial); - if (__kmp_env_consistency_check && user_lock == NULL) { - KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); - } +void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, + void **user_lock, uintptr_t hint) { + KMP_DEBUG_ASSERT(__kmp_init_serial); + if (__kmp_env_consistency_check && user_lock == NULL) { + KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint"); + } - __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); + __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint)); } #endif // KMP_USE_DYNAMIC_LOCK /* initialize the lock */ -void -__kmpc_init_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { +void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { #if KMP_USE_DYNAMIC_LOCK - KMP_DEBUG_ASSERT(__kmp_init_serial); - if (__kmp_env_consistency_check && user_lock == NULL) { - KMP_FATAL(LockIsUninitialized, "omp_init_lock"); - } - __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); + + KMP_DEBUG_ASSERT(__kmp_init_serial); + if (__kmp_env_consistency_check && user_lock == NULL) { + KMP_FATAL(LockIsUninitialized, "omp_init_lock"); + } + __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); #else // KMP_USE_DYNAMIC_LOCK - static char const * const func = "omp_init_lock"; - kmp_user_lock_p lck; - KMP_DEBUG_ASSERT( __kmp_init_serial ); + static char const *const func = "omp_init_lock"; + kmp_user_lock_p lck; + KMP_DEBUG_ASSERT(__kmp_init_serial); - if ( __kmp_env_consistency_check ) { - if ( user_lock == NULL ) { - KMP_FATAL( LockIsUninitialized, func ); - } + if (__kmp_env_consistency_check) { + if (user_lock == NULL) { + KMP_FATAL(LockIsUninitialized, func); } + } - KMP_CHECK_USER_LOCK_INIT(); + KMP_CHECK_USER_LOCK_INIT(); - if ( ( __kmp_user_lock_kind == lk_tas ) - && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } + if ((__kmp_user_lock_kind == lk_tas) && + (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { + lck = (kmp_user_lock_p)user_lock; + } #if KMP_USE_FUTEX - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } + else if ((__kmp_user_lock_kind == lk_futex) && + (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { + lck = (kmp_user_lock_p)user_lock; + } #endif - else { - lck = __kmp_user_lock_allocate( user_lock, gtid, 0 ); - } - INIT_LOCK( lck ); - __kmp_set_user_lock_location( lck, loc ); + else { + lck = __kmp_user_lock_allocate(user_lock, gtid, 0); + } + INIT_LOCK(lck); + __kmp_set_user_lock_location(lck, loc); #if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_init_lock)) { - ompt_callbacks.ompt_callback(ompt_event_init_lock)((uint64_t) lck); - } + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_init_lock)) { + ompt_callbacks.ompt_callback(ompt_event_init_lock)((uint64_t)lck); + } #endif #if USE_ITT_BUILD - __kmp_itt_lock_creating( lck ); + __kmp_itt_lock_creating(lck); #endif /* USE_ITT_BUILD */ #endif // KMP_USE_DYNAMIC_LOCK } // __kmpc_init_lock /* initialize the lock */ -void -__kmpc_init_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { +void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { #if KMP_USE_DYNAMIC_LOCK - KMP_DEBUG_ASSERT(__kmp_init_serial); - if (__kmp_env_consistency_check && user_lock == NULL) { - KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); - } - __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); + KMP_DEBUG_ASSERT(__kmp_init_serial); + if (__kmp_env_consistency_check && user_lock == NULL) { + KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock"); + } + __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq); #else // KMP_USE_DYNAMIC_LOCK - static char const * const func = "omp_init_nest_lock"; - kmp_user_lock_p lck; - KMP_DEBUG_ASSERT( __kmp_init_serial ); + static char const *const func = "omp_init_nest_lock"; + kmp_user_lock_p lck; + KMP_DEBUG_ASSERT(__kmp_init_serial); - if ( __kmp_env_consistency_check ) { - if ( user_lock == NULL ) { - KMP_FATAL( LockIsUninitialized, func ); - } + if (__kmp_env_consistency_check) { + if (user_lock == NULL) { + KMP_FATAL(LockIsUninitialized, func); } + } - KMP_CHECK_USER_LOCK_INIT(); + KMP_CHECK_USER_LOCK_INIT(); - if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) - + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } + if ((__kmp_user_lock_kind == lk_tas) && + (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= + OMP_NEST_LOCK_T_SIZE)) { + lck = (kmp_user_lock_p)user_lock; + } #if KMP_USE_FUTEX - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) - <= OMP_NEST_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } + else if ((__kmp_user_lock_kind == lk_futex) && + (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= + OMP_NEST_LOCK_T_SIZE)) { + lck = (kmp_user_lock_p)user_lock; + } #endif - else { - lck = __kmp_user_lock_allocate( user_lock, gtid, 0 ); - } + else { + lck = __kmp_user_lock_allocate(user_lock, gtid, 0); + } - INIT_NESTED_LOCK( lck ); - __kmp_set_user_lock_location( lck, loc ); + INIT_NESTED_LOCK(lck); + __kmp_set_user_lock_location(lck, loc); #if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)) { - ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)((uint64_t) lck); - } + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)) { + ompt_callbacks.ompt_callback(ompt_event_init_nest_lock)((uint64_t)lck); + } #endif #if USE_ITT_BUILD - __kmp_itt_lock_creating( lck ); + __kmp_itt_lock_creating(lck); #endif /* USE_ITT_BUILD */ #endif // KMP_USE_DYNAMIC_LOCK } // __kmpc_init_nest_lock -void -__kmpc_destroy_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { +void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { #if KMP_USE_DYNAMIC_LOCK -# if USE_ITT_BUILD - kmp_user_lock_p lck; - if (KMP_EXTRACT_D_TAG(user_lock) == 0) { - lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; - } else { - lck = (kmp_user_lock_p)user_lock; - } - __kmp_itt_lock_destroyed(lck); -# endif - KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); +#if USE_ITT_BUILD + kmp_user_lock_p lck; + if (KMP_EXTRACT_D_TAG(user_lock) == 0) { + lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock; + } else { + lck = (kmp_user_lock_p)user_lock; + } + __kmp_itt_lock_destroyed(lck); +#endif + KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); #else - kmp_user_lock_p lck; + kmp_user_lock_p lck; - if ( ( __kmp_user_lock_kind == lk_tas ) - && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } + if ((__kmp_user_lock_kind == lk_tas) && + (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { + lck = (kmp_user_lock_p)user_lock; + } #if KMP_USE_FUTEX - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } + else if ((__kmp_user_lock_kind == lk_futex) && + (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { + lck = (kmp_user_lock_p)user_lock; + } #endif - else { - lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_lock" ); - } + else { + lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock"); + } #if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_destroy_lock)) { - ompt_callbacks.ompt_callback(ompt_event_destroy_lock)((uint64_t) lck); - } + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_destroy_lock)) { + ompt_callbacks.ompt_callback(ompt_event_destroy_lock)((uint64_t)lck); + } #endif #if USE_ITT_BUILD - __kmp_itt_lock_destroyed( lck ); + __kmp_itt_lock_destroyed(lck); #endif /* USE_ITT_BUILD */ - DESTROY_LOCK( lck ); + DESTROY_LOCK(lck); - if ( ( __kmp_user_lock_kind == lk_tas ) - && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - ; - } + if ((__kmp_user_lock_kind == lk_tas) && + (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { + ; + } #if KMP_USE_FUTEX - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - ; - } + else if ((__kmp_user_lock_kind == lk_futex) && + (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { + ; + } #endif - else { - __kmp_user_lock_free( user_lock, gtid, lck ); - } + else { + __kmp_user_lock_free(user_lock, gtid, lck); + } #endif // KMP_USE_DYNAMIC_LOCK } // __kmpc_destroy_lock /* destroy the lock */ -void -__kmpc_destroy_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { +void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { #if KMP_USE_DYNAMIC_LOCK -# if USE_ITT_BUILD - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); - __kmp_itt_lock_destroyed(ilk->lock); -# endif - KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); +#if USE_ITT_BUILD + kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock); + __kmp_itt_lock_destroyed(ilk->lock); +#endif + KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); #else // KMP_USE_DYNAMIC_LOCK - kmp_user_lock_p lck; + kmp_user_lock_p lck; - if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) - + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } + if ((__kmp_user_lock_kind == lk_tas) && + (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= + OMP_NEST_LOCK_T_SIZE)) { + lck = (kmp_user_lock_p)user_lock; + } #if KMP_USE_FUTEX - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) - <= OMP_NEST_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } + else if ((__kmp_user_lock_kind == lk_futex) && + (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= + OMP_NEST_LOCK_T_SIZE)) { + lck = (kmp_user_lock_p)user_lock; + } #endif - else { - lck = __kmp_lookup_user_lock( user_lock, "omp_destroy_nest_lock" ); - } + else { + lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock"); + } #if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)) { - ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)((uint64_t) lck); - } + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)) { + ompt_callbacks.ompt_callback(ompt_event_destroy_nest_lock)((uint64_t)lck); + } #endif #if USE_ITT_BUILD - __kmp_itt_lock_destroyed( lck ); + __kmp_itt_lock_destroyed(lck); #endif /* USE_ITT_BUILD */ - DESTROY_NESTED_LOCK( lck ); + DESTROY_NESTED_LOCK(lck); - if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) - + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { - ; - } + if ((__kmp_user_lock_kind == lk_tas) && + (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= + OMP_NEST_LOCK_T_SIZE)) { + ; + } #if KMP_USE_FUTEX - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) - <= OMP_NEST_LOCK_T_SIZE ) ) { - ; - } + else if ((__kmp_user_lock_kind == lk_futex) && + (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= + OMP_NEST_LOCK_T_SIZE)) { + ; + } #endif - else { - __kmp_user_lock_free( user_lock, gtid, lck ); - } + else { + __kmp_user_lock_free(user_lock, gtid, lck); + } #endif // KMP_USE_DYNAMIC_LOCK } // __kmpc_destroy_nest_lock -void -__kmpc_set_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { - KMP_COUNT_BLOCK(OMP_set_lock); +void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { + KMP_COUNT_BLOCK(OMP_set_lock); #if KMP_USE_DYNAMIC_LOCK - int tag = KMP_EXTRACT_D_TAG(user_lock); -# if USE_ITT_BUILD - __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); // itt function will get to the right lock object. -# endif -# if KMP_USE_INLINED_TAS - if (tag == locktag_tas && !__kmp_env_consistency_check) { - KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); - } else -# elif KMP_USE_INLINED_FUTEX - if (tag == locktag_futex && !__kmp_env_consistency_check) { - KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); - } else -# endif - { - __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); - } -# if USE_ITT_BUILD - __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); -# endif + int tag = KMP_EXTRACT_D_TAG(user_lock); +#if USE_ITT_BUILD + __kmp_itt_lock_acquiring( + (kmp_user_lock_p) + user_lock); // itt function will get to the right lock object. +#endif +#if KMP_USE_INLINED_TAS + if (tag == locktag_tas && !__kmp_env_consistency_check) { + KMP_ACQUIRE_TAS_LOCK(user_lock, gtid); + } else +#elif KMP_USE_INLINED_FUTEX + if (tag == locktag_futex && !__kmp_env_consistency_check) { + KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid); + } else +#endif + { + __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid); + } +#if USE_ITT_BUILD + __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); +#endif #else // KMP_USE_DYNAMIC_LOCK - kmp_user_lock_p lck; + kmp_user_lock_p lck; - if ( ( __kmp_user_lock_kind == lk_tas ) - && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } + if ((__kmp_user_lock_kind == lk_tas) && + (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { + lck = (kmp_user_lock_p)user_lock; + } #if KMP_USE_FUTEX - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } + else if ((__kmp_user_lock_kind == lk_futex) && + (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { + lck = (kmp_user_lock_p)user_lock; + } #endif - else { - lck = __kmp_lookup_user_lock( user_lock, "omp_set_lock" ); - } + else { + lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock"); + } #if USE_ITT_BUILD - __kmp_itt_lock_acquiring( lck ); + __kmp_itt_lock_acquiring(lck); #endif /* USE_ITT_BUILD */ - ACQUIRE_LOCK( lck, gtid ); + ACQUIRE_LOCK(lck, gtid); #if USE_ITT_BUILD - __kmp_itt_lock_acquired( lck ); + __kmp_itt_lock_acquired(lck); #endif /* USE_ITT_BUILD */ #if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_acquired_lock)) { - ompt_callbacks.ompt_callback(ompt_event_acquired_lock)((uint64_t) lck); - } + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_acquired_lock)) { + ompt_callbacks.ompt_callback(ompt_event_acquired_lock)((uint64_t)lck); + } #endif #endif // KMP_USE_DYNAMIC_LOCK } -void -__kmpc_set_nest_lock( ident_t * loc, kmp_int32 gtid, void ** user_lock ) { +void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { #if KMP_USE_DYNAMIC_LOCK -# if USE_ITT_BUILD - __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); -# endif - KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); -# if USE_ITT_BUILD - __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); +#if USE_ITT_BUILD + __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); +#endif + KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid); +#if USE_ITT_BUILD + __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); #endif #if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { - // missing support here: need to know whether acquired first or not - } + if (ompt_enabled) { + // missing support here: need to know whether acquired first or not + } #endif #else // KMP_USE_DYNAMIC_LOCK - int acquire_status; - kmp_user_lock_p lck; + int acquire_status; + kmp_user_lock_p lck; - if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) - + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } + if ((__kmp_user_lock_kind == lk_tas) && + (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= + OMP_NEST_LOCK_T_SIZE)) { + lck = (kmp_user_lock_p)user_lock; + } #if KMP_USE_FUTEX - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) - <= OMP_NEST_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } + else if ((__kmp_user_lock_kind == lk_futex) && + (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= + OMP_NEST_LOCK_T_SIZE)) { + lck = (kmp_user_lock_p)user_lock; + } #endif - else { - lck = __kmp_lookup_user_lock( user_lock, "omp_set_nest_lock" ); - } + else { + lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock"); + } #if USE_ITT_BUILD - __kmp_itt_lock_acquiring( lck ); + __kmp_itt_lock_acquiring(lck); #endif /* USE_ITT_BUILD */ - ACQUIRE_NESTED_LOCK( lck, gtid, &acquire_status ); + ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status); #if USE_ITT_BUILD - __kmp_itt_lock_acquired( lck ); + __kmp_itt_lock_acquired(lck); #endif /* USE_ITT_BUILD */ #if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { - if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { - if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)) - ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)((uint64_t) lck); - } else { - if(ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)) - ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)((uint64_t) lck); - } + if (ompt_enabled) { + if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) { + if (ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)) + ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_first)( + (uint64_t)lck); + } else { + if (ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)) + ompt_callbacks.ompt_callback(ompt_event_acquired_nest_lock_next)( + (uint64_t)lck); } + } #endif #endif // KMP_USE_DYNAMIC_LOCK } -void -__kmpc_unset_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) -{ +void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { #if KMP_USE_DYNAMIC_LOCK - int tag = KMP_EXTRACT_D_TAG(user_lock); -# if USE_ITT_BUILD - __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); -# endif -# if KMP_USE_INLINED_TAS - if (tag == locktag_tas && !__kmp_env_consistency_check) { - KMP_RELEASE_TAS_LOCK(user_lock, gtid); - } else -# elif KMP_USE_INLINED_FUTEX - if (tag == locktag_futex && !__kmp_env_consistency_check) { - KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); - } else -# endif - { - __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); - } + int tag = KMP_EXTRACT_D_TAG(user_lock); +#if USE_ITT_BUILD + __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); +#endif +#if KMP_USE_INLINED_TAS + if (tag == locktag_tas && !__kmp_env_consistency_check) { + KMP_RELEASE_TAS_LOCK(user_lock, gtid); + } else +#elif KMP_USE_INLINED_FUTEX + if (tag == locktag_futex && !__kmp_env_consistency_check) { + KMP_RELEASE_FUTEX_LOCK(user_lock, gtid); + } else +#endif + { + __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid); + } #else // KMP_USE_DYNAMIC_LOCK - kmp_user_lock_p lck; + kmp_user_lock_p lck; - /* Can't use serial interval since not block structured */ - /* release the lock */ + /* Can't use serial interval since not block structured */ + /* release the lock */ - if ( ( __kmp_user_lock_kind == lk_tas ) - && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - // "fast" path implemented to fix customer performance issue + if ((__kmp_user_lock_kind == lk_tas) && + (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { +#if KMP_OS_LINUX && \ + (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) +// "fast" path implemented to fix customer performance issue #if USE_ITT_BUILD - __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock ); + __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); #endif /* USE_ITT_BUILD */ - TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); - KMP_MB(); - return; + TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0); + KMP_MB(); + return; #else - lck = (kmp_user_lock_p)user_lock; + lck = (kmp_user_lock_p)user_lock; #endif - } + } #if KMP_USE_FUTEX - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } + else if ((__kmp_user_lock_kind == lk_futex) && + (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { + lck = (kmp_user_lock_p)user_lock; + } #endif - else { - lck = __kmp_lookup_user_lock( user_lock, "omp_unset_lock" ); - } + else { + lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock"); + } #if USE_ITT_BUILD - __kmp_itt_lock_releasing( lck ); + __kmp_itt_lock_releasing(lck); #endif /* USE_ITT_BUILD */ - RELEASE_LOCK( lck, gtid ); + RELEASE_LOCK(lck, gtid); #if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_release_lock)) { - ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t) lck); - } + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_release_lock)) { + ompt_callbacks.ompt_callback(ompt_event_release_lock)((uint64_t)lck); + } #endif #endif // KMP_USE_DYNAMIC_LOCK } /* release the lock */ -void -__kmpc_unset_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) -{ +void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { #if KMP_USE_DYNAMIC_LOCK -# if USE_ITT_BUILD - __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); -# endif - KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); +#if USE_ITT_BUILD + __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); +#endif + KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid); #else // KMP_USE_DYNAMIC_LOCK - kmp_user_lock_p lck; + kmp_user_lock_p lck; - /* Can't use serial interval since not block structured */ + /* Can't use serial interval since not block structured */ - if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) - + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - // "fast" path implemented to fix customer performance issue - kmp_tas_lock_t *tl = (kmp_tas_lock_t*)user_lock; + if ((__kmp_user_lock_kind == lk_tas) && + (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= + OMP_NEST_LOCK_T_SIZE)) { +#if KMP_OS_LINUX && \ + (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + // "fast" path implemented to fix customer performance issue + kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock; #if USE_ITT_BUILD - __kmp_itt_lock_releasing( (kmp_user_lock_p)user_lock ); + __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock); #endif /* USE_ITT_BUILD */ - if ( --(tl->lk.depth_locked) == 0 ) { - TCW_4(tl->lk.poll, 0); - } - KMP_MB(); - return; + if (--(tl->lk.depth_locked) == 0) { + TCW_4(tl->lk.poll, 0); + } + KMP_MB(); + return; #else - lck = (kmp_user_lock_p)user_lock; + lck = (kmp_user_lock_p)user_lock; #endif - } + } #if KMP_USE_FUTEX - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) - <= OMP_NEST_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } + else if ((__kmp_user_lock_kind == lk_futex) && + (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= + OMP_NEST_LOCK_T_SIZE)) { + lck = (kmp_user_lock_p)user_lock; + } #endif - else { - lck = __kmp_lookup_user_lock( user_lock, "omp_unset_nest_lock" ); - } + else { + lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock"); + } #if USE_ITT_BUILD - __kmp_itt_lock_releasing( lck ); + __kmp_itt_lock_releasing(lck); #endif /* USE_ITT_BUILD */ - int release_status; - release_status = RELEASE_NESTED_LOCK( lck, gtid ); + int release_status; + release_status = RELEASE_NESTED_LOCK(lck, gtid); #if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled) { - if (release_status == KMP_LOCK_RELEASED) { - if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) { - ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)( - (uint64_t) lck); - } - } else if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)) { - ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)( - (uint64_t) lck); - } + if (ompt_enabled) { + if (release_status == KMP_LOCK_RELEASED) { + if (ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)) { + ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_last)( + (uint64_t)lck); + } + } else if (ompt_callbacks.ompt_callback( + ompt_event_release_nest_lock_prev)) { + ompt_callbacks.ompt_callback(ompt_event_release_nest_lock_prev)( + (uint64_t)lck); } + } #endif #endif // KMP_USE_DYNAMIC_LOCK } /* try to acquire the lock */ -int -__kmpc_test_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) -{ - KMP_COUNT_BLOCK(OMP_test_lock); +int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { + KMP_COUNT_BLOCK(OMP_test_lock); #if KMP_USE_DYNAMIC_LOCK - int rc; - int tag = KMP_EXTRACT_D_TAG(user_lock); -# if USE_ITT_BUILD - __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); -# endif -# if KMP_USE_INLINED_TAS - if (tag == locktag_tas && !__kmp_env_consistency_check) { - KMP_TEST_TAS_LOCK(user_lock, gtid, rc); - } else -# elif KMP_USE_INLINED_FUTEX - if (tag == locktag_futex && !__kmp_env_consistency_check) { - KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); - } else -# endif - { - rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); - } - if (rc) { -# if USE_ITT_BUILD - __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); -# endif - return FTN_TRUE; - } else { -# if USE_ITT_BUILD - __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); -# endif - return FTN_FALSE; - } + int rc; + int tag = KMP_EXTRACT_D_TAG(user_lock); +#if USE_ITT_BUILD + __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); +#endif +#if KMP_USE_INLINED_TAS + if (tag == locktag_tas && !__kmp_env_consistency_check) { + KMP_TEST_TAS_LOCK(user_lock, gtid, rc); + } else +#elif KMP_USE_INLINED_FUTEX + if (tag == locktag_futex && !__kmp_env_consistency_check) { + KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc); + } else +#endif + { + rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid); + } + if (rc) { +#if USE_ITT_BUILD + __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); +#endif + return FTN_TRUE; + } else { +#if USE_ITT_BUILD + __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); +#endif + return FTN_FALSE; + } #else // KMP_USE_DYNAMIC_LOCK - kmp_user_lock_p lck; - int rc; + kmp_user_lock_p lck; + int rc; - if ( ( __kmp_user_lock_kind == lk_tas ) - && ( sizeof( lck->tas.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } + if ((__kmp_user_lock_kind == lk_tas) && + (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) { + lck = (kmp_user_lock_p)user_lock; + } #if KMP_USE_FUTEX - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) <= OMP_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } + else if ((__kmp_user_lock_kind == lk_futex) && + (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) { + lck = (kmp_user_lock_p)user_lock; + } #endif - else { - lck = __kmp_lookup_user_lock( user_lock, "omp_test_lock" ); - } + else { + lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock"); + } #if USE_ITT_BUILD - __kmp_itt_lock_acquiring( lck ); + __kmp_itt_lock_acquiring(lck); #endif /* USE_ITT_BUILD */ - rc = TEST_LOCK( lck, gtid ); + rc = TEST_LOCK(lck, gtid); #if USE_ITT_BUILD - if ( rc ) { - __kmp_itt_lock_acquired( lck ); - } else { - __kmp_itt_lock_cancelled( lck ); - } + if (rc) { + __kmp_itt_lock_acquired(lck); + } else { + __kmp_itt_lock_cancelled(lck); + } #endif /* USE_ITT_BUILD */ - return ( rc ? FTN_TRUE : FTN_FALSE ); + return (rc ? FTN_TRUE : FTN_FALSE); - /* Can't use serial interval since not block structured */ +/* Can't use serial interval since not block structured */ #endif // KMP_USE_DYNAMIC_LOCK } /* try to acquire the lock */ -int -__kmpc_test_nest_lock( ident_t *loc, kmp_int32 gtid, void **user_lock ) -{ +int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { #if KMP_USE_DYNAMIC_LOCK - int rc; -# if USE_ITT_BUILD - __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); -# endif - rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); -# if USE_ITT_BUILD - if (rc) { - __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); - } else { - __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); - } -# endif - return rc; + int rc; +#if USE_ITT_BUILD + __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock); +#endif + rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid); +#if USE_ITT_BUILD + if (rc) { + __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock); + } else { + __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock); + } +#endif + return rc; #else // KMP_USE_DYNAMIC_LOCK - kmp_user_lock_p lck; - int rc; + kmp_user_lock_p lck; + int rc; - if ( ( __kmp_user_lock_kind == lk_tas ) && ( sizeof( lck->tas.lk.poll ) - + sizeof( lck->tas.lk.depth_locked ) <= OMP_NEST_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } + if ((__kmp_user_lock_kind == lk_tas) && + (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <= + OMP_NEST_LOCK_T_SIZE)) { + lck = (kmp_user_lock_p)user_lock; + } #if KMP_USE_FUTEX - else if ( ( __kmp_user_lock_kind == lk_futex ) - && ( sizeof( lck->futex.lk.poll ) + sizeof( lck->futex.lk.depth_locked ) - <= OMP_NEST_LOCK_T_SIZE ) ) { - lck = (kmp_user_lock_p)user_lock; - } + else if ((__kmp_user_lock_kind == lk_futex) && + (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <= + OMP_NEST_LOCK_T_SIZE)) { + lck = (kmp_user_lock_p)user_lock; + } #endif - else { - lck = __kmp_lookup_user_lock( user_lock, "omp_test_nest_lock" ); - } + else { + lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock"); + } #if USE_ITT_BUILD - __kmp_itt_lock_acquiring( lck ); + __kmp_itt_lock_acquiring(lck); #endif /* USE_ITT_BUILD */ - rc = TEST_NESTED_LOCK( lck, gtid ); + rc = TEST_NESTED_LOCK(lck, gtid); #if USE_ITT_BUILD - if ( rc ) { - __kmp_itt_lock_acquired( lck ); - } else { - __kmp_itt_lock_cancelled( lck ); - } + if (rc) { + __kmp_itt_lock_acquired(lck); + } else { + __kmp_itt_lock_cancelled(lck); + } #endif /* USE_ITT_BUILD */ - return rc; + return rc; - /* Can't use serial interval since not block structured */ +/* Can't use serial interval since not block structured */ #endif // KMP_USE_DYNAMIC_LOCK } +// Interface to fast scalable reduce methods routines -/*--------------------------------------------------------------------------------------------------------------------*/ - -/* - * Interface to fast scalable reduce methods routines - */ - -// keep the selected method in a thread local structure for cross-function usage: will be used in __kmpc_end_reduce* functions; -// another solution: to re-determine the method one more time in __kmpc_end_reduce* functions (new prototype required then) +// keep the selected method in a thread local structure for cross-function +// usage: will be used in __kmpc_end_reduce* functions; +// another solution: to re-determine the method one more time in +// __kmpc_end_reduce* functions (new prototype required then) // AT: which solution is better? -#define __KMP_SET_REDUCTION_METHOD(gtid,rmethod) \ - ( ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) = ( rmethod ) ) - -#define __KMP_GET_REDUCTION_METHOD(gtid) \ - ( __kmp_threads[ ( gtid ) ] -> th.th_local.packed_reduction_method ) +#define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \ + ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod)) -// description of the packed_reduction_method variable: look at the macros in kmp.h +#define __KMP_GET_REDUCTION_METHOD(gtid) \ + (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) +// description of the packed_reduction_method variable: look at the macros in +// kmp.h // used in a critical section reduce block static __forceinline void -__kmp_enter_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) { +__kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit) { - // this lock was visible to a customer and to the threading profile tool as a serial overhead span - // (although it's used for an internal purpose only) - // why was it visible in previous implementation? - // should we keep it visible in new reduce block? - kmp_user_lock_p lck; + // this lock was visible to a customer and to the threading profile tool as a + // serial overhead span (although it's used for an internal purpose only) + // why was it visible in previous implementation? + // should we keep it visible in new reduce block? + kmp_user_lock_p lck; #if KMP_USE_DYNAMIC_LOCK - kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; - // Check if it is initialized. - if (*lk == 0) { - if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { - KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, KMP_GET_D_TAG(__kmp_user_lock_seq)); - } else { - __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(__kmp_user_lock_seq)); - } - } - // Branch for accessing the actual lock object and set operation. This branching is inevitable since - // this lock initialization does not follow the normal dispatch path (lock table is not used). - if (KMP_EXTRACT_D_TAG(lk) != 0) { - lck = (kmp_user_lock_p)lk; - KMP_DEBUG_ASSERT(lck != NULL); - if (__kmp_env_consistency_check) { - __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); - } - KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); + kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit; + // Check if it is initialized. + if (*lk == 0) { + if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { + KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0, + KMP_GET_D_TAG(__kmp_user_lock_seq)); } else { - kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); - lck = ilk->lock; - KMP_DEBUG_ASSERT(lck != NULL); - if (__kmp_env_consistency_check) { - __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); - } - KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); + __kmp_init_indirect_csptr(crit, loc, global_tid, + KMP_GET_I_TAG(__kmp_user_lock_seq)); } + } + // Branch for accessing the actual lock object and set operation. This + // branching is inevitable since this lock initialization does not follow the + // normal dispatch path (lock table is not used). + if (KMP_EXTRACT_D_TAG(lk) != 0) { + lck = (kmp_user_lock_p)lk; + KMP_DEBUG_ASSERT(lck != NULL); + if (__kmp_env_consistency_check) { + __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); + } + KMP_D_LOCK_FUNC(lk, set)(lk, global_tid); + } else { + kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk); + lck = ilk->lock; + KMP_DEBUG_ASSERT(lck != NULL); + if (__kmp_env_consistency_check) { + __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq); + } + KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid); + } #else // KMP_USE_DYNAMIC_LOCK - // We know that the fast reduction code is only emitted by Intel compilers - // with 32 byte critical sections. If there isn't enough space, then we - // have to use a pointer. - if ( __kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE ) { - lck = (kmp_user_lock_p)crit; - } - else { - lck = __kmp_get_critical_section_ptr( crit, loc, global_tid ); - } - KMP_DEBUG_ASSERT( lck != NULL ); + // We know that the fast reduction code is only emitted by Intel compilers + // with 32 byte critical sections. If there isn't enough space, then we + // have to use a pointer. + if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) { + lck = (kmp_user_lock_p)crit; + } else { + lck = __kmp_get_critical_section_ptr(crit, loc, global_tid); + } + KMP_DEBUG_ASSERT(lck != NULL); - if ( __kmp_env_consistency_check ) - __kmp_push_sync( global_tid, ct_critical, loc, lck ); + if (__kmp_env_consistency_check) + __kmp_push_sync(global_tid, ct_critical, loc, lck); - __kmp_acquire_user_lock_with_checks( lck, global_tid ); + __kmp_acquire_user_lock_with_checks(lck, global_tid); #endif // KMP_USE_DYNAMIC_LOCK } // used in a critical section reduce block static __forceinline void -__kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, kmp_critical_name * crit ) { +__kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *crit) { - kmp_user_lock_p lck; + kmp_user_lock_p lck; #if KMP_USE_DYNAMIC_LOCK - if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { - lck = (kmp_user_lock_p)crit; - if (__kmp_env_consistency_check) - __kmp_pop_sync(global_tid, ct_critical, loc); - KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); - } else { - kmp_indirect_lock_t *ilk = (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); - if (__kmp_env_consistency_check) - __kmp_pop_sync(global_tid, ct_critical, loc); - KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); - } + if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) { + lck = (kmp_user_lock_p)crit; + if (__kmp_env_consistency_check) + __kmp_pop_sync(global_tid, ct_critical, loc); + KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid); + } else { + kmp_indirect_lock_t *ilk = + (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit)); + if (__kmp_env_consistency_check) + __kmp_pop_sync(global_tid, ct_critical, loc); + KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid); + } #else // KMP_USE_DYNAMIC_LOCK - // We know that the fast reduction code is only emitted by Intel compilers with 32 byte critical - // sections. If there isn't enough space, then we have to use a pointer. - if ( __kmp_base_user_lock_size > 32 ) { - lck = *( (kmp_user_lock_p *) crit ); - KMP_ASSERT( lck != NULL ); - } else { - lck = (kmp_user_lock_p) crit; - } + // We know that the fast reduction code is only emitted by Intel compilers + // with 32 byte critical sections. If there isn't enough space, then we have + // to use a pointer. + if (__kmp_base_user_lock_size > 32) { + lck = *((kmp_user_lock_p *)crit); + KMP_ASSERT(lck != NULL); + } else { + lck = (kmp_user_lock_p)crit; + } - if ( __kmp_env_consistency_check ) - __kmp_pop_sync( global_tid, ct_critical, loc ); + if (__kmp_env_consistency_check) + __kmp_pop_sync(global_tid, ct_critical, loc); - __kmp_release_user_lock_with_checks( lck, global_tid ); + __kmp_release_user_lock_with_checks(lck, global_tid); #endif // KMP_USE_DYNAMIC_LOCK } // __kmp_end_critical_section_reduce_block - /* 2.a.i. Reduce Block without a terminating barrier */ /*! @ingroup SYNCHRONIZATION @@ -2650,141 +2577,165 @@ __kmp_end_critical_section_reduce_block( ident_t * loc, kmp_int32 global_tid, km @param num_vars number of items (variables) to be reduced @param reduce_size size of data in bytes to be reduced @param reduce_data pointer to data to be reduced -@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data +@param reduce_func callback function providing reduction operation on two +operands and returning result of reduction in lhs_data @param lck pointer to the unique lock data structure -@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed +@result 1 for the master thread, 0 for all other team threads, 2 for all team +threads if atomic reduction needed The nowait version is used for a reduce clause with the nowait argument. */ kmp_int32 -__kmpc_reduce_nowait( - ident_t *loc, kmp_int32 global_tid, - kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), - kmp_critical_name *lck ) { - - KMP_COUNT_BLOCK(REDUCE_nowait); - int retval = 0; - PACKED_REDUCTION_METHOD_T packed_reduction_method; +__kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, + size_t reduce_size, void *reduce_data, + void (*reduce_func)(void *lhs_data, void *rhs_data), + kmp_critical_name *lck) { + + KMP_COUNT_BLOCK(REDUCE_nowait); + int retval = 0; + PACKED_REDUCTION_METHOD_T packed_reduction_method; #if OMP_40_ENABLED - kmp_team_t *team; - kmp_info_t *th; - int teams_swapped = 0, task_state; + kmp_team_t *team; + kmp_info_t *th; + int teams_swapped = 0, task_state; #endif - KA_TRACE( 10, ( "__kmpc_reduce_nowait() enter: called T#%d\n", global_tid ) ); + KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid)); - // why do we need this initialization here at all? - // Reduction clause can not be used as a stand-alone directive. + // why do we need this initialization here at all? + // Reduction clause can not be used as a stand-alone directive. - // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed - // possible detection of false-positive race by the threadchecker ??? - if( ! TCR_4( __kmp_init_parallel ) ) - __kmp_parallel_initialize(); + // do not call __kmp_serial_initialize(), it will be called by + // __kmp_parallel_initialize() if needed + // possible detection of false-positive race by the threadchecker ??? + if (!TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); - // check correctness of reduce block nesting +// check correctness of reduce block nesting #if KMP_USE_DYNAMIC_LOCK - if ( __kmp_env_consistency_check ) - __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 ); + if (__kmp_env_consistency_check) + __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); #else - if ( __kmp_env_consistency_check ) - __kmp_push_sync( global_tid, ct_reduce, loc, NULL ); + if (__kmp_env_consistency_check) + __kmp_push_sync(global_tid, ct_reduce, loc, NULL); #endif #if OMP_40_ENABLED - th = __kmp_thread_from_gtid(global_tid); - if( th->th.th_teams_microtask ) { // AC: check if we are inside the teams construct? - team = th->th.th_team; - if( team->t.t_level == th->th.th_teams_level ) { - // this is reduction at teams construct - KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 - // Let's swap teams temporarily for the reduction barrier - teams_swapped = 1; - th->th.th_info.ds.ds_tid = team->t.t_master_tid; - th->th.th_team = team->t.t_parent; - th->th.th_team_nproc = th->th.th_team->t.t_nproc; - th->th.th_task_team = th->th.th_team->t.t_task_team[0]; - task_state = th->th.th_task_state; - th->th.th_task_state = 0; - } + th = __kmp_thread_from_gtid(global_tid); + if (th->th.th_teams_microtask) { // AC: check if we are inside the teams + // construct? + team = th->th.th_team; + if (team->t.t_level == th->th.th_teams_level) { + // this is reduction at teams construct + KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0 + // Let's swap teams temporarily for the reduction barrier + teams_swapped = 1; + th->th.th_info.ds.ds_tid = team->t.t_master_tid; + th->th.th_team = team->t.t_parent; + th->th.th_team_nproc = th->th.th_team->t.t_nproc; + th->th.th_task_team = th->th.th_team->t.t_task_team[0]; + task_state = th->th.th_task_state; + th->th.th_task_state = 0; } + } #endif // OMP_40_ENABLED - // packed_reduction_method value will be reused by __kmp_end_reduce* function, the value should be kept in a variable - // the variable should be either a construct-specific or thread-specific property, not a team specific property - // (a thread can reach the next reduce block on the next construct, reduce method may differ on the next construct) - // an ident_t "loc" parameter could be used as a construct-specific property (what if loc == 0?) - // (if both construct-specific and team-specific variables were shared, then unness extra syncs should be needed) - // a thread-specific variable is better regarding two issues above (next construct and extra syncs) - // a thread-specific "th_local.reduction_method" variable is used currently - // each thread executes 'determine' and 'set' lines (no need to execute by one thread, to avoid unness extra syncs) - - packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck ); - __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method ); - - if( packed_reduction_method == critical_reduce_block ) { - - __kmp_enter_critical_section_reduce_block( loc, global_tid, lck ); - retval = 1; - - } else if( packed_reduction_method == empty_reduce_block ) { - - // usage: if team size == 1, no synchronization is required ( Intel platforms only ) - retval = 1; - - } else if( packed_reduction_method == atomic_reduce_block ) { - - retval = 2; - - // all threads should do this pop here (because __kmpc_end_reduce_nowait() won't be called by the code gen) - // (it's not quite good, because the checking block has been closed by this 'pop', - // but atomic operation has not been executed yet, will be executed slightly later, literally on next instruction) - if ( __kmp_env_consistency_check ) - __kmp_pop_sync( global_tid, ct_reduce, loc ); - - } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) { - - //AT: performance issue: a real barrier here - //AT: (if master goes slow, other threads are blocked here waiting for the master to come and release them) - //AT: (it's not what a customer might expect specifying NOWAIT clause) - //AT: (specifying NOWAIT won't result in improvement of performance, it'll be confusing to a customer) - //AT: another implementation of *barrier_gather*nowait() (or some other design) might go faster - // and be more in line with sense of NOWAIT - //AT: TO DO: do epcc test and compare times - - // this barrier should be invisible to a customer and to the threading profile tool - // (it's neither a terminating barrier nor customer's code, it's used for an internal purpose) + // packed_reduction_method value will be reused by __kmp_end_reduce* function, + // the value should be kept in a variable + // the variable should be either a construct-specific or thread-specific + // property, not a team specific property + // (a thread can reach the next reduce block on the next construct, reduce + // method may differ on the next construct) + // an ident_t "loc" parameter could be used as a construct-specific property + // (what if loc == 0?) + // (if both construct-specific and team-specific variables were shared, + // then unness extra syncs should be needed) + // a thread-specific variable is better regarding two issues above (next + // construct and extra syncs) + // a thread-specific "th_local.reduction_method" variable is used currently + // each thread executes 'determine' and 'set' lines (no need to execute by one + // thread, to avoid unness extra syncs) + + packed_reduction_method = __kmp_determine_reduction_method( + loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); + __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); + + if (packed_reduction_method == critical_reduce_block) { + + __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); + retval = 1; + + } else if (packed_reduction_method == empty_reduce_block) { + + // usage: if team size == 1, no synchronization is required ( Intel + // platforms only ) + retval = 1; + + } else if (packed_reduction_method == atomic_reduce_block) { + + retval = 2; + + // all threads should do this pop here (because __kmpc_end_reduce_nowait() + // won't be called by the code gen) + // (it's not quite good, because the checking block has been closed by + // this 'pop', + // but atomic operation has not been executed yet, will be executed + // slightly later, literally on next instruction) + if (__kmp_env_consistency_check) + __kmp_pop_sync(global_tid, ct_reduce, loc); + + } else if (TEST_REDUCTION_METHOD(packed_reduction_method, + tree_reduce_block)) { + +// AT: performance issue: a real barrier here +// AT: (if master goes slow, other threads are blocked here waiting for the +// master to come and release them) +// AT: (it's not what a customer might expect specifying NOWAIT clause) +// AT: (specifying NOWAIT won't result in improvement of performance, it'll +// be confusing to a customer) +// AT: another implementation of *barrier_gather*nowait() (or some other design) +// might go faster and be more in line with sense of NOWAIT +// AT: TO DO: do epcc test and compare times + +// this barrier should be invisible to a customer and to the threading profile +// tool (it's neither a terminating barrier nor customer's code, it's +// used for an internal purpose) #if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = loc; + __kmp_threads[global_tid]->th.th_ident = loc; #endif - retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, FALSE, reduce_size, reduce_data, reduce_func ); - retval = ( retval != 0 ) ? ( 0 ) : ( 1 ); + retval = + __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), + global_tid, FALSE, reduce_size, reduce_data, reduce_func); + retval = (retval != 0) ? (0) : (1); - // all other workers except master should do this pop here - // ( none of other workers will get to __kmpc_end_reduce_nowait() ) - if ( __kmp_env_consistency_check ) { - if( retval == 0 ) { - __kmp_pop_sync( global_tid, ct_reduce, loc ); - } - } - - } else { + // all other workers except master should do this pop here + // ( none of other workers will get to __kmpc_end_reduce_nowait() ) + if (__kmp_env_consistency_check) { + if (retval == 0) { + __kmp_pop_sync(global_tid, ct_reduce, loc); + } + } - // should never reach this block - KMP_ASSERT( 0 ); // "unexpected method" + } else { - } + // should never reach this block + KMP_ASSERT(0); // "unexpected method" + } #if OMP_40_ENABLED - if( teams_swapped ) { - // Restore thread structure - th->th.th_info.ds.ds_tid = 0; - th->th.th_team = team; - th->th.th_team_nproc = team->t.t_nproc; - th->th.th_task_team = team->t.t_task_team[task_state]; - th->th.th_task_state = task_state; - } + if (teams_swapped) { + // Restore thread structure + th->th.th_info.ds.ds_tid = 0; + th->th.th_team = team; + th->th.th_team_nproc = team->t.t_nproc; + th->th.th_task_team = team->t.t_task_team[task_state]; + th->th.th_task_state = task_state; + } #endif - KA_TRACE( 10, ( "__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) ); + KA_TRACE( + 10, + ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n", + global_tid, packed_reduction_method, retval)); - return retval; + return retval; } /*! @@ -2795,47 +2746,49 @@ __kmpc_reduce_nowait( Finish the execution of a reduce nowait. */ -void -__kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) { - - PACKED_REDUCTION_METHOD_T packed_reduction_method; +void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *lck) { - KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid ) ); + PACKED_REDUCTION_METHOD_T packed_reduction_method; - packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid ); + KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid)); - if( packed_reduction_method == critical_reduce_block ) { + packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); - __kmp_end_critical_section_reduce_block( loc, global_tid, lck ); + if (packed_reduction_method == critical_reduce_block) { - } else if( packed_reduction_method == empty_reduce_block ) { + __kmp_end_critical_section_reduce_block(loc, global_tid, lck); - // usage: if team size == 1, no synchronization is required ( on Intel platforms only ) + } else if (packed_reduction_method == empty_reduce_block) { - } else if( packed_reduction_method == atomic_reduce_block ) { + // usage: if team size == 1, no synchronization is required ( on Intel + // platforms only ) - // neither master nor other workers should get here - // (code gen does not generate this call in case 2: atomic reduce block) - // actually it's better to remove this elseif at all; - // after removal this value will checked by the 'else' and will assert + } else if (packed_reduction_method == atomic_reduce_block) { - } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) { + // neither master nor other workers should get here + // (code gen does not generate this call in case 2: atomic reduce block) + // actually it's better to remove this elseif at all; + // after removal this value will checked by the 'else' and will assert - // only master gets here + } else if (TEST_REDUCTION_METHOD(packed_reduction_method, + tree_reduce_block)) { - } else { + // only master gets here - // should never reach this block - KMP_ASSERT( 0 ); // "unexpected method" + } else { - } + // should never reach this block + KMP_ASSERT(0); // "unexpected method" + } - if ( __kmp_env_consistency_check ) - __kmp_pop_sync( global_tid, ct_reduce, loc ); + if (__kmp_env_consistency_check) + __kmp_pop_sync(global_tid, ct_reduce, loc); - KA_TRACE( 10, ( "__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) ); + KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n", + global_tid, packed_reduction_method)); - return; + return; } /* 2.a.ii. Reduce Block with a terminating barrier */ @@ -2847,88 +2800,95 @@ __kmpc_end_reduce_nowait( ident_t *loc, kmp_int32 global_tid, kmp_critical_name @param num_vars number of items (variables) to be reduced @param reduce_size size of data in bytes to be reduced @param reduce_data pointer to data to be reduced -@param reduce_func callback function providing reduction operation on two operands and returning result of reduction in lhs_data +@param reduce_func callback function providing reduction operation on two +operands and returning result of reduction in lhs_data @param lck pointer to the unique lock data structure -@result 1 for the master thread, 0 for all other team threads, 2 for all team threads if atomic reduction needed +@result 1 for the master thread, 0 for all other team threads, 2 for all team +threads if atomic reduction needed A blocking reduce that includes an implicit barrier. */ -kmp_int32 -__kmpc_reduce( - ident_t *loc, kmp_int32 global_tid, - kmp_int32 num_vars, size_t reduce_size, void *reduce_data, - void (*reduce_func)(void *lhs_data, void *rhs_data), - kmp_critical_name *lck ) -{ - KMP_COUNT_BLOCK(REDUCE_wait); - int retval = 0; - PACKED_REDUCTION_METHOD_T packed_reduction_method; - - KA_TRACE( 10, ( "__kmpc_reduce() enter: called T#%d\n", global_tid ) ); - - // why do we need this initialization here at all? - // Reduction clause can not be a stand-alone directive. - - // do not call __kmp_serial_initialize(), it will be called by __kmp_parallel_initialize() if needed - // possible detection of false-positive race by the threadchecker ??? - if( ! TCR_4( __kmp_init_parallel ) ) - __kmp_parallel_initialize(); - - // check correctness of reduce block nesting +kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, + size_t reduce_size, void *reduce_data, + void (*reduce_func)(void *lhs_data, void *rhs_data), + kmp_critical_name *lck) { + KMP_COUNT_BLOCK(REDUCE_wait); + int retval = 0; + PACKED_REDUCTION_METHOD_T packed_reduction_method; + + KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid)); + + // why do we need this initialization here at all? + // Reduction clause can not be a stand-alone directive. + + // do not call __kmp_serial_initialize(), it will be called by + // __kmp_parallel_initialize() if needed + // possible detection of false-positive race by the threadchecker ??? + if (!TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); + +// check correctness of reduce block nesting #if KMP_USE_DYNAMIC_LOCK - if ( __kmp_env_consistency_check ) - __kmp_push_sync( global_tid, ct_reduce, loc, NULL, 0 ); + if (__kmp_env_consistency_check) + __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0); #else - if ( __kmp_env_consistency_check ) - __kmp_push_sync( global_tid, ct_reduce, loc, NULL ); + if (__kmp_env_consistency_check) + __kmp_push_sync(global_tid, ct_reduce, loc, NULL); #endif - packed_reduction_method = __kmp_determine_reduction_method( loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck ); - __KMP_SET_REDUCTION_METHOD( global_tid, packed_reduction_method ); + packed_reduction_method = __kmp_determine_reduction_method( + loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck); + __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method); - if( packed_reduction_method == critical_reduce_block ) { + if (packed_reduction_method == critical_reduce_block) { - __kmp_enter_critical_section_reduce_block( loc, global_tid, lck ); - retval = 1; + __kmp_enter_critical_section_reduce_block(loc, global_tid, lck); + retval = 1; - } else if( packed_reduction_method == empty_reduce_block ) { + } else if (packed_reduction_method == empty_reduce_block) { - // usage: if team size == 1, no synchronization is required ( Intel platforms only ) - retval = 1; + // usage: if team size == 1, no synchronization is required ( Intel + // platforms only ) + retval = 1; - } else if( packed_reduction_method == atomic_reduce_block ) { + } else if (packed_reduction_method == atomic_reduce_block) { - retval = 2; + retval = 2; - } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) { + } else if (TEST_REDUCTION_METHOD(packed_reduction_method, + tree_reduce_block)) { - //case tree_reduce_block: - // this barrier should be visible to a customer and to the threading profile tool - // (it's a terminating barrier on constructs if NOWAIT not specified) +// case tree_reduce_block: +// this barrier should be visible to a customer and to the threading profile +// tool (it's a terminating barrier on constructs if NOWAIT not specified) #if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = loc; // needed for correct notification of frames + __kmp_threads[global_tid]->th.th_ident = + loc; // needed for correct notification of frames #endif - retval = __kmp_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid, TRUE, reduce_size, reduce_data, reduce_func ); - retval = ( retval != 0 ) ? ( 0 ) : ( 1 ); - - // all other workers except master should do this pop here - // ( none of other workers except master will enter __kmpc_end_reduce() ) - if ( __kmp_env_consistency_check ) { - if( retval == 0 ) { // 0: all other workers; 1: master - __kmp_pop_sync( global_tid, ct_reduce, loc ); - } - } + retval = + __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), + global_tid, TRUE, reduce_size, reduce_data, reduce_func); + retval = (retval != 0) ? (0) : (1); - } else { + // all other workers except master should do this pop here + // ( none of other workers except master will enter __kmpc_end_reduce() ) + if (__kmp_env_consistency_check) { + if (retval == 0) { // 0: all other workers; 1: master + __kmp_pop_sync(global_tid, ct_reduce, loc); + } + } - // should never reach this block - KMP_ASSERT( 0 ); // "unexpected method" + } else { - } + // should never reach this block + KMP_ASSERT(0); // "unexpected method" + } - KA_TRACE( 10, ( "__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", global_tid, packed_reduction_method, retval ) ); + KA_TRACE(10, + ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n", + global_tid, packed_reduction_method, retval)); - return retval; + return retval; } /*! @@ -2938,103 +2898,103 @@ __kmpc_reduce( @param lck pointer to the unique lock data structure Finish the execution of a blocking reduce. -The lck pointer must be the same as that used in the corresponding start function. +The lck pointer must be the same as that used in the corresponding +start function. */ -void -__kmpc_end_reduce( ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck ) { +void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, + kmp_critical_name *lck) { - PACKED_REDUCTION_METHOD_T packed_reduction_method; + PACKED_REDUCTION_METHOD_T packed_reduction_method; - KA_TRACE( 10, ( "__kmpc_end_reduce() enter: called T#%d\n", global_tid ) ); + KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid)); - packed_reduction_method = __KMP_GET_REDUCTION_METHOD( global_tid ); + packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid); - // this barrier should be visible to a customer and to the threading profile tool - // (it's a terminating barrier on constructs if NOWAIT not specified) + // this barrier should be visible to a customer and to the threading profile + // tool (it's a terminating barrier on constructs if NOWAIT not specified) - if( packed_reduction_method == critical_reduce_block ) { + if (packed_reduction_method == critical_reduce_block) { - __kmp_end_critical_section_reduce_block( loc, global_tid, lck ); + __kmp_end_critical_section_reduce_block(loc, global_tid, lck); - // TODO: implicit barrier: should be exposed +// TODO: implicit barrier: should be exposed #if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = loc; + __kmp_threads[global_tid]->th.th_ident = loc; #endif - __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL ); + __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); - } else if( packed_reduction_method == empty_reduce_block ) { + } else if (packed_reduction_method == empty_reduce_block) { - // usage: if team size == 1, no synchronization is required ( Intel platforms only ) +// usage: if team size==1, no synchronization is required (Intel platforms only) - // TODO: implicit barrier: should be exposed +// TODO: implicit barrier: should be exposed #if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = loc; + __kmp_threads[global_tid]->th.th_ident = loc; #endif - __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL ); + __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); - } else if( packed_reduction_method == atomic_reduce_block ) { + } else if (packed_reduction_method == atomic_reduce_block) { - // TODO: implicit barrier: should be exposed +// TODO: implicit barrier: should be exposed #if USE_ITT_NOTIFY - __kmp_threads[global_tid]->th.th_ident = loc; + __kmp_threads[global_tid]->th.th_ident = loc; #endif - __kmp_barrier( bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL ); - - } else if( TEST_REDUCTION_METHOD( packed_reduction_method, tree_reduce_block ) ) { + __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); - // only master executes here (master releases all other workers) - __kmp_end_split_barrier( UNPACK_REDUCTION_BARRIER( packed_reduction_method ), global_tid ); + } else if (TEST_REDUCTION_METHOD(packed_reduction_method, + tree_reduce_block)) { - } else { + // only master executes here (master releases all other workers) + __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method), + global_tid); - // should never reach this block - KMP_ASSERT( 0 ); // "unexpected method" + } else { - } + // should never reach this block + KMP_ASSERT(0); // "unexpected method" + } - if ( __kmp_env_consistency_check ) - __kmp_pop_sync( global_tid, ct_reduce, loc ); + if (__kmp_env_consistency_check) + __kmp_pop_sync(global_tid, ct_reduce, loc); - KA_TRACE( 10, ( "__kmpc_end_reduce() exit: called T#%d: method %08x\n", global_tid, packed_reduction_method ) ); + KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n", + global_tid, packed_reduction_method)); - return; + return; } #undef __KMP_GET_REDUCTION_METHOD #undef __KMP_SET_REDUCTION_METHOD -/*-- end of interface to fast scalable reduce routines ---------------------------------------------------------------*/ +/* end of interface to fast scalable reduce routines */ -kmp_uint64 -__kmpc_get_taskid() { +kmp_uint64 __kmpc_get_taskid() { - kmp_int32 gtid; - kmp_info_t * thread; + kmp_int32 gtid; + kmp_info_t *thread; - gtid = __kmp_get_gtid(); - if ( gtid < 0 ) { - return 0; - }; // if - thread = __kmp_thread_from_gtid( gtid ); - return thread->th.th_current_task->td_task_id; + gtid = __kmp_get_gtid(); + if (gtid < 0) { + return 0; + }; // if + thread = __kmp_thread_from_gtid(gtid); + return thread->th.th_current_task->td_task_id; } // __kmpc_get_taskid +kmp_uint64 __kmpc_get_parent_taskid() { -kmp_uint64 -__kmpc_get_parent_taskid() { - - kmp_int32 gtid; - kmp_info_t * thread; - kmp_taskdata_t * parent_task; + kmp_int32 gtid; + kmp_info_t *thread; + kmp_taskdata_t *parent_task; - gtid = __kmp_get_gtid(); - if ( gtid < 0 ) { - return 0; - }; // if - thread = __kmp_thread_from_gtid( gtid ); - parent_task = thread->th.th_current_task->td_parent; - return ( parent_task == NULL ? 0 : parent_task->td_task_id ); + gtid = __kmp_get_gtid(); + if (gtid < 0) { + return 0; + }; // if + thread = __kmp_thread_from_gtid(gtid); + parent_task = thread->th.th_current_task->td_parent; + return (parent_task == NULL ? 0 : parent_task->td_task_id); } // __kmpc_get_parent_taskid @@ -3050,282 +3010,292 @@ Initialize doacross loop information. Expect compiler send us inclusive bounds, e.g. for(i=2;i<9;i+=2) lo=2, up=8, st=2. */ -void -__kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, struct kmp_dim * dims) -{ - int j, idx; - kmp_int64 last, trace_count; - kmp_info_t *th = __kmp_threads[gtid]; - kmp_team_t *team = th->th.th_team; - kmp_uint32 *flags; - kmp_disp_t *pr_buf = th->th.th_dispatch; - dispatch_shared_info_t *sh_buf; - - KA_TRACE(20,("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", - gtid, num_dims, !team->t.t_serialized)); - KMP_DEBUG_ASSERT(dims != NULL); - KMP_DEBUG_ASSERT(num_dims > 0); - - if( team->t.t_serialized ) { - KA_TRACE(20,("__kmpc_doacross_init() exit: serialized team\n")); - return; // no dependencies if team is serialized - } - KMP_DEBUG_ASSERT(team->t.t_nproc > 1); - idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for the next loop - sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; - - // Save bounds info into allocated private buffer - KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); - pr_buf->th_doacross_info = - (kmp_int64*)__kmp_thread_malloc(th, sizeof(kmp_int64)*(4 * num_dims + 1)); - KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); - pr_buf->th_doacross_info[0] = (kmp_int64)num_dims; // first element is number of dimensions - // Save also address of num_done in order to access it later without knowing the buffer index - pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; - pr_buf->th_doacross_info[2] = dims[0].lo; - pr_buf->th_doacross_info[3] = dims[0].up; - pr_buf->th_doacross_info[4] = dims[0].st; - last = 5; - for( j = 1; j < num_dims; ++j ) { - kmp_int64 range_length; // To keep ranges of all dimensions but the first dims[0] - if( dims[j].st == 1 ) { // most common case - // AC: should we care of ranges bigger than LLONG_MAX? (not for now) - range_length = dims[j].up - dims[j].lo + 1; - } else { - if( dims[j].st > 0 ) { - KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); - range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; - } else { // negative increment - KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); - range_length = (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; - } - } - pr_buf->th_doacross_info[last++] = range_length; - pr_buf->th_doacross_info[last++] = dims[j].lo; - pr_buf->th_doacross_info[last++] = dims[j].up; - pr_buf->th_doacross_info[last++] = dims[j].st; - } - - // Compute total trip count. - // Start with range of dims[0] which we don't need to keep in the buffer. - if( dims[0].st == 1 ) { // most common case - trace_count = dims[0].up - dims[0].lo + 1; - } else if( dims[0].st > 0 ) { - KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); - trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; - } else { // negative increment - KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); - trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; - } - for( j = 1; j < num_dims; ++j ) { - trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges - } - KMP_DEBUG_ASSERT(trace_count > 0); +void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, + struct kmp_dim *dims) { + int j, idx; + kmp_int64 last, trace_count; + kmp_info_t *th = __kmp_threads[gtid]; + kmp_team_t *team = th->th.th_team; + kmp_uint32 *flags; + kmp_disp_t *pr_buf = th->th.th_dispatch; + dispatch_shared_info_t *sh_buf; + + KA_TRACE( + 20, + ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n", + gtid, num_dims, !team->t.t_serialized)); + KMP_DEBUG_ASSERT(dims != NULL); + KMP_DEBUG_ASSERT(num_dims > 0); + + if (team->t.t_serialized) { + KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n")); + return; // no dependencies if team is serialized + } + KMP_DEBUG_ASSERT(team->t.t_nproc > 1); + idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for + // the next loop + sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; + + // Save bounds info into allocated private buffer + KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL); + pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc( + th, sizeof(kmp_int64) * (4 * num_dims + 1)); + KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); + pr_buf->th_doacross_info[0] = + (kmp_int64)num_dims; // first element is number of dimensions + // Save also address of num_done in order to access it later without knowing + // the buffer index + pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done; + pr_buf->th_doacross_info[2] = dims[0].lo; + pr_buf->th_doacross_info[3] = dims[0].up; + pr_buf->th_doacross_info[4] = dims[0].st; + last = 5; + for (j = 1; j < num_dims; ++j) { + kmp_int64 + range_length; // To keep ranges of all dimensions but the first dims[0] + if (dims[j].st == 1) { // most common case + // AC: should we care of ranges bigger than LLONG_MAX? (not for now) + range_length = dims[j].up - dims[j].lo + 1; + } else { + if (dims[j].st > 0) { + KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo); + range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1; + } else { // negative increment + KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up); + range_length = + (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1; + } + } + pr_buf->th_doacross_info[last++] = range_length; + pr_buf->th_doacross_info[last++] = dims[j].lo; + pr_buf->th_doacross_info[last++] = dims[j].up; + pr_buf->th_doacross_info[last++] = dims[j].st; + } - // Check if shared buffer is not occupied by other loop (idx - __kmp_dispatch_num_buffers) - if( idx != sh_buf->doacross_buf_idx ) { - // Shared buffer is occupied, wait for it to be free - __kmp_wait_yield_4( (kmp_uint32*)&sh_buf->doacross_buf_idx, idx, __kmp_eq_4, NULL ); - } - // Check if we are the first thread. After the CAS the first thread gets 0, - // others get 1 if initialization is in progress, allocated pointer otherwise. - flags = (kmp_uint32*)KMP_COMPARE_AND_STORE_RET64( - (kmp_int64*)&sh_buf->doacross_flags,NULL,(kmp_int64)1); - if( flags == NULL ) { - // we are the first thread, allocate the array of flags - kmp_int64 size = trace_count / 8 + 8; // in bytes, use single bit per iteration - sh_buf->doacross_flags = (kmp_uint32*)__kmp_thread_calloc(th, size, 1); - } else if( (kmp_int64)flags == 1 ) { - // initialization is still in progress, need to wait - while( (volatile kmp_int64)sh_buf->doacross_flags == 1 ) { - KMP_YIELD(TRUE); - } - } - KMP_DEBUG_ASSERT((kmp_int64)sh_buf->doacross_flags > 1); // check value of pointer - pr_buf->th_doacross_flags = sh_buf->doacross_flags; // save private copy in order to not - // touch shared buffer on each iteration - KA_TRACE(20,("__kmpc_doacross_init() exit: T#%d\n", gtid)); -} - -void -__kmpc_doacross_wait(ident_t *loc, int gtid, long long *vec) -{ - kmp_int32 shft, num_dims, i; - kmp_uint32 flag; - kmp_int64 iter_number; // iteration number of "collapsed" loop nest - kmp_info_t *th = __kmp_threads[gtid]; - kmp_team_t *team = th->th.th_team; - kmp_disp_t *pr_buf; - kmp_int64 lo, up, st; - - KA_TRACE(20,("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); - if( team->t.t_serialized ) { - KA_TRACE(20,("__kmpc_doacross_wait() exit: serialized team\n")); - return; // no dependencies if team is serialized + // Compute total trip count. + // Start with range of dims[0] which we don't need to keep in the buffer. + if (dims[0].st == 1) { // most common case + trace_count = dims[0].up - dims[0].lo + 1; + } else if (dims[0].st > 0) { + KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo); + trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1; + } else { // negative increment + KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up); + trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1; + } + for (j = 1; j < num_dims; ++j) { + trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges + } + KMP_DEBUG_ASSERT(trace_count > 0); + + // Check if shared buffer is not occupied by other loop (idx - + // __kmp_dispatch_num_buffers) + if (idx != sh_buf->doacross_buf_idx) { + // Shared buffer is occupied, wait for it to be free + __kmp_wait_yield_4((kmp_uint32 *)&sh_buf->doacross_buf_idx, idx, __kmp_eq_4, + NULL); + } + // Check if we are the first thread. After the CAS the first thread gets 0, + // others get 1 if initialization is in progress, allocated pointer otherwise. + flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64( + (kmp_int64 *)&sh_buf->doacross_flags, NULL, (kmp_int64)1); + if (flags == NULL) { + // we are the first thread, allocate the array of flags + kmp_int64 size = + trace_count / 8 + 8; // in bytes, use single bit per iteration + sh_buf->doacross_flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1); + } else if ((kmp_int64)flags == 1) { + // initialization is still in progress, need to wait + while ((volatile kmp_int64)sh_buf->doacross_flags == 1) { + KMP_YIELD(TRUE); } + } + KMP_DEBUG_ASSERT((kmp_int64)sh_buf->doacross_flags > + 1); // check value of pointer + pr_buf->th_doacross_flags = + sh_buf->doacross_flags; // save private copy in order to not + // touch shared buffer on each iteration + KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid)); +} + +void __kmpc_doacross_wait(ident_t *loc, int gtid, long long *vec) { + kmp_int32 shft, num_dims, i; + kmp_uint32 flag; + kmp_int64 iter_number; // iteration number of "collapsed" loop nest + kmp_info_t *th = __kmp_threads[gtid]; + kmp_team_t *team = th->th.th_team; + kmp_disp_t *pr_buf; + kmp_int64 lo, up, st; + + KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid)); + if (team->t.t_serialized) { + KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n")); + return; // no dependencies if team is serialized + } - // calculate sequential iteration number and check out-of-bounds condition - pr_buf = th->th.th_dispatch; - KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); - num_dims = pr_buf->th_doacross_info[0]; - lo = pr_buf->th_doacross_info[2]; - up = pr_buf->th_doacross_info[3]; - st = pr_buf->th_doacross_info[4]; - if( st == 1 ) { // most common case - if( vec[0] < lo || vec[0] > up ) { - KA_TRACE(20,( - "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n", - gtid, vec[0], lo, up)); - return; - } - iter_number = vec[0] - lo; - } else if( st > 0 ) { - if( vec[0] < lo || vec[0] > up ) { - KA_TRACE(20,( - "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n", - gtid, vec[0], lo, up)); - return; - } - iter_number = (kmp_uint64)(vec[0] - lo) / st; - } else { // negative increment - if( vec[0] > lo || vec[0] < up ) { - KA_TRACE(20,( - "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n", - gtid, vec[0], lo, up)); - return; - } - iter_number = (kmp_uint64)(lo - vec[0]) / (-st); - } - for( i = 1; i < num_dims; ++i ) { - kmp_int64 iter, ln; - kmp_int32 j = i * 4; - ln = pr_buf->th_doacross_info[j + 1]; - lo = pr_buf->th_doacross_info[j + 2]; - up = pr_buf->th_doacross_info[j + 3]; - st = pr_buf->th_doacross_info[j + 4]; - if( st == 1 ) { - if( vec[i] < lo || vec[i] > up ) { - KA_TRACE(20,( - "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n", - gtid, vec[i], lo, up)); - return; - } - iter = vec[i] - lo; - } else if( st > 0 ) { - if( vec[i] < lo || vec[i] > up ) { - KA_TRACE(20,( - "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n", - gtid, vec[i], lo, up)); - return; - } - iter = (kmp_uint64)(vec[i] - lo) / st; - } else { // st < 0 - if( vec[i] > lo || vec[i] < up ) { - KA_TRACE(20,( - "__kmpc_doacross_wait() exit: T#%d iter %lld is out of bounds [%lld,%lld]\n", - gtid, vec[i], lo, up)); - return; - } - iter = (kmp_uint64)(lo - vec[i]) / (-st); - } - iter_number = iter + ln * iter_number; - } - shft = iter_number % 32; // use 32-bit granularity - iter_number >>= 5; // divided by 32 - flag = 1 << shft; - while( (flag & pr_buf->th_doacross_flags[iter_number]) == 0 ) { - KMP_YIELD(TRUE); - } - KA_TRACE(20,("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", - gtid, (iter_number<<5)+shft)); -} - -void -__kmpc_doacross_post(ident_t *loc, int gtid, long long *vec) -{ - kmp_int32 shft, num_dims, i; - kmp_uint32 flag; - kmp_int64 iter_number; // iteration number of "collapsed" loop nest - kmp_info_t *th = __kmp_threads[gtid]; - kmp_team_t *team = th->th.th_team; - kmp_disp_t *pr_buf; - kmp_int64 lo, st; - - KA_TRACE(20,("__kmpc_doacross_post() enter: called T#%d\n", gtid)); - if( team->t.t_serialized ) { - KA_TRACE(20,("__kmpc_doacross_post() exit: serialized team\n")); - return; // no dependencies if team is serialized + // calculate sequential iteration number and check out-of-bounds condition + pr_buf = th->th.th_dispatch; + KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); + num_dims = pr_buf->th_doacross_info[0]; + lo = pr_buf->th_doacross_info[2]; + up = pr_buf->th_doacross_info[3]; + st = pr_buf->th_doacross_info[4]; + if (st == 1) { // most common case + if (vec[0] < lo || vec[0] > up) { + KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " + "bounds [%lld,%lld]\n", + gtid, vec[0], lo, up)); + return; + } + iter_number = vec[0] - lo; + } else if (st > 0) { + if (vec[0] < lo || vec[0] > up) { + KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " + "bounds [%lld,%lld]\n", + gtid, vec[0], lo, up)); + return; + } + iter_number = (kmp_uint64)(vec[0] - lo) / st; + } else { // negative increment + if (vec[0] > lo || vec[0] < up) { + KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " + "bounds [%lld,%lld]\n", + gtid, vec[0], lo, up)); + return; + } + iter_number = (kmp_uint64)(lo - vec[0]) / (-st); + } + for (i = 1; i < num_dims; ++i) { + kmp_int64 iter, ln; + kmp_int32 j = i * 4; + ln = pr_buf->th_doacross_info[j + 1]; + lo = pr_buf->th_doacross_info[j + 2]; + up = pr_buf->th_doacross_info[j + 3]; + st = pr_buf->th_doacross_info[j + 4]; + if (st == 1) { + if (vec[i] < lo || vec[i] > up) { + KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " + "bounds [%lld,%lld]\n", + gtid, vec[i], lo, up)); + return; + } + iter = vec[i] - lo; + } else if (st > 0) { + if (vec[i] < lo || vec[i] > up) { + KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " + "bounds [%lld,%lld]\n", + gtid, vec[i], lo, up)); + return; + } + iter = (kmp_uint64)(vec[i] - lo) / st; + } else { // st < 0 + if (vec[i] > lo || vec[i] < up) { + KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of " + "bounds [%lld,%lld]\n", + gtid, vec[i], lo, up)); + return; + } + iter = (kmp_uint64)(lo - vec[i]) / (-st); } + iter_number = iter + ln * iter_number; + } + shft = iter_number % 32; // use 32-bit granularity + iter_number >>= 5; // divided by 32 + flag = 1 << shft; + while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) { + KMP_YIELD(TRUE); + } + KA_TRACE(20, + ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n", + gtid, (iter_number << 5) + shft)); +} + +void __kmpc_doacross_post(ident_t *loc, int gtid, long long *vec) { + kmp_int32 shft, num_dims, i; + kmp_uint32 flag; + kmp_int64 iter_number; // iteration number of "collapsed" loop nest + kmp_info_t *th = __kmp_threads[gtid]; + kmp_team_t *team = th->th.th_team; + kmp_disp_t *pr_buf; + kmp_int64 lo, st; + + KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid)); + if (team->t.t_serialized) { + KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n")); + return; // no dependencies if team is serialized + } - // calculate sequential iteration number (same as in "wait" but no out-of-bounds checks) - pr_buf = th->th.th_dispatch; - KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); - num_dims = pr_buf->th_doacross_info[0]; - lo = pr_buf->th_doacross_info[2]; - st = pr_buf->th_doacross_info[4]; - if( st == 1 ) { // most common case - iter_number = vec[0] - lo; - } else if( st > 0 ) { - iter_number = (kmp_uint64)(vec[0] - lo) / st; - } else { // negative increment - iter_number = (kmp_uint64)(lo - vec[0]) / (-st); - } - for( i = 1; i < num_dims; ++i ) { - kmp_int64 iter, ln; - kmp_int32 j = i * 4; - ln = pr_buf->th_doacross_info[j + 1]; - lo = pr_buf->th_doacross_info[j + 2]; - st = pr_buf->th_doacross_info[j + 4]; - if( st == 1 ) { - iter = vec[i] - lo; - } else if( st > 0 ) { - iter = (kmp_uint64)(vec[i] - lo) / st; - } else { // st < 0 - iter = (kmp_uint64)(lo - vec[i]) / (-st); - } - iter_number = iter + ln * iter_number; - } - shft = iter_number % 32; // use 32-bit granularity - iter_number >>= 5; // divided by 32 - flag = 1 << shft; - if( (flag & pr_buf->th_doacross_flags[iter_number]) == 0 ) - KMP_TEST_THEN_OR32( (kmp_int32*)&pr_buf->th_doacross_flags[iter_number], (kmp_int32)flag ); - KA_TRACE(20,("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", - gtid, (iter_number<<5)+shft)); -} - -void -__kmpc_doacross_fini(ident_t *loc, int gtid) -{ - kmp_int64 num_done; - kmp_info_t *th = __kmp_threads[gtid]; - kmp_team_t *team = th->th.th_team; - kmp_disp_t *pr_buf = th->th.th_dispatch; - - KA_TRACE(20,("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); - if( team->t.t_serialized ) { - KA_TRACE(20,("__kmpc_doacross_fini() exit: serialized team %p\n", team)); - return; // nothing to do - } - num_done = KMP_TEST_THEN_INC64((kmp_int64*)pr_buf->th_doacross_info[1]) + 1; - if( num_done == th->th.th_team_nproc ) { - // we are the last thread, need to free shared resources - int idx = pr_buf->th_doacross_buf_idx - 1; - dispatch_shared_info_t *sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; - KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == (kmp_int64)&sh_buf->doacross_num_done); - KMP_DEBUG_ASSERT(num_done == (kmp_int64)sh_buf->doacross_num_done); - KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); - __kmp_thread_free(th, (void*)sh_buf->doacross_flags); - sh_buf->doacross_flags = NULL; - sh_buf->doacross_num_done = 0; - sh_buf->doacross_buf_idx += __kmp_dispatch_num_buffers; // free buffer for future re-use - } - // free private resources (need to keep buffer index forever) - __kmp_thread_free(th, (void*)pr_buf->th_doacross_info); - pr_buf->th_doacross_info = NULL; - KA_TRACE(20,("__kmpc_doacross_fini() exit: T#%d\n", gtid)); + // calculate sequential iteration number (same as in "wait" but no + // out-of-bounds checks) + pr_buf = th->th.th_dispatch; + KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL); + num_dims = pr_buf->th_doacross_info[0]; + lo = pr_buf->th_doacross_info[2]; + st = pr_buf->th_doacross_info[4]; + if (st == 1) { // most common case + iter_number = vec[0] - lo; + } else if (st > 0) { + iter_number = (kmp_uint64)(vec[0] - lo) / st; + } else { // negative increment + iter_number = (kmp_uint64)(lo - vec[0]) / (-st); + } + for (i = 1; i < num_dims; ++i) { + kmp_int64 iter, ln; + kmp_int32 j = i * 4; + ln = pr_buf->th_doacross_info[j + 1]; + lo = pr_buf->th_doacross_info[j + 2]; + st = pr_buf->th_doacross_info[j + 4]; + if (st == 1) { + iter = vec[i] - lo; + } else if (st > 0) { + iter = (kmp_uint64)(vec[i] - lo) / st; + } else { // st < 0 + iter = (kmp_uint64)(lo - vec[i]) / (-st); + } + iter_number = iter + ln * iter_number; + } + shft = iter_number % 32; // use 32-bit granularity + iter_number >>= 5; // divided by 32 + flag = 1 << shft; + if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) + KMP_TEST_THEN_OR32((kmp_int32 *)&pr_buf->th_doacross_flags[iter_number], + (kmp_int32)flag); + KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid, + (iter_number << 5) + shft)); +} + +void __kmpc_doacross_fini(ident_t *loc, int gtid) { + kmp_int64 num_done; + kmp_info_t *th = __kmp_threads[gtid]; + kmp_team_t *team = th->th.th_team; + kmp_disp_t *pr_buf = th->th.th_dispatch; + + KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid)); + if (team->t.t_serialized) { + KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team)); + return; // nothing to do + } + num_done = KMP_TEST_THEN_INC64((kmp_int64 *)pr_buf->th_doacross_info[1]) + 1; + if (num_done == th->th.th_team_nproc) { + // we are the last thread, need to free shared resources + int idx = pr_buf->th_doacross_buf_idx - 1; + dispatch_shared_info_t *sh_buf = + &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers]; + KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] == + (kmp_int64)&sh_buf->doacross_num_done); + KMP_DEBUG_ASSERT(num_done == (kmp_int64)sh_buf->doacross_num_done); + KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx); + __kmp_thread_free(th, (void *)sh_buf->doacross_flags); + sh_buf->doacross_flags = NULL; + sh_buf->doacross_num_done = 0; + sh_buf->doacross_buf_idx += + __kmp_dispatch_num_buffers; // free buffer for future re-use + } + // free private resources (need to keep buffer index forever) + __kmp_thread_free(th, (void *)pr_buf->th_doacross_info); + pr_buf->th_doacross_info = NULL; + KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid)); } #endif // end of file // - diff --git a/openmp/runtime/src/kmp_debug.cpp b/openmp/runtime/src/kmp_debug.cpp index fd7b905..6db0774 100644 --- a/openmp/runtime/src/kmp_debug.cpp +++ b/openmp/runtime/src/kmp_debug.cpp @@ -19,124 +19,116 @@ #include "kmp_io.h" #ifdef KMP_DEBUG -void -__kmp_debug_printf_stdout( char const * format, ... ) -{ - va_list ap; - va_start( ap, format ); +void __kmp_debug_printf_stdout(char const *format, ...) { + va_list ap; + va_start(ap, format); - __kmp_vprintf( kmp_out, format, ap ); + __kmp_vprintf(kmp_out, format, ap); - va_end(ap); + va_end(ap); } #endif -void -__kmp_debug_printf( char const * format, ... ) -{ - va_list ap; - va_start( ap, format ); +void __kmp_debug_printf(char const *format, ...) { + va_list ap; + va_start(ap, format); - __kmp_vprintf( kmp_err, format, ap ); + __kmp_vprintf(kmp_err, format, ap); - va_end( ap ); + va_end(ap); } #ifdef KMP_USE_ASSERT - int - __kmp_debug_assert( - char const * msg, - char const * file, - int line - ) { - - if ( file == NULL ) { - file = KMP_I18N_STR( UnknownFile ); - } else { - // Remove directories from path, leave only file name. File name is enough, there is no need - // in bothering developers and customers with full paths. - char const * slash = strrchr( file, '/' ); - if ( slash != NULL ) { - file = slash + 1; - }; // if - }; // if - - #ifdef KMP_DEBUG - __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); - __kmp_debug_printf( "Assertion failure at %s(%d): %s.\n", file, line, msg ); - __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); - #ifdef USE_ASSERT_BREAK - #if KMP_OS_WINDOWS - DebugBreak(); - #endif - #endif // USE_ASSERT_BREAK - #ifdef USE_ASSERT_STALL - /* __kmp_infinite_loop(); */ - for(;;); - #endif // USE_ASSERT_STALL - #ifdef USE_ASSERT_SEG - { - int volatile * ZERO = (int*) 0; - ++ (*ZERO); - } - #endif // USE_ASSERT_SEG - #endif - - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( AssertionFailure, file, line ), - KMP_HNT( SubmitBugReport ), - __kmp_msg_null - ); - - return 0; - - } // __kmp_debug_assert +int __kmp_debug_assert(char const *msg, char const *file, int line) { + + if (file == NULL) { + file = KMP_I18N_STR(UnknownFile); + } else { + // Remove directories from path, leave only file name. File name is enough, + // there is no need in bothering developers and customers with full paths. + char const *slash = strrchr(file, '/'); + if (slash != NULL) { + file = slash + 1; + }; // if + }; // if + +#ifdef KMP_DEBUG + __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); + __kmp_debug_printf("Assertion failure at %s(%d): %s.\n", file, line, msg); + __kmp_release_bootstrap_lock(&__kmp_stdio_lock); +#ifdef USE_ASSERT_BREAK +#if KMP_OS_WINDOWS + DebugBreak(); +#endif +#endif // USE_ASSERT_BREAK +#ifdef USE_ASSERT_STALL + /* __kmp_infinite_loop(); */ + for (;;) + ; +#endif // USE_ASSERT_STALL +#ifdef USE_ASSERT_SEG + { + int volatile *ZERO = (int *)0; + ++(*ZERO); + } +#endif // USE_ASSERT_SEG +#endif + + __kmp_msg(kmp_ms_fatal, KMP_MSG(AssertionFailure, file, line), + KMP_HNT(SubmitBugReport), __kmp_msg_null); + + return 0; + +} // __kmp_debug_assert #endif // KMP_USE_ASSERT /* Dump debugging buffer to stderr */ -void -__kmp_dump_debug_buffer( void ) -{ - if ( __kmp_debug_buffer != NULL ) { - int i; - int dc = __kmp_debug_count; - char *db = & __kmp_debug_buffer[ (dc % __kmp_debug_buf_lines) * __kmp_debug_buf_chars ]; - char *db_end = & __kmp_debug_buffer[ __kmp_debug_buf_lines * __kmp_debug_buf_chars ]; - char *db2; - - __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); - __kmp_printf_no_lock( "\nStart dump of debugging buffer (entry=%d):\n", - dc % __kmp_debug_buf_lines ); - - for ( i = 0; i < __kmp_debug_buf_lines; i++ ) { - - if ( *db != '\0' ) { - /* Fix up where no carriage return before string termination char */ - for ( db2 = db + 1; db2 < db + __kmp_debug_buf_chars - 1; db2 ++) { - if ( *db2 == '\0' ) { - if ( *(db2-1) != '\n' ) { *db2 = '\n'; *(db2+1) = '\0'; } - break; - } - } - /* Handle case at end by shortening the printed message by one char if necessary */ - if ( db2 == db + __kmp_debug_buf_chars - 1 && - *db2 == '\0' && *(db2-1) != '\n' ) { - *(db2-1) = '\n'; - } - - __kmp_printf_no_lock( "%4d: %.*s", i, __kmp_debug_buf_chars, db ); - *db = '\0'; /* only let it print once! */ +void __kmp_dump_debug_buffer(void) { + if (__kmp_debug_buffer != NULL) { + int i; + int dc = __kmp_debug_count; + char *db = &__kmp_debug_buffer[(dc % __kmp_debug_buf_lines) * + __kmp_debug_buf_chars]; + char *db_end = + &__kmp_debug_buffer[__kmp_debug_buf_lines * __kmp_debug_buf_chars]; + char *db2; + + __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); + __kmp_printf_no_lock("\nStart dump of debugging buffer (entry=%d):\n", + dc % __kmp_debug_buf_lines); + + for (i = 0; i < __kmp_debug_buf_lines; i++) { + + if (*db != '\0') { + /* Fix up where no carriage return before string termination char */ + for (db2 = db + 1; db2 < db + __kmp_debug_buf_chars - 1; db2++) { + if (*db2 == '\0') { + if (*(db2 - 1) != '\n') { + *db2 = '\n'; + *(db2 + 1) = '\0'; } - - db += __kmp_debug_buf_chars; - if ( db >= db_end ) - db = __kmp_debug_buffer; + break; + } + } + /* Handle case at end by shortening the printed message by one char if + * necessary */ + if (db2 == db + __kmp_debug_buf_chars - 1 && *db2 == '\0' && + *(db2 - 1) != '\n') { + *(db2 - 1) = '\n'; } - __kmp_printf_no_lock( "End dump of debugging buffer (entry=%d).\n\n", - ( dc+i-1 ) % __kmp_debug_buf_lines ); - __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); + __kmp_printf_no_lock("%4d: %.*s", i, __kmp_debug_buf_chars, db); + *db = '\0'; /* only let it print once! */ + } + + db += __kmp_debug_buf_chars; + if (db >= db_end) + db = __kmp_debug_buffer; } + + __kmp_printf_no_lock("End dump of debugging buffer (entry=%d).\n\n", + (dc + i - 1) % __kmp_debug_buf_lines); + __kmp_release_bootstrap_lock(&__kmp_stdio_lock); + } } diff --git a/openmp/runtime/src/kmp_debug.h b/openmp/runtime/src/kmp_debug.h index e4e3d8d..9080a4a 100644 --- a/openmp/runtime/src/kmp_debug.h +++ b/openmp/runtime/src/kmp_debug.h @@ -19,94 +19,155 @@ #include #ifdef __cplusplus - extern "C" { +extern "C" { #endif // __cplusplus -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // Build-time assertion. -// ------------------------------------------------------------------------------------------------- // New C++11 style build assert -#define KMP_BUILD_ASSERT( expr ) static_assert(expr, "Build condition error") +#define KMP_BUILD_ASSERT(expr) static_assert(expr, "Build condition error") -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // Run-time assertions. -// ------------------------------------------------------------------------------------------------- -extern void __kmp_dump_debug_buffer( void ); +extern void __kmp_dump_debug_buffer(void); #ifdef KMP_USE_ASSERT - extern int __kmp_debug_assert( char const * expr, char const * file, int line ); - #ifdef KMP_DEBUG - #define KMP_ASSERT( cond ) ( (cond) ? 0 : __kmp_debug_assert( #cond, __FILE__, __LINE__ ) ) - #define KMP_ASSERT2( cond, msg ) ( (cond) ? 0 : __kmp_debug_assert( (msg), __FILE__, __LINE__ ) ) - #define KMP_DEBUG_ASSERT( cond ) KMP_ASSERT( cond ) - #define KMP_DEBUG_ASSERT2( cond, msg ) KMP_ASSERT2( cond, msg ) - #else - // Do not expose condition in release build. Use "assertion failure". - #define KMP_ASSERT( cond ) ( (cond) ? 0 : __kmp_debug_assert( "assertion failure", __FILE__, __LINE__ ) ) - #define KMP_ASSERT2( cond, msg ) KMP_ASSERT( cond ) - #define KMP_DEBUG_ASSERT( cond ) 0 - #define KMP_DEBUG_ASSERT2( cond, msg ) 0 - #endif // KMP_DEBUG +extern int __kmp_debug_assert(char const *expr, char const *file, int line); +#ifdef KMP_DEBUG +#define KMP_ASSERT(cond) \ + ((cond) ? 0 : __kmp_debug_assert(#cond, __FILE__, __LINE__)) +#define KMP_ASSERT2(cond, msg) \ + ((cond) ? 0 : __kmp_debug_assert((msg), __FILE__, __LINE__)) +#define KMP_DEBUG_ASSERT(cond) KMP_ASSERT(cond) +#define KMP_DEBUG_ASSERT2(cond, msg) KMP_ASSERT2(cond, msg) +#else +// Do not expose condition in release build. Use "assertion failure". +#define KMP_ASSERT(cond) \ + ((cond) ? 0 : __kmp_debug_assert("assertion failure", __FILE__, __LINE__)) +#define KMP_ASSERT2(cond, msg) KMP_ASSERT(cond) +#define KMP_DEBUG_ASSERT(cond) 0 +#define KMP_DEBUG_ASSERT2(cond, msg) 0 +#endif // KMP_DEBUG #else - #define KMP_ASSERT( cond ) 0 - #define KMP_ASSERT2( cond, msg ) 0 - #define KMP_DEBUG_ASSERT( cond ) 0 - #define KMP_DEBUG_ASSERT2( cond, msg ) 0 +#define KMP_ASSERT(cond) 0 +#define KMP_ASSERT2(cond, msg) 0 +#define KMP_DEBUG_ASSERT(cond) 0 +#define KMP_DEBUG_ASSERT2(cond, msg) 0 #endif // KMP_USE_ASSERT #ifdef KMP_DEBUG - extern void __kmp_debug_printf_stdout( char const * format, ... ); +extern void __kmp_debug_printf_stdout(char const *format, ...); #endif -extern void __kmp_debug_printf( char const * format, ... ); +extern void __kmp_debug_printf(char const *format, ...); #ifdef KMP_DEBUG - extern int kmp_a_debug; - extern int kmp_b_debug; - extern int kmp_c_debug; - extern int kmp_d_debug; - extern int kmp_e_debug; - extern int kmp_f_debug; - extern int kmp_diag; - - #define KA_TRACE(d,x) if (kmp_a_debug >= d) { __kmp_debug_printf x ; } - #define KB_TRACE(d,x) if (kmp_b_debug >= d) { __kmp_debug_printf x ; } - #define KC_TRACE(d,x) if (kmp_c_debug >= d) { __kmp_debug_printf x ; } - #define KD_TRACE(d,x) if (kmp_d_debug >= d) { __kmp_debug_printf x ; } - #define KE_TRACE(d,x) if (kmp_e_debug >= d) { __kmp_debug_printf x ; } - #define KF_TRACE(d,x) if (kmp_f_debug >= d) { __kmp_debug_printf x ; } - #define K_DIAG(d,x) {if (kmp_diag == d) { __kmp_debug_printf_stdout x ; } } - - #define KA_DUMP(d,x) if (kmp_a_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } - #define KB_DUMP(d,x) if (kmp_b_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } - #define KC_DUMP(d,x) if (kmp_c_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } - #define KD_DUMP(d,x) if (kmp_d_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } - #define KE_DUMP(d,x) if (kmp_e_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } - #define KF_DUMP(d,x) if (kmp_f_debug >= d) { int ks; __kmp_disable(&ks); (x) ; __kmp_enable(ks); } +extern int kmp_a_debug; +extern int kmp_b_debug; +extern int kmp_c_debug; +extern int kmp_d_debug; +extern int kmp_e_debug; +extern int kmp_f_debug; +extern int kmp_diag; + +#define KA_TRACE(d, x) \ + if (kmp_a_debug >= d) { \ + __kmp_debug_printf x; \ + } +#define KB_TRACE(d, x) \ + if (kmp_b_debug >= d) { \ + __kmp_debug_printf x; \ + } +#define KC_TRACE(d, x) \ + if (kmp_c_debug >= d) { \ + __kmp_debug_printf x; \ + } +#define KD_TRACE(d, x) \ + if (kmp_d_debug >= d) { \ + __kmp_debug_printf x; \ + } +#define KE_TRACE(d, x) \ + if (kmp_e_debug >= d) { \ + __kmp_debug_printf x; \ + } +#define KF_TRACE(d, x) \ + if (kmp_f_debug >= d) { \ + __kmp_debug_printf x; \ + } +#define K_DIAG(d, x) \ + { \ + if (kmp_diag == d) { \ + __kmp_debug_printf_stdout x; \ + } \ + } + +#define KA_DUMP(d, x) \ + if (kmp_a_debug >= d) { \ + int ks; \ + __kmp_disable(&ks); \ + (x); \ + __kmp_enable(ks); \ + } +#define KB_DUMP(d, x) \ + if (kmp_b_debug >= d) { \ + int ks; \ + __kmp_disable(&ks); \ + (x); \ + __kmp_enable(ks); \ + } +#define KC_DUMP(d, x) \ + if (kmp_c_debug >= d) { \ + int ks; \ + __kmp_disable(&ks); \ + (x); \ + __kmp_enable(ks); \ + } +#define KD_DUMP(d, x) \ + if (kmp_d_debug >= d) { \ + int ks; \ + __kmp_disable(&ks); \ + (x); \ + __kmp_enable(ks); \ + } +#define KE_DUMP(d, x) \ + if (kmp_e_debug >= d) { \ + int ks; \ + __kmp_disable(&ks); \ + (x); \ + __kmp_enable(ks); \ + } +#define KF_DUMP(d, x) \ + if (kmp_f_debug >= d) { \ + int ks; \ + __kmp_disable(&ks); \ + (x); \ + __kmp_enable(ks); \ + } #else - #define KA_TRACE(d,x) /* nothing to do */ - #define KB_TRACE(d,x) /* nothing to do */ - #define KC_TRACE(d,x) /* nothing to do */ - #define KD_TRACE(d,x) /* nothing to do */ - #define KE_TRACE(d,x) /* nothing to do */ - #define KF_TRACE(d,x) /* nothing to do */ - #define K_DIAG(d,x) {}/* nothing to do */ - - #define KA_DUMP(d,x) /* nothing to do */ - #define KB_DUMP(d,x) /* nothing to do */ - #define KC_DUMP(d,x) /* nothing to do */ - #define KD_DUMP(d,x) /* nothing to do */ - #define KE_DUMP(d,x) /* nothing to do */ - #define KF_DUMP(d,x) /* nothing to do */ +#define KA_TRACE(d, x) /* nothing to do */ +#define KB_TRACE(d, x) /* nothing to do */ +#define KC_TRACE(d, x) /* nothing to do */ +#define KD_TRACE(d, x) /* nothing to do */ +#define KE_TRACE(d, x) /* nothing to do */ +#define KF_TRACE(d, x) /* nothing to do */ +#define K_DIAG(d, x) \ + {} /* nothing to do */ + +#define KA_DUMP(d, x) /* nothing to do */ +#define KB_DUMP(d, x) /* nothing to do */ +#define KC_DUMP(d, x) /* nothing to do */ +#define KD_DUMP(d, x) /* nothing to do */ +#define KE_DUMP(d, x) /* nothing to do */ +#define KF_DUMP(d, x) /* nothing to do */ #endif // KMP_DEBUG #ifdef __cplusplus - } // extern "C" +} // extern "C" #endif // __cplusplus #endif /* KMP_DEBUG_H */ diff --git a/openmp/runtime/src/kmp_debugger.cpp b/openmp/runtime/src/kmp_debugger.cpp index 76fe1e0..4394a19 100644 --- a/openmp/runtime/src/kmp_debugger.cpp +++ b/openmp/runtime/src/kmp_debugger.cpp @@ -1,6 +1,6 @@ #if USE_DEBUGGER /* - * kmp_debugger.c -- debugger support. + * kmp_debugger.cpp -- debugger support. */ @@ -19,47 +19,36 @@ #include "kmp_omp.h" #include "kmp_str.h" -/* - NOTE: All variable names are known to the debugger, do not change! -*/ +// NOTE: All variable names are known to the debugger, do not change! #ifdef __cplusplus - extern "C" { - extern kmp_omp_struct_info_t __kmp_omp_debug_struct_info; - } // extern "C" +extern "C" { +extern kmp_omp_struct_info_t __kmp_omp_debug_struct_info; +} // extern "C" #endif // __cplusplus -int __kmp_debugging = FALSE; // Boolean whether currently debugging OpenMP RTL. +int __kmp_debugging = FALSE; // Boolean whether currently debugging OpenMP RTL. -#define offset_and_size_of( structure, field ) \ - { \ - offsetof( structure, field ), \ - sizeof( ( (structure *) NULL)->field ) \ - } +#define offset_and_size_of(structure, field) \ + { offsetof(structure, field), sizeof(((structure *)NULL)->field) } -#define offset_and_size_not_available \ - { -1, -1 } +#define offset_and_size_not_available \ + { -1, -1 } -#define addr_and_size_of( var ) \ - { \ - (kmp_uint64)( & var ), \ - sizeof( var ) \ - } +#define addr_and_size_of(var) \ + { (kmp_uint64)(&var), sizeof(var) } #define nthr_buffer_size 1024 -static kmp_int32 -kmp_omp_nthr_info_buffer[ nthr_buffer_size ] = - { nthr_buffer_size * sizeof( kmp_int32 ) }; +static kmp_int32 kmp_omp_nthr_info_buffer[nthr_buffer_size] = { + nthr_buffer_size * sizeof(kmp_int32)}; /* TODO: Check punctuation for various platforms here */ -static char func_microtask[] = "__kmp_invoke_microtask"; -static char func_fork[] = "__kmpc_fork_call"; -static char func_fork_teams[] = "__kmpc_fork_teams"; - +static char func_microtask[] = "__kmp_invoke_microtask"; +static char func_fork[] = "__kmpc_fork_call"; +static char func_fork_teams[] = "__kmpc_fork_teams"; // Various info about runtime structures: addresses, field offsets, sizes, etc. -kmp_omp_struct_info_t -__kmp_omp_debug_struct_info = { +kmp_omp_struct_info_t __kmp_omp_debug_struct_info = { /* Change this only if you make a fundamental data structure change here */ KMP_OMP_VERSION, @@ -67,166 +56,167 @@ __kmp_omp_debug_struct_info = { /* sanity check. Only should be checked if versions are identical * This is also used for backward compatibility to get the runtime * structure size if it the runtime is older than the interface */ - sizeof( kmp_omp_struct_info_t ), + sizeof(kmp_omp_struct_info_t), /* OpenMP RTL version info. */ - addr_and_size_of( __kmp_version_major ), - addr_and_size_of( __kmp_version_minor ), - addr_and_size_of( __kmp_version_build ), - addr_and_size_of( __kmp_openmp_version ), - { (kmp_uint64)( __kmp_copyright ) + KMP_VERSION_MAGIC_LEN, 0 }, // Skip magic prefix. + addr_and_size_of(__kmp_version_major), + addr_and_size_of(__kmp_version_minor), + addr_and_size_of(__kmp_version_build), + addr_and_size_of(__kmp_openmp_version), + {(kmp_uint64)(__kmp_copyright) + KMP_VERSION_MAGIC_LEN, + 0}, // Skip magic prefix. /* Various globals. */ - addr_and_size_of( __kmp_threads ), - addr_and_size_of( __kmp_root ), - addr_and_size_of( __kmp_threads_capacity ), - addr_and_size_of( __kmp_monitor ), -#if ! KMP_USE_DYNAMIC_LOCK - addr_and_size_of( __kmp_user_lock_table ), + addr_and_size_of(__kmp_threads), + addr_and_size_of(__kmp_root), + addr_and_size_of(__kmp_threads_capacity), + addr_and_size_of(__kmp_monitor), +#if !KMP_USE_DYNAMIC_LOCK + addr_and_size_of(__kmp_user_lock_table), #endif - addr_and_size_of( func_microtask ), - addr_and_size_of( func_fork ), - addr_and_size_of( func_fork_teams ), - addr_and_size_of( __kmp_team_counter ), - addr_and_size_of( __kmp_task_counter ), - addr_and_size_of( kmp_omp_nthr_info_buffer ), - sizeof( void * ), + addr_and_size_of(func_microtask), + addr_and_size_of(func_fork), + addr_and_size_of(func_fork_teams), + addr_and_size_of(__kmp_team_counter), + addr_and_size_of(__kmp_task_counter), + addr_and_size_of(kmp_omp_nthr_info_buffer), + sizeof(void *), OMP_LOCK_T_SIZE < sizeof(void *), bs_last_barrier, INITIAL_TASK_DEQUE_SIZE, // thread structure information - sizeof( kmp_base_info_t ), - offset_and_size_of( kmp_base_info_t, th_info ), - offset_and_size_of( kmp_base_info_t, th_team ), - offset_and_size_of( kmp_base_info_t, th_root ), - offset_and_size_of( kmp_base_info_t, th_serial_team ), - offset_and_size_of( kmp_base_info_t, th_ident ), - offset_and_size_of( kmp_base_info_t, th_spin_here ), - offset_and_size_of( kmp_base_info_t, th_next_waiting ), - offset_and_size_of( kmp_base_info_t, th_task_team ), - offset_and_size_of( kmp_base_info_t, th_current_task ), - offset_and_size_of( kmp_base_info_t, th_task_state ), - offset_and_size_of( kmp_base_info_t, th_bar ), - offset_and_size_of( kmp_bstate_t, b_worker_arrived ), + sizeof(kmp_base_info_t), + offset_and_size_of(kmp_base_info_t, th_info), + offset_and_size_of(kmp_base_info_t, th_team), + offset_and_size_of(kmp_base_info_t, th_root), + offset_and_size_of(kmp_base_info_t, th_serial_team), + offset_and_size_of(kmp_base_info_t, th_ident), + offset_and_size_of(kmp_base_info_t, th_spin_here), + offset_and_size_of(kmp_base_info_t, th_next_waiting), + offset_and_size_of(kmp_base_info_t, th_task_team), + offset_and_size_of(kmp_base_info_t, th_current_task), + offset_and_size_of(kmp_base_info_t, th_task_state), + offset_and_size_of(kmp_base_info_t, th_bar), + offset_and_size_of(kmp_bstate_t, b_worker_arrived), #if OMP_40_ENABLED // teams information - offset_and_size_of( kmp_base_info_t, th_teams_microtask), - offset_and_size_of( kmp_base_info_t, th_teams_level), - offset_and_size_of( kmp_teams_size_t, nteams ), - offset_and_size_of( kmp_teams_size_t, nth ), + offset_and_size_of(kmp_base_info_t, th_teams_microtask), + offset_and_size_of(kmp_base_info_t, th_teams_level), + offset_and_size_of(kmp_teams_size_t, nteams), + offset_and_size_of(kmp_teams_size_t, nth), #endif // kmp_desc structure (for info field above) - sizeof( kmp_desc_base_t ), - offset_and_size_of( kmp_desc_base_t, ds_tid ), - offset_and_size_of( kmp_desc_base_t, ds_gtid ), - // On Windows* OS, ds_thread contains a thread /handle/, which is not usable, while thread /id/ - // is in ds_thread_id. - #if KMP_OS_WINDOWS - offset_and_size_of( kmp_desc_base_t, ds_thread_id), - #else - offset_and_size_of( kmp_desc_base_t, ds_thread), - #endif + sizeof(kmp_desc_base_t), + offset_and_size_of(kmp_desc_base_t, ds_tid), + offset_and_size_of(kmp_desc_base_t, ds_gtid), +// On Windows* OS, ds_thread contains a thread /handle/, which is not usable, +// while thread /id/ is in ds_thread_id. +#if KMP_OS_WINDOWS + offset_and_size_of(kmp_desc_base_t, ds_thread_id), +#else + offset_and_size_of(kmp_desc_base_t, ds_thread), +#endif // team structure information - sizeof( kmp_base_team_t ), - offset_and_size_of( kmp_base_team_t, t_master_tid ), - offset_and_size_of( kmp_base_team_t, t_ident ), - offset_and_size_of( kmp_base_team_t, t_parent ), - offset_and_size_of( kmp_base_team_t, t_nproc ), - offset_and_size_of( kmp_base_team_t, t_threads ), - offset_and_size_of( kmp_base_team_t, t_serialized ), - offset_and_size_of( kmp_base_team_t, t_id ), - offset_and_size_of( kmp_base_team_t, t_pkfn ), - offset_and_size_of( kmp_base_team_t, t_task_team ), - offset_and_size_of( kmp_base_team_t, t_implicit_task_taskdata ), + sizeof(kmp_base_team_t), + offset_and_size_of(kmp_base_team_t, t_master_tid), + offset_and_size_of(kmp_base_team_t, t_ident), + offset_and_size_of(kmp_base_team_t, t_parent), + offset_and_size_of(kmp_base_team_t, t_nproc), + offset_and_size_of(kmp_base_team_t, t_threads), + offset_and_size_of(kmp_base_team_t, t_serialized), + offset_and_size_of(kmp_base_team_t, t_id), + offset_and_size_of(kmp_base_team_t, t_pkfn), + offset_and_size_of(kmp_base_team_t, t_task_team), + offset_and_size_of(kmp_base_team_t, t_implicit_task_taskdata), #if OMP_40_ENABLED - offset_and_size_of( kmp_base_team_t, t_cancel_request ), + offset_and_size_of(kmp_base_team_t, t_cancel_request), #endif - offset_and_size_of( kmp_base_team_t, t_bar ), - offset_and_size_of( kmp_balign_team_t, b_master_arrived ), - offset_and_size_of( kmp_balign_team_t, b_team_arrived ), + offset_and_size_of(kmp_base_team_t, t_bar), + offset_and_size_of(kmp_balign_team_t, b_master_arrived), + offset_and_size_of(kmp_balign_team_t, b_team_arrived), // root structure information - sizeof( kmp_base_root_t ), - offset_and_size_of( kmp_base_root_t, r_root_team ), - offset_and_size_of( kmp_base_root_t, r_hot_team ), - offset_and_size_of( kmp_base_root_t, r_uber_thread ), + sizeof(kmp_base_root_t), + offset_and_size_of(kmp_base_root_t, r_root_team), + offset_and_size_of(kmp_base_root_t, r_hot_team), + offset_and_size_of(kmp_base_root_t, r_uber_thread), offset_and_size_not_available, // ident structure information - sizeof( ident_t ), - offset_and_size_of( ident_t, psource ), - offset_and_size_of( ident_t, flags ), + sizeof(ident_t), + offset_and_size_of(ident_t, psource), + offset_and_size_of(ident_t, flags), // lock structure information - sizeof( kmp_base_queuing_lock_t ), - offset_and_size_of( kmp_base_queuing_lock_t, initialized ), - offset_and_size_of( kmp_base_queuing_lock_t, location ), - offset_and_size_of( kmp_base_queuing_lock_t, tail_id ), - offset_and_size_of( kmp_base_queuing_lock_t, head_id ), - offset_and_size_of( kmp_base_queuing_lock_t, next_ticket ), - offset_and_size_of( kmp_base_queuing_lock_t, now_serving ), - offset_and_size_of( kmp_base_queuing_lock_t, owner_id ), - offset_and_size_of( kmp_base_queuing_lock_t, depth_locked ), - offset_and_size_of( kmp_base_queuing_lock_t, flags ), - -#if ! KMP_USE_DYNAMIC_LOCK + sizeof(kmp_base_queuing_lock_t), + offset_and_size_of(kmp_base_queuing_lock_t, initialized), + offset_and_size_of(kmp_base_queuing_lock_t, location), + offset_and_size_of(kmp_base_queuing_lock_t, tail_id), + offset_and_size_of(kmp_base_queuing_lock_t, head_id), + offset_and_size_of(kmp_base_queuing_lock_t, next_ticket), + offset_and_size_of(kmp_base_queuing_lock_t, now_serving), + offset_and_size_of(kmp_base_queuing_lock_t, owner_id), + offset_and_size_of(kmp_base_queuing_lock_t, depth_locked), + offset_and_size_of(kmp_base_queuing_lock_t, flags), + +#if !KMP_USE_DYNAMIC_LOCK /* Lock table. */ - sizeof( kmp_lock_table_t ), - offset_and_size_of( kmp_lock_table_t, used ), - offset_and_size_of( kmp_lock_table_t, allocated ), - offset_and_size_of( kmp_lock_table_t, table ), + sizeof(kmp_lock_table_t), + offset_and_size_of(kmp_lock_table_t, used), + offset_and_size_of(kmp_lock_table_t, allocated), + offset_and_size_of(kmp_lock_table_t, table), #endif // Task team structure information. - sizeof( kmp_base_task_team_t ), - offset_and_size_of( kmp_base_task_team_t, tt_threads_data ), - offset_and_size_of( kmp_base_task_team_t, tt_found_tasks ), - offset_and_size_of( kmp_base_task_team_t, tt_nproc ), - offset_and_size_of( kmp_base_task_team_t, tt_unfinished_threads ), - offset_and_size_of( kmp_base_task_team_t, tt_active ), + sizeof(kmp_base_task_team_t), + offset_and_size_of(kmp_base_task_team_t, tt_threads_data), + offset_and_size_of(kmp_base_task_team_t, tt_found_tasks), + offset_and_size_of(kmp_base_task_team_t, tt_nproc), + offset_and_size_of(kmp_base_task_team_t, tt_unfinished_threads), + offset_and_size_of(kmp_base_task_team_t, tt_active), // task_data_t. - sizeof( kmp_taskdata_t ), - offset_and_size_of( kmp_taskdata_t, td_task_id ), - offset_and_size_of( kmp_taskdata_t, td_flags ), - offset_and_size_of( kmp_taskdata_t, td_team ), - offset_and_size_of( kmp_taskdata_t, td_parent ), - offset_and_size_of( kmp_taskdata_t, td_level ), - offset_and_size_of( kmp_taskdata_t, td_ident ), - offset_and_size_of( kmp_taskdata_t, td_allocated_child_tasks ), - offset_and_size_of( kmp_taskdata_t, td_incomplete_child_tasks ), - - offset_and_size_of( kmp_taskdata_t, td_taskwait_ident ), - offset_and_size_of( kmp_taskdata_t, td_taskwait_counter ), - offset_and_size_of( kmp_taskdata_t, td_taskwait_thread ), + sizeof(kmp_taskdata_t), + offset_and_size_of(kmp_taskdata_t, td_task_id), + offset_and_size_of(kmp_taskdata_t, td_flags), + offset_and_size_of(kmp_taskdata_t, td_team), + offset_and_size_of(kmp_taskdata_t, td_parent), + offset_and_size_of(kmp_taskdata_t, td_level), + offset_and_size_of(kmp_taskdata_t, td_ident), + offset_and_size_of(kmp_taskdata_t, td_allocated_child_tasks), + offset_and_size_of(kmp_taskdata_t, td_incomplete_child_tasks), + + offset_and_size_of(kmp_taskdata_t, td_taskwait_ident), + offset_and_size_of(kmp_taskdata_t, td_taskwait_counter), + offset_and_size_of(kmp_taskdata_t, td_taskwait_thread), #if OMP_40_ENABLED - offset_and_size_of( kmp_taskdata_t, td_taskgroup ), - offset_and_size_of( kmp_taskgroup_t, count ), - offset_and_size_of( kmp_taskgroup_t, cancel_request ), - - offset_and_size_of( kmp_taskdata_t, td_depnode ), - offset_and_size_of( kmp_depnode_list_t, node ), - offset_and_size_of( kmp_depnode_list_t, next ), - offset_and_size_of( kmp_base_depnode_t, successors ), - offset_and_size_of( kmp_base_depnode_t, task ), - offset_and_size_of( kmp_base_depnode_t, npredecessors ), - offset_and_size_of( kmp_base_depnode_t, nrefs ), + offset_and_size_of(kmp_taskdata_t, td_taskgroup), + offset_and_size_of(kmp_taskgroup_t, count), + offset_and_size_of(kmp_taskgroup_t, cancel_request), + + offset_and_size_of(kmp_taskdata_t, td_depnode), + offset_and_size_of(kmp_depnode_list_t, node), + offset_and_size_of(kmp_depnode_list_t, next), + offset_and_size_of(kmp_base_depnode_t, successors), + offset_and_size_of(kmp_base_depnode_t, task), + offset_and_size_of(kmp_base_depnode_t, npredecessors), + offset_and_size_of(kmp_base_depnode_t, nrefs), #endif - offset_and_size_of( kmp_task_t, routine ), + offset_and_size_of(kmp_task_t, routine), // thread_data_t. - sizeof( kmp_thread_data_t ), - offset_and_size_of( kmp_base_thread_data_t, td_deque ), - offset_and_size_of( kmp_base_thread_data_t, td_deque_size ), - offset_and_size_of( kmp_base_thread_data_t, td_deque_head ), - offset_and_size_of( kmp_base_thread_data_t, td_deque_tail ), - offset_and_size_of( kmp_base_thread_data_t, td_deque_ntasks ), - offset_and_size_of( kmp_base_thread_data_t, td_deque_last_stolen ), + sizeof(kmp_thread_data_t), + offset_and_size_of(kmp_base_thread_data_t, td_deque), + offset_and_size_of(kmp_base_thread_data_t, td_deque_size), + offset_and_size_of(kmp_base_thread_data_t, td_deque_head), + offset_and_size_of(kmp_base_thread_data_t, td_deque_tail), + offset_and_size_of(kmp_base_thread_data_t, td_deque_ntasks), + offset_and_size_of(kmp_base_thread_data_t, td_deque_last_stolen), // The last field. KMP_OMP_VERSION, @@ -236,80 +226,66 @@ __kmp_omp_debug_struct_info = { #undef offset_and_size_of #undef addr_and_size_of -/* - Intel compiler on IA-32 architecture issues a warning "conversion +/* Intel compiler on IA-32 architecture issues a warning "conversion from "unsigned long long" to "char *" may lose significant bits" when 64-bit value is assigned to 32-bit pointer. Use this function - to suppress the warning. -*/ -static inline -void * -__kmp_convert_to_ptr( - kmp_uint64 addr -) { - #if KMP_COMPILER_ICC - #pragma warning( push ) - #pragma warning( disable: 810 ) // conversion from "unsigned long long" to "char *" may lose significant bits - #pragma warning( disable: 1195 ) // conversion from integer to smaller pointer - #endif // KMP_COMPILER_ICC - return (void *) addr; - #if KMP_COMPILER_ICC - #pragma warning( pop ) - #endif // KMP_COMPILER_ICC + to suppress the warning. */ +static inline void *__kmp_convert_to_ptr(kmp_uint64 addr) { +#if KMP_COMPILER_ICC +#pragma warning(push) +#pragma warning(disable : 810) // conversion from "unsigned long long" to "char +// *" may lose significant bits +#pragma warning(disable : 1195) // conversion from integer to smaller pointer +#endif // KMP_COMPILER_ICC + return (void *)addr; +#if KMP_COMPILER_ICC +#pragma warning(pop) +#endif // KMP_COMPILER_ICC } // __kmp_convert_to_ptr +static int kmp_location_match(kmp_str_loc_t *loc, kmp_omp_nthr_item_t *item) { -static int -kmp_location_match( - kmp_str_loc_t * loc, - kmp_omp_nthr_item_t * item -) { - - int file_match = 0; - int func_match = 0; - int line_match = 0; - - char * file = (char *) __kmp_convert_to_ptr( item->file ); - char * func = (char *) __kmp_convert_to_ptr( item->func ); - file_match = __kmp_str_fname_match( & loc->fname, file ); - func_match = - item->func == 0 // If item->func is NULL, it allows any func name. - || - strcmp( func, "*" ) == 0 - || - ( loc->func != NULL && strcmp( loc->func, func ) == 0 ); - line_match = - item->begin <= loc->line - && - ( item->end <= 0 || loc->line <= item->end ); // if item->end <= 0, it means "end of file". - - return ( file_match && func_match && line_match ); + int file_match = 0; + int func_match = 0; + int line_match = 0; -} // kmp_location_match + char *file = (char *)__kmp_convert_to_ptr(item->file); + char *func = (char *)__kmp_convert_to_ptr(item->func); + file_match = __kmp_str_fname_match(&loc->fname, file); + func_match = + item->func == 0 // If item->func is NULL, it allows any func name. + || strcmp(func, "*") == 0 || + (loc->func != NULL && strcmp(loc->func, func) == 0); + line_match = + item->begin <= loc->line && + (item->end <= 0 || + loc->line <= item->end); // if item->end <= 0, it means "end of file". + + return (file_match && func_match && line_match); +} // kmp_location_match -int -__kmp_omp_num_threads( - ident_t const * ident -) { - - int num_threads = 0; - - kmp_omp_nthr_info_t * info = - (kmp_omp_nthr_info_t *) __kmp_convert_to_ptr( __kmp_omp_debug_struct_info.nthr_info.addr ); - if ( info->num > 0 && info->array != 0 ) { - kmp_omp_nthr_item_t * items = (kmp_omp_nthr_item_t *) __kmp_convert_to_ptr( info->array ); - kmp_str_loc_t loc = __kmp_str_loc_init( ident->psource, 1 ); - int i; - for ( i = 0; i < info->num; ++ i ) { - if ( kmp_location_match( & loc, & items[ i ] ) ) { - num_threads = items[ i ].num_threads; - }; // if - }; // for - __kmp_str_loc_free( & loc ); - }; // if - - return num_threads;; +int __kmp_omp_num_threads(ident_t const *ident) { + + int num_threads = 0; + + kmp_omp_nthr_info_t *info = (kmp_omp_nthr_info_t *)__kmp_convert_to_ptr( + __kmp_omp_debug_struct_info.nthr_info.addr); + if (info->num > 0 && info->array != 0) { + kmp_omp_nthr_item_t *items = + (kmp_omp_nthr_item_t *)__kmp_convert_to_ptr(info->array); + kmp_str_loc_t loc = __kmp_str_loc_init(ident->psource, 1); + int i; + for (i = 0; i < info->num; ++i) { + if (kmp_location_match(&loc, &items[i])) { + num_threads = items[i].num_threads; + }; // if + }; // for + __kmp_str_loc_free(&loc); + }; // if + + return num_threads; + ; } // __kmp_omp_num_threads #endif /* USE_DEBUGGER */ diff --git a/openmp/runtime/src/kmp_debugger.h b/openmp/runtime/src/kmp_debugger.h index ff0ad96..4713d88 100644 --- a/openmp/runtime/src/kmp_debugger.h +++ b/openmp/runtime/src/kmp_debugger.h @@ -18,34 +18,34 @@ #define KMP_DEBUGGER_H #ifdef __cplusplus - extern "C" { +extern "C" { #endif // __cplusplus -/* * This external variable can be set by any debugger to flag to the runtime that we - are currently executing inside a debugger. This will allow the debugger to override - the number of threads spawned in a parallel region by using __kmp_omp_num_threads() (below). - * When __kmp_debugging is TRUE, each team and each task gets a unique integer identifier - that can be used by debugger to conveniently identify teams and tasks. - * The debugger has access to __kmp_omp_debug_struct_info which contains information - about the OpenMP library's important internal structures. This access will allow the debugger - to read detailed information from the typical OpenMP constructs (teams, threads, tasking, etc. ) - during a debugging session and offer detailed and useful information which the user can probe - about the OpenMP portion of their code. - */ -extern int __kmp_debugging; /* Boolean whether currently debugging OpenMP RTL */ +/* This external variable can be set by any debugger to flag to the runtime + that we are currently executing inside a debugger. This will allow the + debugger to override the number of threads spawned in a parallel region by + using __kmp_omp_num_threads() (below). + * When __kmp_debugging is TRUE, each team and each task gets a unique integer + identifier that can be used by debugger to conveniently identify teams and + tasks. + * The debugger has access to __kmp_omp_debug_struct_info which contains + information about the OpenMP library's important internal structures. This + access will allow the debugger to read detailed information from the typical + OpenMP constructs (teams, threads, tasking, etc. ) during a debugging + session and offer detailed and useful information which the user can probe + about the OpenMP portion of their code. */ +extern int __kmp_debugging; /* Boolean whether currently debugging OpenMP RTL */ // Return number of threads specified by the debugger for given parallel region. -/* The ident field, which represents a source file location, is used to check if the - debugger has changed the number of threads for the parallel region at source file - location ident. This way, specific parallel regions' number of threads can be changed - at the debugger's request. - */ -int __kmp_omp_num_threads( ident_t const * ident ); +/* The ident field, which represents a source file location, is used to check if + the debugger has changed the number of threads for the parallel region at + source file location ident. This way, specific parallel regions' number of + threads can be changed at the debugger's request. */ +int __kmp_omp_num_threads(ident_t const *ident); #ifdef __cplusplus - } // extern "C" +} // extern "C" #endif // __cplusplus - #endif // KMP_DEBUGGER_H #endif // USE_DEBUGGER diff --git a/openmp/runtime/src/kmp_dispatch.cpp b/openmp/runtime/src/kmp_dispatch.cpp index 49bb216..5439599 100644 --- a/openmp/runtime/src/kmp_dispatch.cpp +++ b/openmp/runtime/src/kmp_dispatch.cpp @@ -13,32 +13,28 @@ //===----------------------------------------------------------------------===// -/* - * Dynamic scheduling initialization and dispatch. +/* Dynamic scheduling initialization and dispatch. * * NOTE: __kmp_nth is a constant inside of any dispatch loop, however * it may change values between parallel regions. __kmp_max_nth * is the largest value __kmp_nth may take, 1 is the smallest. - * */ -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -// Need to raise Win version from XP to Vista here for support of InterlockedExchange64 +// Need to raise Win version from XP to Vista here for support of +// InterlockedExchange64 #if defined(_WIN32_WINNT) && defined(_M_IX86) #undef _WIN32_WINNT #define _WIN32_WINNT 0x0502 #endif #include "kmp.h" +#include "kmp_error.h" #include "kmp_i18n.h" #include "kmp_itt.h" -#include "kmp_str.h" -#include "kmp_error.h" #include "kmp_stats.h" +#include "kmp_str.h" #if KMP_OS_WINDOWS && KMP_ARCH_X86 - #include +#include #endif #if OMPT_SUPPORT @@ -47,2350 +43,2338 @@ #endif /* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ #if KMP_STATIC_STEAL_ENABLED - // replaces dispatch_private_info{32,64} structures and dispatch_private_info{32,64}_t types - template< typename T > - struct dispatch_private_infoXX_template { - typedef typename traits_t< T >::unsigned_t UT; - typedef typename traits_t< T >::signed_t ST; - UT count; // unsigned - T ub; - /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ - T lb; - ST st; // signed - UT tc; // unsigned - T static_steal_counter; // for static_steal only; maybe better to put after ub - - /* parm[1-4] are used in different ways by different scheduling algorithms */ - - // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on ) - // a) parm3 is properly aligned and - // b) all parm1-4 are in the same cache line. - // Because of parm1-4 are used together, performance seems to be better - // if they are in the same line (not measured though). - - struct KMP_ALIGN( 32 ) { // compiler does not accept sizeof(T)*4 - T parm1; - T parm2; - T parm3; - T parm4; - }; - - UT ordered_lower; // unsigned - UT ordered_upper; // unsigned - #if KMP_OS_WINDOWS - T last_upper; - #endif /* KMP_OS_WINDOWS */ - }; +// replaces dispatch_private_info{32,64} structures and +// dispatch_private_info{32,64}_t types +template struct dispatch_private_infoXX_template { + typedef typename traits_t::unsigned_t UT; + typedef typename traits_t::signed_t ST; + UT count; // unsigned + T ub; + /* Adding KMP_ALIGN_CACHE here doesn't help / can hurt performance */ + T lb; + ST st; // signed + UT tc; // unsigned + T static_steal_counter; // for static_steal only; maybe better to put after ub + + /* parm[1-4] are used in different ways by different scheduling algorithms */ + + // KMP_ALIGN( 32 ) ensures ( if the KMP_ALIGN macro is turned on ) + // a) parm3 is properly aligned and + // b) all parm1-4 are in the same cache line. + // Because of parm1-4 are used together, performance seems to be better + // if they are in the same line (not measured though). + + struct KMP_ALIGN(32) { // compiler does not accept sizeof(T)*4 + T parm1; + T parm2; + T parm3; + T parm4; + }; + + UT ordered_lower; // unsigned + UT ordered_upper; // unsigned +#if KMP_OS_WINDOWS + T last_upper; +#endif /* KMP_OS_WINDOWS */ +}; #else /* KMP_STATIC_STEAL_ENABLED */ - // replaces dispatch_private_info{32,64} structures and dispatch_private_info{32,64}_t types - template< typename T > - struct dispatch_private_infoXX_template { - typedef typename traits_t< T >::unsigned_t UT; - typedef typename traits_t< T >::signed_t ST; - T lb; - T ub; - ST st; // signed - UT tc; // unsigned - - T parm1; - T parm2; - T parm3; - T parm4; - - UT count; // unsigned - - UT ordered_lower; // unsigned - UT ordered_upper; // unsigned - #if KMP_OS_WINDOWS - T last_upper; - #endif /* KMP_OS_WINDOWS */ - }; +// replaces dispatch_private_info{32,64} structures and +// dispatch_private_info{32,64}_t types +template struct dispatch_private_infoXX_template { + typedef typename traits_t::unsigned_t UT; + typedef typename traits_t::signed_t ST; + T lb; + T ub; + ST st; // signed + UT tc; // unsigned + + T parm1; + T parm2; + T parm3; + T parm4; + + UT count; // unsigned + + UT ordered_lower; // unsigned + UT ordered_upper; // unsigned +#if KMP_OS_WINDOWS + T last_upper; +#endif /* KMP_OS_WINDOWS */ +}; #endif /* KMP_STATIC_STEAL_ENABLED */ // replaces dispatch_private_info structure and dispatch_private_info_t type -template< typename T > -struct KMP_ALIGN_CACHE dispatch_private_info_template { - // duplicate alignment here, otherwise size of structure is not correct in our compiler - union KMP_ALIGN_CACHE private_info_tmpl { - dispatch_private_infoXX_template< T > p; - dispatch_private_info64_t p64; - } u; - enum sched_type schedule; /* scheduling algorithm */ - kmp_uint32 ordered; /* ordered clause specified */ - kmp_uint32 ordered_bumped; - kmp_int32 ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size after making order - dispatch_private_info * next; /* stack of buffers for nest of serial regions */ - kmp_uint32 nomerge; /* don't merge iters if serialized */ - kmp_uint32 type_size; - enum cons_type pushed_ws; +template struct KMP_ALIGN_CACHE dispatch_private_info_template { + // duplicate alignment here, otherwise size of structure is not correct in our + // compiler + union KMP_ALIGN_CACHE private_info_tmpl { + dispatch_private_infoXX_template p; + dispatch_private_info64_t p64; + } u; + enum sched_type schedule; /* scheduling algorithm */ + kmp_uint32 ordered; /* ordered clause specified */ + kmp_uint32 ordered_bumped; + // To retain the structure size after making ordered_iteration scalar + kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 3]; + dispatch_private_info *next; /* stack of buffers for nest of serial regions */ + kmp_uint32 nomerge; /* don't merge iters if serialized */ + kmp_uint32 type_size; + enum cons_type pushed_ws; }; - -// replaces dispatch_shared_info{32,64} structures and dispatch_shared_info{32,64}_t types -template< typename UT > -struct dispatch_shared_infoXX_template { - /* chunk index under dynamic, number of idle threads under static-steal; - iteration index otherwise */ - volatile UT iteration; - volatile UT num_done; - volatile UT ordered_iteration; - UT ordered_dummy[KMP_MAX_ORDERED-3]; // to retain the structure size making ordered_iteration scalar +// replaces dispatch_shared_info{32,64} structures and +// dispatch_shared_info{32,64}_t types +template struct dispatch_shared_infoXX_template { + /* chunk index under dynamic, number of idle threads under static-steal; + iteration index otherwise */ + volatile UT iteration; + volatile UT num_done; + volatile UT ordered_iteration; + // to retain the structure size making ordered_iteration scalar + UT ordered_dummy[KMP_MAX_ORDERED - 3]; }; // replaces dispatch_shared_info structure and dispatch_shared_info_t type -template< typename UT > -struct dispatch_shared_info_template { - // we need union here to keep the structure size - union shared_info_tmpl { - dispatch_shared_infoXX_template< UT > s; - dispatch_shared_info64_t s64; - } u; - volatile kmp_uint32 buffer_index; +template struct dispatch_shared_info_template { + // we need union here to keep the structure size + union shared_info_tmpl { + dispatch_shared_infoXX_template s; + dispatch_shared_info64_t s64; + } u; + volatile kmp_uint32 buffer_index; #if OMP_45_ENABLED - volatile kmp_int32 doacross_buf_idx; // teamwise index - kmp_uint32 *doacross_flags; // array of iteration flags (0/1) - kmp_int32 doacross_num_done; // count finished threads + volatile kmp_int32 doacross_buf_idx; // teamwise index + kmp_uint32 *doacross_flags; // array of iteration flags (0/1) + kmp_int32 doacross_num_done; // count finished threads #endif #if KMP_USE_HWLOC - // When linking with libhwloc, the ORDERED EPCC test slowsdown on big - // machines (> 48 cores). Performance analysis showed that a cache thrash - // was occurring and this padding helps alleviate the problem. - char padding[64]; + // When linking with libhwloc, the ORDERED EPCC test slowsdown on big + // machines (> 48 cores). Performance analysis showed that a cache thrash + // was occurring and this padding helps alleviate the problem. + char padding[64]; #endif }; /* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ #undef USE_TEST_LOCKS // test_then_add template (general template should NOT be used) -template< typename T > -static __forceinline T -test_then_add( volatile T *p, T d ); - -template<> -__forceinline kmp_int32 -test_then_add< kmp_int32 >( volatile kmp_int32 *p, kmp_int32 d ) -{ - kmp_int32 r; - r = KMP_TEST_THEN_ADD32( p, d ); - return r; +template static __forceinline T test_then_add(volatile T *p, T d); + +template <> +__forceinline kmp_int32 test_then_add(volatile kmp_int32 *p, + kmp_int32 d) { + kmp_int32 r; + r = KMP_TEST_THEN_ADD32(p, d); + return r; } -template<> -__forceinline kmp_int64 -test_then_add< kmp_int64 >( volatile kmp_int64 *p, kmp_int64 d ) -{ - kmp_int64 r; - r = KMP_TEST_THEN_ADD64( p, d ); - return r; +template <> +__forceinline kmp_int64 test_then_add(volatile kmp_int64 *p, + kmp_int64 d) { + kmp_int64 r; + r = KMP_TEST_THEN_ADD64(p, d); + return r; } // test_then_inc_acq template (general template should NOT be used) -template< typename T > -static __forceinline T -test_then_inc_acq( volatile T *p ); - -template<> -__forceinline kmp_int32 -test_then_inc_acq< kmp_int32 >( volatile kmp_int32 *p ) -{ - kmp_int32 r; - r = KMP_TEST_THEN_INC_ACQ32( p ); - return r; +template static __forceinline T test_then_inc_acq(volatile T *p); + +template <> +__forceinline kmp_int32 test_then_inc_acq(volatile kmp_int32 *p) { + kmp_int32 r; + r = KMP_TEST_THEN_INC_ACQ32(p); + return r; } -template<> -__forceinline kmp_int64 -test_then_inc_acq< kmp_int64 >( volatile kmp_int64 *p ) -{ - kmp_int64 r; - r = KMP_TEST_THEN_INC_ACQ64( p ); - return r; +template <> +__forceinline kmp_int64 test_then_inc_acq(volatile kmp_int64 *p) { + kmp_int64 r; + r = KMP_TEST_THEN_INC_ACQ64(p); + return r; } // test_then_inc template (general template should NOT be used) -template< typename T > -static __forceinline T -test_then_inc( volatile T *p ); - -template<> -__forceinline kmp_int32 -test_then_inc< kmp_int32 >( volatile kmp_int32 *p ) -{ - kmp_int32 r; - r = KMP_TEST_THEN_INC32( p ); - return r; +template static __forceinline T test_then_inc(volatile T *p); + +template <> +__forceinline kmp_int32 test_then_inc(volatile kmp_int32 *p) { + kmp_int32 r; + r = KMP_TEST_THEN_INC32(p); + return r; } -template<> -__forceinline kmp_int64 -test_then_inc< kmp_int64 >( volatile kmp_int64 *p ) -{ - kmp_int64 r; - r = KMP_TEST_THEN_INC64( p ); - return r; +template <> +__forceinline kmp_int64 test_then_inc(volatile kmp_int64 *p) { + kmp_int64 r; + r = KMP_TEST_THEN_INC64(p); + return r; } // compare_and_swap template (general template should NOT be used) -template< typename T > -static __forceinline kmp_int32 -compare_and_swap( volatile T *p, T c, T s ); - -template<> -__forceinline kmp_int32 -compare_and_swap< kmp_int32 >( volatile kmp_int32 *p, kmp_int32 c, kmp_int32 s ) -{ - return KMP_COMPARE_AND_STORE_REL32( p, c, s ); +template +static __forceinline kmp_int32 compare_and_swap(volatile T *p, T c, T s); + +template <> +__forceinline kmp_int32 compare_and_swap(volatile kmp_int32 *p, + kmp_int32 c, kmp_int32 s) { + return KMP_COMPARE_AND_STORE_REL32(p, c, s); } -template<> -__forceinline kmp_int32 -compare_and_swap< kmp_int64 >( volatile kmp_int64 *p, kmp_int64 c, kmp_int64 s ) -{ - return KMP_COMPARE_AND_STORE_REL64( p, c, s ); +template <> +__forceinline kmp_int32 compare_and_swap(volatile kmp_int64 *p, + kmp_int64 c, kmp_int64 s) { + return KMP_COMPARE_AND_STORE_REL64(p, c, s); } -/* - Spin wait loop that first does pause, then yield. +/* Spin wait loop that first does pause, then yield. Waits until function returns non-zero when called with *spinner and check. Does NOT put threads to sleep. #if USE_ITT_BUILD Arguments: - obj -- is higher-level synchronization object to report to ittnotify. It is used to report - locks consistently. For example, if lock is acquired immediately, its address is - reported to ittnotify via KMP_FSYNC_ACQUIRED(). However, it lock cannot be acquired - immediately and lock routine calls to KMP_WAIT_YIELD(), the later should report the same - address, not an address of low-level spinner. + obj -- is higher-level synchronization object to report to ittnotify. + It is used to report locks consistently. For example, if lock is + acquired immediately, its address is reported to ittnotify via + KMP_FSYNC_ACQUIRED(). However, it lock cannot be acquired immediately + and lock routine calls to KMP_WAIT_YIELD(), the later should report the + same address, not an address of low-level spinner. #endif // USE_ITT_BUILD */ -template< typename UT > +template // ToDo: make inline function (move to header file for icl) -static UT // unsigned 4- or 8-byte type -__kmp_wait_yield( volatile UT * spinner, - UT checker, - kmp_uint32 (* pred)( UT, UT ) - USE_ITT_BUILD_ARG(void * obj) // Higher-level synchronization object, or NULL. - ) -{ - // note: we may not belong to a team at this point - register volatile UT * spin = spinner; - register UT check = checker; - register kmp_uint32 spins; - register kmp_uint32 (*f) ( UT, UT ) = pred; - register UT r; - - KMP_FSYNC_SPIN_INIT( obj, (void*) spin ); - KMP_INIT_YIELD( spins ); - // main wait spin loop - while(!f(r = *spin, check)) - { - KMP_FSYNC_SPIN_PREPARE( obj ); - /* GEH - remove this since it was accidentally introduced when kmp_wait was split. - It causes problems with infinite recursion because of exit lock */ - /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort) - __kmp_abort_thread(); */ - - // if we are oversubscribed, - // or have waited a bit (and KMP_LIBRARY=throughput, then yield - // pause is in the following code - KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc ); - KMP_YIELD_SPIN( spins ); - } - KMP_FSYNC_SPIN_ACQUIRED( obj ); - return r; +static UT // unsigned 4- or 8-byte type + __kmp_wait_yield( + volatile UT *spinner, UT checker, + kmp_uint32 (*pred)(UT, UT) USE_ITT_BUILD_ARG( + void *obj) // Higher-level synchronization object, or NULL. + ) { + // note: we may not belong to a team at this point + register volatile UT *spin = spinner; + register UT check = checker; + register kmp_uint32 spins; + register kmp_uint32 (*f)(UT, UT) = pred; + register UT r; + + KMP_FSYNC_SPIN_INIT(obj, (void *)spin); + KMP_INIT_YIELD(spins); + // main wait spin loop + while (!f(r = *spin, check)) { + KMP_FSYNC_SPIN_PREPARE(obj); + /* GEH - remove this since it was accidentally introduced when kmp_wait was + split. It causes problems with infinite recursion because of exit lock */ + /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort) + __kmp_abort_thread(); */ + + // if we are oversubscribed, or have waited a bit (and + // KMP_LIBRARY=throughput, then yield. pause is in the following code + KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc); + KMP_YIELD_SPIN(spins); + } + KMP_FSYNC_SPIN_ACQUIRED(obj); + return r; } -template< typename UT > -static kmp_uint32 __kmp_eq( UT value, UT checker) { - return value == checker; +template static kmp_uint32 __kmp_eq(UT value, UT checker) { + return value == checker; } -template< typename UT > -static kmp_uint32 __kmp_neq( UT value, UT checker) { - return value != checker; +template static kmp_uint32 __kmp_neq(UT value, UT checker) { + return value != checker; } -template< typename UT > -static kmp_uint32 __kmp_lt( UT value, UT checker) { - return value < checker; +template static kmp_uint32 __kmp_lt(UT value, UT checker) { + return value < checker; } -template< typename UT > -static kmp_uint32 __kmp_ge( UT value, UT checker) { - return value >= checker; +template static kmp_uint32 __kmp_ge(UT value, UT checker) { + return value >= checker; } -template< typename UT > -static kmp_uint32 __kmp_le( UT value, UT checker) { - return value <= checker; +template static kmp_uint32 __kmp_le(UT value, UT checker) { + return value <= checker; } - -/* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */ -static void -__kmp_dispatch_deo_error( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) -{ - kmp_info_t *th; +static void __kmp_dispatch_deo_error(int *gtid_ref, int *cid_ref, + ident_t *loc_ref) { + kmp_info_t *th; - KMP_DEBUG_ASSERT( gtid_ref ); + KMP_DEBUG_ASSERT(gtid_ref); - if ( __kmp_env_consistency_check ) { - th = __kmp_threads[*gtid_ref]; - if ( th -> th.th_root -> r.r_active - && ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) ) { + if (__kmp_env_consistency_check) { + th = __kmp_threads[*gtid_ref]; + if (th->th.th_root->r.r_active && + (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none)) { #if KMP_USE_DYNAMIC_LOCK - __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0 ); + __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL, 0); #else - __kmp_push_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref, NULL ); + __kmp_push_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref, NULL); #endif - } } + } } -template< typename UT > -static void -__kmp_dispatch_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) -{ - typedef typename traits_t< UT >::signed_t ST; - dispatch_private_info_template< UT > * pr; - - int gtid = *gtid_ref; -// int cid = *cid_ref; - kmp_info_t *th = __kmp_threads[ gtid ]; - KMP_DEBUG_ASSERT( th -> th.th_dispatch ); - - KD_TRACE(100, ("__kmp_dispatch_deo: T#%d called\n", gtid ) ); - if ( __kmp_env_consistency_check ) { - pr = reinterpret_cast< dispatch_private_info_template< UT >* > - ( th -> th.th_dispatch -> th_dispatch_pr_current ); - if ( pr -> pushed_ws != ct_none ) { +template +static void __kmp_dispatch_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { + typedef typename traits_t::signed_t ST; + dispatch_private_info_template *pr; + + int gtid = *gtid_ref; + // int cid = *cid_ref; + kmp_info_t *th = __kmp_threads[gtid]; + KMP_DEBUG_ASSERT(th->th.th_dispatch); + + KD_TRACE(100, ("__kmp_dispatch_deo: T#%d called\n", gtid)); + if (__kmp_env_consistency_check) { + pr = reinterpret_cast *>( + th->th.th_dispatch->th_dispatch_pr_current); + if (pr->pushed_ws != ct_none) { #if KMP_USE_DYNAMIC_LOCK - __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL, 0 ); + __kmp_push_sync(gtid, ct_ordered_in_pdo, loc_ref, NULL, 0); #else - __kmp_push_sync( gtid, ct_ordered_in_pdo, loc_ref, NULL ); + __kmp_push_sync(gtid, ct_ordered_in_pdo, loc_ref, NULL); #endif - } } + } - if ( ! th -> th.th_team -> t.t_serialized ) { - dispatch_shared_info_template< UT > * sh = reinterpret_cast< dispatch_shared_info_template< UT >* > - ( th -> th.th_dispatch -> th_dispatch_sh_current ); - UT lower; + if (!th->th.th_team->t.t_serialized) { + dispatch_shared_info_template *sh = + reinterpret_cast *>( + th->th.th_dispatch->th_dispatch_sh_current); + UT lower; - if ( ! __kmp_env_consistency_check ) { - pr = reinterpret_cast< dispatch_private_info_template< UT >* > - ( th -> th.th_dispatch -> th_dispatch_pr_current ); - } - lower = pr->u.p.ordered_lower; - - #if ! defined( KMP_GOMP_COMPAT ) - if ( __kmp_env_consistency_check ) { - if ( pr->ordered_bumped ) { - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - __kmp_error_construct2( - kmp_i18n_msg_CnsMultipleNesting, - ct_ordered_in_pdo, loc_ref, - & p->stack_data[ p->w_top ] - ); - } - } - #endif /* !defined(KMP_GOMP_COMPAT) */ + if (!__kmp_env_consistency_check) { + pr = reinterpret_cast *>( + th->th.th_dispatch->th_dispatch_pr_current); + } + lower = pr->u.p.ordered_lower; + +#if !defined(KMP_GOMP_COMPAT) + if (__kmp_env_consistency_check) { + if (pr->ordered_bumped) { + struct cons_header *p = __kmp_threads[gtid]->th.th_cons; + __kmp_error_construct2(kmp_i18n_msg_CnsMultipleNesting, + ct_ordered_in_pdo, loc_ref, + &p->stack_data[p->w_top]); + } + } +#endif /* !defined(KMP_GOMP_COMPAT) */ - KMP_MB(); - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_deo: T#%%d before wait: ordered_iter:%%%s lower:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) ); - __kmp_str_free( &buff ); - } - #endif + KMP_MB(); +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmp_dispatch_deo: T#%%d before wait: " + "ordered_iter:%%%s lower:%%%s\n", + traits_t::spec, traits_t::spec); + KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower)); + __kmp_str_free(&buff); + } +#endif - __kmp_wait_yield< UT >( &sh->u.s.ordered_iteration, lower, __kmp_ge< UT > - USE_ITT_BUILD_ARG( NULL ) - ); - KMP_MB(); /* is this necessary? */ - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_deo: T#%%d after wait: ordered_iter:%%%s lower:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) ); - __kmp_str_free( &buff ); - } - #endif + __kmp_wait_yield(&sh->u.s.ordered_iteration, lower, + __kmp_ge USE_ITT_BUILD_ARG(NULL)); + KMP_MB(); /* is this necessary? */ +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmp_dispatch_deo: T#%%d after wait: " + "ordered_iter:%%%s lower:%%%s\n", + traits_t::spec, traits_t::spec); + KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower)); + __kmp_str_free(&buff); } - KD_TRACE(100, ("__kmp_dispatch_deo: T#%d returned\n", gtid ) ); +#endif + } + KD_TRACE(100, ("__kmp_dispatch_deo: T#%d returned\n", gtid)); } -static void -__kmp_dispatch_dxo_error( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) -{ - kmp_info_t *th; - - if ( __kmp_env_consistency_check ) { - th = __kmp_threads[*gtid_ref]; - if ( th -> th.th_dispatch -> th_dispatch_pr_current -> pushed_ws != ct_none ) { - __kmp_pop_sync( *gtid_ref, ct_ordered_in_pdo, loc_ref ); - } +static void __kmp_dispatch_dxo_error(int *gtid_ref, int *cid_ref, + ident_t *loc_ref) { + kmp_info_t *th; + + if (__kmp_env_consistency_check) { + th = __kmp_threads[*gtid_ref]; + if (th->th.th_dispatch->th_dispatch_pr_current->pushed_ws != ct_none) { + __kmp_pop_sync(*gtid_ref, ct_ordered_in_pdo, loc_ref); } + } } -template< typename UT > -static void -__kmp_dispatch_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) -{ - typedef typename traits_t< UT >::signed_t ST; - dispatch_private_info_template< UT > * pr; - - int gtid = *gtid_ref; -// int cid = *cid_ref; - kmp_info_t *th = __kmp_threads[ gtid ]; - KMP_DEBUG_ASSERT( th -> th.th_dispatch ); - - KD_TRACE(100, ("__kmp_dispatch_dxo: T#%d called\n", gtid ) ); - if ( __kmp_env_consistency_check ) { - pr = reinterpret_cast< dispatch_private_info_template< UT >* > - ( th -> th.th_dispatch -> th_dispatch_pr_current ); - if ( pr -> pushed_ws != ct_none ) { - __kmp_pop_sync( gtid, ct_ordered_in_pdo, loc_ref ); - } +template +static void __kmp_dispatch_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { + typedef typename traits_t::signed_t ST; + dispatch_private_info_template *pr; + + int gtid = *gtid_ref; + // int cid = *cid_ref; + kmp_info_t *th = __kmp_threads[gtid]; + KMP_DEBUG_ASSERT(th->th.th_dispatch); + + KD_TRACE(100, ("__kmp_dispatch_dxo: T#%d called\n", gtid)); + if (__kmp_env_consistency_check) { + pr = reinterpret_cast *>( + th->th.th_dispatch->th_dispatch_pr_current); + if (pr->pushed_ws != ct_none) { + __kmp_pop_sync(gtid, ct_ordered_in_pdo, loc_ref); } + } - if ( ! th -> th.th_team -> t.t_serialized ) { - dispatch_shared_info_template< UT > * sh = reinterpret_cast< dispatch_shared_info_template< UT >* > - ( th -> th.th_dispatch -> th_dispatch_sh_current ); + if (!th->th.th_team->t.t_serialized) { + dispatch_shared_info_template *sh = + reinterpret_cast *>( + th->th.th_dispatch->th_dispatch_sh_current); - if ( ! __kmp_env_consistency_check ) { - pr = reinterpret_cast< dispatch_private_info_template< UT >* > - ( th -> th.th_dispatch -> th_dispatch_pr_current ); - } + if (!__kmp_env_consistency_check) { + pr = reinterpret_cast *>( + th->th.th_dispatch->th_dispatch_pr_current); + } - KMP_FSYNC_RELEASING( & sh->u.s.ordered_iteration ); - #if ! defined( KMP_GOMP_COMPAT ) - if ( __kmp_env_consistency_check ) { - if ( pr->ordered_bumped != 0 ) { - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - /* How to test it? - OM */ - __kmp_error_construct2( - kmp_i18n_msg_CnsMultipleNesting, - ct_ordered_in_pdo, loc_ref, - & p->stack_data[ p->w_top ] - ); - } - } - #endif /* !defined(KMP_GOMP_COMPAT) */ + KMP_FSYNC_RELEASING(&sh->u.s.ordered_iteration); +#if !defined(KMP_GOMP_COMPAT) + if (__kmp_env_consistency_check) { + if (pr->ordered_bumped != 0) { + struct cons_header *p = __kmp_threads[gtid]->th.th_cons; + /* How to test it? - OM */ + __kmp_error_construct2(kmp_i18n_msg_CnsMultipleNesting, + ct_ordered_in_pdo, loc_ref, + &p->stack_data[p->w_top]); + } + } +#endif /* !defined(KMP_GOMP_COMPAT) */ - KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_MB(); /* Flush all pending memory write invalidates. */ - pr->ordered_bumped += 1; + pr->ordered_bumped += 1; - KD_TRACE(1000, ("__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n", - gtid, pr->ordered_bumped ) ); + KD_TRACE(1000, + ("__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n", + gtid, pr->ordered_bumped)); - KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_MB(); /* Flush all pending memory write invalidates. */ - /* TODO use general release procedure? */ - test_then_inc< ST >( (volatile ST *) & sh->u.s.ordered_iteration ); + /* TODO use general release procedure? */ + test_then_inc((volatile ST *)&sh->u.s.ordered_iteration); - KMP_MB(); /* Flush all pending memory write invalidates. */ - } - KD_TRACE(100, ("__kmp_dispatch_dxo: T#%d returned\n", gtid ) ); + KMP_MB(); /* Flush all pending memory write invalidates. */ + } + KD_TRACE(100, ("__kmp_dispatch_dxo: T#%d returned\n", gtid)); } -/* Computes and returns x to the power of y, where y must a non-negative integer */ -template< typename UT > -static __forceinline long double -__kmp_pow(long double x, UT y) { - long double s=1.0L; - - KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0); - //KMP_DEBUG_ASSERT(y >= 0); // y is unsigned - while(y) { - if ( y & 1 ) - s *= x; - x *= x; - y >>= 1; - } - return s; +// Computes and returns x to the power of y, where y must a non-negative integer +template +static __forceinline long double __kmp_pow(long double x, UT y) { + long double s = 1.0L; + + KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0); + // KMP_DEBUG_ASSERT(y >= 0); // y is unsigned + while (y) { + if (y & 1) + s *= x; + x *= x; + y >>= 1; + } + return s; } -/* Computes and returns the number of unassigned iterations after idx chunks have been assigned - (the total number of unassigned iterations in chunks with index greater than or equal to idx). - __forceinline seems to be broken so that if we __forceinline this function, the behavior is wrong - (one of the unit tests, sch_guided_analytical_basic.cpp, fails) -*/ -template< typename T > -static __inline typename traits_t< T >::unsigned_t -__kmp_dispatch_guided_remaining( - T tc, - typename traits_t< T >::floating_t base, - typename traits_t< T >::unsigned_t idx -) { - /* Note: On Windows* OS on IA-32 architecture and Intel(R) 64, at - least for ICL 8.1, long double arithmetic may not really have - long double precision, even with /Qlong_double. Currently, we - workaround that in the caller code, by manipulating the FPCW for - Windows* OS on IA-32 architecture. The lack of precision is not - expected to be a correctness issue, though. - */ - typedef typename traits_t< T >::unsigned_t UT; - - long double x = tc * __kmp_pow< UT >(base, idx); - UT r = (UT) x; - if ( x == r ) - return r; - return r + 1; +/* Computes and returns the number of unassigned iterations after idx chunks + have been assigned (the total number of unassigned iterations in chunks with + index greater than or equal to idx). __forceinline seems to be broken so that + if we __forceinline this function, the behavior is wrong + (one of the unit tests, sch_guided_analytical_basic.cpp, fails) */ +template +static __inline typename traits_t::unsigned_t +__kmp_dispatch_guided_remaining(T tc, typename traits_t::floating_t base, + typename traits_t::unsigned_t idx) { + /* Note: On Windows* OS on IA-32 architecture and Intel(R) 64, at least for + ICL 8.1, long double arithmetic may not really have long double precision, + even with /Qlong_double. Currently, we workaround that in the caller code, + by manipulating the FPCW for Windows* OS on IA-32 architecture. The lack + of precision is not expected to be a correctness issue, though. */ + typedef typename traits_t::unsigned_t UT; + + long double x = tc * __kmp_pow(base, idx); + UT r = (UT)x; + if (x == r) + return r; + return r + 1; } // Parameters of the guided-iterative algorithm: // p2 = n * nproc * ( chunk + 1 ) // point of switching to dynamic // p3 = 1 / ( n * nproc ) // remaining iterations multiplier -// by default n = 2. For example with n = 3 the chunks distribution will be more flat. +// by default n = 2. For example with n = 3 the chunks distribution will be more +// flat. // With n = 1 first chunk is the same as for static schedule, e.g. trip / nproc. static int guided_int_param = 2; -static double guided_flt_param = 0.5;// = 1.0 / guided_int_param; +static double guided_flt_param = 0.5; // = 1.0 / guided_int_param; // UT - unsigned flavor of T, ST - signed flavor of T, // DBL - double if sizeof(T)==4, or long double if sizeof(T)==8 -template< typename T > +template static void -__kmp_dispatch_init( - ident_t * loc, - int gtid, - enum sched_type schedule, - T lb, - T ub, - typename traits_t< T >::signed_t st, - typename traits_t< T >::signed_t chunk, - int push_ws -) { - typedef typename traits_t< T >::unsigned_t UT; - typedef typename traits_t< T >::signed_t ST; - typedef typename traits_t< T >::floating_t DBL; - - int active; - T tc; - kmp_info_t * th; - kmp_team_t * team; - kmp_uint32 my_buffer_index; - dispatch_private_info_template< T > * pr; - dispatch_shared_info_template< UT > volatile * sh; - - KMP_BUILD_ASSERT( sizeof( dispatch_private_info_template< T > ) == sizeof( dispatch_private_info ) ); - KMP_BUILD_ASSERT( sizeof( dispatch_shared_info_template< UT > ) == sizeof( dispatch_shared_info ) ); - - if ( ! TCR_4( __kmp_init_parallel ) ) - __kmp_parallel_initialize(); +__kmp_dispatch_init(ident_t *loc, int gtid, enum sched_type schedule, T lb, + T ub, typename traits_t::signed_t st, + typename traits_t::signed_t chunk, int push_ws) { + typedef typename traits_t::unsigned_t UT; + typedef typename traits_t::signed_t ST; + typedef typename traits_t::floating_t DBL; + + int active; + T tc; + kmp_info_t *th; + kmp_team_t *team; + kmp_uint32 my_buffer_index; + dispatch_private_info_template *pr; + dispatch_shared_info_template volatile *sh; + + KMP_BUILD_ASSERT(sizeof(dispatch_private_info_template) == + sizeof(dispatch_private_info)); + KMP_BUILD_ASSERT(sizeof(dispatch_shared_info_template) == + sizeof(dispatch_shared_info)); + + if (!TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); #if INCLUDE_SSC_MARKS - SSC_MARK_DISPATCH_INIT(); + SSC_MARK_DISPATCH_INIT(); #endif - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_init: T#%%d called: schedule:%%d chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n", - traits_t< ST >::spec, traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); - KD_TRACE(10, ( buff, gtid, schedule, chunk, lb, ub, st ) ); - __kmp_str_free( &buff ); - } - #endif - /* setup data */ - th = __kmp_threads[ gtid ]; - team = th -> th.th_team; - active = ! team -> t.t_serialized; - th->th.th_ident = loc; +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmp_dispatch_init: T#%%d called: schedule:%%d " + "chunk:%%%s lb:%%%s ub:%%%s st:%%%s\n", + traits_t::spec, traits_t::spec, + traits_t::spec, traits_t::spec); + KD_TRACE(10, (buff, gtid, schedule, chunk, lb, ub, st)); + __kmp_str_free(&buff); + } +#endif + /* setup data */ + th = __kmp_threads[gtid]; + team = th->th.th_team; + active = !team->t.t_serialized; + th->th.th_ident = loc; #if USE_ITT_BUILD - kmp_uint64 cur_chunk = chunk; - int itt_need_metadata_reporting = __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && - KMP_MASTER_GTID(gtid) && + kmp_uint64 cur_chunk = chunk; + int itt_need_metadata_reporting = __itt_metadata_add_ptr && + __kmp_forkjoin_frames_mode == 3 && + KMP_MASTER_GTID(gtid) && #if OMP_40_ENABLED - th->th.th_teams_microtask == NULL && + th->th.th_teams_microtask == NULL && #endif - team->t.t_active_level == 1; + team->t.t_active_level == 1; #endif - if ( ! active ) { - pr = reinterpret_cast< dispatch_private_info_template< T >* > - ( th -> th.th_dispatch -> th_disp_buffer ); /* top of the stack */ - } else { - KMP_DEBUG_ASSERT( th->th.th_dispatch == - &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] ); - - my_buffer_index = th->th.th_dispatch->th_disp_index ++; - - /* What happens when number of threads changes, need to resize buffer? */ - pr = reinterpret_cast< dispatch_private_info_template< T > * > - ( &th -> th.th_dispatch -> th_disp_buffer[ my_buffer_index % __kmp_dispatch_num_buffers ] ); - sh = reinterpret_cast< dispatch_shared_info_template< UT > volatile * > - ( &team -> t.t_disp_buffer[ my_buffer_index % __kmp_dispatch_num_buffers ] ); - } - - #if ( KMP_STATIC_STEAL_ENABLED ) - if ( SCHEDULE_HAS_NONMONOTONIC(schedule) ) - // AC: we now have only one implementation of stealing, so use it - schedule = kmp_sch_static_steal; - else - #endif - schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule); - - /* Pick up the nomerge/ordered bits from the scheduling type */ - if ( (schedule >= kmp_nm_lower) && (schedule < kmp_nm_upper) ) { - pr->nomerge = TRUE; - schedule = (enum sched_type)(((int)schedule) - (kmp_nm_lower - kmp_sch_lower)); - } else { - pr->nomerge = FALSE; - } - pr->type_size = traits_t::type_size; // remember the size of variables - if ( kmp_ord_lower & schedule ) { - pr->ordered = TRUE; - schedule = (enum sched_type)(((int)schedule) - (kmp_ord_lower - kmp_sch_lower)); - } else { - pr->ordered = FALSE; - } - - if ( schedule == kmp_sch_static ) { + if (!active) { + pr = reinterpret_cast *>( + th->th.th_dispatch->th_disp_buffer); /* top of the stack */ + } else { + KMP_DEBUG_ASSERT(th->th.th_dispatch == + &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); + + my_buffer_index = th->th.th_dispatch->th_disp_index++; + + /* What happens when number of threads changes, need to resize buffer? */ + pr = reinterpret_cast *>( + &th->th.th_dispatch + ->th_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); + sh = reinterpret_cast volatile *>( + &team->t.t_disp_buffer[my_buffer_index % __kmp_dispatch_num_buffers]); + } + +#if (KMP_STATIC_STEAL_ENABLED) + if (SCHEDULE_HAS_NONMONOTONIC(schedule)) + // AC: we now have only one implementation of stealing, so use it + schedule = kmp_sch_static_steal; + else +#endif + schedule = SCHEDULE_WITHOUT_MODIFIERS(schedule); + + /* Pick up the nomerge/ordered bits from the scheduling type */ + if ((schedule >= kmp_nm_lower) && (schedule < kmp_nm_upper)) { + pr->nomerge = TRUE; + schedule = + (enum sched_type)(((int)schedule) - (kmp_nm_lower - kmp_sch_lower)); + } else { + pr->nomerge = FALSE; + } + pr->type_size = traits_t::type_size; // remember the size of variables + if (kmp_ord_lower & schedule) { + pr->ordered = TRUE; + schedule = + (enum sched_type)(((int)schedule) - (kmp_ord_lower - kmp_sch_lower)); + } else { + pr->ordered = FALSE; + } + + if (schedule == kmp_sch_static) { + schedule = __kmp_static; + } else { + if (schedule == kmp_sch_runtime) { + // Use the scheduling specified by OMP_SCHEDULE (or __kmp_sch_default if + // not specified) + schedule = team->t.t_sched.r_sched_type; + // Detail the schedule if needed (global controls are differentiated + // appropriately) + if (schedule == kmp_sch_guided_chunked) { + schedule = __kmp_guided; + } else if (schedule == kmp_sch_static) { schedule = __kmp_static; - } else { - if ( schedule == kmp_sch_runtime ) { - // Use the scheduling specified by OMP_SCHEDULE (or __kmp_sch_default if not specified) - schedule = team -> t.t_sched.r_sched_type; - // Detail the schedule if needed (global controls are differentiated appropriately) - if ( schedule == kmp_sch_guided_chunked ) { - schedule = __kmp_guided; - } else if ( schedule == kmp_sch_static ) { - schedule = __kmp_static; - } - // Use the chunk size specified by OMP_SCHEDULE (or default if not specified) - chunk = team -> t.t_sched.chunk; + } + // Use the chunk size specified by OMP_SCHEDULE (or default if not + // specified) + chunk = team->t.t_sched.chunk; #if USE_ITT_BUILD - cur_chunk = chunk; + cur_chunk = chunk; #endif - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_init: T#%%d new: schedule:%%d chunk:%%%s\n", - traits_t< ST >::spec ); - KD_TRACE(10, ( buff, gtid, schedule, chunk ) ); - __kmp_str_free( &buff ); - } - #endif - } else { - if ( schedule == kmp_sch_guided_chunked ) { - schedule = __kmp_guided; - } - if ( chunk <= 0 ) { - chunk = KMP_DEFAULT_CHUNK; - } - } +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_init: T#%%d new: schedule:%%d chunk:%%%s\n", + traits_t::spec); + KD_TRACE(10, (buff, gtid, schedule, chunk)); + __kmp_str_free(&buff); + } +#endif + } else { + if (schedule == kmp_sch_guided_chunked) { + schedule = __kmp_guided; + } + if (chunk <= 0) { + chunk = KMP_DEFAULT_CHUNK; + } + } - if ( schedule == kmp_sch_auto ) { - // mapping and differentiation: in the __kmp_do_serial_initialize() - schedule = __kmp_auto; - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_init: kmp_sch_auto: T#%%d new: schedule:%%d chunk:%%%s\n", - traits_t< ST >::spec ); - KD_TRACE(10, ( buff, gtid, schedule, chunk ) ); - __kmp_str_free( &buff ); - } - #endif - } + if (schedule == kmp_sch_auto) { + // mapping and differentiation: in the __kmp_do_serial_initialize() + schedule = __kmp_auto; +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmp_dispatch_init: kmp_sch_auto: T#%%d new: " + "schedule:%%d chunk:%%%s\n", + traits_t::spec); + KD_TRACE(10, (buff, gtid, schedule, chunk)); + __kmp_str_free(&buff); + } +#endif + } - /* guided analytical not safe for too many threads */ - if ( schedule == kmp_sch_guided_analytical_chunked && th->th.th_team_nproc > 1<<20 ) { - schedule = kmp_sch_guided_iterative_chunked; - KMP_WARNING( DispatchManyThreads ); - } - pr->u.p.parm1 = chunk; + /* guided analytical not safe for too many threads */ + if (schedule == kmp_sch_guided_analytical_chunked && + th->th.th_team_nproc > 1 << 20) { + schedule = kmp_sch_guided_iterative_chunked; + KMP_WARNING(DispatchManyThreads); } - KMP_ASSERT2( (kmp_sch_lower < schedule && schedule < kmp_sch_upper), - "unknown scheduling type" ); + pr->u.p.parm1 = chunk; + } + KMP_ASSERT2((kmp_sch_lower < schedule && schedule < kmp_sch_upper), + "unknown scheduling type"); - pr->u.p.count = 0; + pr->u.p.count = 0; - if ( __kmp_env_consistency_check ) { - if ( st == 0 ) { - __kmp_error_construct( - kmp_i18n_msg_CnsLoopIncrZeroProhibited, - ( pr->ordered ? ct_pdo_ordered : ct_pdo ), loc - ); - } + if (__kmp_env_consistency_check) { + if (st == 0) { + __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, + (pr->ordered ? ct_pdo_ordered : ct_pdo), loc); } - // compute trip count - if ( st == 1 ) { // most common case - if ( ub >= lb ) { - tc = ub - lb + 1; - } else { // ub < lb - tc = 0; // zero-trip - } - } else if ( st < 0 ) { - if ( lb >= ub ) { - // AC: cast to unsigned is needed for loops like (i=2B; i>-2B; i-=1B), - // where the division needs to be unsigned regardless of the result type - tc = (UT)(lb - ub) / (-st) + 1; - } else { // lb < ub - tc = 0; // zero-trip - } - } else { // st > 0 - if ( ub >= lb ) { - // AC: cast to unsigned is needed for loops like (i=-2B; i<2B; i+=1B), - // where the division needs to be unsigned regardless of the result type - tc = (UT)(ub - lb) / st + 1; - } else { // ub < lb - tc = 0; // zero-trip - } + } + // compute trip count + if (st == 1) { // most common case + if (ub >= lb) { + tc = ub - lb + 1; + } else { // ub < lb + tc = 0; // zero-trip } - - // Any half-decent optimizer will remove this test when the blocks are empty since the macros expand to nothing - // when statistics are disabled. - if (schedule == __kmp_static) - { - KMP_COUNT_BLOCK(OMP_FOR_static); - KMP_COUNT_VALUE(FOR_static_iterations, tc); + } else if (st < 0) { + if (lb >= ub) { + // AC: cast to unsigned is needed for loops like (i=2B; i>-2B; i-=1B), + // where the division needs to be unsigned regardless of the result type + tc = (UT)(lb - ub) / (-st) + 1; + } else { // lb < ub + tc = 0; // zero-trip } - else - { - KMP_COUNT_BLOCK(OMP_FOR_dynamic); - KMP_COUNT_VALUE(FOR_dynamic_iterations, tc); + } else { // st > 0 + if (ub >= lb) { + // AC: cast to unsigned is needed for loops like (i=-2B; i<2B; i+=1B), + // where the division needs to be unsigned regardless of the result type + tc = (UT)(ub - lb) / st + 1; + } else { // ub < lb + tc = 0; // zero-trip } + } + + // Any half-decent optimizer will remove this test when the blocks are empty + // since the macros expand to nothing when statistics are disabled. + if (schedule == __kmp_static) { + KMP_COUNT_BLOCK(OMP_FOR_static); + KMP_COUNT_VALUE(FOR_static_iterations, tc); + } else { + KMP_COUNT_BLOCK(OMP_FOR_dynamic); + KMP_COUNT_VALUE(FOR_dynamic_iterations, tc); + } + + pr->u.p.lb = lb; + pr->u.p.ub = ub; + pr->u.p.st = st; + pr->u.p.tc = tc; - pr->u.p.lb = lb; - pr->u.p.ub = ub; - pr->u.p.st = st; - pr->u.p.tc = tc; +#if KMP_OS_WINDOWS + pr->u.p.last_upper = ub + st; +#endif /* KMP_OS_WINDOWS */ - #if KMP_OS_WINDOWS - pr->u.p.last_upper = ub + st; - #endif /* KMP_OS_WINDOWS */ + /* NOTE: only the active parallel region(s) has active ordered sections */ - /* NOTE: only the active parallel region(s) has active ordered sections */ + if (active) { + if (pr->ordered == 0) { + th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo_error; + th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo_error; + } else { + pr->ordered_bumped = 0; - if ( active ) { - if ( pr->ordered == 0 ) { - th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo_error; - th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo_error; - } else { - pr->ordered_bumped = 0; + pr->u.p.ordered_lower = 1; + pr->u.p.ordered_upper = 0; - pr->u.p.ordered_lower = 1; - pr->u.p.ordered_upper = 0; + th->th.th_dispatch->th_deo_fcn = __kmp_dispatch_deo; + th->th.th_dispatch->th_dxo_fcn = __kmp_dispatch_dxo; + } + } - th -> th.th_dispatch -> th_deo_fcn = __kmp_dispatch_deo< UT >; - th -> th.th_dispatch -> th_dxo_fcn = __kmp_dispatch_dxo< UT >; - } + if (__kmp_env_consistency_check) { + enum cons_type ws = pr->ordered ? ct_pdo_ordered : ct_pdo; + if (push_ws) { + __kmp_push_workshare(gtid, ws, loc); + pr->pushed_ws = ws; + } else { + __kmp_check_workshare(gtid, ws, loc); + pr->pushed_ws = ct_none; } + } + + switch (schedule) { +#if (KMP_STATIC_STEAL_ENABLED) + case kmp_sch_static_steal: { + T nproc = th->th.th_team_nproc; + T ntc, init; + + KD_TRACE(100, + ("__kmp_dispatch_init: T#%d kmp_sch_static_steal case\n", gtid)); + + ntc = (tc % chunk ? 1 : 0) + tc / chunk; + if (nproc > 1 && ntc >= nproc) { + KMP_COUNT_BLOCK(OMP_FOR_static_steal); + T id = __kmp_tid_from_gtid(gtid); + T small_chunk, extras; + + small_chunk = ntc / nproc; + extras = ntc % nproc; + + init = id * small_chunk + (id < extras ? id : extras); + pr->u.p.count = init; + pr->u.p.ub = init + small_chunk + (id < extras ? 1 : 0); + + pr->u.p.parm2 = lb; + // pr->pfields.parm3 = 0; // it's not used in static_steal + pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid + pr->u.p.st = st; + if (traits_t::type_size > 4) { + // AC: TODO: check if 16-byte CAS available and use it to + // improve performance (probably wait for explicit request + // before spending time on this). + // For now use dynamically allocated per-thread lock, + // free memory in __kmp_dispatch_next when status==0. + KMP_DEBUG_ASSERT(th->th.th_dispatch->th_steal_lock == NULL); + th->th.th_dispatch->th_steal_lock = + (kmp_lock_t *)__kmp_allocate(sizeof(kmp_lock_t)); + __kmp_init_lock(th->th.th_dispatch->th_steal_lock); + } + break; + } else { + KD_TRACE(100, ("__kmp_dispatch_init: T#%d falling-through to " + "kmp_sch_static_balanced\n", + gtid)); + schedule = kmp_sch_static_balanced; + /* too few iterations: fall-through to kmp_sch_static_balanced */ + } // if + /* FALL-THROUGH to static balanced */ + } // case +#endif + case kmp_sch_static_balanced: { + T nproc = th->th.th_team_nproc; + T init, limit; - if ( __kmp_env_consistency_check ) { - enum cons_type ws = pr->ordered ? ct_pdo_ordered : ct_pdo; - if ( push_ws ) { - __kmp_push_workshare( gtid, ws, loc ); - pr->pushed_ws = ws; + KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_static_balanced case\n", + gtid)); + + if (nproc > 1) { + T id = __kmp_tid_from_gtid(gtid); + + if (tc < nproc) { + if (id < tc) { + init = id; + limit = id; + pr->u.p.parm1 = (id == tc - 1); /* parm1 stores *plastiter */ } else { - __kmp_check_workshare( gtid, ws, loc ); - pr->pushed_ws = ct_none; + pr->u.p.count = 1; /* means no more chunks to execute */ + pr->u.p.parm1 = FALSE; + break; } + } else { + T small_chunk = tc / nproc; + T extras = tc % nproc; + init = id * small_chunk + (id < extras ? id : extras); + limit = init + small_chunk - (id < extras ? 0 : 1); + pr->u.p.parm1 = (id == nproc - 1); + } + } else { + if (tc > 0) { + init = 0; + limit = tc - 1; + pr->u.p.parm1 = TRUE; + } else { // zero trip count + pr->u.p.count = 1; /* means no more chunks to execute */ + pr->u.p.parm1 = FALSE; + break; + } } - - switch ( schedule ) { - #if ( KMP_STATIC_STEAL_ENABLED ) - case kmp_sch_static_steal: - { - T nproc = th->th.th_team_nproc; - T ntc, init; - - KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_static_steal case\n", gtid ) ); - - ntc = (tc % chunk ? 1 : 0) + tc / chunk; - if ( nproc > 1 && ntc >= nproc ) { - KMP_COUNT_BLOCK(OMP_FOR_static_steal); - T id = __kmp_tid_from_gtid(gtid); - T small_chunk, extras; - - small_chunk = ntc / nproc; - extras = ntc % nproc; - - init = id * small_chunk + ( id < extras ? id : extras ); - pr->u.p.count = init; - pr->u.p.ub = init + small_chunk + ( id < extras ? 1 : 0 ); - - pr->u.p.parm2 = lb; - //pr->pfields.parm3 = 0; // it's not used in static_steal - pr->u.p.parm4 = (id + 1) % nproc; // remember neighbour tid - pr->u.p.st = st; - if ( traits_t::type_size > 4 ) { - // AC: TODO: check if 16-byte CAS available and use it to - // improve performance (probably wait for explicit request - // before spending time on this). - // For now use dynamically allocated per-thread lock, - // free memory in __kmp_dispatch_next when status==0. - KMP_DEBUG_ASSERT(th->th.th_dispatch->th_steal_lock == NULL); - th->th.th_dispatch->th_steal_lock = - (kmp_lock_t*)__kmp_allocate(sizeof(kmp_lock_t)); - __kmp_init_lock(th->th.th_dispatch->th_steal_lock); - } - break; - } else { - KD_TRACE(100, ("__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_balanced\n", - gtid ) ); - schedule = kmp_sch_static_balanced; - /* too few iterations: fall-through to kmp_sch_static_balanced */ - } // if - /* FALL-THROUGH to static balanced */ - } // case - #endif - case kmp_sch_static_balanced: - { - T nproc = th->th.th_team_nproc; - T init, limit; - - KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_static_balanced case\n", - gtid ) ); - - if ( nproc > 1 ) { - T id = __kmp_tid_from_gtid(gtid); - - if ( tc < nproc ) { - if ( id < tc ) { - init = id; - limit = id; - pr->u.p.parm1 = (id == tc - 1); /* parm1 stores *plastiter */ - } else { - pr->u.p.count = 1; /* means no more chunks to execute */ - pr->u.p.parm1 = FALSE; - break; - } - } else { - T small_chunk = tc / nproc; - T extras = tc % nproc; - init = id * small_chunk + (id < extras ? id : extras); - limit = init + small_chunk - (id < extras ? 0 : 1); - pr->u.p.parm1 = (id == nproc - 1); - } - } else { - if ( tc > 0 ) { - init = 0; - limit = tc - 1; - pr->u.p.parm1 = TRUE; - } else { - // zero trip count - pr->u.p.count = 1; /* means no more chunks to execute */ - pr->u.p.parm1 = FALSE; - break; - } - } #if USE_ITT_BUILD - // Calculate chunk for metadata report - if ( itt_need_metadata_reporting ) - cur_chunk = limit - init + 1; + // Calculate chunk for metadata report + if (itt_need_metadata_reporting) + cur_chunk = limit - init + 1; #endif - if ( st == 1 ) { - pr->u.p.lb = lb + init; - pr->u.p.ub = lb + limit; - } else { - T ub_tmp = lb + limit * st; // calculated upper bound, "ub" is user-defined upper bound - pr->u.p.lb = lb + init * st; - // adjust upper bound to "ub" if needed, so that MS lastprivate will match it exactly - if ( st > 0 ) { - pr->u.p.ub = ( ub_tmp + st > ub ? ub : ub_tmp ); - } else { - pr->u.p.ub = ( ub_tmp + st < ub ? ub : ub_tmp ); - } - } - if ( pr->ordered ) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - } - break; - } // case - case kmp_sch_guided_iterative_chunked : - { - T nproc = th->th.th_team_nproc; - KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_guided_iterative_chunked case\n",gtid)); + if (st == 1) { + pr->u.p.lb = lb + init; + pr->u.p.ub = lb + limit; + } else { + // calculated upper bound, "ub" is user-defined upper bound + T ub_tmp = lb + limit * st; + pr->u.p.lb = lb + init * st; + // adjust upper bound to "ub" if needed, so that MS lastprivate will match + // it exactly + if (st > 0) { + pr->u.p.ub = (ub_tmp + st > ub ? ub : ub_tmp); + } else { + pr->u.p.ub = (ub_tmp + st < ub ? ub : ub_tmp); + } + } + if (pr->ordered) { + pr->u.p.ordered_lower = init; + pr->u.p.ordered_upper = limit; + } + break; + } // case + case kmp_sch_guided_iterative_chunked: { + T nproc = th->th.th_team_nproc; + KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_guided_iterative_chunked" + " case\n", + gtid)); + + if (nproc > 1) { + if ((2L * chunk + 1) * nproc >= tc) { + /* chunk size too large, switch to dynamic */ + schedule = kmp_sch_dynamic_chunked; + } else { + // when remaining iters become less than parm2 - switch to dynamic + pr->u.p.parm2 = guided_int_param * nproc * (chunk + 1); + *(double *)&pr->u.p.parm3 = + guided_flt_param / nproc; // may occupy parm3 and parm4 + } + } else { + KD_TRACE(100, ("__kmp_dispatch_init: T#%d falling-through to " + "kmp_sch_static_greedy\n", + gtid)); + schedule = kmp_sch_static_greedy; + /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */ + KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n", + gtid)); + pr->u.p.parm1 = tc; + } // if + } // case + break; + case kmp_sch_guided_analytical_chunked: { + T nproc = th->th.th_team_nproc; + KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_guided_analytical_chunked" + " case\n", + gtid)); + if (nproc > 1) { + if ((2L * chunk + 1) * nproc >= tc) { + /* chunk size too large, switch to dynamic */ + schedule = kmp_sch_dynamic_chunked; + } else { + /* commonly used term: (2 nproc - 1)/(2 nproc) */ + DBL x; - if ( nproc > 1 ) { - if ( (2L * chunk + 1 ) * nproc >= tc ) { - /* chunk size too large, switch to dynamic */ - schedule = kmp_sch_dynamic_chunked; - } else { - // when remaining iters become less than parm2 - switch to dynamic - pr->u.p.parm2 = guided_int_param * nproc * ( chunk + 1 ); - *(double*)&pr->u.p.parm3 = guided_flt_param / nproc; // may occupy parm3 and parm4 - } - } else { - KD_TRACE(100,("__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n",gtid)); - schedule = kmp_sch_static_greedy; - /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */ - KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid)); - pr->u.p.parm1 = tc; - } // if - } // case - break; - case kmp_sch_guided_analytical_chunked: - { - T nproc = th->th.th_team_nproc; - KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_guided_analytical_chunked case\n", gtid)); +#if KMP_OS_WINDOWS && KMP_ARCH_X86 + /* Linux* OS already has 64-bit computation by default for long double, + and on Windows* OS on Intel(R) 64, /Qlong_double doesn't work. On + Windows* OS on IA-32 architecture, we need to set precision to 64-bit + instead of the default 53-bit. Even though long double doesn't work + on Windows* OS on Intel(R) 64, the resulting lack of precision is not + expected to impact the correctness of the algorithm, but this has not + been mathematically proven. */ + // save original FPCW and set precision to 64-bit, as + // Windows* OS on IA-32 architecture defaults to 53-bit + unsigned int oldFpcw = _control87(0, 0); + _control87(_PC_64, _MCW_PC); // 0,0x30000 +#endif + /* value used for comparison in solver for cross-over point */ + long double target = ((long double)chunk * 2 + 1) * nproc / tc; + + /* crossover point--chunk indexes equal to or greater than + this point switch to dynamic-style scheduling */ + UT cross; + + /* commonly used term: (2 nproc - 1)/(2 nproc) */ + x = (long double)1.0 - (long double)0.5 / nproc; + +#ifdef KMP_DEBUG + { // test natural alignment + struct _test_a { + char a; + union { + char b; + DBL d; + }; + } t; + ptrdiff_t natural_alignment = + (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1; + //__kmp_warn( " %llx %llx %lld", (long long)&t.d, (long long)&t, (long + // long)natural_alignment ); + KMP_DEBUG_ASSERT( + (((ptrdiff_t)&pr->u.p.parm3) & (natural_alignment)) == 0); + } +#endif // KMP_DEBUG - if ( nproc > 1 ) { - if ( (2L * chunk + 1 ) * nproc >= tc ) { - /* chunk size too large, switch to dynamic */ - schedule = kmp_sch_dynamic_chunked; - } else { - /* commonly used term: (2 nproc - 1)/(2 nproc) */ - DBL x; - - #if KMP_OS_WINDOWS && KMP_ARCH_X86 - /* Linux* OS already has 64-bit computation by default for - long double, and on Windows* OS on Intel(R) 64, - /Qlong_double doesn't work. On Windows* OS - on IA-32 architecture, we need to set precision to - 64-bit instead of the default 53-bit. Even though long - double doesn't work on Windows* OS on Intel(R) 64, the - resulting lack of precision is not expected to impact - the correctness of the algorithm, but this has not been - mathematically proven. - */ - // save original FPCW and set precision to 64-bit, as - // Windows* OS on IA-32 architecture defaults to 53-bit - unsigned int oldFpcw = _control87(0,0); - _control87(_PC_64,_MCW_PC); // 0,0x30000 - #endif - /* value used for comparison in solver for cross-over point */ - long double target = ((long double)chunk * 2 + 1) * nproc / tc; - - /* crossover point--chunk indexes equal to or greater than - this point switch to dynamic-style scheduling */ - UT cross; - - /* commonly used term: (2 nproc - 1)/(2 nproc) */ - x = (long double)1.0 - (long double)0.5 / nproc; - - #ifdef KMP_DEBUG - { // test natural alignment - struct _test_a { - char a; - union { - char b; - DBL d; - }; - } t; - ptrdiff_t natural_alignment = (ptrdiff_t)&t.b - (ptrdiff_t)&t - (ptrdiff_t)1; - //__kmp_warn( " %llx %llx %lld", (long long)&t.d, (long long)&t, (long long)natural_alignment ); - KMP_DEBUG_ASSERT( ( ( (ptrdiff_t)&pr->u.p.parm3 ) & ( natural_alignment ) ) == 0 ); - } - #endif // KMP_DEBUG - - /* save the term in thread private dispatch structure */ - *(DBL*)&pr->u.p.parm3 = x; - - /* solve for the crossover point to the nearest integer i for which C_i <= chunk */ - { - UT left, right, mid; - long double p; - - /* estimate initial upper and lower bound */ - - /* doesn't matter what value right is as long as it is positive, but - it affects performance of the solver - */ - right = 229; - p = __kmp_pow< UT >(x,right); - if ( p > target ) { - do{ - p *= p; - right <<= 1; - } while(p>target && right < (1<<27)); - left = right >> 1; /* lower bound is previous (failed) estimate of upper bound */ - } else { - left = 0; - } - - /* bisection root-finding method */ - while ( left + 1 < right ) { - mid = (left + right) / 2; - if ( __kmp_pow< UT >(x,mid) > target ) { - left = mid; - } else { - right = mid; - } - } // while - cross = right; - } - /* assert sanity of computed crossover point */ - KMP_ASSERT(cross && __kmp_pow< UT >(x, cross - 1) > target && __kmp_pow< UT >(x, cross) <= target); - - /* save the crossover point in thread private dispatch structure */ - pr->u.p.parm2 = cross; - - // C75803 - #if ( ( KMP_OS_LINUX || KMP_OS_WINDOWS ) && KMP_ARCH_X86 ) && ( ! defined( KMP_I8 ) ) - #define GUIDED_ANALYTICAL_WORKAROUND (*( DBL * )&pr->u.p.parm3) - #else - #define GUIDED_ANALYTICAL_WORKAROUND (x) - #endif - /* dynamic-style scheduling offset */ - pr->u.p.count = tc - __kmp_dispatch_guided_remaining(tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - cross * chunk; - #if KMP_OS_WINDOWS && KMP_ARCH_X86 - // restore FPCW - _control87(oldFpcw,_MCW_PC); - #endif - } // if + /* save the term in thread private dispatch structure */ + *(DBL *)&pr->u.p.parm3 = x; + + /* solve for the crossover point to the nearest integer i for which C_i + <= chunk */ + { + UT left, right, mid; + long double p; + + /* estimate initial upper and lower bound */ + + /* doesn't matter what value right is as long as it is positive, but + it affects performance of the solver */ + right = 229; + p = __kmp_pow(x, right); + if (p > target) { + do { + p *= p; + right <<= 1; + } while (p > target && right < (1 << 27)); + /* lower bound is previous (failed) estimate of upper bound */ + left = right >> 1; + } else { + left = 0; + } + + /* bisection root-finding method */ + while (left + 1 < right) { + mid = (left + right) / 2; + if (__kmp_pow(x, mid) > target) { + left = mid; } else { - KD_TRACE(100, ("__kmp_dispatch_init: T#%d falling-through to kmp_sch_static_greedy\n", - gtid ) ); - schedule = kmp_sch_static_greedy; - /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */ - pr->u.p.parm1 = tc; - } // if - } // case - break; - case kmp_sch_static_greedy: - KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n",gtid)); - pr->u.p.parm1 = ( th->th.th_team_nproc > 1 ) ? - ( tc + th->th.th_team_nproc - 1 ) / th->th.th_team_nproc : - tc; - break; - case kmp_sch_static_chunked : - case kmp_sch_dynamic_chunked : - if ( pr->u.p.parm1 <= 0 ) { - pr->u.p.parm1 = KMP_DEFAULT_CHUNK; + right = mid; + } + } // while + cross = right; } - KD_TRACE(100,("__kmp_dispatch_init: T#%d kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", gtid)); - break; - case kmp_sch_trapezoidal : - { - /* TSS: trapezoid self-scheduling, minimum chunk_size = parm1 */ - - T parm1, parm2, parm3, parm4; - KD_TRACE(100, ("__kmp_dispatch_init: T#%d kmp_sch_trapezoidal case\n", gtid ) ); + /* assert sanity of computed crossover point */ + KMP_ASSERT(cross && __kmp_pow(x, cross - 1) > target && + __kmp_pow(x, cross) <= target); - parm1 = chunk; + /* save the crossover point in thread private dispatch structure */ + pr->u.p.parm2 = cross; - /* F : size of the first cycle */ - parm2 = ( tc / (2 * th->th.th_team_nproc) ); +// C75803 +#if ((KMP_OS_LINUX || KMP_OS_WINDOWS) && KMP_ARCH_X86) && (!defined(KMP_I8)) +#define GUIDED_ANALYTICAL_WORKAROUND (*(DBL *)&pr->u.p.parm3) +#else +#define GUIDED_ANALYTICAL_WORKAROUND (x) +#endif + /* dynamic-style scheduling offset */ + pr->u.p.count = tc - __kmp_dispatch_guided_remaining( + tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - + cross * chunk; +#if KMP_OS_WINDOWS && KMP_ARCH_X86 + // restore FPCW + _control87(oldFpcw, _MCW_PC); +#endif + } // if + } else { + KD_TRACE(100, ("__kmp_dispatch_init: T#%d falling-through to " + "kmp_sch_static_greedy\n", + gtid)); + schedule = kmp_sch_static_greedy; + /* team->t.t_nproc == 1: fall-through to kmp_sch_static_greedy */ + pr->u.p.parm1 = tc; + } // if + } // case + break; + case kmp_sch_static_greedy: + KD_TRACE(100, + ("__kmp_dispatch_init: T#%d kmp_sch_static_greedy case\n", gtid)); + pr->u.p.parm1 = (th->th.th_team_nproc > 1) + ? (tc + th->th.th_team_nproc - 1) / th->th.th_team_nproc + : tc; + break; + case kmp_sch_static_chunked: + case kmp_sch_dynamic_chunked: + if (pr->u.p.parm1 <= 0) { + pr->u.p.parm1 = KMP_DEFAULT_CHUNK; + } + KD_TRACE(100, ("__kmp_dispatch_init: T#%d " + "kmp_sch_static_chunked/kmp_sch_dynamic_chunked cases\n", + gtid)); + break; + case kmp_sch_trapezoidal: { + /* TSS: trapezoid self-scheduling, minimum chunk_size = parm1 */ - if ( parm2 < 1 ) { - parm2 = 1; - } + T parm1, parm2, parm3, parm4; + KD_TRACE(100, + ("__kmp_dispatch_init: T#%d kmp_sch_trapezoidal case\n", gtid)); - /* L : size of the last cycle. Make sure the last cycle - * is not larger than the first cycle. - */ - if ( parm1 < 1 ) { - parm1 = 1; - } else if ( parm1 > parm2 ) { - parm1 = parm2; - } + parm1 = chunk; - /* N : number of cycles */ - parm3 = ( parm2 + parm1 ); - parm3 = ( 2 * tc + parm3 - 1) / parm3; + /* F : size of the first cycle */ + parm2 = (tc / (2 * th->th.th_team_nproc)); - if ( parm3 < 2 ) { - parm3 = 2; - } + if (parm2 < 1) { + parm2 = 1; + } - /* sigma : decreasing incr of the trapezoid */ - parm4 = ( parm3 - 1 ); - parm4 = ( parm2 - parm1 ) / parm4; + /* L : size of the last cycle. Make sure the last cycle is not larger + than the first cycle. */ + if (parm1 < 1) { + parm1 = 1; + } else if (parm1 > parm2) { + parm1 = parm2; + } - // pointless check, because parm4 >= 0 always - //if ( parm4 < 0 ) { - // parm4 = 0; - //} + /* N : number of cycles */ + parm3 = (parm2 + parm1); + parm3 = (2 * tc + parm3 - 1) / parm3; - pr->u.p.parm1 = parm1; - pr->u.p.parm2 = parm2; - pr->u.p.parm3 = parm3; - pr->u.p.parm4 = parm4; - } // case - break; + if (parm3 < 2) { + parm3 = 2; + } - default: - { - __kmp_msg( - kmp_ms_fatal, // Severity - KMP_MSG( UnknownSchedTypeDetected ), // Primary message - KMP_HNT( GetNewerLibrary ), // Hint - __kmp_msg_null // Variadic argument list terminator - ); - } - break; - } // switch - pr->schedule = schedule; - if ( active ) { - /* The name of this buffer should be my_buffer_index when it's free to use it */ - - KD_TRACE(100, ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d sh->buffer_index:%d\n", - gtid, my_buffer_index, sh->buffer_index) ); - __kmp_wait_yield< kmp_uint32 >( & sh->buffer_index, my_buffer_index, __kmp_eq< kmp_uint32 > - USE_ITT_BUILD_ARG( NULL ) - ); - // Note: KMP_WAIT_YIELD() cannot be used there: buffer index and my_buffer_index are - // *always* 32-bit integers. - KMP_MB(); /* is this necessary? */ - KD_TRACE(100, ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d sh->buffer_index:%d\n", - gtid, my_buffer_index, sh->buffer_index) ); - - th -> th.th_dispatch -> th_dispatch_pr_current = (dispatch_private_info_t*) pr; - th -> th.th_dispatch -> th_dispatch_sh_current = (dispatch_shared_info_t*) sh; + /* sigma : decreasing incr of the trapezoid */ + parm4 = (parm3 - 1); + parm4 = (parm2 - parm1) / parm4; + + // pointless check, because parm4 >= 0 always + // if ( parm4 < 0 ) { + // parm4 = 0; + //} + + pr->u.p.parm1 = parm1; + pr->u.p.parm2 = parm2; + pr->u.p.parm3 = parm3; + pr->u.p.parm4 = parm4; + } // case + break; + + default: { + __kmp_msg(kmp_ms_fatal, // Severity + KMP_MSG(UnknownSchedTypeDetected), // Primary message + KMP_HNT(GetNewerLibrary), // Hint + __kmp_msg_null // Variadic argument list terminator + ); + } break; + } // switch + pr->schedule = schedule; + if (active) { + /* The name of this buffer should be my_buffer_index when it's free to use + * it */ + + KD_TRACE(100, ("__kmp_dispatch_init: T#%d before wait: my_buffer_index:%d " + "sh->buffer_index:%d\n", + gtid, my_buffer_index, sh->buffer_index)); + __kmp_wait_yield(&sh->buffer_index, my_buffer_index, + __kmp_eq USE_ITT_BUILD_ARG(NULL)); + // Note: KMP_WAIT_YIELD() cannot be used there: buffer index and + // my_buffer_index are *always* 32-bit integers. + KMP_MB(); /* is this necessary? */ + KD_TRACE(100, ("__kmp_dispatch_init: T#%d after wait: my_buffer_index:%d " + "sh->buffer_index:%d\n", + gtid, my_buffer_index, sh->buffer_index)); + + th->th.th_dispatch->th_dispatch_pr_current = (dispatch_private_info_t *)pr; + th->th.th_dispatch->th_dispatch_sh_current = (dispatch_shared_info_t *)sh; #if USE_ITT_BUILD - if ( pr->ordered ) { - __kmp_itt_ordered_init( gtid ); - }; // if - // Report loop metadata - if ( itt_need_metadata_reporting ) { - // Only report metadata by master of active team at level 1 - kmp_uint64 schedtype = 0; - switch ( schedule ) { - case kmp_sch_static_chunked: - case kmp_sch_static_balanced:// Chunk is calculated in the switch above - break; - case kmp_sch_static_greedy: - cur_chunk = pr->u.p.parm1; - break; - case kmp_sch_dynamic_chunked: - schedtype = 1; - break; - case kmp_sch_guided_iterative_chunked: - case kmp_sch_guided_analytical_chunked: - schedtype = 2; - break; - default: -// Should we put this case under "static"? -// case kmp_sch_static_steal: - schedtype = 3; - break; - } - __kmp_itt_metadata_loop(loc, schedtype, tc, cur_chunk); - } -#endif /* USE_ITT_BUILD */ + if (pr->ordered) { + __kmp_itt_ordered_init(gtid); }; // if - - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s lb:%%%s ub:%%%s" \ - " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" \ - " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n", - traits_t< UT >::spec, traits_t< T >::spec, traits_t< T >::spec, - traits_t< ST >::spec, traits_t< UT >::spec, traits_t< UT >::spec, - traits_t< UT >::spec, traits_t< UT >::spec, traits_t< T >::spec, - traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec ); - KD_TRACE(10, ( buff, - gtid, pr->schedule, pr->ordered, pr->u.p.lb, pr->u.p.ub, - pr->u.p.st, pr->u.p.tc, pr->u.p.count, - pr->u.p.ordered_lower, pr->u.p.ordered_upper, pr->u.p.parm1, - pr->u.p.parm2, pr->u.p.parm3, pr->u.p.parm4 ) ); - __kmp_str_free( &buff ); - } - #endif - #if ( KMP_STATIC_STEAL_ENABLED ) - // It cannot be guaranteed that after execution of a loop with some other schedule kind - // all the parm3 variables will contain the same value. - // Even if all parm3 will be the same, it still exists a bad case like using 0 and 1 - // rather than program life-time increment. - // So the dedicated variable is required. The 'static_steal_counter' is used. - if( schedule == kmp_sch_static_steal ) { - // Other threads will inspect this variable when searching for a victim. - // This is a flag showing that other threads may steal from this thread since then. - volatile T * p = &pr->u.p.static_steal_counter; - *p = *p + 1; + // Report loop metadata + if (itt_need_metadata_reporting) { + // Only report metadata by master of active team at level 1 + kmp_uint64 schedtype = 0; + switch (schedule) { + case kmp_sch_static_chunked: + case kmp_sch_static_balanced: // Chunk is calculated in the switch above + break; + case kmp_sch_static_greedy: + cur_chunk = pr->u.p.parm1; + break; + case kmp_sch_dynamic_chunked: + schedtype = 1; + break; + case kmp_sch_guided_iterative_chunked: + case kmp_sch_guided_analytical_chunked: + schedtype = 2; + break; + default: + // Should we put this case under "static"? + // case kmp_sch_static_steal: + schedtype = 3; + break; } - #endif // ( KMP_STATIC_STEAL_ENABLED ) + __kmp_itt_metadata_loop(loc, schedtype, tc, cur_chunk); + } +#endif /* USE_ITT_BUILD */ + }; // if + +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_init: T#%%d returning: schedule:%%d ordered:%%%s " + "lb:%%%s ub:%%%s" + " st:%%%s tc:%%%s count:%%%s\n\tordered_lower:%%%s ordered_upper:%%%s" + " parm1:%%%s parm2:%%%s parm3:%%%s parm4:%%%s\n", + traits_t::spec, traits_t::spec, traits_t::spec, + traits_t::spec, traits_t::spec, traits_t::spec, + traits_t::spec, traits_t::spec, traits_t::spec, + traits_t::spec, traits_t::spec, traits_t::spec); + KD_TRACE(10, (buff, gtid, pr->schedule, pr->ordered, pr->u.p.lb, pr->u.p.ub, + pr->u.p.st, pr->u.p.tc, pr->u.p.count, pr->u.p.ordered_lower, + pr->u.p.ordered_upper, pr->u.p.parm1, pr->u.p.parm2, + pr->u.p.parm3, pr->u.p.parm4)); + __kmp_str_free(&buff); + } +#endif +#if (KMP_STATIC_STEAL_ENABLED) + // It cannot be guaranteed that after execution of a loop with some other + // schedule kind all the parm3 variables will contain the same value. Even if + // all parm3 will be the same, it still exists a bad case like using 0 and 1 + // rather than program life-time increment. So the dedicated variable is + // required. The 'static_steal_counter' is used. + if (schedule == kmp_sch_static_steal) { + // Other threads will inspect this variable when searching for a victim. + // This is a flag showing that other threads may steal from this thread + // since then. + volatile T *p = &pr->u.p.static_steal_counter; + *p = *p + 1; + } +#endif // ( KMP_STATIC_STEAL_ENABLED ) #if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { - ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - ompt_callbacks.ompt_callback(ompt_event_loop_begin)( - team_info->parallel_id, task_info->task_id, team_info->microtask); - } + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); + ompt_task_info_t *task_info = __ompt_get_taskinfo(0); + ompt_callbacks.ompt_callback(ompt_event_loop_begin)( + team_info->parallel_id, task_info->task_id, team_info->microtask); + } #endif } -/* - * For ordered loops, either __kmp_dispatch_finish() should be called after +/* For ordered loops, either __kmp_dispatch_finish() should be called after * every iteration, or __kmp_dispatch_finish_chunk() should be called after * every chunk of iterations. If the ordered section(s) were not executed * for this iteration (or every iteration in this chunk), we need to set the - * ordered iteration counters so that the next thread can proceed. - */ -template< typename UT > -static void -__kmp_dispatch_finish( int gtid, ident_t *loc ) -{ - typedef typename traits_t< UT >::signed_t ST; - kmp_info_t *th = __kmp_threads[ gtid ]; - - KD_TRACE(100, ("__kmp_dispatch_finish: T#%d called\n", gtid ) ); - if ( ! th -> th.th_team -> t.t_serialized ) { - - dispatch_private_info_template< UT > * pr = - reinterpret_cast< dispatch_private_info_template< UT >* > - ( th->th.th_dispatch->th_dispatch_pr_current ); - dispatch_shared_info_template< UT > volatile * sh = - reinterpret_cast< dispatch_shared_info_template< UT >volatile* > - ( th->th.th_dispatch->th_dispatch_sh_current ); - KMP_DEBUG_ASSERT( pr ); - KMP_DEBUG_ASSERT( sh ); - KMP_DEBUG_ASSERT( th->th.th_dispatch == - &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] ); - - if ( pr->ordered_bumped ) { - KD_TRACE(1000, ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n", - gtid ) ); - pr->ordered_bumped = 0; - } else { - UT lower = pr->u.p.ordered_lower; + * ordered iteration counters so that the next thread can proceed. */ +template +static void __kmp_dispatch_finish(int gtid, ident_t *loc) { + typedef typename traits_t::signed_t ST; + kmp_info_t *th = __kmp_threads[gtid]; + + KD_TRACE(100, ("__kmp_dispatch_finish: T#%d called\n", gtid)); + if (!th->th.th_team->t.t_serialized) { + + dispatch_private_info_template *pr = + reinterpret_cast *>( + th->th.th_dispatch->th_dispatch_pr_current); + dispatch_shared_info_template volatile *sh = + reinterpret_cast volatile *>( + th->th.th_dispatch->th_dispatch_sh_current); + KMP_DEBUG_ASSERT(pr); + KMP_DEBUG_ASSERT(sh); + KMP_DEBUG_ASSERT(th->th.th_dispatch == + &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); + + if (pr->ordered_bumped) { + KD_TRACE( + 1000, + ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n", + gtid)); + pr->ordered_bumped = 0; + } else { + UT lower = pr->u.p.ordered_lower; - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_finish: T#%%d before wait: ordered_iteration:%%%s lower:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) ); - __kmp_str_free( &buff ); - } - #endif +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmp_dispatch_finish: T#%%d before wait: " + "ordered_iteration:%%%s lower:%%%s\n", + traits_t::spec, traits_t::spec); + KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower)); + __kmp_str_free(&buff); + } +#endif - __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT > - USE_ITT_BUILD_ARG(NULL) - ); - KMP_MB(); /* is this necessary? */ - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_finish: T#%%d after wait: ordered_iteration:%%%s lower:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower ) ); - __kmp_str_free( &buff ); - } - #endif + __kmp_wait_yield(&sh->u.s.ordered_iteration, lower, + __kmp_ge USE_ITT_BUILD_ARG(NULL)); + KMP_MB(); /* is this necessary? */ +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmp_dispatch_finish: T#%%d after wait: " + "ordered_iteration:%%%s lower:%%%s\n", + traits_t::spec, traits_t::spec); + KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower)); + __kmp_str_free(&buff); + } +#endif - test_then_inc< ST >( (volatile ST *) & sh->u.s.ordered_iteration ); - } // if + test_then_inc((volatile ST *)&sh->u.s.ordered_iteration); } // if - KD_TRACE(100, ("__kmp_dispatch_finish: T#%d returned\n", gtid ) ); + } // if + KD_TRACE(100, ("__kmp_dispatch_finish: T#%d returned\n", gtid)); } #ifdef KMP_GOMP_COMPAT -template< typename UT > -static void -__kmp_dispatch_finish_chunk( int gtid, ident_t *loc ) -{ - typedef typename traits_t< UT >::signed_t ST; - kmp_info_t *th = __kmp_threads[ gtid ]; - - KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d called\n", gtid ) ); - if ( ! th -> th.th_team -> t.t_serialized ) { -// int cid; - dispatch_private_info_template< UT > * pr = - reinterpret_cast< dispatch_private_info_template< UT >* > - ( th->th.th_dispatch->th_dispatch_pr_current ); - dispatch_shared_info_template< UT > volatile * sh = - reinterpret_cast< dispatch_shared_info_template< UT >volatile* > - ( th->th.th_dispatch->th_dispatch_sh_current ); - KMP_DEBUG_ASSERT( pr ); - KMP_DEBUG_ASSERT( sh ); - KMP_DEBUG_ASSERT( th->th.th_dispatch == - &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] ); - -// for (cid = 0; cid < KMP_MAX_ORDERED; ++cid) { - UT lower = pr->u.p.ordered_lower; - UT upper = pr->u.p.ordered_upper; - UT inc = upper - lower + 1; - - if ( pr->ordered_bumped == inc ) { - KD_TRACE(1000, ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n", - gtid ) ); - pr->ordered_bumped = 0; - } else { - inc -= pr->ordered_bumped; - - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_finish_chunk: T#%%d before wait: " \ - "ordered_iteration:%%%s lower:%%%s upper:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, lower, upper ) ); - __kmp_str_free( &buff ); - } - #endif +template +static void __kmp_dispatch_finish_chunk(int gtid, ident_t *loc) { + typedef typename traits_t::signed_t ST; + kmp_info_t *th = __kmp_threads[gtid]; + + KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d called\n", gtid)); + if (!th->th.th_team->t.t_serialized) { + // int cid; + dispatch_private_info_template *pr = + reinterpret_cast *>( + th->th.th_dispatch->th_dispatch_pr_current); + dispatch_shared_info_template volatile *sh = + reinterpret_cast volatile *>( + th->th.th_dispatch->th_dispatch_sh_current); + KMP_DEBUG_ASSERT(pr); + KMP_DEBUG_ASSERT(sh); + KMP_DEBUG_ASSERT(th->th.th_dispatch == + &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); + + // for (cid = 0; cid < KMP_MAX_ORDERED; ++cid) { + UT lower = pr->u.p.ordered_lower; + UT upper = pr->u.p.ordered_upper; + UT inc = upper - lower + 1; + + if (pr->ordered_bumped == inc) { + KD_TRACE( + 1000, + ("__kmp_dispatch_finish: T#%d resetting ordered_bumped to zero\n", + gtid)); + pr->ordered_bumped = 0; + } else { + inc -= pr->ordered_bumped; + +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_finish_chunk: T#%%d before wait: " + "ordered_iteration:%%%s lower:%%%s upper:%%%s\n", + traits_t::spec, traits_t::spec, traits_t::spec); + KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower, upper)); + __kmp_str_free(&buff); + } +#endif - __kmp_wait_yield< UT >(&sh->u.s.ordered_iteration, lower, __kmp_ge< UT > - USE_ITT_BUILD_ARG(NULL) - ); + __kmp_wait_yield(&sh->u.s.ordered_iteration, lower, + __kmp_ge USE_ITT_BUILD_ARG(NULL)); - KMP_MB(); /* is this necessary? */ - KD_TRACE(1000, ("__kmp_dispatch_finish_chunk: T#%d resetting ordered_bumped to zero\n", - gtid ) ); - pr->ordered_bumped = 0; + KMP_MB(); /* is this necessary? */ + KD_TRACE(1000, ("__kmp_dispatch_finish_chunk: T#%d resetting " + "ordered_bumped to zero\n", + gtid)); + pr->ordered_bumped = 0; //!!!!! TODO check if the inc should be unsigned, or signed??? - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_finish_chunk: T#%%d after wait: " \ - "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper ) ); - __kmp_str_free( &buff ); - } - #endif +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_finish_chunk: T#%%d after wait: " + "ordered_iteration:%%%s inc:%%%s lower:%%%s upper:%%%s\n", + traits_t::spec, traits_t::spec, traits_t::spec, + traits_t::spec); + KD_TRACE(1000, + (buff, gtid, sh->u.s.ordered_iteration, inc, lower, upper)); + __kmp_str_free(&buff); + } +#endif - test_then_add< ST >( (volatile ST *) & sh->u.s.ordered_iteration, inc); - } -// } + test_then_add((volatile ST *)&sh->u.s.ordered_iteration, inc); } - KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d returned\n", gtid ) ); + // } + } + KD_TRACE(100, ("__kmp_dispatch_finish_chunk: T#%d returned\n", gtid)); } #endif /* KMP_GOMP_COMPAT */ -/* Define a macro for exiting __kmp_dispatch_next(). If status is 0 - * (no more work), then tell OMPT the loop is over. In some cases - * kmp_dispatch_fini() is not called. */ +/* Define a macro for exiting __kmp_dispatch_next(). If status is 0 (no more + work), then tell OMPT the loop is over. In some cases kmp_dispatch_fini() + is not called. */ #if OMPT_SUPPORT && OMPT_TRACE #define OMPT_LOOP_END \ - if (status == 0) { \ - if (ompt_enabled && \ - ompt_callbacks.ompt_callback(ompt_event_loop_end)) { \ - ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \ - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); \ - ompt_callbacks.ompt_callback(ompt_event_loop_end)( \ - team_info->parallel_id, task_info->task_id); \ - } \ - } + if (status == 0) { \ + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_end)) { \ + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); \ + ompt_task_info_t *task_info = __ompt_get_taskinfo(0); \ + ompt_callbacks.ompt_callback(ompt_event_loop_end)( \ + team_info->parallel_id, task_info->task_id); \ + } \ + } #else #define OMPT_LOOP_END // no-op #endif -template< typename T > -static int -__kmp_dispatch_next( - ident_t *loc, int gtid, kmp_int32 *p_last, T *p_lb, T *p_ub, typename traits_t< T >::signed_t *p_st -) { - - typedef typename traits_t< T >::unsigned_t UT; - typedef typename traits_t< T >::signed_t ST; - typedef typename traits_t< T >::floating_t DBL; +template +static int __kmp_dispatch_next(ident_t *loc, int gtid, kmp_int32 *p_last, + T *p_lb, T *p_ub, + typename traits_t::signed_t *p_st) { + + typedef typename traits_t::unsigned_t UT; + typedef typename traits_t::signed_t ST; + typedef typename traits_t::floating_t DBL; + + // This is potentially slightly misleading, schedule(runtime) will appear here + // even if the actual runtme schedule is static. (Which points out a + // disadavantage of schedule(runtime): even when static scheduling is used it + // costs more than a compile time choice to use static scheduling would.) + KMP_TIME_PARTITIONED_BLOCK(FOR_dynamic_scheduling); + + int status; + dispatch_private_info_template *pr; + kmp_info_t *th = __kmp_threads[gtid]; + kmp_team_t *team = th->th.th_team; + + KMP_DEBUG_ASSERT(p_lb && p_ub && p_st); // AC: these cannot be NULL +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmp_dispatch_next: T#%%d called p_lb:%%%s " + "p_ub:%%%s p_st:%%%s p_last: %%p\n", + traits_t::spec, traits_t::spec, + traits_t::spec); + KD_TRACE(1000, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last)); + __kmp_str_free(&buff); + } +#endif - // This is potentially slightly misleading, schedule(runtime) will appear here even if the actual runtme schedule - // is static. (Which points out a disadavantage of schedule(runtime): even when static scheduling is used it costs - // more than a compile time choice to use static scheduling would.) - KMP_TIME_PARTITIONED_BLOCK(FOR_dynamic_scheduling); + if (team->t.t_serialized) { + /* NOTE: serialize this dispatch becase we are not at the active level */ + pr = reinterpret_cast *>( + th->th.th_dispatch->th_disp_buffer); /* top of the stack */ + KMP_DEBUG_ASSERT(pr); + + if ((status = (pr->u.p.tc != 0)) == 0) { + *p_lb = 0; + *p_ub = 0; + // if ( p_last != NULL ) + // *p_last = 0; + if (p_st != NULL) + *p_st = 0; + if (__kmp_env_consistency_check) { + if (pr->pushed_ws != ct_none) { + pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc); + } + } + } else if (pr->nomerge) { + kmp_int32 last; + T start; + UT limit, trip, init; + ST incr; + T chunk = pr->u.p.parm1; + + KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", + gtid)); + + init = chunk * pr->u.p.count++; + trip = pr->u.p.tc - 1; + + if ((status = (init <= trip)) == 0) { + *p_lb = 0; + *p_ub = 0; + // if ( p_last != NULL ) + // *p_last = 0; + if (p_st != NULL) + *p_st = 0; + if (__kmp_env_consistency_check) { + if (pr->pushed_ws != ct_none) { + pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc); + } + } + } else { + start = pr->u.p.lb; + limit = chunk + init - 1; + incr = pr->u.p.st; - int status; - dispatch_private_info_template< T > * pr; - kmp_info_t * th = __kmp_threads[ gtid ]; - kmp_team_t * team = th -> th.th_team; + if ((last = (limit >= trip)) != 0) { + limit = trip; +#if KMP_OS_WINDOWS + pr->u.p.last_upper = pr->u.p.ub; +#endif /* KMP_OS_WINDOWS */ + } + if (p_last != NULL) + *p_last = last; + if (p_st != NULL) + *p_st = incr; + if (incr == 1) { + *p_lb = start + init; + *p_ub = start + limit; + } else { + *p_lb = start + init * incr; + *p_ub = start + limit * incr; + } - KMP_DEBUG_ASSERT( p_lb && p_ub && p_st ); // AC: these cannot be NULL - #ifdef KMP_DEBUG + if (pr->ordered) { + pr->u.p.ordered_lower = init; + pr->u.p.ordered_upper = limit; +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmp_dispatch_next: T#%%d " + "ordered_lower:%%%s ordered_upper:%%%s\n", + traits_t::spec, traits_t::spec); + KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, + pr->u.p.ordered_upper)); + __kmp_str_free(&buff); + } +#endif + } // if + } // if + } else { + pr->u.p.tc = 0; + *p_lb = pr->u.p.lb; + *p_ub = pr->u.p.ub; +#if KMP_OS_WINDOWS + pr->u.p.last_upper = *p_ub; +#endif /* KMP_OS_WINDOWS */ + if (p_last != NULL) + *p_last = TRUE; + if (p_st != NULL) + *p_st = pr->u.p.st; + } // if +#ifdef KMP_DEBUG { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d called p_lb:%%%s p_ub:%%%s p_st:%%%s p_last: %%p\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); - KD_TRACE(1000, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last ) ); - __kmp_str_free( &buff ); + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " + "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n", + traits_t::spec, traits_t::spec, traits_t::spec); + KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, *p_st, p_last, *p_last, status)); + __kmp_str_free(&buff); } - #endif - - if ( team -> t.t_serialized ) { - /* NOTE: serialize this dispatch becase we are not at the active level */ - pr = reinterpret_cast< dispatch_private_info_template< T >* > - ( th -> th.th_dispatch -> th_disp_buffer ); /* top of the stack */ - KMP_DEBUG_ASSERT( pr ); - - if ( (status = (pr->u.p.tc != 0)) == 0 ) { - *p_lb = 0; - *p_ub = 0; -// if ( p_last != NULL ) -// *p_last = 0; - if ( p_st != NULL ) - *p_st = 0; - if ( __kmp_env_consistency_check ) { - if ( pr->pushed_ws != ct_none ) { - pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc ); - } +#endif +#if INCLUDE_SSC_MARKS + SSC_MARK_DISPATCH_NEXT(); +#endif + OMPT_LOOP_END; + return status; + } else { + kmp_int32 last = 0; + dispatch_shared_info_template *sh; + T start; + ST incr; + UT limit, trip, init; + + KMP_DEBUG_ASSERT(th->th.th_dispatch == + &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid]); + + pr = reinterpret_cast *>( + th->th.th_dispatch->th_dispatch_pr_current); + KMP_DEBUG_ASSERT(pr); + sh = reinterpret_cast *>( + th->th.th_dispatch->th_dispatch_sh_current); + KMP_DEBUG_ASSERT(sh); + + if (pr->u.p.tc == 0) { + // zero trip count + status = 0; + } else { + switch (pr->schedule) { +#if (KMP_STATIC_STEAL_ENABLED) + case kmp_sch_static_steal: { + T chunk = pr->u.p.parm1; + int nproc = th->th.th_team_nproc; + + KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_static_steal case\n", + gtid)); + + trip = pr->u.p.tc - 1; + + if (traits_t::type_size > 4) { + // use lock for 8-byte and CAS for 4-byte induction + // variable. TODO (optional): check and use 16-byte CAS + kmp_lock_t *lck = th->th.th_dispatch->th_steal_lock; + KMP_DEBUG_ASSERT(lck != NULL); + if (pr->u.p.count < (UT)pr->u.p.ub) { + __kmp_acquire_lock(lck, gtid); + // try to get own chunk of iterations + init = (pr->u.p.count)++; + status = (init < (UT)pr->u.p.ub); + __kmp_release_lock(lck, gtid); + } else { + status = 0; // no own chunks + } + if (!status) { // try to steal + kmp_info_t **other_threads = team->t.t_threads; + int while_limit = nproc; // nproc attempts to find a victim + int while_index = 0; + // TODO: algorithm of searching for a victim + // should be cleaned up and measured + while ((!status) && (while_limit != ++while_index)) { + T remaining; + T victimIdx = pr->u.p.parm4; + T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1; + dispatch_private_info_template *victim = + reinterpret_cast *>( + other_threads[victimIdx] + ->th.th_dispatch->th_dispatch_pr_current); + while ((victim == NULL || victim == pr || + (*(volatile T *)&victim->u.p.static_steal_counter != + *(volatile T *)&pr->u.p.static_steal_counter)) && + oldVictimIdx != victimIdx) { + victimIdx = (victimIdx + 1) % nproc; + victim = reinterpret_cast *>( + other_threads[victimIdx] + ->th.th_dispatch->th_dispatch_pr_current); + }; + if (!victim || + (*(volatile T *)&victim->u.p.static_steal_counter != + *(volatile T *)&pr->u.p.static_steal_counter)) { + continue; // try once more (nproc attempts in total) + // no victim is ready yet to participate in stealing + // because all victims are still in kmp_init_dispatch + } + if (victim->u.p.count + 2 > (UT)victim->u.p.ub) { + pr->u.p.parm4 = (victimIdx + 1) % nproc; // shift start tid + continue; // not enough chunks to steal, goto next victim + } + + lck = other_threads[victimIdx]->th.th_dispatch->th_steal_lock; + KMP_ASSERT(lck != NULL); + __kmp_acquire_lock(lck, gtid); + limit = victim->u.p.ub; // keep initial ub + if (victim->u.p.count >= limit || + (remaining = limit - victim->u.p.count) < 2) { + __kmp_release_lock(lck, gtid); + pr->u.p.parm4 = (victimIdx + 1) % nproc; // next victim + continue; // not enough chunks to steal + } + // stealing succeded, reduce victim's ub by 1/4 of undone chunks + // or by 1 + if (remaining > 3) { + KMP_COUNT_VALUE(FOR_static_steal_stolen, remaining >> 2); + init = (victim->u.p.ub -= + (remaining >> 2)); // steal 1/4 of remaining + } else { + KMP_COUNT_VALUE(FOR_static_steal_stolen, 1); + init = + (victim->u.p.ub -= 1); // steal 1 chunk of 2 or 3 remaining + } + __kmp_release_lock(lck, gtid); + + KMP_DEBUG_ASSERT(init + 1 <= limit); + pr->u.p.parm4 = victimIdx; // remember victim to steal from + status = 1; + while_index = 0; + // now update own count and ub with stolen range but init chunk + __kmp_acquire_lock(th->th.th_dispatch->th_steal_lock, gtid); + pr->u.p.count = init + 1; + pr->u.p.ub = limit; + __kmp_release_lock(th->th.th_dispatch->th_steal_lock, gtid); + } // while (search for victim) + } // if (try to find victim and steal) + } else { + // 4-byte induction variable, use 8-byte CAS for pair (count, ub) + typedef union { + struct { + UT count; + T ub; + } p; + kmp_int64 b; + } union_i4; + // All operations on 'count' or 'ub' must be combined atomically + // together. + { + union_i4 vold, vnew; + vold.b = *(volatile kmp_int64 *)(&pr->u.p.count); + vnew = vold; + vnew.p.count++; + while (!KMP_COMPARE_AND_STORE_ACQ64( + (volatile kmp_int64 *)&pr->u.p.count, + *VOLATILE_CAST(kmp_int64 *) & vold.b, + *VOLATILE_CAST(kmp_int64 *) & vnew.b)) { + KMP_CPU_PAUSE(); + vold.b = *(volatile kmp_int64 *)(&pr->u.p.count); + vnew = vold; + vnew.p.count++; } - } else if ( pr->nomerge ) { - kmp_int32 last; - T start; - UT limit, trip, init; - ST incr; - T chunk = pr->u.p.parm1; - - KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", gtid ) ); - - init = chunk * pr->u.p.count++; - trip = pr->u.p.tc - 1; - - if ( (status = (init <= trip)) == 0 ) { - *p_lb = 0; - *p_ub = 0; -// if ( p_last != NULL ) -// *p_last = 0; - if ( p_st != NULL ) - *p_st = 0; - if ( __kmp_env_consistency_check ) { - if ( pr->pushed_ws != ct_none ) { - pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc ); - } + vnew = vold; + init = vnew.p.count; + status = (init < (UT)vnew.p.ub); + } + + if (!status) { + kmp_info_t **other_threads = team->t.t_threads; + int while_limit = nproc; // nproc attempts to find a victim + int while_index = 0; + + // TODO: algorithm of searching for a victim + // should be cleaned up and measured + while ((!status) && (while_limit != ++while_index)) { + union_i4 vold, vnew; + kmp_int32 remaining; + T victimIdx = pr->u.p.parm4; + T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1; + dispatch_private_info_template *victim = + reinterpret_cast *>( + other_threads[victimIdx] + ->th.th_dispatch->th_dispatch_pr_current); + while ((victim == NULL || victim == pr || + (*(volatile T *)&victim->u.p.static_steal_counter != + *(volatile T *)&pr->u.p.static_steal_counter)) && + oldVictimIdx != victimIdx) { + victimIdx = (victimIdx + 1) % nproc; + victim = reinterpret_cast *>( + other_threads[victimIdx] + ->th.th_dispatch->th_dispatch_pr_current); + }; + if (!victim || + (*(volatile T *)&victim->u.p.static_steal_counter != + *(volatile T *)&pr->u.p.static_steal_counter)) { + continue; // try once more (nproc attempts in total) + // no victim is ready yet to participate in stealing + // because all victims are still in kmp_init_dispatch + } + pr->u.p.parm4 = victimIdx; // new victim found + while (1) { // CAS loop if victim has enough chunks to steal + vold.b = *(volatile kmp_int64 *)(&victim->u.p.count); + vnew = vold; + + KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip); + if (vnew.p.count >= (UT)vnew.p.ub || + (remaining = vnew.p.ub - vnew.p.count) < 2) { + pr->u.p.parm4 = + (victimIdx + 1) % nproc; // shift start victim id + break; // not enough chunks to steal, goto next victim } - } else { - start = pr->u.p.lb; - limit = chunk + init - 1; - incr = pr->u.p.st; - - if ( (last = (limit >= trip)) != 0 ) { - limit = trip; - #if KMP_OS_WINDOWS - pr->u.p.last_upper = pr->u.p.ub; - #endif /* KMP_OS_WINDOWS */ - } - if ( p_last != NULL ) - *p_last = last; - if ( p_st != NULL ) - *p_st = incr; - if ( incr == 1 ) { - *p_lb = start + init; - *p_ub = start + limit; + if (remaining > 3) { + vnew.p.ub -= (remaining >> 2); // try to steal 1/4 remaining } else { - *p_lb = start + init * incr; - *p_ub = start + limit * incr; + vnew.p.ub -= 1; // steal 1 chunk of 2 or 3 remaining } - - if ( pr->ordered ) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); - __kmp_str_free( &buff ); - } - #endif - } // if - } // if + KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip); + // TODO: Should this be acquire or release? + if (KMP_COMPARE_AND_STORE_ACQ64( + (volatile kmp_int64 *)&victim->u.p.count, + *VOLATILE_CAST(kmp_int64 *) & vold.b, + *VOLATILE_CAST(kmp_int64 *) & vnew.b)) { + // stealing succeeded + KMP_COUNT_VALUE(FOR_static_steal_stolen, + vold.p.ub - vnew.p.ub); + status = 1; + while_index = 0; + // now update own count and ub + init = vnew.p.ub; + vold.p.count = init + 1; +#if KMP_ARCH_X86 + KMP_XCHG_FIXED64((volatile kmp_int64 *)(&pr->u.p.count), + vold.b); +#else + *(volatile kmp_int64 *)(&pr->u.p.count) = vold.b; +#endif + break; + } // if (check CAS result) + KMP_CPU_PAUSE(); // CAS failed, repeat attempt + } // while (try to steal from particular victim) + } // while (search for victim) + } // if (try to find victim and steal) + } // if (4-byte induction variable) + if (!status) { + *p_lb = 0; + *p_ub = 0; + if (p_st != NULL) + *p_st = 0; } else { - pr->u.p.tc = 0; - *p_lb = pr->u.p.lb; - *p_ub = pr->u.p.ub; - #if KMP_OS_WINDOWS - pr->u.p.last_upper = *p_ub; - #endif /* KMP_OS_WINDOWS */ - if ( p_last != NULL ) - *p_last = TRUE; - if ( p_st != NULL ) - *p_st = pr->u.p.st; + start = pr->u.p.parm2; + init *= chunk; + limit = chunk + init - 1; + incr = pr->u.p.st; + KMP_COUNT_VALUE(FOR_static_steal_chunks, 1); + + KMP_DEBUG_ASSERT(init <= trip); + if ((last = (limit >= trip)) != 0) + limit = trip; + if (p_st != NULL) + *p_st = incr; + + if (incr == 1) { + *p_lb = start + init; + *p_ub = start + limit; + } else { + *p_lb = start + init * incr; + *p_ub = start + limit * incr; + } + + if (pr->ordered) { + pr->u.p.ordered_lower = init; + pr->u.p.ordered_upper = limit; +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmp_dispatch_next: T#%%d " + "ordered_lower:%%%s ordered_upper:%%%s\n", + traits_t::spec, traits_t::spec); + KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, + pr->u.p.ordered_upper)); + __kmp_str_free(&buff); + } +#endif + } // if } // if - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d serialized case: p_lb:%%%s " \ - "p_ub:%%%s p_st:%%%s p_last:%%p %%d returning:%%d\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); - KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, *p_st, p_last, *p_last, status) ); - __kmp_str_free( &buff ); + break; + } // case +#endif // ( KMP_STATIC_STEAL_ENABLED ) + case kmp_sch_static_balanced: { + KD_TRACE( + 100, + ("__kmp_dispatch_next: T#%d kmp_sch_static_balanced case\n", gtid)); + if ((status = !pr->u.p.count) != + 0) { /* check if thread has any iteration to do */ + pr->u.p.count = 1; + *p_lb = pr->u.p.lb; + *p_ub = pr->u.p.ub; + last = pr->u.p.parm1; + if (p_st != NULL) + *p_st = pr->u.p.st; + } else { /* no iterations to do */ + pr->u.p.lb = pr->u.p.ub + pr->u.p.st; } - #endif -#if INCLUDE_SSC_MARKS - SSC_MARK_DISPATCH_NEXT(); + if (pr->ordered) { +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmp_dispatch_next: T#%%d " + "ordered_lower:%%%s ordered_upper:%%%s\n", + traits_t::spec, traits_t::spec); + KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, + pr->u.p.ordered_upper)); + __kmp_str_free(&buff); + } #endif - OMPT_LOOP_END; - return status; - } else { - kmp_int32 last = 0; - dispatch_shared_info_template< UT > *sh; - T start; - ST incr; - UT limit, trip, init; - - KMP_DEBUG_ASSERT( th->th.th_dispatch == - &th->th.th_team->t.t_dispatch[th->th.th_info.ds.ds_tid] ); - - pr = reinterpret_cast< dispatch_private_info_template< T >* > - ( th->th.th_dispatch->th_dispatch_pr_current ); - KMP_DEBUG_ASSERT( pr ); - sh = reinterpret_cast< dispatch_shared_info_template< UT >* > - ( th->th.th_dispatch->th_dispatch_sh_current ); - KMP_DEBUG_ASSERT( sh ); - - if ( pr->u.p.tc == 0 ) { - // zero trip count + } // if + } // case + break; + case kmp_sch_static_greedy: /* original code for kmp_sch_static_greedy was + merged here */ + case kmp_sch_static_chunked: { + T parm1; + + KD_TRACE(100, ("__kmp_dispatch_next: T#%d " + "kmp_sch_static_[affinity|chunked] case\n", + gtid)); + parm1 = pr->u.p.parm1; + + trip = pr->u.p.tc - 1; + init = parm1 * (pr->u.p.count + __kmp_tid_from_gtid(gtid)); + + if ((status = (init <= trip)) != 0) { + start = pr->u.p.lb; + incr = pr->u.p.st; + limit = parm1 + init - 1; + + if ((last = (limit >= trip)) != 0) + limit = trip; + + if (p_st != NULL) + *p_st = incr; + + pr->u.p.count += th->th.th_team_nproc; + + if (incr == 1) { + *p_lb = start + init; + *p_ub = start + limit; + } else { + *p_lb = start + init * incr; + *p_ub = start + limit * incr; + } + + if (pr->ordered) { + pr->u.p.ordered_lower = init; + pr->u.p.ordered_upper = limit; +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmp_dispatch_next: T#%%d " + "ordered_lower:%%%s ordered_upper:%%%s\n", + traits_t::spec, traits_t::spec); + KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, + pr->u.p.ordered_upper)); + __kmp_str_free(&buff); + } +#endif + } // if + } // if + } // case + break; + + case kmp_sch_dynamic_chunked: { + T chunk = pr->u.p.parm1; + + KD_TRACE( + 100, + ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", gtid)); + + init = chunk * test_then_inc_acq((volatile ST *)&sh->u.s.iteration); + trip = pr->u.p.tc - 1; + + if ((status = (init <= trip)) == 0) { + *p_lb = 0; + *p_ub = 0; + if (p_st != NULL) + *p_st = 0; + } else { + start = pr->u.p.lb; + limit = chunk + init - 1; + incr = pr->u.p.st; + + if ((last = (limit >= trip)) != 0) + limit = trip; + + if (p_st != NULL) + *p_st = incr; + + if (incr == 1) { + *p_lb = start + init; + *p_ub = start + limit; + } else { + *p_lb = start + init * incr; + *p_ub = start + limit * incr; + } + + if (pr->ordered) { + pr->u.p.ordered_lower = init; + pr->u.p.ordered_upper = limit; +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmp_dispatch_next: T#%%d " + "ordered_lower:%%%s ordered_upper:%%%s\n", + traits_t::spec, traits_t::spec); + KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, + pr->u.p.ordered_upper)); + __kmp_str_free(&buff); + } +#endif + } // if + } // if + } // case + break; + + case kmp_sch_guided_iterative_chunked: { + T chunkspec = pr->u.p.parm1; + KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_guided_chunked " + "iterative case\n", + gtid)); + trip = pr->u.p.tc; + // Start atomic part of calculations + while (1) { + ST remaining; // signed, because can be < 0 + init = sh->u.s.iteration; // shared value + remaining = trip - init; + if (remaining <= 0) { // AC: need to compare with 0 first + // nothing to do, don't try atomic op status = 0; + break; + } + if ((T)remaining < + pr->u.p.parm2) { // compare with K*nproc*(chunk+1), K=2 by default + // use dynamic-style shcedule + // atomically inrement iterations, get old value + init = test_then_add((ST *)&sh->u.s.iteration, (ST)chunkspec); + remaining = trip - init; + if (remaining <= 0) { + status = 0; // all iterations got by other threads + } else { // got some iterations to work on + status = 1; + if ((T)remaining > chunkspec) { + limit = init + chunkspec - 1; + } else { + last = 1; // the last chunk + limit = init + remaining - 1; + } // if + } // if + break; + } // if + limit = init + (UT)(remaining * + *(double *)&pr->u.p.parm3); // divide by K*nproc + if (compare_and_swap((ST *)&sh->u.s.iteration, (ST)init, + (ST)limit)) { + // CAS was successful, chunk obtained + status = 1; + --limit; + break; + } // if + } // while + if (status != 0) { + start = pr->u.p.lb; + incr = pr->u.p.st; + if (p_st != NULL) + *p_st = incr; + *p_lb = start + init * incr; + *p_ub = start + limit * incr; + if (pr->ordered) { + pr->u.p.ordered_lower = init; + pr->u.p.ordered_upper = limit; +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmp_dispatch_next: T#%%d " + "ordered_lower:%%%s ordered_upper:%%%s\n", + traits_t::spec, traits_t::spec); + KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, + pr->u.p.ordered_upper)); + __kmp_str_free(&buff); + } +#endif + } // if } else { - switch (pr->schedule) { - #if ( KMP_STATIC_STEAL_ENABLED ) - case kmp_sch_static_steal: - { - T chunk = pr->u.p.parm1; - int nproc = th->th.th_team_nproc; - - KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_static_steal case\n", gtid) ); - - trip = pr->u.p.tc - 1; - - if ( traits_t::type_size > 4 ) { - // use lock for 8-byte and CAS for 4-byte induction - // variable. TODO (optional): check and use 16-byte CAS - kmp_lock_t * lck = th->th.th_dispatch->th_steal_lock; - KMP_DEBUG_ASSERT(lck != NULL); - if( pr->u.p.count < (UT)pr->u.p.ub ) { - __kmp_acquire_lock(lck, gtid); - // try to get own chunk of iterations - init = ( pr->u.p.count )++; - status = ( init < (UT)pr->u.p.ub ); - __kmp_release_lock(lck, gtid); - } else { - status = 0; // no own chunks - } - if( !status ) { // try to steal - kmp_info_t **other_threads = team->t.t_threads; - int while_limit = nproc; // nproc attempts to find a victim - int while_index = 0; - // TODO: algorithm of searching for a victim - // should be cleaned up and measured - while ( ( !status ) && ( while_limit != ++while_index ) ) { - T remaining; - T victimIdx = pr->u.p.parm4; - T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1; - dispatch_private_info_template< T > * victim = - reinterpret_cast< dispatch_private_info_template< T >* > - (other_threads[victimIdx]->th.th_dispatch->th_dispatch_pr_current); - while( ( victim == NULL || victim == pr || - ( *(volatile T*)&victim->u.p.static_steal_counter != - *(volatile T*)&pr->u.p.static_steal_counter ) ) && - oldVictimIdx != victimIdx ) - { - victimIdx = (victimIdx + 1) % nproc; - victim = reinterpret_cast< dispatch_private_info_template< T >* > - (other_threads[victimIdx]->th.th_dispatch->th_dispatch_pr_current); - }; - if( !victim || - ( *(volatile T *)&victim->u.p.static_steal_counter != - *(volatile T *)&pr->u.p.static_steal_counter ) ) - { - continue; // try once more (nproc attempts in total) - // no victim is ready yet to participate in stealing - // because all victims are still in kmp_init_dispatch - } - if( victim->u.p.count + 2 > (UT)victim->u.p.ub ) { - pr->u.p.parm4 = (victimIdx + 1) % nproc; // shift start tid - continue; // not enough chunks to steal, goto next victim - } - - lck = other_threads[victimIdx]->th.th_dispatch->th_steal_lock; - KMP_ASSERT(lck != NULL); - __kmp_acquire_lock(lck, gtid); - limit = victim->u.p.ub; // keep initial ub - if( victim->u.p.count >= limit || - (remaining = limit - victim->u.p.count) < 2 ) - { - __kmp_release_lock(lck, gtid); - pr->u.p.parm4 = (victimIdx + 1) % nproc; // next victim - continue; // not enough chunks to steal - } - // stealing succeded, reduce victim's ub by 1/4 of undone chunks or by 1 - if( remaining > 3 ) { - KMP_COUNT_VALUE(FOR_static_steal_stolen, remaining>>2); - init = ( victim->u.p.ub -= (remaining>>2) ); // steal 1/4 of remaining - } else { - KMP_COUNT_VALUE(FOR_static_steal_stolen, 1); - init = ( victim->u.p.ub -= 1 ); // steal 1 chunk of 2 or 3 remaining - } - __kmp_release_lock(lck, gtid); - - KMP_DEBUG_ASSERT(init + 1 <= limit); - pr->u.p.parm4 = victimIdx; // remember victim to steal from - status = 1; - while_index = 0; - // now update own count and ub with stolen range but init chunk - __kmp_acquire_lock(th->th.th_dispatch->th_steal_lock, gtid); - pr->u.p.count = init + 1; - pr->u.p.ub = limit; - __kmp_release_lock(th->th.th_dispatch->th_steal_lock, gtid); - } // while (search for victim) - } // if (try to find victim and steal) - } else { - // 4-byte induction variable, use 8-byte CAS for pair (count, ub) - typedef union { - struct { - UT count; - T ub; - } p; - kmp_int64 b; - } union_i4; - // All operations on 'count' or 'ub' must be combined atomically together. - { - union_i4 vold, vnew; - vold.b = *( volatile kmp_int64 * )(&pr->u.p.count); - vnew = vold; - vnew.p.count++; - while( ! KMP_COMPARE_AND_STORE_ACQ64( - ( volatile kmp_int64* )&pr->u.p.count, - *VOLATILE_CAST(kmp_int64 *)&vold.b, - *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) { - KMP_CPU_PAUSE(); - vold.b = *( volatile kmp_int64 * )(&pr->u.p.count); - vnew = vold; - vnew.p.count++; - } - vnew = vold; - init = vnew.p.count; - status = ( init < (UT)vnew.p.ub ) ; - } - - if( !status ) { - kmp_info_t **other_threads = team->t.t_threads; - int while_limit = nproc; // nproc attempts to find a victim - int while_index = 0; - - // TODO: algorithm of searching for a victim - // should be cleaned up and measured - while ( ( !status ) && ( while_limit != ++while_index ) ) { - union_i4 vold, vnew; - kmp_int32 remaining; - T victimIdx = pr->u.p.parm4; - T oldVictimIdx = victimIdx ? victimIdx - 1 : nproc - 1; - dispatch_private_info_template< T > * victim = - reinterpret_cast< dispatch_private_info_template< T >* > - (other_threads[victimIdx]->th.th_dispatch->th_dispatch_pr_current); - while( (victim == NULL || victim == pr || - (*(volatile T*)&victim->u.p.static_steal_counter != - *(volatile T*)&pr->u.p.static_steal_counter)) && - oldVictimIdx != victimIdx ) - { - victimIdx = (victimIdx + 1) % nproc; - victim = reinterpret_cast< dispatch_private_info_template< T >* > - ( other_threads[victimIdx]->th.th_dispatch->th_dispatch_pr_current ); - }; - if( !victim || - ( *(volatile T *)&victim->u.p.static_steal_counter != - *(volatile T *)&pr->u.p.static_steal_counter ) ) - { - continue; // try once more (nproc attempts in total) - // no victim is ready yet to participate in stealing - // because all victims are still in kmp_init_dispatch - } - pr->u.p.parm4 = victimIdx; // new victim found - while( 1 ) { // CAS loop if victim has enough chunks to steal - vold.b = *( volatile kmp_int64 * )( &victim->u.p.count ); - vnew = vold; - - KMP_DEBUG_ASSERT( (vnew.p.ub - 1) * (UT)chunk <= trip ); - if ( vnew.p.count >= (UT)vnew.p.ub || - (remaining = vnew.p.ub - vnew.p.count) < 2 ) - { - pr->u.p.parm4 = (victimIdx + 1) % nproc; // shift start victim id - break; // not enough chunks to steal, goto next victim - } - if( remaining > 3 ) { - vnew.p.ub -= (remaining>>2); // try to steal 1/4 of remaining - } else { - vnew.p.ub -= 1; // steal 1 chunk of 2 or 3 remaining - } - KMP_DEBUG_ASSERT((vnew.p.ub - 1) * (UT)chunk <= trip); - // TODO: Should this be acquire or release? - if ( KMP_COMPARE_AND_STORE_ACQ64( - ( volatile kmp_int64 * )&victim->u.p.count, - *VOLATILE_CAST(kmp_int64 *)&vold.b, - *VOLATILE_CAST(kmp_int64 *)&vnew.b ) ) { - // stealing succedded - KMP_COUNT_VALUE(FOR_static_steal_stolen, vold.p.ub-vnew.p.ub); - status = 1; - while_index = 0; - // now update own count and ub - init = vnew.p.ub; - vold.p.count = init + 1; - #if KMP_ARCH_X86 - KMP_XCHG_FIXED64(( volatile kmp_int64 * )(&pr->u.p.count), vold.b); - #else - *( volatile kmp_int64 * )(&pr->u.p.count) = vold.b; - #endif - break; - } // if (check CAS result) - KMP_CPU_PAUSE(); // CAS failed, repeate attempt - } // while (try to steal from particular victim) - } // while (search for victim) - } // if (try to find victim and steal) - } // if (4-byte induction variable) - if ( !status ) { - *p_lb = 0; - *p_ub = 0; - if ( p_st != NULL ) *p_st = 0; - } else { - start = pr->u.p.parm2; - init *= chunk; - limit = chunk + init - 1; - incr = pr->u.p.st; - KMP_COUNT_VALUE(FOR_static_steal_chunks, 1); - - KMP_DEBUG_ASSERT(init <= trip); - if ( (last = (limit >= trip)) != 0 ) - limit = trip; - if ( p_st != NULL ) *p_st = incr; - - if ( incr == 1 ) { - *p_lb = start + init; - *p_ub = start + limit; - } else { - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - } - - if ( pr->ordered ) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); - __kmp_str_free( &buff ); - } - #endif - } // if - } // if - break; - } // case - #endif // ( KMP_STATIC_STEAL_ENABLED ) - case kmp_sch_static_balanced: - { - KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_static_balanced case\n", gtid) ); - if ( (status = !pr->u.p.count) != 0 ) { /* check if thread has any iteration to do */ - pr->u.p.count = 1; - *p_lb = pr->u.p.lb; - *p_ub = pr->u.p.ub; - last = pr->u.p.parm1; - if ( p_st != NULL ) - *p_st = pr->u.p.st; - } else { /* no iterations to do */ - pr->u.p.lb = pr->u.p.ub + pr->u.p.st; - } - if ( pr->ordered ) { - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); - __kmp_str_free( &buff ); - } - #endif - } // if - } // case - break; - case kmp_sch_static_greedy: /* original code for kmp_sch_static_greedy was merged here */ - case kmp_sch_static_chunked: - { - T parm1; - - KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_static_[affinity|chunked] case\n", - gtid ) ); - parm1 = pr->u.p.parm1; - - trip = pr->u.p.tc - 1; - init = parm1 * (pr->u.p.count + __kmp_tid_from_gtid(gtid)); - - if ( (status = (init <= trip)) != 0 ) { - start = pr->u.p.lb; - incr = pr->u.p.st; - limit = parm1 + init - 1; - - if ( (last = (limit >= trip)) != 0 ) - limit = trip; - - if ( p_st != NULL ) *p_st = incr; - - pr->u.p.count += th->th.th_team_nproc; - - if ( incr == 1 ) { - *p_lb = start + init; - *p_ub = start + limit; - } - else { - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - } - - if ( pr->ordered ) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); - __kmp_str_free( &buff ); - } - #endif - } // if - } // if - } // case - break; - - case kmp_sch_dynamic_chunked: - { - T chunk = pr->u.p.parm1; - - KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_dynamic_chunked case\n", - gtid ) ); - - init = chunk * test_then_inc_acq< ST >((volatile ST *) & sh->u.s.iteration ); - trip = pr->u.p.tc - 1; - - if ( (status = (init <= trip)) == 0 ) { - *p_lb = 0; - *p_ub = 0; - if ( p_st != NULL ) *p_st = 0; - } else { - start = pr->u.p.lb; - limit = chunk + init - 1; - incr = pr->u.p.st; - - if ( (last = (limit >= trip)) != 0 ) - limit = trip; - - if ( p_st != NULL ) *p_st = incr; - - if ( incr == 1 ) { - *p_lb = start + init; - *p_ub = start + limit; - } else { - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - } - - if ( pr->ordered ) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); - __kmp_str_free( &buff ); - } - #endif - } // if - } // if - } // case - break; - - case kmp_sch_guided_iterative_chunked: - { - T chunkspec = pr->u.p.parm1; - KD_TRACE(100, - ("__kmp_dispatch_next: T#%d kmp_sch_guided_chunked iterative case\n",gtid)); - trip = pr->u.p.tc; - // Start atomic part of calculations - while(1) { - ST remaining; // signed, because can be < 0 - init = sh->u.s.iteration; // shared value - remaining = trip - init; - if ( remaining <= 0 ) { // AC: need to compare with 0 first - // nothing to do, don't try atomic op - status = 0; - break; - } - if ( (T)remaining < pr->u.p.parm2 ) { // compare with K*nproc*(chunk+1), K=2 by default - // use dynamic-style shcedule - // atomically inrement iterations, get old value - init = test_then_add( (ST*)&sh->u.s.iteration, (ST)chunkspec ); - remaining = trip - init; - if (remaining <= 0) { - status = 0; // all iterations got by other threads - } else { - // got some iterations to work on - status = 1; - if ( (T)remaining > chunkspec ) { - limit = init + chunkspec - 1; - } else { - last = 1; // the last chunk - limit = init + remaining - 1; - } // if - } // if - break; - } // if - limit = init + (UT)( remaining * *(double*)&pr->u.p.parm3 ); // divide by K*nproc - if ( compare_and_swap( (ST*)&sh->u.s.iteration, (ST)init, (ST)limit ) ) { - // CAS was successful, chunk obtained - status = 1; - --limit; - break; - } // if - } // while - if ( status != 0 ) { - start = pr->u.p.lb; - incr = pr->u.p.st; - if ( p_st != NULL ) - *p_st = incr; - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - if ( pr->ordered ) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); - __kmp_str_free( &buff ); - } - #endif - } // if - } else { - *p_lb = 0; - *p_ub = 0; - if ( p_st != NULL ) - *p_st = 0; - } // if - } // case - break; - - case kmp_sch_guided_analytical_chunked: - { - T chunkspec = pr->u.p.parm1; - UT chunkIdx; - #if KMP_OS_WINDOWS && KMP_ARCH_X86 - /* for storing original FPCW value for Windows* OS on - IA-32 architecture 8-byte version */ - unsigned int oldFpcw; - unsigned int fpcwSet = 0; - #endif - KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_guided_chunked analytical case\n", - gtid ) ); - - trip = pr->u.p.tc; - - KMP_DEBUG_ASSERT(th->th.th_team_nproc > 1); - KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)th->th.th_team_nproc < trip); - - while(1) { /* this while loop is a safeguard against unexpected zero chunk sizes */ - chunkIdx = test_then_inc_acq< ST >((volatile ST *) & sh->u.s.iteration ); - if ( chunkIdx >= (UT)pr->u.p.parm2 ) { - --trip; - /* use dynamic-style scheduling */ - init = chunkIdx * chunkspec + pr->u.p.count; - /* need to verify init > 0 in case of overflow in the above calculation */ - if ( (status = (init > 0 && init <= trip)) != 0 ) { - limit = init + chunkspec -1; - - if ( (last = (limit >= trip)) != 0 ) - limit = trip; - } - break; - } else { - /* use exponential-style scheduling */ - /* The following check is to workaround the lack of long double precision on Windows* OS. - This check works around the possible effect that init != 0 for chunkIdx == 0. - */ - #if KMP_OS_WINDOWS && KMP_ARCH_X86 - /* If we haven't already done so, save original - FPCW and set precision to 64-bit, as Windows* OS - on IA-32 architecture defaults to 53-bit */ - if ( !fpcwSet ) { - oldFpcw = _control87(0,0); - _control87(_PC_64,_MCW_PC); - fpcwSet = 0x30000; - } - #endif - if ( chunkIdx ) { - init = __kmp_dispatch_guided_remaining< T >( - trip, *( DBL * )&pr->u.p.parm3, chunkIdx ); - KMP_DEBUG_ASSERT(init); - init = trip - init; - } else - init = 0; - limit = trip - __kmp_dispatch_guided_remaining< T >( - trip, *( DBL * )&pr->u.p.parm3, chunkIdx + 1 ); - KMP_ASSERT(init <= limit); - if ( init < limit ) { - KMP_DEBUG_ASSERT(limit <= trip); - --limit; - status = 1; - break; - } // if - } // if - } // while (1) - #if KMP_OS_WINDOWS && KMP_ARCH_X86 - /* restore FPCW if necessary - AC: check fpcwSet flag first because oldFpcw can be uninitialized here - */ - if ( fpcwSet && ( oldFpcw & fpcwSet ) ) - _control87(oldFpcw,_MCW_PC); - #endif - if ( status != 0 ) { - start = pr->u.p.lb; - incr = pr->u.p.st; - if ( p_st != NULL ) - *p_st = incr; - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - if ( pr->ordered ) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); - __kmp_str_free( &buff ); - } - #endif - } - } else { - *p_lb = 0; - *p_ub = 0; - if ( p_st != NULL ) - *p_st = 0; - } - } // case - break; - - case kmp_sch_trapezoidal: - { - UT index; - T parm2 = pr->u.p.parm2; - T parm3 = pr->u.p.parm3; - T parm4 = pr->u.p.parm4; - KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_trapezoidal case\n", - gtid ) ); - - index = test_then_inc< ST >( (volatile ST *) & sh->u.s.iteration ); - - init = ( index * ( (2*parm2) - (index-1)*parm4 ) ) / 2; - trip = pr->u.p.tc - 1; - - if ( (status = ((T)index < parm3 && init <= trip)) == 0 ) { - *p_lb = 0; - *p_ub = 0; - if ( p_st != NULL ) *p_st = 0; - } else { - start = pr->u.p.lb; - limit = ( (index+1) * ( 2*parm2 - index*parm4 ) ) / 2 - 1; - incr = pr->u.p.st; - - if ( (last = (limit >= trip)) != 0 ) - limit = trip; - - if ( p_st != NULL ) *p_st = incr; - - if ( incr == 1 ) { - *p_lb = start + init; - *p_ub = start + limit; - } else { - *p_lb = start + init * incr; - *p_ub = start + limit * incr; - } - - if ( pr->ordered ) { - pr->u.p.ordered_lower = init; - pr->u.p.ordered_upper = limit; - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d ordered_lower:%%%s ordered_upper:%%%s\n", - traits_t< UT >::spec, traits_t< UT >::spec ); - KD_TRACE(1000, ( buff, gtid, pr->u.p.ordered_lower, pr->u.p.ordered_upper ) ); - __kmp_str_free( &buff ); - } - #endif - } // if - } // if - } // case - break; - default: - { - status = 0; // to avoid complaints on uninitialized variable use - __kmp_msg( - kmp_ms_fatal, // Severity - KMP_MSG( UnknownSchedTypeDetected ), // Primary message - KMP_HNT( GetNewerLibrary ), // Hint - __kmp_msg_null // Variadic argument list terminator - ); - } - break; - } // switch - } // if tc == 0; - - if ( status == 0 ) { - UT num_done; + *p_lb = 0; + *p_ub = 0; + if (p_st != NULL) + *p_st = 0; + } // if + } // case + break; - num_done = test_then_inc< ST >( (volatile ST *) & sh->u.s.num_done ); - #ifdef KMP_DEBUG + case kmp_sch_guided_analytical_chunked: { + T chunkspec = pr->u.p.parm1; + UT chunkIdx; +#if KMP_OS_WINDOWS && KMP_ARCH_X86 + /* for storing original FPCW value for Windows* OS on + IA-32 architecture 8-byte version */ + unsigned int oldFpcw; + unsigned int fpcwSet = 0; +#endif + KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_guided_chunked " + "analytical case\n", + gtid)); + + trip = pr->u.p.tc; + + KMP_DEBUG_ASSERT(th->th.th_team_nproc > 1); + KMP_DEBUG_ASSERT((2UL * chunkspec + 1) * (UT)th->th.th_team_nproc < + trip); + + while (1) { /* this while loop is a safeguard against unexpected zero + chunk sizes */ + chunkIdx = test_then_inc_acq((volatile ST *)&sh->u.s.iteration); + if (chunkIdx >= (UT)pr->u.p.parm2) { + --trip; + /* use dynamic-style scheduling */ + init = chunkIdx * chunkspec + pr->u.p.count; + /* need to verify init > 0 in case of overflow in the above + * calculation */ + if ((status = (init > 0 && init <= trip)) != 0) { + limit = init + chunkspec - 1; + + if ((last = (limit >= trip)) != 0) + limit = trip; + } + break; + } else { +/* use exponential-style scheduling */ +/* The following check is to workaround the lack of long double precision on + Windows* OS. + This check works around the possible effect that init != 0 for chunkIdx == 0. + */ +#if KMP_OS_WINDOWS && KMP_ARCH_X86 + /* If we haven't already done so, save original FPCW and set + precision to 64-bit, as Windows* OS on IA-32 architecture + defaults to 53-bit */ + if (!fpcwSet) { + oldFpcw = _control87(0, 0); + _control87(_PC_64, _MCW_PC); + fpcwSet = 0x30000; + } +#endif + if (chunkIdx) { + init = __kmp_dispatch_guided_remaining( + trip, *(DBL *)&pr->u.p.parm3, chunkIdx); + KMP_DEBUG_ASSERT(init); + init = trip - init; + } else + init = 0; + limit = trip - __kmp_dispatch_guided_remaining( + trip, *(DBL *)&pr->u.p.parm3, chunkIdx + 1); + KMP_ASSERT(init <= limit); + if (init < limit) { + KMP_DEBUG_ASSERT(limit <= trip); + --limit; + status = 1; + break; + } // if + } // if + } // while (1) +#if KMP_OS_WINDOWS && KMP_ARCH_X86 + /* restore FPCW if necessary + AC: check fpcwSet flag first because oldFpcw can be uninitialized + here */ + if (fpcwSet && (oldFpcw & fpcwSet)) + _control87(oldFpcw, _MCW_PC); +#endif + if (status != 0) { + start = pr->u.p.lb; + incr = pr->u.p.st; + if (p_st != NULL) + *p_st = incr; + *p_lb = start + init * incr; + *p_ub = start + limit * incr; + if (pr->ordered) { + pr->u.p.ordered_lower = init; + pr->u.p.ordered_upper = limit; +#ifdef KMP_DEBUG { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n", - traits_t< UT >::spec ); - KD_TRACE(100, ( buff, gtid, sh->u.s.num_done ) ); - __kmp_str_free( &buff ); + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmp_dispatch_next: T#%%d " + "ordered_lower:%%%s ordered_upper:%%%s\n", + traits_t::spec, traits_t::spec); + KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, + pr->u.p.ordered_upper)); + __kmp_str_free(&buff); } - #endif - - if ( (ST)num_done == th->th.th_team_nproc - 1 ) { - #if ( KMP_STATIC_STEAL_ENABLED ) - if( pr->schedule == kmp_sch_static_steal && traits_t::type_size > 4 ) { - int i; - kmp_info_t **other_threads = team->t.t_threads; - // loop complete, safe to destroy locks used for stealing - for( i = 0; i < th->th.th_team_nproc; ++i ) { - kmp_lock_t * lck = other_threads[i]->th.th_dispatch->th_steal_lock; - KMP_ASSERT(lck != NULL); - __kmp_destroy_lock( lck ); - __kmp_free( lck ); - other_threads[i]->th.th_dispatch->th_steal_lock = NULL; - } - } - #endif - /* NOTE: release this buffer to be reused */ +#endif + } + } else { + *p_lb = 0; + *p_ub = 0; + if (p_st != NULL) + *p_st = 0; + } + } // case + break; + + case kmp_sch_trapezoidal: { + UT index; + T parm2 = pr->u.p.parm2; + T parm3 = pr->u.p.parm3; + T parm4 = pr->u.p.parm4; + KD_TRACE(100, ("__kmp_dispatch_next: T#%d kmp_sch_trapezoidal case\n", + gtid)); + + index = test_then_inc((volatile ST *)&sh->u.s.iteration); + + init = (index * ((2 * parm2) - (index - 1) * parm4)) / 2; + trip = pr->u.p.tc - 1; + + if ((status = ((T)index < parm3 && init <= trip)) == 0) { + *p_lb = 0; + *p_ub = 0; + if (p_st != NULL) + *p_st = 0; + } else { + start = pr->u.p.lb; + limit = ((index + 1) * (2 * parm2 - index * parm4)) / 2 - 1; + incr = pr->u.p.st; + + if ((last = (limit >= trip)) != 0) + limit = trip; + + if (p_st != NULL) + *p_st = incr; + + if (incr == 1) { + *p_lb = start + init; + *p_ub = start + limit; + } else { + *p_lb = start + init * incr; + *p_ub = start + limit * incr; + } + + if (pr->ordered) { + pr->u.p.ordered_lower = init; + pr->u.p.ordered_upper = limit; +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmp_dispatch_next: T#%%d " + "ordered_lower:%%%s ordered_upper:%%%s\n", + traits_t::spec, traits_t::spec); + KD_TRACE(1000, (buff, gtid, pr->u.p.ordered_lower, + pr->u.p.ordered_upper)); + __kmp_str_free(&buff); + } +#endif + } // if + } // if + } // case + break; + default: { + status = 0; // to avoid complaints on uninitialized variable use + __kmp_msg(kmp_ms_fatal, // Severity + KMP_MSG(UnknownSchedTypeDetected), // Primary message + KMP_HNT(GetNewerLibrary), // Hint + __kmp_msg_null // Variadic argument list terminator + ); + } break; + } // switch + } // if tc == 0; + + if (status == 0) { + UT num_done; + + num_done = test_then_inc((volatile ST *)&sh->u.s.num_done); +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_next: T#%%d increment num_done:%%%s\n", + traits_t::spec); + KD_TRACE(100, (buff, gtid, sh->u.s.num_done)); + __kmp_str_free(&buff); + } +#endif - KMP_MB(); /* Flush all pending memory write invalidates. */ + if ((ST)num_done == th->th.th_team_nproc - 1) { +#if (KMP_STATIC_STEAL_ENABLED) + if (pr->schedule == kmp_sch_static_steal && + traits_t::type_size > 4) { + int i; + kmp_info_t **other_threads = team->t.t_threads; + // loop complete, safe to destroy locks used for stealing + for (i = 0; i < th->th.th_team_nproc; ++i) { + kmp_lock_t *lck = other_threads[i]->th.th_dispatch->th_steal_lock; + KMP_ASSERT(lck != NULL); + __kmp_destroy_lock(lck); + __kmp_free(lck); + other_threads[i]->th.th_dispatch->th_steal_lock = NULL; + } + } +#endif + /* NOTE: release this buffer to be reused */ - sh->u.s.num_done = 0; - sh->u.s.iteration = 0; + KMP_MB(); /* Flush all pending memory write invalidates. */ - /* TODO replace with general release procedure? */ - if ( pr->ordered ) { - sh->u.s.ordered_iteration = 0; - } + sh->u.s.num_done = 0; + sh->u.s.iteration = 0; - KMP_MB(); /* Flush all pending memory write invalidates. */ + /* TODO replace with general release procedure? */ + if (pr->ordered) { + sh->u.s.ordered_iteration = 0; + } - sh -> buffer_index += __kmp_dispatch_num_buffers; - KD_TRACE(100, ("__kmp_dispatch_next: T#%d change buffer_index:%d\n", - gtid, sh->buffer_index) ); + KMP_MB(); /* Flush all pending memory write invalidates. */ - KMP_MB(); /* Flush all pending memory write invalidates. */ + sh->buffer_index += __kmp_dispatch_num_buffers; + KD_TRACE(100, ("__kmp_dispatch_next: T#%d change buffer_index:%d\n", + gtid, sh->buffer_index)); - } // if - if ( __kmp_env_consistency_check ) { - if ( pr->pushed_ws != ct_none ) { - pr->pushed_ws = __kmp_pop_workshare( gtid, pr->pushed_ws, loc ); - } - } + KMP_MB(); /* Flush all pending memory write invalidates. */ - th -> th.th_dispatch -> th_deo_fcn = NULL; - th -> th.th_dispatch -> th_dxo_fcn = NULL; - th -> th.th_dispatch -> th_dispatch_sh_current = NULL; - th -> th.th_dispatch -> th_dispatch_pr_current = NULL; - } // if (status == 0) -#if KMP_OS_WINDOWS - else if ( last ) { - pr->u.p.last_upper = pr->u.p.ub; + } // if + if (__kmp_env_consistency_check) { + if (pr->pushed_ws != ct_none) { + pr->pushed_ws = __kmp_pop_workshare(gtid, pr->pushed_ws, loc); } -#endif /* KMP_OS_WINDOWS */ - if ( p_last != NULL && status != 0 ) - *p_last = last; - } // if + } - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmp_dispatch_next: T#%%d normal case: " \ - "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p returning:%%d\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); - KD_TRACE(10, ( buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last, status ) ); - __kmp_str_free( &buff ); + th->th.th_dispatch->th_deo_fcn = NULL; + th->th.th_dispatch->th_dxo_fcn = NULL; + th->th.th_dispatch->th_dispatch_sh_current = NULL; + th->th.th_dispatch->th_dispatch_pr_current = NULL; + } // if (status == 0) +#if KMP_OS_WINDOWS + else if (last) { + pr->u.p.last_upper = pr->u.p.ub; } - #endif +#endif /* KMP_OS_WINDOWS */ + if (p_last != NULL && status != 0) + *p_last = last; + } // if + +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmp_dispatch_next: T#%%d normal case: " + "p_lb:%%%s p_ub:%%%s p_st:%%%s p_last:%%p returning:%%d\n", + traits_t::spec, traits_t::spec, traits_t::spec); + KD_TRACE(10, (buff, gtid, *p_lb, *p_ub, p_st ? *p_st : 0, p_last, status)); + __kmp_str_free(&buff); + } +#endif #if INCLUDE_SSC_MARKS - SSC_MARK_DISPATCH_NEXT(); + SSC_MARK_DISPATCH_NEXT(); #endif - OMPT_LOOP_END; - return status; + OMPT_LOOP_END; + return status; } -template< typename T > -static void -__kmp_dist_get_bounds( - ident_t *loc, - kmp_int32 gtid, - kmp_int32 *plastiter, - T *plower, - T *pupper, - typename traits_t< T >::signed_t incr -) { - typedef typename traits_t< T >::unsigned_t UT; - typedef typename traits_t< T >::signed_t ST; - register kmp_uint32 team_id; - register kmp_uint32 nteams; - register UT trip_count; - register kmp_team_t *team; - kmp_info_t * th; - - KMP_DEBUG_ASSERT( plastiter && plower && pupper ); - KE_TRACE( 10, ("__kmpc_dist_get_bounds called (%d)\n", gtid)); - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( "__kmpc_dist_get_bounds: T#%%d liter=%%d "\ - "iter=(%%%s, %%%s, %%%s) signed?<%s>\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, - traits_t< T >::spec ); - KD_TRACE(100, ( buff, gtid, *plastiter, *plower, *pupper, incr ) ); - __kmp_str_free( &buff ); - } - #endif +template +static void __kmp_dist_get_bounds(ident_t *loc, kmp_int32 gtid, + kmp_int32 *plastiter, T *plower, T *pupper, + typename traits_t::signed_t incr) { + typedef typename traits_t::unsigned_t UT; + typedef typename traits_t::signed_t ST; + register kmp_uint32 team_id; + register kmp_uint32 nteams; + register UT trip_count; + register kmp_team_t *team; + kmp_info_t *th; + + KMP_DEBUG_ASSERT(plastiter && plower && pupper); + KE_TRACE(10, ("__kmpc_dist_get_bounds called (%d)\n", gtid)); +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmpc_dist_get_bounds: T#%%d liter=%%d " + "iter=(%%%s, %%%s, %%%s) signed?<%s>\n", + traits_t::spec, traits_t::spec, + traits_t::spec, traits_t::spec); + KD_TRACE(100, (buff, gtid, *plastiter, *plower, *pupper, incr)); + __kmp_str_free(&buff); + } +#endif - if( __kmp_env_consistency_check ) { - if( incr == 0 ) { - __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); - } - if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) { - // The loop is illegal. - // Some zero-trip loops maintained by compiler, e.g.: - // for(i=10;i<0;++i) // lower >= upper - run-time check - // for(i=0;i>10;--i) // lower <= upper - run-time check - // for(i=0;i>10;++i) // incr > 0 - compile-time check - // for(i=10;i<0;--i) // incr < 0 - compile-time check - // Compiler does not check the following illegal loops: - // for(i=0;i<10;i+=incr) // where incr<0 - // for(i=10;i>0;i-=incr) // where incr<0 - __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc ); - } + if (__kmp_env_consistency_check) { + if (incr == 0) { + __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, + loc); + } + if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { + // The loop is illegal. + // Some zero-trip loops maintained by compiler, e.g.: + // for(i=10;i<0;++i) // lower >= upper - run-time check + // for(i=0;i>10;--i) // lower <= upper - run-time check + // for(i=0;i>10;++i) // incr > 0 - compile-time check + // for(i=10;i<0;--i) // incr < 0 - compile-time check + // Compiler does not check the following illegal loops: + // for(i=0;i<10;i+=incr) // where incr<0 + // for(i=10;i>0;i-=incr) // where incr<0 + __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); } - th = __kmp_threads[gtid]; - team = th->th.th_team; - #if OMP_40_ENABLED - KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct - nteams = th->th.th_teams_size.nteams; - #endif - team_id = team->t.t_master_tid; - KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); - - // compute global trip count - if( incr == 1 ) { - trip_count = *pupper - *plower + 1; - } else if(incr == -1) { - trip_count = *plower - *pupper + 1; - } else if ( incr > 0 ) { - // upper-lower can exceed the limit of signed type - trip_count = (UT)(*pupper - *plower) / incr + 1; + } + th = __kmp_threads[gtid]; + team = th->th.th_team; +#if OMP_40_ENABLED + KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct + nteams = th->th.th_teams_size.nteams; +#endif + team_id = team->t.t_master_tid; + KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); + + // compute global trip count + if (incr == 1) { + trip_count = *pupper - *plower + 1; + } else if (incr == -1) { + trip_count = *plower - *pupper + 1; + } else if (incr > 0) { + // upper-lower can exceed the limit of signed type + trip_count = (UT)(*pupper - *plower) / incr + 1; + } else { + trip_count = (UT)(*plower - *pupper) / (-incr) + 1; + } + + if (trip_count <= nteams) { + KMP_DEBUG_ASSERT( + __kmp_static == kmp_sch_static_greedy || + __kmp_static == + kmp_sch_static_balanced); // Unknown static scheduling type. + // only some teams get single iteration, others get nothing + if (team_id < trip_count) { + *pupper = *plower = *plower + team_id * incr; } else { - trip_count = (UT)(*plower - *pupper) / ( -incr ) + 1; + *plower = *pupper + incr; // zero-trip loop } - - if( trip_count <= nteams ) { - KMP_DEBUG_ASSERT( - __kmp_static == kmp_sch_static_greedy || \ - __kmp_static == kmp_sch_static_balanced - ); // Unknown static scheduling type. - // only some teams get single iteration, others get nothing - if( team_id < trip_count ) { - *pupper = *plower = *plower + team_id * incr; - } else { - *plower = *pupper + incr; // zero-trip loop - } - if( plastiter != NULL ) - *plastiter = ( team_id == trip_count - 1 ); + if (plastiter != NULL) + *plastiter = (team_id == trip_count - 1); + } else { + if (__kmp_static == kmp_sch_static_balanced) { + register UT chunk = trip_count / nteams; + register UT extras = trip_count % nteams; + *plower += + incr * (team_id * chunk + (team_id < extras ? team_id : extras)); + *pupper = *plower + chunk * incr - (team_id < extras ? 0 : incr); + if (plastiter != NULL) + *plastiter = (team_id == nteams - 1); } else { - if( __kmp_static == kmp_sch_static_balanced ) { - register UT chunk = trip_count / nteams; - register UT extras = trip_count % nteams; - *plower += incr * ( team_id * chunk + ( team_id < extras ? team_id : extras ) ); - *pupper = *plower + chunk * incr - ( team_id < extras ? 0 : incr ); - if( plastiter != NULL ) - *plastiter = ( team_id == nteams - 1 ); - } else { - register T chunk_inc_count = - ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr; - register T upper = *pupper; - KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); - // Unknown static scheduling type. - *plower += team_id * chunk_inc_count; - *pupper = *plower + chunk_inc_count - incr; - // Check/correct bounds if needed - if( incr > 0 ) { - if( *pupper < *plower ) - *pupper = traits_t::max_value; - if( plastiter != NULL ) - *plastiter = *plower <= upper && *pupper > upper - incr; - if( *pupper > upper ) - *pupper = upper; // tracker C73258 - } else { - if( *pupper > *plower ) - *pupper = traits_t::min_value; - if( plastiter != NULL ) - *plastiter = *plower >= upper && *pupper < upper - incr; - if( *pupper < upper ) - *pupper = upper; // tracker C73258 - } - } + register T chunk_inc_count = + (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; + register T upper = *pupper; + KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); + // Unknown static scheduling type. + *plower += team_id * chunk_inc_count; + *pupper = *plower + chunk_inc_count - incr; + // Check/correct bounds if needed + if (incr > 0) { + if (*pupper < *plower) + *pupper = traits_t::max_value; + if (plastiter != NULL) + *plastiter = *plower <= upper && *pupper > upper - incr; + if (*pupper > upper) + *pupper = upper; // tracker C73258 + } else { + if (*pupper > *plower) + *pupper = traits_t::min_value; + if (plastiter != NULL) + *plastiter = *plower >= upper && *pupper < upper - incr; + if (*pupper < upper) + *pupper = upper; // tracker C73258 + } } + } } -//----------------------------------------------------------------------------------------- +//----------------------------------------------------------------------------- // Dispatch routines // Transfer call to template< type T > // __kmp_dispatch_init( ident_t *loc, int gtid, enum sched_type schedule, @@ -2408,50 +2392,45 @@ extern "C" { @param st Step (or increment if you prefer) @param chunk The chunk size to block with -This function prepares the runtime to start a dynamically scheduled for loop, saving the loop arguments. +This function prepares the runtime to start a dynamically scheduled for loop, +saving the loop arguments. These functions are all identical apart from the types of the arguments. */ -void -__kmpc_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true ); +void __kmpc_dispatch_init_4(ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_int32 lb, + kmp_int32 ub, kmp_int32 st, kmp_int32 chunk) { + KMP_DEBUG_ASSERT(__kmp_init_serial); + __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } /*! See @ref __kmpc_dispatch_init_4 */ -void -__kmpc_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true ); +void __kmpc_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_uint32 lb, + kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk) { + KMP_DEBUG_ASSERT(__kmp_init_serial); + __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } /*! See @ref __kmpc_dispatch_init_4 */ -void -__kmpc_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_int64 lb, kmp_int64 ub, - kmp_int64 st, kmp_int64 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true ); +void __kmpc_dispatch_init_8(ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_int64 lb, + kmp_int64 ub, kmp_int64 st, kmp_int64 chunk) { + KMP_DEBUG_ASSERT(__kmp_init_serial); + __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } /*! See @ref __kmpc_dispatch_init_4 */ -void -__kmpc_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_uint64 lb, kmp_uint64 ub, - kmp_int64 st, kmp_int64 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true ); +void __kmpc_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_uint64 lb, + kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk) { + KMP_DEBUG_ASSERT(__kmp_init_serial); + __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } /*! @@ -2463,46 +2442,47 @@ regular iterations dispatching we need to calc per-team iteration space. These functions are all identical apart from the types of the arguments. */ -void -__kmpc_dist_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_int32 *p_last, kmp_int32 lb, kmp_int32 ub, kmp_int32 st, kmp_int32 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_dist_get_bounds< kmp_int32 >( loc, gtid, p_last, &lb, &ub, st ); - __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, true ); +void __kmpc_dist_dispatch_init_4(ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_int32 *p_last, + kmp_int32 lb, kmp_int32 ub, kmp_int32 st, + kmp_int32 chunk) { + KMP_DEBUG_ASSERT(__kmp_init_serial); + __kmp_dist_get_bounds(loc, gtid, p_last, &lb, &ub, st); + __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } -void -__kmpc_dist_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_int32 *p_last, kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_dist_get_bounds< kmp_uint32 >( loc, gtid, p_last, &lb, &ub, st ); - __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, true ); +void __kmpc_dist_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_int32 *p_last, + kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, + kmp_int32 chunk) { + KMP_DEBUG_ASSERT(__kmp_init_serial); + __kmp_dist_get_bounds(loc, gtid, p_last, &lb, &ub, st); + __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } -void -__kmpc_dist_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_int32 *p_last, kmp_int64 lb, kmp_int64 ub, kmp_int64 st, kmp_int64 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_dist_get_bounds< kmp_int64 >( loc, gtid, p_last, &lb, &ub, st ); - __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, true ); +void __kmpc_dist_dispatch_init_8(ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_int32 *p_last, + kmp_int64 lb, kmp_int64 ub, kmp_int64 st, + kmp_int64 chunk) { + KMP_DEBUG_ASSERT(__kmp_init_serial); + __kmp_dist_get_bounds(loc, gtid, p_last, &lb, &ub, st); + __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } -void -__kmpc_dist_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_int32 *p_last, kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_dist_get_bounds< kmp_uint64 >( loc, gtid, p_last, &lb, &ub, st ); - __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, true ); +void __kmpc_dist_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_int32 *p_last, + kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, + kmp_int64 chunk) { + KMP_DEBUG_ASSERT(__kmp_init_serial); + __kmp_dist_get_bounds(loc, gtid, p_last, &lb, &ub, st); + __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, true); } /*! @param loc Source code location @param gtid Global thread id -@param p_last Pointer to a flag set to one if this is the last chunk or zero otherwise +@param p_last Pointer to a flag set to one if this is the last chunk or zero +otherwise @param p_lb Pointer to the lower bound for the next chunk of work @param p_ub Pointer to the upper bound for the next chunk of work @param p_st Pointer to the stride for the next chunk of work @@ -2511,41 +2491,35 @@ __kmpc_dist_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type sche Get the next dynamically allocated chunk of work for this thread. If there is no more work, then the lb,ub and stride need not be modified. */ -int -__kmpc_dispatch_next_4( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st ) -{ - return __kmp_dispatch_next< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st ); +int __kmpc_dispatch_next_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st) { + return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st); } /*! See @ref __kmpc_dispatch_next_4 */ -int -__kmpc_dispatch_next_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st ) -{ - return __kmp_dispatch_next< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st ); +int __kmpc_dispatch_next_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_uint32 *p_lb, kmp_uint32 *p_ub, + kmp_int32 *p_st) { + return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st); } /*! See @ref __kmpc_dispatch_next_4 */ -int -__kmpc_dispatch_next_8( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st ) -{ - return __kmp_dispatch_next< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st ); +int __kmpc_dispatch_next_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st) { + return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st); } /*! See @ref __kmpc_dispatch_next_4 */ -int -__kmpc_dispatch_next_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st ) -{ - return __kmp_dispatch_next< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st ); +int __kmpc_dispatch_next_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_uint64 *p_lb, kmp_uint64 *p_ub, + kmp_int64 *p_st) { + return __kmp_dispatch_next(loc, gtid, p_last, p_lb, p_ub, p_st); } /*! @@ -2554,188 +2528,161 @@ __kmpc_dispatch_next_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, Mark the end of a dynamic loop. */ -void -__kmpc_dispatch_fini_4( ident_t *loc, kmp_int32 gtid ) -{ - __kmp_dispatch_finish< kmp_uint32 >( gtid, loc ); +void __kmpc_dispatch_fini_4(ident_t *loc, kmp_int32 gtid) { + __kmp_dispatch_finish(gtid, loc); } /*! See @ref __kmpc_dispatch_fini_4 */ -void -__kmpc_dispatch_fini_8( ident_t *loc, kmp_int32 gtid ) -{ - __kmp_dispatch_finish< kmp_uint64 >( gtid, loc ); +void __kmpc_dispatch_fini_8(ident_t *loc, kmp_int32 gtid) { + __kmp_dispatch_finish(gtid, loc); } /*! See @ref __kmpc_dispatch_fini_4 */ -void -__kmpc_dispatch_fini_4u( ident_t *loc, kmp_int32 gtid ) -{ - __kmp_dispatch_finish< kmp_uint32 >( gtid, loc ); +void __kmpc_dispatch_fini_4u(ident_t *loc, kmp_int32 gtid) { + __kmp_dispatch_finish(gtid, loc); } /*! See @ref __kmpc_dispatch_fini_4 */ -void -__kmpc_dispatch_fini_8u( ident_t *loc, kmp_int32 gtid ) -{ - __kmp_dispatch_finish< kmp_uint64 >( gtid, loc ); +void __kmpc_dispatch_fini_8u(ident_t *loc, kmp_int32 gtid) { + __kmp_dispatch_finish(gtid, loc); } /*! @} */ -//----------------------------------------------------------------------------------------- -//Non-template routines from kmp_dispatch.cpp used in other sources +//----------------------------------------------------------------------------- +// Non-template routines from kmp_dispatch.cpp used in other sources -kmp_uint32 __kmp_eq_4( kmp_uint32 value, kmp_uint32 checker) { - return value == checker; +kmp_uint32 __kmp_eq_4(kmp_uint32 value, kmp_uint32 checker) { + return value == checker; } -kmp_uint32 __kmp_neq_4( kmp_uint32 value, kmp_uint32 checker) { - return value != checker; +kmp_uint32 __kmp_neq_4(kmp_uint32 value, kmp_uint32 checker) { + return value != checker; } -kmp_uint32 __kmp_lt_4( kmp_uint32 value, kmp_uint32 checker) { - return value < checker; +kmp_uint32 __kmp_lt_4(kmp_uint32 value, kmp_uint32 checker) { + return value < checker; } -kmp_uint32 __kmp_ge_4( kmp_uint32 value, kmp_uint32 checker) { - return value >= checker; +kmp_uint32 __kmp_ge_4(kmp_uint32 value, kmp_uint32 checker) { + return value >= checker; } -kmp_uint32 __kmp_le_4( kmp_uint32 value, kmp_uint32 checker) { - return value <= checker; +kmp_uint32 __kmp_le_4(kmp_uint32 value, kmp_uint32 checker) { + return value <= checker; } kmp_uint32 -__kmp_wait_yield_4(volatile kmp_uint32 * spinner, - kmp_uint32 checker, - kmp_uint32 (* pred)( kmp_uint32, kmp_uint32 ) - , void * obj // Higher-level synchronization object, or NULL. - ) -{ - // note: we may not belong to a team at this point - register volatile kmp_uint32 * spin = spinner; - register kmp_uint32 check = checker; - register kmp_uint32 spins; - register kmp_uint32 (*f) ( kmp_uint32, kmp_uint32 ) = pred; - register kmp_uint32 r; - - KMP_FSYNC_SPIN_INIT( obj, (void*) spin ); - KMP_INIT_YIELD( spins ); - // main wait spin loop - while(!f(r = TCR_4(*spin), check)) { - KMP_FSYNC_SPIN_PREPARE( obj ); - /* GEH - remove this since it was accidentally introduced when kmp_wait was split. - It causes problems with infinite recursion because of exit lock */ - /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort) - __kmp_abort_thread(); */ - - /* if we have waited a bit, or are oversubscribed, yield */ - /* pause is in the following code */ - KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc ); - KMP_YIELD_SPIN( spins ); - } - KMP_FSYNC_SPIN_ACQUIRED( obj ); - return r; +__kmp_wait_yield_4(volatile kmp_uint32 *spinner, kmp_uint32 checker, + kmp_uint32 (*pred)(kmp_uint32, kmp_uint32), + void *obj // Higher-level synchronization object, or NULL. + ) { + // note: we may not belong to a team at this point + register volatile kmp_uint32 *spin = spinner; + register kmp_uint32 check = checker; + register kmp_uint32 spins; + register kmp_uint32 (*f)(kmp_uint32, kmp_uint32) = pred; + register kmp_uint32 r; + + KMP_FSYNC_SPIN_INIT(obj, (void *)spin); + KMP_INIT_YIELD(spins); + // main wait spin loop + while (!f(r = TCR_4(*spin), check)) { + KMP_FSYNC_SPIN_PREPARE(obj); + /* GEH - remove this since it was accidentally introduced when kmp_wait was + split. It causes problems with infinite recursion because of exit lock */ + /* if ( TCR_4(__kmp_global.g.g_done) && __kmp_global.g.g_abort) + __kmp_abort_thread(); */ + + /* if we have waited a bit, or are oversubscribed, yield */ + /* pause is in the following code */ + KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc); + KMP_YIELD_SPIN(spins); + } + KMP_FSYNC_SPIN_ACQUIRED(obj); + return r; } -void -__kmp_wait_yield_4_ptr(void *spinner, - kmp_uint32 checker, - kmp_uint32 (*pred)( void *, kmp_uint32 ), - void *obj // Higher-level synchronization object, or NULL. - ) -{ - // note: we may not belong to a team at this point - register void *spin = spinner; - register kmp_uint32 check = checker; - register kmp_uint32 spins; - register kmp_uint32 (*f) ( void *, kmp_uint32 ) = pred; - - KMP_FSYNC_SPIN_INIT( obj, spin ); - KMP_INIT_YIELD( spins ); - // main wait spin loop - while ( !f( spin, check ) ) { - KMP_FSYNC_SPIN_PREPARE( obj ); - /* if we have waited a bit, or are oversubscribed, yield */ - /* pause is in the following code */ - KMP_YIELD( TCR_4( __kmp_nth ) > __kmp_avail_proc ); - KMP_YIELD_SPIN( spins ); - } - KMP_FSYNC_SPIN_ACQUIRED( obj ); +void __kmp_wait_yield_4_ptr( + void *spinner, kmp_uint32 checker, kmp_uint32 (*pred)(void *, kmp_uint32), + void *obj // Higher-level synchronization object, or NULL. + ) { + // note: we may not belong to a team at this point + register void *spin = spinner; + register kmp_uint32 check = checker; + register kmp_uint32 spins; + register kmp_uint32 (*f)(void *, kmp_uint32) = pred; + + KMP_FSYNC_SPIN_INIT(obj, spin); + KMP_INIT_YIELD(spins); + // main wait spin loop + while (!f(spin, check)) { + KMP_FSYNC_SPIN_PREPARE(obj); + /* if we have waited a bit, or are oversubscribed, yield */ + /* pause is in the following code */ + KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc); + KMP_YIELD_SPIN(spins); + } + KMP_FSYNC_SPIN_ACQUIRED(obj); } } // extern "C" #ifdef KMP_GOMP_COMPAT -void -__kmp_aux_dispatch_init_4( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_int32 lb, kmp_int32 ub, kmp_int32 st, - kmp_int32 chunk, int push_ws ) -{ - __kmp_dispatch_init< kmp_int32 >( loc, gtid, schedule, lb, ub, st, chunk, - push_ws ); +void __kmp_aux_dispatch_init_4(ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_int32 lb, + kmp_int32 ub, kmp_int32 st, kmp_int32 chunk, + int push_ws) { + __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, + push_ws); } -void -__kmp_aux_dispatch_init_4u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_uint32 lb, kmp_uint32 ub, kmp_int32 st, - kmp_int32 chunk, int push_ws ) -{ - __kmp_dispatch_init< kmp_uint32 >( loc, gtid, schedule, lb, ub, st, chunk, - push_ws ); +void __kmp_aux_dispatch_init_4u(ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_uint32 lb, + kmp_uint32 ub, kmp_int32 st, kmp_int32 chunk, + int push_ws) { + __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, + push_ws); } -void -__kmp_aux_dispatch_init_8( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_int64 lb, kmp_int64 ub, kmp_int64 st, - kmp_int64 chunk, int push_ws ) -{ - __kmp_dispatch_init< kmp_int64 >( loc, gtid, schedule, lb, ub, st, chunk, - push_ws ); +void __kmp_aux_dispatch_init_8(ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_int64 lb, + kmp_int64 ub, kmp_int64 st, kmp_int64 chunk, + int push_ws) { + __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, + push_ws); } -void -__kmp_aux_dispatch_init_8u( ident_t *loc, kmp_int32 gtid, enum sched_type schedule, - kmp_uint64 lb, kmp_uint64 ub, kmp_int64 st, - kmp_int64 chunk, int push_ws ) -{ - __kmp_dispatch_init< kmp_uint64 >( loc, gtid, schedule, lb, ub, st, chunk, - push_ws ); +void __kmp_aux_dispatch_init_8u(ident_t *loc, kmp_int32 gtid, + enum sched_type schedule, kmp_uint64 lb, + kmp_uint64 ub, kmp_int64 st, kmp_int64 chunk, + int push_ws) { + __kmp_dispatch_init(loc, gtid, schedule, lb, ub, st, chunk, + push_ws); } -void -__kmp_aux_dispatch_fini_chunk_4( ident_t *loc, kmp_int32 gtid ) -{ - __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc ); +void __kmp_aux_dispatch_fini_chunk_4(ident_t *loc, kmp_int32 gtid) { + __kmp_dispatch_finish_chunk(gtid, loc); } -void -__kmp_aux_dispatch_fini_chunk_8( ident_t *loc, kmp_int32 gtid ) -{ - __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc ); +void __kmp_aux_dispatch_fini_chunk_8(ident_t *loc, kmp_int32 gtid) { + __kmp_dispatch_finish_chunk(gtid, loc); } -void -__kmp_aux_dispatch_fini_chunk_4u( ident_t *loc, kmp_int32 gtid ) -{ - __kmp_dispatch_finish_chunk< kmp_uint32 >( gtid, loc ); +void __kmp_aux_dispatch_fini_chunk_4u(ident_t *loc, kmp_int32 gtid) { + __kmp_dispatch_finish_chunk(gtid, loc); } -void -__kmp_aux_dispatch_fini_chunk_8u( ident_t *loc, kmp_int32 gtid ) -{ - __kmp_dispatch_finish_chunk< kmp_uint64 >( gtid, loc ); +void __kmp_aux_dispatch_fini_chunk_8u(ident_t *loc, kmp_int32 gtid) { + __kmp_dispatch_finish_chunk(gtid, loc); } #endif /* KMP_GOMP_COMPAT */ /* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - diff --git a/openmp/runtime/src/kmp_environment.cpp b/openmp/runtime/src/kmp_environment.cpp index d4d95df..2cbd88b 100644 --- a/openmp/runtime/src/kmp_environment.cpp +++ b/openmp/runtime/src/kmp_environment.cpp @@ -13,583 +13,499 @@ //===----------------------------------------------------------------------===// -/* - ------------------------------------------------------------------------------------------------ - We use GetEnvironmentVariable for Windows* OS instead of getenv because the act of - loading a DLL on Windows* OS makes any user-set environment variables (i.e. with putenv()) - unavailable. getenv() apparently gets a clean copy of the env variables as they existed - at the start of the run. - JH 12/23/2002 - ------------------------------------------------------------------------------------------------ - On Windows* OS, there are two environments (at least, see below): +/* We use GetEnvironmentVariable for Windows* OS instead of getenv because the + act of loading a DLL on Windows* OS makes any user-set environment variables + (i.e. with putenv()) unavailable. getenv() apparently gets a clean copy of + the env variables as they existed at the start of the run. JH 12/23/2002 + + On Windows* OS, there are two environments (at least, see below): - 1. Environment maintained by Windows* OS on IA-32 architecture. - Accessible through GetEnvironmentVariable(), - SetEnvironmentVariable(), and GetEnvironmentStrings(). + 1. Environment maintained by Windows* OS on IA-32 architecture. Accessible + through GetEnvironmentVariable(), SetEnvironmentVariable(), and + GetEnvironmentStrings(). - 2. Environment maintained by C RTL. Accessible through getenv(), putenv(). + 2. Environment maintained by C RTL. Accessible through getenv(), putenv(). - putenv() function updates both C and Windows* OS on IA-32 architecture. getenv() function - search for variables in C RTL environment only. Windows* OS on IA-32 architecture functions work *only* - with Windows* OS on IA-32 architecture. + putenv() function updates both C and Windows* OS on IA-32 architecture. + getenv() function search for variables in C RTL environment only. + Windows* OS on IA-32 architecture functions work *only* with Windows* OS on + IA-32 architecture. - Windows* OS on IA-32 architecture maintained by OS, so there is always only one Windows* OS on - IA-32 architecture per process. Changes in Windows* OS on IA-32 architecture are process-visible. + Windows* OS on IA-32 architecture maintained by OS, so there is always only + one Windows* OS on IA-32 architecture per process. Changes in Windows* OS on + IA-32 architecture are process-visible. - C environment maintained by C RTL. Multiple copies of C RTL may be present in the process, and - each C RTL maintains its own environment. :-( + C environment maintained by C RTL. Multiple copies of C RTL may be present + in the process, and each C RTL maintains its own environment. :-( - Thus, proper way to work with environment on Windows* OS is: + Thus, proper way to work with environment on Windows* OS is: - 1. Set variables with putenv() function -- both C and Windows* OS on - IA-32 architecture are being updated. Windows* OS on - IA-32 architecture may be considered as primary target, - while updating C RTL environment is a free bonus. + 1. Set variables with putenv() function -- both C and Windows* OS on IA-32 + architecture are being updated. Windows* OS on IA-32 architecture may be + considered primary target, while updating C RTL environment is free bonus. - 2. Get variables with GetEnvironmentVariable() -- getenv() does not - search Windows* OS on IA-32 architecture, and can not see variables - set with SetEnvironmentVariable(). + 2. Get variables with GetEnvironmentVariable() -- getenv() does not + search Windows* OS on IA-32 architecture, and can not see variables + set with SetEnvironmentVariable(). - 2007-04-05 -- lev - ------------------------------------------------------------------------------------------------ + 2007-04-05 -- lev */ #include "kmp_environment.h" -#include "kmp_os.h" // KMP_OS_*. -#include "kmp.h" // -#include "kmp_str.h" // __kmp_str_*(). +#include "kmp.h" // #include "kmp_i18n.h" +#include "kmp_os.h" // KMP_OS_*. +#include "kmp_str.h" // __kmp_str_*(). #if KMP_OS_UNIX - #include // getenv, setenv, unsetenv. - #include // strlen, strcpy. - #if KMP_OS_DARWIN - #include - #define environ (*_NSGetEnviron()) - #else - extern char * * environ; - #endif +#include // getenv, setenv, unsetenv. +#include // strlen, strcpy. +#if KMP_OS_DARWIN +#include +#define environ (*_NSGetEnviron()) +#else +extern char **environ; +#endif #elif KMP_OS_WINDOWS - #include // GetEnvironmentVariable, SetEnvironmentVariable, GetLastError. +#include // GetEnvironmentVariable, SetEnvironmentVariable, +// GetLastError. #else - #error Unknown or unsupported OS. +#error Unknown or unsupported OS. #endif - // TODO: Eliminate direct memory allocations, use string operations instead. -static inline -void * -allocate( - size_t size -) { - void * ptr = KMP_INTERNAL_MALLOC( size ); - if ( ptr == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - }; // if - return ptr; +static inline void *allocate(size_t size) { + void *ptr = KMP_INTERNAL_MALLOC(size); + if (ptr == NULL) { + KMP_FATAL(MemoryAllocFailed); + }; // if + return ptr; } // allocate +char *__kmp_env_get(char const *name) { -char * -__kmp_env_get( char const * name ) { - - char * result = NULL; - - #if KMP_OS_UNIX - char const * value = getenv( name ); - if ( value != NULL ) { - size_t len = KMP_STRLEN( value ) + 1; - result = (char *) KMP_INTERNAL_MALLOC( len ); - if ( result == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - }; // if - KMP_STRNCPY_S( result, len, value, len ); - }; // if - #elif KMP_OS_WINDOWS - /* - We use GetEnvironmentVariable for Windows* OS instead of getenv because the act of - loading a DLL on Windows* OS makes any user-set environment variables (i.e. with putenv()) - unavailable. getenv() apparently gets a clean copy of the env variables as they existed - at the start of the run. - JH 12/23/2002 - */ - DWORD rc; - rc = GetEnvironmentVariable( name, NULL, 0 ); - if ( ! rc ) { - DWORD error = GetLastError(); - if ( error != ERROR_ENVVAR_NOT_FOUND ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantGetEnvVar, name ), - KMP_ERR( error ), - __kmp_msg_null - ); - }; // if - // Variable is not found, it's ok, just continue. - } else { - DWORD len = rc; - result = (char *) KMP_INTERNAL_MALLOC( len ); - if ( result == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - }; // if - rc = GetEnvironmentVariable( name, result, len ); - if ( ! rc ) { - // GetEnvironmentVariable() may return 0 if variable is empty. - // In such a case GetLastError() returns ERROR_SUCCESS. - DWORD error = GetLastError(); - if ( error != ERROR_SUCCESS ) { - // Unexpected error. The variable should be in the environment, - // and buffer should be large enough. - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantGetEnvVar, name ), - KMP_ERR( error ), - __kmp_msg_null - ); - KMP_INTERNAL_FREE( (void *) result ); - result = NULL; - }; // if - }; // if - }; // if - #else - #error Unknown or unsupported OS. - #endif - - return result; + char *result = NULL; -} // func __kmp_env_get +#if KMP_OS_UNIX + char const *value = getenv(name); + if (value != NULL) { + size_t len = KMP_STRLEN(value) + 1; + result = (char *)KMP_INTERNAL_MALLOC(len); + if (result == NULL) { + KMP_FATAL(MemoryAllocFailed); + }; // if + KMP_STRNCPY_S(result, len, value, len); + }; // if +#elif KMP_OS_WINDOWS + /* We use GetEnvironmentVariable for Windows* OS instead of getenv because the + act of loading a DLL on Windows* OS makes any user-set environment + variables (i.e. with putenv()) unavailable. getenv() apparently gets a + clean copy of the env variables as they existed at the start of the run. + JH 12/23/2002 */ + DWORD rc; + rc = GetEnvironmentVariable(name, NULL, 0); + if (!rc) { + DWORD error = GetLastError(); + if (error != ERROR_ENVVAR_NOT_FOUND) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantGetEnvVar, name), KMP_ERR(error), + __kmp_msg_null); + }; // if + // Variable is not found, it's ok, just continue. + } else { + DWORD len = rc; + result = (char *)KMP_INTERNAL_MALLOC(len); + if (result == NULL) { + KMP_FATAL(MemoryAllocFailed); + }; // if + rc = GetEnvironmentVariable(name, result, len); + if (!rc) { + // GetEnvironmentVariable() may return 0 if variable is empty. + // In such a case GetLastError() returns ERROR_SUCCESS. + DWORD error = GetLastError(); + if (error != ERROR_SUCCESS) { + // Unexpected error. The variable should be in the environment, + // and buffer should be large enough. + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantGetEnvVar, name), KMP_ERR(error), + __kmp_msg_null); + KMP_INTERNAL_FREE((void *)result); + result = NULL; + }; // if + }; // if + }; // if +#else +#error Unknown or unsupported OS. +#endif + return result; + +} // func __kmp_env_get // TODO: Find and replace all regular free() with __kmp_env_free(). -void -__kmp_env_free( char const * * value ) { +void __kmp_env_free(char const **value) { - KMP_DEBUG_ASSERT( value != NULL ); - KMP_INTERNAL_FREE( (void *) * value ); - * value = NULL; + KMP_DEBUG_ASSERT(value != NULL); + KMP_INTERNAL_FREE((void *)*value); + *value = NULL; } // func __kmp_env_free +int __kmp_env_exists(char const *name) { - -int -__kmp_env_exists( char const * name ) { - - #if KMP_OS_UNIX - char const * value = getenv( name ); - return ( ( value == NULL ) ? ( 0 ) : ( 1 ) ); - #elif KMP_OS_WINDOWS - DWORD rc; - rc = GetEnvironmentVariable( name, NULL, 0 ); - if ( rc == 0 ) { - DWORD error = GetLastError(); - if ( error != ERROR_ENVVAR_NOT_FOUND ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantGetEnvVar, name ), - KMP_ERR( error ), - __kmp_msg_null - ); - }; // if - return 0; - }; // if - return 1; - #else - #error Unknown or unsupported OS. - #endif +#if KMP_OS_UNIX + char const *value = getenv(name); + return ((value == NULL) ? (0) : (1)); +#elif KMP_OS_WINDOWS + DWORD rc; + rc = GetEnvironmentVariable(name, NULL, 0); + if (rc == 0) { + DWORD error = GetLastError(); + if (error != ERROR_ENVVAR_NOT_FOUND) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantGetEnvVar, name), KMP_ERR(error), + __kmp_msg_null); + }; // if + return 0; + }; // if + return 1; +#else +#error Unknown or unsupported OS. +#endif } // func __kmp_env_exists +void __kmp_env_set(char const *name, char const *value, int overwrite) { - -void -__kmp_env_set( char const * name, char const * value, int overwrite ) { - - #if KMP_OS_UNIX - int rc = setenv( name, value, overwrite ); - if ( rc != 0 ) { - // Dead code. I tried to put too many variables into Linux* OS - // environment on IA-32 architecture. When application consumes - // more than ~2.5 GB of memory, entire system feels bad. Sometimes - // application is killed (by OS?), sometimes system stops - // responding... But this error message never appears. --ln - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetEnvVar, name ), - KMP_HNT( NotEnoughMemory ), - __kmp_msg_null - ); - }; // if - #elif KMP_OS_WINDOWS - BOOL rc; - if ( ! overwrite ) { - rc = GetEnvironmentVariable( name, NULL, 0 ); - if ( rc ) { - // Variable exists, do not overwrite. - return; - }; // if - DWORD error = GetLastError(); - if ( error != ERROR_ENVVAR_NOT_FOUND ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantGetEnvVar, name ), - KMP_ERR( error ), - __kmp_msg_null - ); - }; // if - }; // if - rc = SetEnvironmentVariable( name, value ); - if ( ! rc ) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetEnvVar, name ), - KMP_ERR( error ), - __kmp_msg_null - ); - }; // if - #else - #error Unknown or unsupported OS. - #endif +#if KMP_OS_UNIX + int rc = setenv(name, value, overwrite); + if (rc != 0) { + // Dead code. I tried to put too many variables into Linux* OS + // environment on IA-32 architecture. When application consumes + // more than ~2.5 GB of memory, entire system feels bad. Sometimes + // application is killed (by OS?), sometimes system stops + // responding... But this error message never appears. --ln + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetEnvVar, name), + KMP_HNT(NotEnoughMemory), __kmp_msg_null); + }; // if +#elif KMP_OS_WINDOWS + BOOL rc; + if (!overwrite) { + rc = GetEnvironmentVariable(name, NULL, 0); + if (rc) { + // Variable exists, do not overwrite. + return; + }; // if + DWORD error = GetLastError(); + if (error != ERROR_ENVVAR_NOT_FOUND) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantGetEnvVar, name), KMP_ERR(error), + __kmp_msg_null); + }; // if + }; // if + rc = SetEnvironmentVariable(name, value); + if (!rc) { + DWORD error = GetLastError(); + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetEnvVar, name), KMP_ERR(error), + __kmp_msg_null); + }; // if +#else +#error Unknown or unsupported OS. +#endif } // func __kmp_env_set +void __kmp_env_unset(char const *name) { - -void -__kmp_env_unset( char const * name ) { - - #if KMP_OS_UNIX - unsetenv( name ); - #elif KMP_OS_WINDOWS - BOOL rc = SetEnvironmentVariable( name, NULL ); - if ( ! rc ) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetEnvVar, name ), - KMP_ERR( error ), - __kmp_msg_null - ); - }; // if - #else - #error Unknown or unsupported OS. - #endif +#if KMP_OS_UNIX + unsetenv(name); +#elif KMP_OS_WINDOWS + BOOL rc = SetEnvironmentVariable(name, NULL); + if (!rc) { + DWORD error = GetLastError(); + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetEnvVar, name), KMP_ERR(error), + __kmp_msg_null); + }; // if +#else +#error Unknown or unsupported OS. +#endif } // func __kmp_env_unset -// ------------------------------------------------------------------------------------------------- - -/* - Intel OpenMP RTL string representation of environment: just a string of characters, variables - are separated with vertical bars, e. g.: +/* Intel OpenMP RTL string representation of environment: just a string of + characters, variables are separated with vertical bars, e. g.: "KMP_WARNINGS=0|KMP_AFFINITY=compact|" Empty variables are allowed and ignored: "||KMP_WARNINGS=1||" +*/ + +static void +___kmp_env_blk_parse_string(kmp_env_blk_t *block, // M: Env block to fill. + char const *env // I: String to parse. + ) { + + char const chr_delimiter = '|'; + char const str_delimiter[] = {chr_delimiter, 0}; + + char *bulk = NULL; + kmp_env_var_t *vars = NULL; + int count = 0; // Number of used elements in vars array. + int delimiters = 0; // Number of delimiters in input string. + + // Copy original string, we will modify the copy. + bulk = __kmp_str_format("%s", env); + + // Loop thru all the vars in environment block. Count delimiters (maximum + // number of variables is number of delimiters plus one). + { + char const *ptr = bulk; + for (;;) { + ptr = strchr(ptr, chr_delimiter); + if (ptr == NULL) { + break; + }; // if + ++delimiters; + ptr += 1; + }; // forever + } + + // Allocate vars array. + vars = (kmp_env_var_t *)allocate((delimiters + 1) * sizeof(kmp_env_var_t)); + + // Loop thru all the variables. + { + char *var; // Pointer to variable (both name and value). + char *name; // Pointer to name of variable. + char *value; // Pointer to value. + char *buf; // Buffer for __kmp_str_token() function. + var = __kmp_str_token(bulk, str_delimiter, &buf); // Get the first var. + while (var != NULL) { + // Save found variable in vars array. + __kmp_str_split(var, '=', &name, &value); + KMP_DEBUG_ASSERT(count < delimiters + 1); + vars[count].name = name; + vars[count].value = value; + ++count; + // Get the next var. + var = __kmp_str_token(NULL, str_delimiter, &buf); + }; // while + } + + // Fill out result. + block->bulk = bulk; + block->vars = vars; + block->count = count; + +}; // ___kmp_env_blk_parse_string + +/* Windows* OS (actually, DOS) environment block is a piece of memory with + environment variables. Each variable is terminated with zero byte, entire + block is terminated with one extra zero byte, so we have two zero bytes at + the end of environment block, e. g.: + + "HOME=C:\\users\\lev\x00OS=Windows_NT\x00\x00" + It is not clear how empty environment is represented. "\x00\x00"? */ -static -void -___kmp_env_blk_parse_string( - kmp_env_blk_t * block, // M: Env block to fill. - char const * env // I: String to parse. -) { +#if KMP_OS_WINDOWS +static void ___kmp_env_blk_parse_windows( + kmp_env_blk_t *block, // M: Env block to fill. + char const *env // I: Pointer to Windows* OS (DOS) environment block. + ) { - char const chr_delimiter = '|'; - char const str_delimiter[] = { chr_delimiter, 0 }; + char *bulk = NULL; + kmp_env_var_t *vars = NULL; + int count = 0; // Number of used elements in vars array. + int size = 0; // Size of bulk. - char * bulk = NULL; - kmp_env_var_t * vars = NULL; - int count = 0; // Number of used elements in vars array. - int delimiters = 0; // Number of delimiters in input string. + char *name; // Pointer to name of variable. + char *value; // Pointer to value. - // Copy original string, we will modify the copy. - bulk = __kmp_str_format( "%s", env ); + if (env != NULL) { - // Loop thru all the vars in environment block. Count delimiters (maximum number of variables - // is number of delimiters plus one). + // Loop thru all the vars in environment block. Count variables, find size + // of block. { - char const * ptr = bulk; - for ( ; ; ) { - ptr = strchr( ptr, chr_delimiter ); - if ( ptr == NULL ) { - break; - }; // if - ++ delimiters; - ptr += 1; - }; // forever + char const *var; // Pointer to beginning of var. + int len; // Length of variable. + count = 0; + var = + env; // The first variable starts and beginning of environment block. + len = KMP_STRLEN(var); + while (len != 0) { + ++count; + size = size + len + 1; + var = var + len + + 1; // Move pointer to the beginning of the next variable. + len = KMP_STRLEN(var); + }; // while + size = + size + 1; // Total size of env block, including terminating zero byte. } + // Copy original block to bulk, we will modify bulk, not original block. + bulk = (char *)allocate(size); + KMP_MEMCPY_S(bulk, size, env, size); // Allocate vars array. - vars = (kmp_env_var_t *) allocate( ( delimiters + 1 ) * sizeof( kmp_env_var_t ) ); + vars = (kmp_env_var_t *)allocate(count * sizeof(kmp_env_var_t)); - // Loop thru all the variables. + // Loop thru all the vars, now in bulk. { - char * var; // Pointer to variable (both name and value). - char * name; // Pointer to name of variable. - char * value; // Pointer to value. - char * buf; // Buffer for __kmp_str_token() function. - var = __kmp_str_token( bulk, str_delimiter, & buf ); // Get the first var. - while ( var != NULL ) { - // Save found variable in vars array. - __kmp_str_split( var, '=', & name, & value ); - KMP_DEBUG_ASSERT( count < delimiters + 1 ); - vars[ count ].name = name; - vars[ count ].value = value; - ++ count; - // Get the next var. - var = __kmp_str_token( NULL, str_delimiter, & buf ); - }; // while + char *var; // Pointer to beginning of var. + int len; // Length of variable. + count = 0; + var = bulk; + len = KMP_STRLEN(var); + while (len != 0) { + // Save variable in vars array. + __kmp_str_split(var, '=', &name, &value); + vars[count].name = name; + vars[count].value = value; + ++count; + // Get the next var. + var = var + len + 1; + len = KMP_STRLEN(var); + }; // while } - // Fill out result. - block->bulk = bulk; - block->vars = vars; - block->count = count; - -}; // ___kmp_env_blk_parse_string - - + }; // if -/* - Windows* OS (actually, DOS) environment block is a piece of memory with environment variables. Each - variable is terminated with zero byte, entire block is terminated with one extra zero byte, so - we have two zero bytes at the end of environment block, e. g.: - - "HOME=C:\\users\\lev\x00OS=Windows_NT\x00\x00" - - It is not clear how empty environment is represented. "\x00\x00"? -*/ - -#if KMP_OS_WINDOWS -static -void -___kmp_env_blk_parse_windows( - kmp_env_blk_t * block, // M: Env block to fill. - char const * env // I: Pointer to Windows* OS (DOS) environment block. -) { - - char * bulk = NULL; - kmp_env_var_t * vars = NULL; - int count = 0; // Number of used elements in vars array. - int size = 0; // Size of bulk. - - char * name; // Pointer to name of variable. - char * value; // Pointer to value. - - if ( env != NULL ) { - - // Loop thru all the vars in environment block. Count variables, find size of block. - { - char const * var; // Pointer to beginning of var. - int len; // Length of variable. - count = 0; - var = env; // The first variable starts and beginning of environment block. - len = KMP_STRLEN( var ); - while ( len != 0 ) { - ++ count; - size = size + len + 1; - var = var + len + 1; // Move pointer to the beginning of the next variable. - len = KMP_STRLEN( var ); - }; // while - size = size + 1; // Total size of env block, including terminating zero byte. - } - - // Copy original block to bulk, we will modify bulk, not original block. - bulk = (char *) allocate( size ); - KMP_MEMCPY_S( bulk, size, env, size ); - // Allocate vars array. - vars = (kmp_env_var_t *) allocate( count * sizeof( kmp_env_var_t ) ); - - // Loop thru all the vars, now in bulk. - { - char * var; // Pointer to beginning of var. - int len; // Length of variable. - count = 0; - var = bulk; - len = KMP_STRLEN( var ); - while ( len != 0 ) { - // Save variable in vars array. - __kmp_str_split( var, '=', & name, & value ); - vars[ count ].name = name; - vars[ count ].value = value; - ++ count; - // Get the next var. - var = var + len + 1; - len = KMP_STRLEN( var ); - }; // while - } - - }; // if - - // Fill out result. - block->bulk = bulk; - block->vars = vars; - block->count = count; + // Fill out result. + block->bulk = bulk; + block->vars = vars; + block->count = count; }; // ___kmp_env_blk_parse_windows #endif - -/* - Unix environment block is a array of pointers to variables, last pointer in array is NULL: +/* Unix environment block is a array of pointers to variables, last pointer in + array is NULL: { "HOME=/home/lev", "TERM=xterm", NULL } */ -static -void -___kmp_env_blk_parse_unix( - kmp_env_blk_t * block, // M: Env block to fill. - char * * env // I: Unix environment to parse. -) { +static void +___kmp_env_blk_parse_unix(kmp_env_blk_t *block, // M: Env block to fill. + char **env // I: Unix environment to parse. + ) { + + char *bulk = NULL; + kmp_env_var_t *vars = NULL; + int count = 0; + int size = 0; // Size of bulk. + + // Count number of variables and length of required bulk. + { + count = 0; + size = 0; + while (env[count] != NULL) { + size += KMP_STRLEN(env[count]) + 1; + ++count; + }; // while + } + + // Allocate memory. + bulk = (char *)allocate(size); + vars = (kmp_env_var_t *)allocate(count * sizeof(kmp_env_var_t)); + + // Loop thru all the vars. + { + char *var; // Pointer to beginning of var. + char *name; // Pointer to name of variable. + char *value; // Pointer to value. + int len; // Length of variable. + int i; + var = bulk; + for (i = 0; i < count; ++i) { + // Copy variable to bulk. + len = KMP_STRLEN(env[i]); + KMP_MEMCPY_S(var, size, env[i], len + 1); + // Save found variable in vars array. + __kmp_str_split(var, '=', &name, &value); + vars[i].name = name; + vars[i].value = value; + // Move pointer. + var += len + 1; + }; // for + } - char * bulk = NULL; - kmp_env_var_t * vars = NULL; - int count = 0; - int size = 0; // Size of bulk. + // Fill out result. + block->bulk = bulk; + block->vars = vars; + block->count = count; - // Count number of variables and length of required bulk. - { - count = 0; - size = 0; - while ( env[ count ] != NULL ) { - size += KMP_STRLEN( env[ count ] ) + 1; - ++ count; - }; // while - } +}; // ___kmp_env_blk_parse_unix - // Allocate memory. - bulk = (char *) allocate( size ); - vars = (kmp_env_var_t *) allocate( count * sizeof( kmp_env_var_t ) ); +void __kmp_env_blk_init(kmp_env_blk_t *block, // M: Block to initialize. + char const *bulk // I: Initialization string, or NULL. + ) { - // Loop thru all the vars. + if (bulk != NULL) { + ___kmp_env_blk_parse_string(block, bulk); + } else { +#if KMP_OS_UNIX + ___kmp_env_blk_parse_unix(block, environ); +#elif KMP_OS_WINDOWS { - char * var; // Pointer to beginning of var. - char * name; // Pointer to name of variable. - char * value; // Pointer to value. - int len; // Length of variable. - int i; - var = bulk; - for ( i = 0; i < count; ++ i ) { - // Copy variable to bulk. - len = KMP_STRLEN( env[ i ] ); - KMP_MEMCPY_S( var, size, env[ i ], len + 1 ); - // Save found variable in vars array. - __kmp_str_split( var, '=', & name, & value ); - vars[ i ].name = name; - vars[ i ].value = value; - // Move pointer. - var += len + 1; - }; // for + char *mem = GetEnvironmentStrings(); + if (mem == NULL) { + DWORD error = GetLastError(); + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantGetEnvironment), KMP_ERR(error), + __kmp_msg_null); + }; // if + ___kmp_env_blk_parse_windows(block, mem); + FreeEnvironmentStrings(mem); } - - // Fill out result. - block->bulk = bulk; - block->vars = vars; - block->count = count; - -}; // ___kmp_env_blk_parse_unix - - - -void -__kmp_env_blk_init( - kmp_env_blk_t * block, // M: Block to initialize. - char const * bulk // I: Initialization string, or NULL. -) { - - if ( bulk != NULL ) { - ___kmp_env_blk_parse_string( block, bulk ); - } else { - #if KMP_OS_UNIX - ___kmp_env_blk_parse_unix( block, environ ); - #elif KMP_OS_WINDOWS - { - char * mem = GetEnvironmentStrings(); - if ( mem == NULL ) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantGetEnvironment ), - KMP_ERR( error ), - __kmp_msg_null - ); - }; // if - ___kmp_env_blk_parse_windows( block, mem ); - FreeEnvironmentStrings( mem ); - } - #else - #error Unknown or unsupported OS. - #endif - }; // if +#else +#error Unknown or unsupported OS. +#endif + }; // if } // __kmp_env_blk_init - - -static -int -___kmp_env_var_cmp( // Comparison function for qsort(). - kmp_env_var_t const * lhs, - kmp_env_var_t const * rhs -) { - return strcmp( lhs->name, rhs->name ); +static int ___kmp_env_var_cmp( // Comparison function for qsort(). + kmp_env_var_t const *lhs, kmp_env_var_t const *rhs) { + return strcmp(lhs->name, rhs->name); } -void -__kmp_env_blk_sort( - kmp_env_blk_t * block // M: Block of environment variables to sort. -) { +void __kmp_env_blk_sort( + kmp_env_blk_t *block // M: Block of environment variables to sort. + ) { - qsort( - (void *) block->vars, - block->count, - sizeof( kmp_env_var_t ), - ( int ( * )( void const *, void const * ) ) & ___kmp_env_var_cmp - ); + qsort((void *)block->vars, block->count, sizeof(kmp_env_var_t), + (int (*)(void const *, void const *)) & ___kmp_env_var_cmp); } // __kmp_env_block_sort +void __kmp_env_blk_free( + kmp_env_blk_t *block // M: Block of environment variables to free. + ) { + KMP_INTERNAL_FREE((void *)block->vars); + __kmp_str_free(&(block->bulk)); -void -__kmp_env_blk_free( - kmp_env_blk_t * block // M: Block of environment variables to free. -) { - - KMP_INTERNAL_FREE( (void *) block->vars ); - __kmp_str_free(&(block->bulk)); - - block->count = 0; - block->vars = NULL; + block->count = 0; + block->vars = NULL; } // __kmp_env_blk_free +char const * // R: Value of variable or NULL if variable does not exist. + __kmp_env_blk_var( + kmp_env_blk_t *block, // I: Block of environment variables. + char const *name // I: Name of variable to find. + ) { - -char const * // R: Value of variable or NULL if variable does not exist. -__kmp_env_blk_var( - kmp_env_blk_t * block, // I: Block of environment variables. - char const * name // I: Name of variable to find. -) { - - int i; - for ( i = 0; i < block->count; ++ i ) { - if ( strcmp( block->vars[ i ].name, name ) == 0 ) { - return block->vars[ i ].value; - }; // if - }; // for - return NULL; + int i; + for (i = 0; i < block->count; ++i) { + if (strcmp(block->vars[i].name, name) == 0) { + return block->vars[i].value; + }; // if + }; // for + return NULL; } // __kmp_env_block_var - // end of file // diff --git a/openmp/runtime/src/kmp_environment.h b/openmp/runtime/src/kmp_environment.h index 243b547..f59f3e5 100644 --- a/openmp/runtime/src/kmp_environment.h +++ b/openmp/runtime/src/kmp_environment.h @@ -20,56 +20,56 @@ extern "C" { #endif -// Return a copy of the value of environment variable or NULL if the variable does not exist. +// Return a copy of the value of environment variable or NULL if the variable +// does not exist. // *Note*: Returned pointed *must* be freed after use with __kmp_env_free(). -char * __kmp_env_get( char const * name ); -void __kmp_env_free( char const * * value ); +char *__kmp_env_get(char const *name); +void __kmp_env_free(char const **value); // Return 1 if the environment variable exists or 0 if does not exist. -int __kmp_env_exists( char const * name ); +int __kmp_env_exists(char const *name); // Set the environment variable. -void __kmp_env_set( char const * name, char const * value, int overwrite ); +void __kmp_env_set(char const *name, char const *value, int overwrite); // Unset (remove) environment variable. -void __kmp_env_unset( char const * name ); +void __kmp_env_unset(char const *name); - -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // Working with environment blocks. -// ------------------------------------------------------------------------------------------------- -/* - kmp_env_blk_t is read-only collection of environment variables (or environment-like). Usage: - - kmp_env_blk_t block; - __kmp_env_blk_init( & block, NULL ); // Initialize block from process environment. - // or - __kmp_env_blk_init( & block, "KMP_WARNING=1|KMP_AFFINITY=none" ); // from string. - __kmp_env_blk_sort( & block ); // Optionally, sort list. - for ( i = 0; i < block.count; ++ i ) { - // Process block.vars[ i ].name and block.vars[ i ].value... - }; // for i - __kmp_env_block_free( & block ); +/* kmp_env_blk_t is read-only collection of environment variables (or + environment-like). Usage: + +kmp_env_blk_t block; +__kmp_env_blk_init( & block, NULL ); // Initialize block from process + // environment. +// or +__kmp_env_blk_init( & block, "KMP_WARNING=1|KMP_AFFINITY=none" ); // from string +__kmp_env_blk_sort( & block ); // Optionally, sort list. +for ( i = 0; i < block.count; ++ i ) { + // Process block.vars[ i ].name and block.vars[ i ].value... +}; // for i +__kmp_env_block_free( & block ); */ struct __kmp_env_var { - char const * name; - char const * value; + char const *name; + char const *value; }; typedef struct __kmp_env_var kmp_env_var_t; struct __kmp_env_blk { - char const * bulk; - kmp_env_var_t const * vars; - int count; + char const *bulk; + kmp_env_var_t const *vars; + int count; }; typedef struct __kmp_env_blk kmp_env_blk_t; -void __kmp_env_blk_init( kmp_env_blk_t * block, char const * bulk ); -void __kmp_env_blk_free( kmp_env_blk_t * block ); -void __kmp_env_blk_sort( kmp_env_blk_t * block ); -char const * __kmp_env_blk_var( kmp_env_blk_t * block, char const * name ); +void __kmp_env_blk_init(kmp_env_blk_t *block, char const *bulk); +void __kmp_env_blk_free(kmp_env_blk_t *block); +void __kmp_env_blk_sort(kmp_env_blk_t *block); +char const *__kmp_env_blk_var(kmp_env_blk_t *block, char const *name); #ifdef __cplusplus } @@ -78,4 +78,3 @@ char const * __kmp_env_blk_var( kmp_env_blk_t * block, char const * name ); #endif // KMP_ENVIRONMENT_H // end of file // - diff --git a/openmp/runtime/src/kmp_error.cpp b/openmp/runtime/src/kmp_error.cpp index 2d84066..529dc48 100644 --- a/openmp/runtime/src/kmp_error.cpp +++ b/openmp/runtime/src/kmp_error.cpp @@ -14,259 +14,237 @@ #include "kmp.h" +#include "kmp_error.h" #include "kmp_i18n.h" #include "kmp_str.h" -#include "kmp_error.h" /* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ -#define MIN_STACK 100 +#define MIN_STACK 100 - -static char const * cons_text_c[] = { - "(none)", - "\"parallel\"", - "work-sharing", /* this is not called "for" because of lowering of "sections" pragmas */ - "\"ordered\" work-sharing", /* this is not called "for ordered" because of lowering of "sections" pragmas */ +static char const *cons_text_c[] = { + "(none)", "\"parallel\"", "work-sharing", /* this is not called "for" + because of lowering of + "sections" pragmas */ + "\"ordered\" work-sharing", /* this is not called "for ordered" because of + lowering of "sections" pragmas */ "\"sections\"", - "work-sharing", /* this is not called "single" because of lowering of "sections" pragmas */ - "\"taskq\"", - "\"taskq\"", - "\"taskq ordered\"", - "\"critical\"", - "\"ordered\"", /* in PARALLEL */ - "\"ordered\"", /* in PDO */ - "\"ordered\"", /* in TASKQ */ - "\"master\"", - "\"reduce\"", - "\"barrier\"" -}; - -#define get_src( ident ) ( (ident) == NULL ? NULL : (ident)->psource ) - -#define PUSH_MSG( ct, ident ) \ - "\tpushing on stack: %s (%s)\n", cons_text_c[ (ct) ], get_src( (ident) ) -#define POP_MSG( p ) \ - "\tpopping off stack: %s (%s)\n", \ - cons_text_c[ (p)->stack_data[ tos ].type ], \ - get_src( (p)->stack_data[ tos ].ident ) - -static int const cons_text_c_num = sizeof( cons_text_c ) / sizeof( char const * ); + "work-sharing", /* this is not called "single" because of lowering of + "sections" pragmas */ + "\"taskq\"", "\"taskq\"", "\"taskq ordered\"", "\"critical\"", + "\"ordered\"", /* in PARALLEL */ + "\"ordered\"", /* in PDO */ + "\"ordered\"", /* in TASKQ */ + "\"master\"", "\"reduce\"", "\"barrier\""}; + +#define get_src(ident) ((ident) == NULL ? NULL : (ident)->psource) + +#define PUSH_MSG(ct, ident) \ + "\tpushing on stack: %s (%s)\n", cons_text_c[(ct)], get_src((ident)) +#define POP_MSG(p) \ + "\tpopping off stack: %s (%s)\n", cons_text_c[(p)->stack_data[tos].type], \ + get_src((p)->stack_data[tos].ident) + +static int const cons_text_c_num = sizeof(cons_text_c) / sizeof(char const *); -/* ------------------------------------------------------------------------ */ /* --------------- START OF STATIC LOCAL ROUTINES ------------------------- */ -/* ------------------------------------------------------------------------ */ -static void -__kmp_check_null_func( void ) -{ - /* nothing to do */ +static void __kmp_check_null_func(void) { /* nothing to do */ } -static void -__kmp_expand_cons_stack( int gtid, struct cons_header *p ) -{ - int i; - struct cons_data *d; +static void __kmp_expand_cons_stack(int gtid, struct cons_header *p) { + int i; + struct cons_data *d; - /* TODO for monitor perhaps? */ - if (gtid < 0) - __kmp_check_null_func(); + /* TODO for monitor perhaps? */ + if (gtid < 0) + __kmp_check_null_func(); - KE_TRACE( 10, ("expand cons_stack (%d %d)\n", gtid, __kmp_get_gtid() ) ); + KE_TRACE(10, ("expand cons_stack (%d %d)\n", gtid, __kmp_get_gtid())); - d = p->stack_data; + d = p->stack_data; - p->stack_size = (p->stack_size * 2) + 100; + p->stack_size = (p->stack_size * 2) + 100; - /* TODO free the old data */ - p->stack_data = (struct cons_data *) __kmp_allocate( sizeof( struct cons_data ) * (p->stack_size+1) ); + /* TODO free the old data */ + p->stack_data = (struct cons_data *)__kmp_allocate(sizeof(struct cons_data) * + (p->stack_size + 1)); - for (i = p->stack_top; i >= 0; --i) - p->stack_data[i] = d[i]; + for (i = p->stack_top; i >= 0; --i) + p->stack_data[i] = d[i]; - /* NOTE: we do not free the old stack_data */ + /* NOTE: we do not free the old stack_data */ } // NOTE: Function returns allocated memory, caller must free it! -static char const * -__kmp_pragma( - int ct, - ident_t const * ident -) { - char const * cons = NULL; // Construct name. - char * file = NULL; // File name. - char * func = NULL; // Function (routine) name. - char * line = NULL; // Line number. - kmp_str_buf_t buffer; - kmp_msg_t prgm; - __kmp_str_buf_init( & buffer ); - if ( 0 < ct && ct < cons_text_c_num ) { - cons = cons_text_c[ ct ]; - } else { - KMP_DEBUG_ASSERT( 0 ); - }; - if ( ident != NULL && ident->psource != NULL ) { - char * tail = NULL; - __kmp_str_buf_print( & buffer, "%s", ident->psource ); // Copy source to buffer. - // Split string in buffer to file, func, and line. - tail = buffer.str; - __kmp_str_split( tail, ';', NULL, & tail ); - __kmp_str_split( tail, ';', & file, & tail ); - __kmp_str_split( tail, ';', & func, & tail ); - __kmp_str_split( tail, ';', & line, & tail ); - }; // if - prgm = __kmp_msg_format( kmp_i18n_fmt_Pragma, cons, file, func, line ); - __kmp_str_buf_free( & buffer ); - return prgm.str; +static char const *__kmp_pragma(int ct, ident_t const *ident) { + char const *cons = NULL; // Construct name. + char *file = NULL; // File name. + char *func = NULL; // Function (routine) name. + char *line = NULL; // Line number. + kmp_str_buf_t buffer; + kmp_msg_t prgm; + __kmp_str_buf_init(&buffer); + if (0 < ct && ct < cons_text_c_num) { + cons = cons_text_c[ct]; + } else { + KMP_DEBUG_ASSERT(0); + }; + if (ident != NULL && ident->psource != NULL) { + char *tail = NULL; + __kmp_str_buf_print(&buffer, "%s", + ident->psource); // Copy source to buffer. + // Split string in buffer to file, func, and line. + tail = buffer.str; + __kmp_str_split(tail, ';', NULL, &tail); + __kmp_str_split(tail, ';', &file, &tail); + __kmp_str_split(tail, ';', &func, &tail); + __kmp_str_split(tail, ';', &line, &tail); + }; // if + prgm = __kmp_msg_format(kmp_i18n_fmt_Pragma, cons, file, func, line); + __kmp_str_buf_free(&buffer); + return prgm.str; } // __kmp_pragma -/* ------------------------------------------------------------------------ */ /* ----------------- END OF STATIC LOCAL ROUTINES ------------------------- */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_error_construct( - kmp_i18n_id_t id, // Message identifier. - enum cons_type ct, // Construct type. - ident_t const * ident // Construct ident. -) { - char const * construct = __kmp_pragma( ct, ident ); - __kmp_msg( kmp_ms_fatal, __kmp_msg_format( id, construct ), __kmp_msg_null ); - KMP_INTERNAL_FREE( (void *) construct ); +void __kmp_error_construct(kmp_i18n_id_t id, // Message identifier. + enum cons_type ct, // Construct type. + ident_t const *ident // Construct ident. + ) { + char const *construct = __kmp_pragma(ct, ident); + __kmp_msg(kmp_ms_fatal, __kmp_msg_format(id, construct), __kmp_msg_null); + KMP_INTERNAL_FREE((void *)construct); } -void -__kmp_error_construct2( - kmp_i18n_id_t id, // Message identifier. - enum cons_type ct, // First construct type. - ident_t const * ident, // First construct ident. - struct cons_data const * cons // Second construct. -) { - char const * construct1 = __kmp_pragma( ct, ident ); - char const * construct2 = __kmp_pragma( cons->type, cons->ident ); - __kmp_msg( kmp_ms_fatal, __kmp_msg_format( id, construct1, construct2 ), __kmp_msg_null ); - KMP_INTERNAL_FREE( (void *) construct1 ); - KMP_INTERNAL_FREE( (void *) construct2 ); +void __kmp_error_construct2(kmp_i18n_id_t id, // Message identifier. + enum cons_type ct, // First construct type. + ident_t const *ident, // First construct ident. + struct cons_data const *cons // Second construct. + ) { + char const *construct1 = __kmp_pragma(ct, ident); + char const *construct2 = __kmp_pragma(cons->type, cons->ident); + __kmp_msg(kmp_ms_fatal, __kmp_msg_format(id, construct1, construct2), + __kmp_msg_null); + KMP_INTERNAL_FREE((void *)construct1); + KMP_INTERNAL_FREE((void *)construct2); } - -struct cons_header * -__kmp_allocate_cons_stack( int gtid ) -{ - struct cons_header *p; - - /* TODO for monitor perhaps? */ - if ( gtid < 0 ) { - __kmp_check_null_func(); - }; // if - KE_TRACE( 10, ("allocate cons_stack (%d)\n", gtid ) ); - p = (struct cons_header *) __kmp_allocate( sizeof( struct cons_header ) ); - p->p_top = p->w_top = p->s_top = 0; - p->stack_data = (struct cons_data *) __kmp_allocate( sizeof( struct cons_data ) * (MIN_STACK+1) ); - p->stack_size = MIN_STACK; - p->stack_top = 0; - p->stack_data[ 0 ].type = ct_none; - p->stack_data[ 0 ].prev = 0; - p->stack_data[ 0 ].ident = NULL; - return p; +struct cons_header *__kmp_allocate_cons_stack(int gtid) { + struct cons_header *p; + + /* TODO for monitor perhaps? */ + if (gtid < 0) { + __kmp_check_null_func(); + }; // if + KE_TRACE(10, ("allocate cons_stack (%d)\n", gtid)); + p = (struct cons_header *)__kmp_allocate(sizeof(struct cons_header)); + p->p_top = p->w_top = p->s_top = 0; + p->stack_data = (struct cons_data *)__kmp_allocate(sizeof(struct cons_data) * + (MIN_STACK + 1)); + p->stack_size = MIN_STACK; + p->stack_top = 0; + p->stack_data[0].type = ct_none; + p->stack_data[0].prev = 0; + p->stack_data[0].ident = NULL; + return p; } -void -__kmp_free_cons_stack( void * ptr ) { - struct cons_header * p = (struct cons_header *) ptr; - if ( p != NULL ) { - if ( p->stack_data != NULL ) { - __kmp_free( p->stack_data ); - p->stack_data = NULL; - }; // if - __kmp_free( p ); +void __kmp_free_cons_stack(void *ptr) { + struct cons_header *p = (struct cons_header *)ptr; + if (p != NULL) { + if (p->stack_data != NULL) { + __kmp_free(p->stack_data); + p->stack_data = NULL; }; // if + __kmp_free(p); + }; // if } - #if KMP_DEBUG -static void -dump_cons_stack( int gtid, struct cons_header * p ) { - int i; - int tos = p->stack_top; - kmp_str_buf_t buffer; - __kmp_str_buf_init( & buffer ); - __kmp_str_buf_print( & buffer, "+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n" ); - __kmp_str_buf_print( & buffer, "Begin construct stack with %d items for thread %d\n", tos, gtid ); - __kmp_str_buf_print( & buffer, " stack_top=%d { P=%d, W=%d, S=%d }\n", tos, p->p_top, p->w_top, p->s_top ); - for ( i = tos; i > 0; i-- ) { - struct cons_data * c = & ( p->stack_data[ i ] ); - __kmp_str_buf_print( & buffer, " stack_data[%2d] = { %s (%s) %d %p }\n", i, cons_text_c[ c->type ], get_src( c->ident ), c->prev, c->name ); - }; // for i - __kmp_str_buf_print( & buffer, "End construct stack for thread %d\n", gtid ); - __kmp_str_buf_print( & buffer, "+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n" ); - __kmp_debug_printf( "%s", buffer.str ); - __kmp_str_buf_free( & buffer ); +static void dump_cons_stack(int gtid, struct cons_header *p) { + int i; + int tos = p->stack_top; + kmp_str_buf_t buffer; + __kmp_str_buf_init(&buffer); + __kmp_str_buf_print( + &buffer, + "+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n"); + __kmp_str_buf_print(&buffer, + "Begin construct stack with %d items for thread %d\n", + tos, gtid); + __kmp_str_buf_print(&buffer, " stack_top=%d { P=%d, W=%d, S=%d }\n", tos, + p->p_top, p->w_top, p->s_top); + for (i = tos; i > 0; i--) { + struct cons_data *c = &(p->stack_data[i]); + __kmp_str_buf_print( + &buffer, " stack_data[%2d] = { %s (%s) %d %p }\n", i, + cons_text_c[c->type], get_src(c->ident), c->prev, c->name); + }; // for i + __kmp_str_buf_print(&buffer, "End construct stack for thread %d\n", gtid); + __kmp_str_buf_print( + &buffer, + "+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-\n"); + __kmp_debug_printf("%s", buffer.str); + __kmp_str_buf_free(&buffer); } #endif -void -__kmp_push_parallel( int gtid, ident_t const * ident ) -{ - int tos; - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - - KMP_DEBUG_ASSERT( __kmp_threads[ gtid ]-> th.th_cons ); - KE_TRACE( 10, ("__kmp_push_parallel (%d %d)\n", gtid, __kmp_get_gtid() ) ); - KE_TRACE( 100, ( PUSH_MSG( ct_parallel, ident ) ) ); - if ( p->stack_top >= p->stack_size ) { - __kmp_expand_cons_stack( gtid, p ); - }; // if - tos = ++p->stack_top; - p->stack_data[ tos ].type = ct_parallel; - p->stack_data[ tos ].prev = p->p_top; - p->stack_data[ tos ].ident = ident; - p->stack_data[ tos ].name = NULL; - p->p_top = tos; - KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); +void __kmp_push_parallel(int gtid, ident_t const *ident) { + int tos; + struct cons_header *p = __kmp_threads[gtid]->th.th_cons; + + KMP_DEBUG_ASSERT(__kmp_threads[gtid]->th.th_cons); + KE_TRACE(10, ("__kmp_push_parallel (%d %d)\n", gtid, __kmp_get_gtid())); + KE_TRACE(100, (PUSH_MSG(ct_parallel, ident))); + if (p->stack_top >= p->stack_size) { + __kmp_expand_cons_stack(gtid, p); + }; // if + tos = ++p->stack_top; + p->stack_data[tos].type = ct_parallel; + p->stack_data[tos].prev = p->p_top; + p->stack_data[tos].ident = ident; + p->stack_data[tos].name = NULL; + p->p_top = tos; + KE_DUMP(1000, dump_cons_stack(gtid, p)); } -void -__kmp_check_workshare( int gtid, enum cons_type ct, ident_t const * ident ) -{ - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - - KMP_DEBUG_ASSERT( __kmp_threads[ gtid ]-> th.th_cons ); - KE_TRACE( 10, ("__kmp_check_workshare (%d %d)\n", gtid, __kmp_get_gtid() ) ); - - - if ( p->stack_top >= p->stack_size ) { - __kmp_expand_cons_stack( gtid, p ); - }; // if - if ( p->w_top > p->p_top && - !(IS_CONS_TYPE_TASKQ(p->stack_data[ p->w_top ].type) && IS_CONS_TYPE_TASKQ(ct))) { - // We are already in a WORKSHARE construct for this PARALLEL region. - __kmp_error_construct2( kmp_i18n_msg_CnsInvalidNesting, ct, ident, & p->stack_data[ p->w_top ] ); - }; // if - if ( p->s_top > p->p_top ) { - // We are already in a SYNC construct for this PARALLEL region. - __kmp_error_construct2( kmp_i18n_msg_CnsInvalidNesting, ct, ident, & p->stack_data[ p->s_top ] ); - }; // if +void __kmp_check_workshare(int gtid, enum cons_type ct, ident_t const *ident) { + struct cons_header *p = __kmp_threads[gtid]->th.th_cons; + + KMP_DEBUG_ASSERT(__kmp_threads[gtid]->th.th_cons); + KE_TRACE(10, ("__kmp_check_workshare (%d %d)\n", gtid, __kmp_get_gtid())); + + if (p->stack_top >= p->stack_size) { + __kmp_expand_cons_stack(gtid, p); + }; // if + if (p->w_top > p->p_top && + !(IS_CONS_TYPE_TASKQ(p->stack_data[p->w_top].type) && + IS_CONS_TYPE_TASKQ(ct))) { + // We are already in a WORKSHARE construct for this PARALLEL region. + __kmp_error_construct2(kmp_i18n_msg_CnsInvalidNesting, ct, ident, + &p->stack_data[p->w_top]); + }; // if + if (p->s_top > p->p_top) { + // We are already in a SYNC construct for this PARALLEL region. + __kmp_error_construct2(kmp_i18n_msg_CnsInvalidNesting, ct, ident, + &p->stack_data[p->s_top]); + }; // if } -void -__kmp_push_workshare( int gtid, enum cons_type ct, ident_t const * ident ) -{ - int tos; - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - KE_TRACE( 10, ("__kmp_push_workshare (%d %d)\n", gtid, __kmp_get_gtid() ) ); - __kmp_check_workshare( gtid, ct, ident ); - KE_TRACE( 100, ( PUSH_MSG( ct, ident ) ) ); - tos = ++p->stack_top; - p->stack_data[ tos ].type = ct; - p->stack_data[ tos ].prev = p->w_top; - p->stack_data[ tos ].ident = ident; - p->stack_data[ tos ].name = NULL; - p->w_top = tos; - KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); +void __kmp_push_workshare(int gtid, enum cons_type ct, ident_t const *ident) { + int tos; + struct cons_header *p = __kmp_threads[gtid]->th.th_cons; + KE_TRACE(10, ("__kmp_push_workshare (%d %d)\n", gtid, __kmp_get_gtid())); + __kmp_check_workshare(gtid, ct, ident); + KE_TRACE(100, (PUSH_MSG(ct, ident))); + tos = ++p->stack_top; + p->stack_data[tos].type = ct; + p->stack_data[tos].prev = p->w_top; + p->stack_data[tos].ident = ident; + p->stack_data[tos].name = NULL; + p->w_top = tos; + KE_DUMP(1000, dump_cons_stack(gtid, p)); } void @@ -276,98 +254,91 @@ __kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_l __kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck ) #endif { - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - - KE_TRACE( 10, ("__kmp_check_sync (gtid=%d)\n", __kmp_get_gtid() ) ); - - if (p->stack_top >= p->stack_size) - __kmp_expand_cons_stack( gtid, p ); - - if (ct == ct_ordered_in_parallel || ct == ct_ordered_in_pdo || ct == ct_ordered_in_taskq ) { - if (p->w_top <= p->p_top) { - /* we are not in a worksharing construct */ - #ifdef BUILD_PARALLEL_ORDERED - /* do not report error messages for PARALLEL ORDERED */ - KMP_ASSERT( ct == ct_ordered_in_parallel ); - #else - __kmp_error_construct( kmp_i18n_msg_CnsBoundToWorksharing, ct, ident ); - #endif /* BUILD_PARALLEL_ORDERED */ + struct cons_header *p = __kmp_threads[gtid]->th.th_cons; + + KE_TRACE(10, ("__kmp_check_sync (gtid=%d)\n", __kmp_get_gtid())); + + if (p->stack_top >= p->stack_size) + __kmp_expand_cons_stack(gtid, p); + + if (ct == ct_ordered_in_parallel || ct == ct_ordered_in_pdo || + ct == ct_ordered_in_taskq) { + if (p->w_top <= p->p_top) { +/* we are not in a worksharing construct */ +#ifdef BUILD_PARALLEL_ORDERED + /* do not report error messages for PARALLEL ORDERED */ + KMP_ASSERT(ct == ct_ordered_in_parallel); +#else + __kmp_error_construct(kmp_i18n_msg_CnsBoundToWorksharing, ct, ident); +#endif /* BUILD_PARALLEL_ORDERED */ + } else { + /* inside a WORKSHARING construct for this PARALLEL region */ + if (!IS_CONS_TYPE_ORDERED(p->stack_data[p->w_top].type)) { + if (p->stack_data[p->w_top].type == ct_taskq) { + __kmp_error_construct2(kmp_i18n_msg_CnsNotInTaskConstruct, ct, ident, + &p->stack_data[p->w_top]); } else { - /* inside a WORKSHARING construct for this PARALLEL region */ - if (!IS_CONS_TYPE_ORDERED(p->stack_data[ p->w_top ].type)) { - if (p->stack_data[ p->w_top ].type == ct_taskq) { - __kmp_error_construct2( - kmp_i18n_msg_CnsNotInTaskConstruct, - ct, ident, - & p->stack_data[ p->w_top ] - ); - } else { - __kmp_error_construct2( - kmp_i18n_msg_CnsNoOrderedClause, - ct, ident, - & p->stack_data[ p->w_top ] - ); - } - } - } - if (p->s_top > p->p_top && p->s_top > p->w_top) { - /* inside a sync construct which is inside a worksharing construct */ - int index = p->s_top; - enum cons_type stack_type; - - stack_type = p->stack_data[ index ].type; - - if (stack_type == ct_critical || - ( ( stack_type == ct_ordered_in_parallel || - stack_type == ct_ordered_in_pdo || - stack_type == ct_ordered_in_taskq ) && /* C doesn't allow named ordered; ordered in ordered gets error */ - p->stack_data[ index ].ident != NULL && - (p->stack_data[ index ].ident->flags & KMP_IDENT_KMPC ))) { - /* we are in ORDERED which is inside an ORDERED or CRITICAL construct */ - __kmp_error_construct2( - kmp_i18n_msg_CnsInvalidNesting, - ct, ident, - & p->stack_data[ index ] - ); - } + __kmp_error_construct2(kmp_i18n_msg_CnsNoOrderedClause, ct, ident, + &p->stack_data[p->w_top]); } - } else if ( ct == ct_critical ) { + } + } + if (p->s_top > p->p_top && p->s_top > p->w_top) { + /* inside a sync construct which is inside a worksharing construct */ + int index = p->s_top; + enum cons_type stack_type; + + stack_type = p->stack_data[index].type; + + if (stack_type == ct_critical || + ((stack_type == ct_ordered_in_parallel || + stack_type == ct_ordered_in_pdo || + stack_type == + ct_ordered_in_taskq) && /* C doesn't allow named ordered; + ordered in ordered gets error */ + p->stack_data[index].ident != NULL && + (p->stack_data[index].ident->flags & KMP_IDENT_KMPC))) { + /* we are in ORDERED which is inside an ORDERED or CRITICAL construct */ + __kmp_error_construct2(kmp_i18n_msg_CnsInvalidNesting, ct, ident, + &p->stack_data[index]); + } + } + } else if (ct == ct_critical) { #if KMP_USE_DYNAMIC_LOCK - if ( lck != NULL && __kmp_get_user_lock_owner( lck, seq ) == gtid ) { /* this same thread already has lock for this critical section */ + if (lck != NULL && + __kmp_get_user_lock_owner(lck, seq) == + gtid) { /* this thread already has lock for this critical section */ #else - if ( lck != NULL && __kmp_get_user_lock_owner( lck ) == gtid ) { /* this same thread already has lock for this critical section */ + if (lck != NULL && + __kmp_get_user_lock_owner(lck) == + gtid) { /* this thread already has lock for this critical section */ #endif - int index = p->s_top; - struct cons_data cons = { NULL, ct_critical, 0, NULL }; - /* walk up construct stack and try to find critical with matching name */ - while ( index != 0 && p->stack_data[ index ].name != lck ) { - index = p->stack_data[ index ].prev; - } - if ( index != 0 ) { - /* found match on the stack (may not always because of interleaved critical for Fortran) */ - cons = p->stack_data[ index ]; - } - /* we are in CRITICAL which is inside a CRITICAL construct of the same name */ - __kmp_error_construct2( kmp_i18n_msg_CnsNestingSameName, ct, ident, & cons ); - } - } else if ( ct == ct_master || ct == ct_reduce ) { - if (p->w_top > p->p_top) { - /* inside a WORKSHARING construct for this PARALLEL region */ - __kmp_error_construct2( - kmp_i18n_msg_CnsInvalidNesting, - ct, ident, - & p->stack_data[ p->w_top ] - ); - } - if (ct == ct_reduce && p->s_top > p->p_top) { - /* inside a another SYNC construct for this PARALLEL region */ - __kmp_error_construct2( - kmp_i18n_msg_CnsInvalidNesting, - ct, ident, - & p->stack_data[ p->s_top ] - ); - }; // if + int index = p->s_top; + struct cons_data cons = {NULL, ct_critical, 0, NULL}; + /* walk up construct stack and try to find critical with matching name */ + while (index != 0 && p->stack_data[index].name != lck) { + index = p->stack_data[index].prev; + } + if (index != 0) { + /* found match on the stack (may not always because of interleaved + * critical for Fortran) */ + cons = p->stack_data[index]; + } + /* we are in CRITICAL which is inside a CRITICAL construct of same name */ + __kmp_error_construct2(kmp_i18n_msg_CnsNestingSameName, ct, ident, &cons); + } + } else if (ct == ct_master || ct == ct_reduce) { + if (p->w_top > p->p_top) { + /* inside a WORKSHARING construct for this PARALLEL region */ + __kmp_error_construct2(kmp_i18n_msg_CnsInvalidNesting, ct, ident, + &p->stack_data[p->w_top]); + } + if (ct == ct_reduce && p->s_top > p->p_top) { + /* inside a another SYNC construct for this PARALLEL region */ + __kmp_error_construct2(kmp_i18n_msg_CnsInvalidNesting, ct, ident, + &p->stack_data[p->s_top]); }; // if + }; // if } void @@ -377,147 +348,118 @@ __kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lo __kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p lck ) #endif { - int tos; - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; + int tos; + struct cons_header *p = __kmp_threads[gtid]->th.th_cons; - KMP_ASSERT( gtid == __kmp_get_gtid() ); - KE_TRACE( 10, ("__kmp_push_sync (gtid=%d)\n", gtid ) ); + KMP_ASSERT(gtid == __kmp_get_gtid()); + KE_TRACE(10, ("__kmp_push_sync (gtid=%d)\n", gtid)); #if KMP_USE_DYNAMIC_LOCK - __kmp_check_sync( gtid, ct, ident, lck, seq ); + __kmp_check_sync(gtid, ct, ident, lck, seq); #else - __kmp_check_sync( gtid, ct, ident, lck ); + __kmp_check_sync(gtid, ct, ident, lck); #endif - KE_TRACE( 100, ( PUSH_MSG( ct, ident ) ) ); - tos = ++ p->stack_top; - p->stack_data[ tos ].type = ct; - p->stack_data[ tos ].prev = p->s_top; - p->stack_data[ tos ].ident = ident; - p->stack_data[ tos ].name = lck; - p->s_top = tos; - KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); + KE_TRACE(100, (PUSH_MSG(ct, ident))); + tos = ++p->stack_top; + p->stack_data[tos].type = ct; + p->stack_data[tos].prev = p->s_top; + p->stack_data[tos].ident = ident; + p->stack_data[tos].name = lck; + p->s_top = tos; + KE_DUMP(1000, dump_cons_stack(gtid, p)); } /* ------------------------------------------------------------------------ */ -void -__kmp_pop_parallel( int gtid, ident_t const * ident ) -{ - int tos; - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - tos = p->stack_top; - KE_TRACE( 10, ("__kmp_pop_parallel (%d %d)\n", gtid, __kmp_get_gtid() ) ); - if ( tos == 0 || p->p_top == 0 ) { - __kmp_error_construct( kmp_i18n_msg_CnsDetectedEnd, ct_parallel, ident ); - } - if ( tos != p->p_top || p->stack_data[ tos ].type != ct_parallel ) { - __kmp_error_construct2( - kmp_i18n_msg_CnsExpectedEnd, - ct_parallel, ident, - & p->stack_data[ tos ] - ); - } - KE_TRACE( 100, ( POP_MSG( p ) ) ); - p->p_top = p->stack_data[ tos ].prev; - p->stack_data[ tos ].type = ct_none; - p->stack_data[ tos ].ident = NULL; - p->stack_top = tos - 1; - KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); +void __kmp_pop_parallel(int gtid, ident_t const *ident) { + int tos; + struct cons_header *p = __kmp_threads[gtid]->th.th_cons; + tos = p->stack_top; + KE_TRACE(10, ("__kmp_pop_parallel (%d %d)\n", gtid, __kmp_get_gtid())); + if (tos == 0 || p->p_top == 0) { + __kmp_error_construct(kmp_i18n_msg_CnsDetectedEnd, ct_parallel, ident); + } + if (tos != p->p_top || p->stack_data[tos].type != ct_parallel) { + __kmp_error_construct2(kmp_i18n_msg_CnsExpectedEnd, ct_parallel, ident, + &p->stack_data[tos]); + } + KE_TRACE(100, (POP_MSG(p))); + p->p_top = p->stack_data[tos].prev; + p->stack_data[tos].type = ct_none; + p->stack_data[tos].ident = NULL; + p->stack_top = tos - 1; + KE_DUMP(1000, dump_cons_stack(gtid, p)); } -enum cons_type -__kmp_pop_workshare( int gtid, enum cons_type ct, ident_t const * ident ) -{ - int tos; - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - - tos = p->stack_top; - KE_TRACE( 10, ("__kmp_pop_workshare (%d %d)\n", gtid, __kmp_get_gtid() ) ); - if ( tos == 0 || p->w_top == 0 ) { - __kmp_error_construct( kmp_i18n_msg_CnsDetectedEnd, ct, ident ); - } - - if ( tos != p->w_top || - ( p->stack_data[ tos ].type != ct && - /* below are two exceptions to the rule that construct types must match */ - ! ( p->stack_data[ tos ].type == ct_pdo_ordered && ct == ct_pdo ) && - ! ( p->stack_data[ tos ].type == ct_task_ordered && ct == ct_task ) - ) - ) { - __kmp_check_null_func(); - __kmp_error_construct2( - kmp_i18n_msg_CnsExpectedEnd, - ct, ident, - & p->stack_data[ tos ] - ); - } - KE_TRACE( 100, ( POP_MSG( p ) ) ); - p->w_top = p->stack_data[ tos ].prev; - p->stack_data[ tos ].type = ct_none; - p->stack_data[ tos ].ident = NULL; - p->stack_top = tos - 1; - KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); - return p->stack_data[ p->w_top ].type; +enum cons_type __kmp_pop_workshare(int gtid, enum cons_type ct, + ident_t const *ident) { + int tos; + struct cons_header *p = __kmp_threads[gtid]->th.th_cons; + + tos = p->stack_top; + KE_TRACE(10, ("__kmp_pop_workshare (%d %d)\n", gtid, __kmp_get_gtid())); + if (tos == 0 || p->w_top == 0) { + __kmp_error_construct(kmp_i18n_msg_CnsDetectedEnd, ct, ident); + } + + if (tos != p->w_top || + (p->stack_data[tos].type != ct && + // below are two exceptions to the rule that construct types must match + !(p->stack_data[tos].type == ct_pdo_ordered && ct == ct_pdo) && + !(p->stack_data[tos].type == ct_task_ordered && ct == ct_task))) { + __kmp_check_null_func(); + __kmp_error_construct2(kmp_i18n_msg_CnsExpectedEnd, ct, ident, + &p->stack_data[tos]); + } + KE_TRACE(100, (POP_MSG(p))); + p->w_top = p->stack_data[tos].prev; + p->stack_data[tos].type = ct_none; + p->stack_data[tos].ident = NULL; + p->stack_top = tos - 1; + KE_DUMP(1000, dump_cons_stack(gtid, p)); + return p->stack_data[p->w_top].type; } -void -__kmp_pop_sync( int gtid, enum cons_type ct, ident_t const * ident ) -{ - int tos; - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - tos = p->stack_top; - KE_TRACE( 10, ("__kmp_pop_sync (%d %d)\n", gtid, __kmp_get_gtid() ) ); - if ( tos == 0 || p->s_top == 0 ) { - __kmp_error_construct( kmp_i18n_msg_CnsDetectedEnd, ct, ident ); - }; - if ( tos != p->s_top || p->stack_data[ tos ].type != ct ) { - __kmp_check_null_func(); - __kmp_error_construct2( - kmp_i18n_msg_CnsExpectedEnd, - ct, ident, - & p->stack_data[ tos ] - ); - }; - if ( gtid < 0 ) { - __kmp_check_null_func(); - }; - KE_TRACE( 100, ( POP_MSG( p ) ) ); - p->s_top = p->stack_data[ tos ].prev; - p->stack_data[ tos ].type = ct_none; - p->stack_data[ tos ].ident = NULL; - p->stack_top = tos - 1; - KE_DUMP( 1000, dump_cons_stack( gtid, p ) ); +void __kmp_pop_sync(int gtid, enum cons_type ct, ident_t const *ident) { + int tos; + struct cons_header *p = __kmp_threads[gtid]->th.th_cons; + tos = p->stack_top; + KE_TRACE(10, ("__kmp_pop_sync (%d %d)\n", gtid, __kmp_get_gtid())); + if (tos == 0 || p->s_top == 0) { + __kmp_error_construct(kmp_i18n_msg_CnsDetectedEnd, ct, ident); + }; + if (tos != p->s_top || p->stack_data[tos].type != ct) { + __kmp_check_null_func(); + __kmp_error_construct2(kmp_i18n_msg_CnsExpectedEnd, ct, ident, + &p->stack_data[tos]); + }; + if (gtid < 0) { + __kmp_check_null_func(); + }; + KE_TRACE(100, (POP_MSG(p))); + p->s_top = p->stack_data[tos].prev; + p->stack_data[tos].type = ct_none; + p->stack_data[tos].ident = NULL; + p->stack_top = tos - 1; + KE_DUMP(1000, dump_cons_stack(gtid, p)); } /* ------------------------------------------------------------------------ */ -void -__kmp_check_barrier( int gtid, enum cons_type ct, ident_t const * ident ) -{ - struct cons_header *p = __kmp_threads[ gtid ]->th.th_cons; - KE_TRACE( 10, ("__kmp_check_barrier (loc: %p, gtid: %d %d)\n", ident, gtid, __kmp_get_gtid() ) ); - if ( ident != 0 ) { - __kmp_check_null_func(); - } - if ( p->w_top > p->p_top ) { - /* we are already in a WORKSHARING construct for this PARALLEL region */ - __kmp_error_construct2( - kmp_i18n_msg_CnsInvalidNesting, - ct, ident, - & p->stack_data[ p->w_top ] - ); - } - if (p->s_top > p->p_top) { - /* we are already in a SYNC construct for this PARALLEL region */ - __kmp_error_construct2( - kmp_i18n_msg_CnsInvalidNesting, - ct, ident, - & p->stack_data[ p->s_top ] - ); - } +void __kmp_check_barrier(int gtid, enum cons_type ct, ident_t const *ident) { + struct cons_header *p = __kmp_threads[gtid]->th.th_cons; + KE_TRACE(10, ("__kmp_check_barrier (loc: %p, gtid: %d %d)\n", ident, gtid, + __kmp_get_gtid())); + if (ident != 0) { + __kmp_check_null_func(); + } + if (p->w_top > p->p_top) { + /* we are already in a WORKSHARING construct for this PARALLEL region */ + __kmp_error_construct2(kmp_i18n_msg_CnsInvalidNesting, ct, ident, + &p->stack_data[p->w_top]); + } + if (p->s_top > p->p_top) { + /* we are already in a SYNC construct for this PARALLEL region */ + __kmp_error_construct2(kmp_i18n_msg_CnsInvalidNesting, ct, ident, + &p->stack_data[p->s_top]); + } } - -/* ------------------------------------------------------------------------ */ - - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ diff --git a/openmp/runtime/src/kmp_error.h b/openmp/runtime/src/kmp_error.h index 9dfe111..ef14122 100644 --- a/openmp/runtime/src/kmp_error.h +++ b/openmp/runtime/src/kmp_error.h @@ -20,38 +20,44 @@ /* ------------------------------------------------------------------------ */ #ifdef __cplusplus - extern "C" { +extern "C" { #endif -void __kmp_error_construct( kmp_i18n_id_t id, enum cons_type ct, ident_t const * ident ); -void __kmp_error_construct2( kmp_i18n_id_t id, enum cons_type ct, ident_t const * ident, struct cons_data const * cons ); +void __kmp_error_construct(kmp_i18n_id_t id, enum cons_type ct, + ident_t const *ident); +void __kmp_error_construct2(kmp_i18n_id_t id, enum cons_type ct, + ident_t const *ident, struct cons_data const *cons); -struct cons_header * __kmp_allocate_cons_stack( int gtid ); -void __kmp_free_cons_stack( void * ptr ); +struct cons_header *__kmp_allocate_cons_stack(int gtid); +void __kmp_free_cons_stack(void *ptr); -void __kmp_push_parallel( int gtid, ident_t const * ident ); -void __kmp_push_workshare( int gtid, enum cons_type ct, ident_t const * ident ); +void __kmp_push_parallel(int gtid, ident_t const *ident); +void __kmp_push_workshare(int gtid, enum cons_type ct, ident_t const *ident); #if KMP_USE_DYNAMIC_LOCK -void __kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name, kmp_uint32 ); +void __kmp_push_sync(int gtid, enum cons_type ct, ident_t const *ident, + kmp_user_lock_p name, kmp_uint32); #else -void __kmp_push_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name ); +void __kmp_push_sync(int gtid, enum cons_type ct, ident_t const *ident, + kmp_user_lock_p name); #endif -void __kmp_check_workshare( int gtid, enum cons_type ct, ident_t const * ident ); +void __kmp_check_workshare(int gtid, enum cons_type ct, ident_t const *ident); #if KMP_USE_DYNAMIC_LOCK -void __kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name, kmp_uint32 ); +void __kmp_check_sync(int gtid, enum cons_type ct, ident_t const *ident, + kmp_user_lock_p name, kmp_uint32); #else -void __kmp_check_sync( int gtid, enum cons_type ct, ident_t const * ident, kmp_user_lock_p name ); +void __kmp_check_sync(int gtid, enum cons_type ct, ident_t const *ident, + kmp_user_lock_p name); #endif -void __kmp_pop_parallel( int gtid, ident_t const * ident ); -enum cons_type __kmp_pop_workshare( int gtid, enum cons_type ct, ident_t const * ident ); -void __kmp_pop_sync( int gtid, enum cons_type ct, ident_t const * ident ); -void __kmp_check_barrier( int gtid, enum cons_type ct, ident_t const * ident ); +void __kmp_pop_parallel(int gtid, ident_t const *ident); +enum cons_type __kmp_pop_workshare(int gtid, enum cons_type ct, + ident_t const *ident); +void __kmp_pop_sync(int gtid, enum cons_type ct, ident_t const *ident); +void __kmp_check_barrier(int gtid, enum cons_type ct, ident_t const *ident); #ifdef __cplusplus - } // extern "C" +} // extern "C" #endif #endif // KMP_ERROR_H - diff --git a/openmp/runtime/src/kmp_ftn_cdecl.cpp b/openmp/runtime/src/kmp_ftn_cdecl.cpp index a3c3779..887d8b9 100644 --- a/openmp/runtime/src/kmp_ftn_cdecl.cpp +++ b/openmp/runtime/src/kmp_ftn_cdecl.cpp @@ -17,20 +17,21 @@ #include "kmp_affinity.h" #if KMP_OS_WINDOWS -# if defined KMP_WIN_CDECL || !defined KMP_DYNAMIC_LIB -# define KMP_FTN_ENTRIES KMP_FTN_UPPER -# endif +#if defined KMP_WIN_CDECL || !defined KMP_DYNAMIC_LIB +#define KMP_FTN_ENTRIES KMP_FTN_UPPER +#endif #elif KMP_OS_UNIX -# define KMP_FTN_ENTRIES KMP_FTN_PLAIN +#define KMP_FTN_ENTRIES KMP_FTN_PLAIN #endif // Note: This string is not printed when KMP_VERSION=1. -char const __kmp_version_ftncdecl[] = KMP_VERSION_PREFIX "Fortran __cdecl OMP support: " +char const __kmp_version_ftncdecl[] = + KMP_VERSION_PREFIX "Fortran __cdecl OMP support: " #ifdef KMP_FTN_ENTRIES - "yes"; -# define FTN_STDCALL /* no stdcall */ -# include "kmp_ftn_os.h" -# include "kmp_ftn_entry.h" + "yes"; +#define FTN_STDCALL /* no stdcall */ +#include "kmp_ftn_os.h" +#include "kmp_ftn_entry.h" #else - "no"; + "no"; #endif /* KMP_FTN_ENTRIES */ diff --git a/openmp/runtime/src/kmp_ftn_entry.h b/openmp/runtime/src/kmp_ftn_entry.h index 13501e6..bc3863d 100644 --- a/openmp/runtime/src/kmp_ftn_entry.h +++ b/openmp/runtime/src/kmp_ftn_entry.h @@ -14,947 +14,815 @@ #ifndef FTN_STDCALL -# error The support file kmp_ftn_entry.h should not be compiled by itself. +#error The support file kmp_ftn_entry.h should not be compiled by itself. #endif #ifdef KMP_STUB - #include "kmp_stub.h" +#include "kmp_stub.h" #endif #include "kmp_i18n.h" #ifdef __cplusplus - extern "C" { +extern "C" { #endif // __cplusplus -/* - * For compatibility with the Gnu/MS Open MP codegen, omp_set_num_threads(), +/* For compatibility with the Gnu/MS Open MP codegen, omp_set_num_threads(), * omp_set_nested(), and omp_set_dynamic() [in lowercase on MS, and w/o * a trailing underscore on Linux* OS] take call by value integer arguments. * + omp_set_max_active_levels() * + omp_set_schedule() * * For backward compatibility with 9.1 and previous Intel compiler, these - * entry points take call by reference integer arguments. - */ + * entry points take call by reference integer arguments. */ #ifdef KMP_GOMP_COMPAT -# if (KMP_FTN_ENTRIES == KMP_FTN_PLAIN) || (KMP_FTN_ENTRIES == KMP_FTN_UPPER) -# define PASS_ARGS_BY_VALUE 1 -# endif +#if (KMP_FTN_ENTRIES == KMP_FTN_PLAIN) || (KMP_FTN_ENTRIES == KMP_FTN_UPPER) +#define PASS_ARGS_BY_VALUE 1 +#endif #endif #if KMP_OS_WINDOWS -# if (KMP_FTN_ENTRIES == KMP_FTN_PLAIN) || (KMP_FTN_ENTRIES == KMP_FTN_APPEND) -# define PASS_ARGS_BY_VALUE 1 -# endif +#if (KMP_FTN_ENTRIES == KMP_FTN_PLAIN) || (KMP_FTN_ENTRIES == KMP_FTN_APPEND) +#define PASS_ARGS_BY_VALUE 1 +#endif #endif // This macro helps to reduce code duplication. #ifdef PASS_ARGS_BY_VALUE - #define KMP_DEREF -#else - #define KMP_DEREF * -#endif - -void FTN_STDCALL -FTN_SET_STACKSIZE( int KMP_DEREF arg ) -{ - #ifdef KMP_STUB - __kmps_set_stacksize( KMP_DEREF arg ); - #else - // __kmp_aux_set_stacksize initializes the library if needed - __kmp_aux_set_stacksize( (size_t) KMP_DEREF arg ); - #endif -} - -void FTN_STDCALL -FTN_SET_STACKSIZE_S( size_t KMP_DEREF arg ) -{ - #ifdef KMP_STUB - __kmps_set_stacksize( KMP_DEREF arg ); - #else - // __kmp_aux_set_stacksize initializes the library if needed - __kmp_aux_set_stacksize( KMP_DEREF arg ); - #endif -} - -int FTN_STDCALL -FTN_GET_STACKSIZE( void ) -{ - #ifdef KMP_STUB - return __kmps_get_stacksize(); - #else - if ( ! __kmp_init_serial ) { - __kmp_serial_initialize(); - }; - return (int)__kmp_stksize; - #endif -} - -size_t FTN_STDCALL -FTN_GET_STACKSIZE_S( void ) -{ - #ifdef KMP_STUB - return __kmps_get_stacksize(); - #else - if ( ! __kmp_init_serial ) { - __kmp_serial_initialize(); - }; - return __kmp_stksize; - #endif -} - -void FTN_STDCALL -FTN_SET_BLOCKTIME( int KMP_DEREF arg ) -{ - #ifdef KMP_STUB - __kmps_set_blocktime( KMP_DEREF arg ); - #else - int gtid, tid; - kmp_info_t *thread; - - gtid = __kmp_entry_gtid(); - tid = __kmp_tid_from_gtid(gtid); - thread = __kmp_thread_from_gtid(gtid); - - __kmp_aux_set_blocktime( KMP_DEREF arg, thread, tid ); - #endif -} - -int FTN_STDCALL -FTN_GET_BLOCKTIME( void ) -{ - #ifdef KMP_STUB - return __kmps_get_blocktime(); - #else - int gtid, tid; - kmp_info_t *thread; - kmp_team_p *team; - - gtid = __kmp_entry_gtid(); - tid = __kmp_tid_from_gtid(gtid); - thread = __kmp_thread_from_gtid(gtid); - team = __kmp_threads[ gtid ] -> th.th_team; - - /* These must match the settings used in __kmp_wait_sleep() */ - if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) { - KF_TRACE(10, ( "kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", - gtid, team->t.t_id, tid, KMP_MAX_BLOCKTIME) ); - return KMP_MAX_BLOCKTIME; - } +#define KMP_DEREF +#else +#define KMP_DEREF * +#endif + +void FTN_STDCALL FTN_SET_STACKSIZE(int KMP_DEREF arg) { +#ifdef KMP_STUB + __kmps_set_stacksize(KMP_DEREF arg); +#else + // __kmp_aux_set_stacksize initializes the library if needed + __kmp_aux_set_stacksize((size_t)KMP_DEREF arg); +#endif +} + +void FTN_STDCALL FTN_SET_STACKSIZE_S(size_t KMP_DEREF arg) { +#ifdef KMP_STUB + __kmps_set_stacksize(KMP_DEREF arg); +#else + // __kmp_aux_set_stacksize initializes the library if needed + __kmp_aux_set_stacksize(KMP_DEREF arg); +#endif +} + +int FTN_STDCALL FTN_GET_STACKSIZE(void) { +#ifdef KMP_STUB + return __kmps_get_stacksize(); +#else + if (!__kmp_init_serial) { + __kmp_serial_initialize(); + }; + return (int)__kmp_stksize; +#endif +} + +size_t FTN_STDCALL FTN_GET_STACKSIZE_S(void) { +#ifdef KMP_STUB + return __kmps_get_stacksize(); +#else + if (!__kmp_init_serial) { + __kmp_serial_initialize(); + }; + return __kmp_stksize; +#endif +} + +void FTN_STDCALL FTN_SET_BLOCKTIME(int KMP_DEREF arg) { +#ifdef KMP_STUB + __kmps_set_blocktime(KMP_DEREF arg); +#else + int gtid, tid; + kmp_info_t *thread; + + gtid = __kmp_entry_gtid(); + tid = __kmp_tid_from_gtid(gtid); + thread = __kmp_thread_from_gtid(gtid); + + __kmp_aux_set_blocktime(KMP_DEREF arg, thread, tid); +#endif +} + +int FTN_STDCALL FTN_GET_BLOCKTIME(void) { +#ifdef KMP_STUB + return __kmps_get_blocktime(); +#else + int gtid, tid; + kmp_info_t *thread; + kmp_team_p *team; + + gtid = __kmp_entry_gtid(); + tid = __kmp_tid_from_gtid(gtid); + thread = __kmp_thread_from_gtid(gtid); + team = __kmp_threads[gtid]->th.th_team; + + /* These must match the settings used in __kmp_wait_sleep() */ + if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { + KF_TRACE(10, ("kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", gtid, + team->t.t_id, tid, KMP_MAX_BLOCKTIME)); + return KMP_MAX_BLOCKTIME; + } #ifdef KMP_ADJUST_BLOCKTIME - else if ( __kmp_zero_bt && !get__bt_set( team, tid ) ) { - KF_TRACE(10, ( "kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", - gtid, team->t.t_id, tid, 0) ); - return 0; - } + else if (__kmp_zero_bt && !get__bt_set(team, tid)) { + KF_TRACE(10, ("kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", gtid, + team->t.t_id, tid, 0)); + return 0; + } #endif /* KMP_ADJUST_BLOCKTIME */ - else { - KF_TRACE(10, ( "kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", - gtid, team->t.t_id, tid, get__blocktime( team, tid ) ) ); - return get__blocktime( team, tid ); - }; - #endif -} - -void FTN_STDCALL -FTN_SET_LIBRARY_SERIAL( void ) -{ - #ifdef KMP_STUB - __kmps_set_library( library_serial ); - #else - // __kmp_user_set_library initializes the library if needed - __kmp_user_set_library( library_serial ); - #endif -} - -void FTN_STDCALL -FTN_SET_LIBRARY_TURNAROUND( void ) -{ - #ifdef KMP_STUB - __kmps_set_library( library_turnaround ); - #else - // __kmp_user_set_library initializes the library if needed - __kmp_user_set_library( library_turnaround ); - #endif -} - -void FTN_STDCALL -FTN_SET_LIBRARY_THROUGHPUT( void ) -{ - #ifdef KMP_STUB - __kmps_set_library( library_throughput ); - #else - // __kmp_user_set_library initializes the library if needed - __kmp_user_set_library( library_throughput ); - #endif -} - -void FTN_STDCALL -FTN_SET_LIBRARY( int KMP_DEREF arg ) -{ - #ifdef KMP_STUB - __kmps_set_library( KMP_DEREF arg ); - #else - enum library_type lib; - lib = (enum library_type) KMP_DEREF arg; - // __kmp_user_set_library initializes the library if needed - __kmp_user_set_library( lib ); - #endif -} - -int FTN_STDCALL -FTN_GET_LIBRARY (void) -{ - #ifdef KMP_STUB - return __kmps_get_library(); - #else - if ( ! __kmp_init_serial ) { - __kmp_serial_initialize(); - } - return ((int) __kmp_library); - #endif -} - -void FTN_STDCALL -FTN_SET_DISP_NUM_BUFFERS( int KMP_DEREF arg ) -{ - #ifdef KMP_STUB - ; // empty routine - #else - // ignore after initialization because some teams have already - // allocated dispatch buffers - if( __kmp_init_serial == 0 && (KMP_DEREF arg) > 0 ) - __kmp_dispatch_num_buffers = KMP_DEREF arg; - #endif -} - -int FTN_STDCALL -FTN_SET_AFFINITY( void **mask ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; - #else - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - return __kmp_aux_set_affinity( mask ); - #endif -} - -int FTN_STDCALL -FTN_GET_AFFINITY( void **mask ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; - #else - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - return __kmp_aux_get_affinity( mask ); - #endif -} - -int FTN_STDCALL -FTN_GET_AFFINITY_MAX_PROC( void ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return 0; - #else - // - // We really only NEED serial initialization here. - // - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - return __kmp_aux_get_affinity_max_proc(); - #endif -} - -void FTN_STDCALL -FTN_CREATE_AFFINITY_MASK( void **mask ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - *mask = NULL; - #else - // - // We really only NEED serial initialization here. - // - kmp_affin_mask_t* mask_internals; - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - mask_internals = __kmp_affinity_dispatch->allocate_mask(); - KMP_CPU_ZERO( mask_internals ); - *mask = mask_internals; - #endif -} - -void FTN_STDCALL -FTN_DESTROY_AFFINITY_MASK( void **mask ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - // Nothing - #else - // - // We really only NEED serial initialization here. - // - kmp_affin_mask_t* mask_internals; - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - if ( __kmp_env_consistency_check ) { - if ( *mask == NULL ) { - KMP_FATAL( AffinityInvalidMask, "kmp_destroy_affinity_mask" ); - } - } - mask_internals = (kmp_affin_mask_t*)(*mask); - __kmp_affinity_dispatch->deallocate_mask(mask_internals); - *mask = NULL; - #endif -} - -int FTN_STDCALL -FTN_SET_AFFINITY_MASK_PROC( int KMP_DEREF proc, void **mask ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; - #else - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - return __kmp_aux_set_affinity_mask_proc( KMP_DEREF proc, mask ); - #endif -} - -int FTN_STDCALL -FTN_UNSET_AFFINITY_MASK_PROC( int KMP_DEREF proc, void **mask ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; - #else - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - return __kmp_aux_unset_affinity_mask_proc( KMP_DEREF proc, mask ); - #endif -} - -int FTN_STDCALL -FTN_GET_AFFINITY_MASK_PROC( int KMP_DEREF proc, void **mask ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; - #else - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - return __kmp_aux_get_affinity_mask_proc( KMP_DEREF proc, mask ); - #endif + else { + KF_TRACE(10, ("kmp_get_blocktime: T#%d(%d:%d), blocktime=%d\n", gtid, + team->t.t_id, tid, get__blocktime(team, tid))); + return get__blocktime(team, tid); + }; +#endif } +void FTN_STDCALL FTN_SET_LIBRARY_SERIAL(void) { +#ifdef KMP_STUB + __kmps_set_library(library_serial); +#else + // __kmp_user_set_library initializes the library if needed + __kmp_user_set_library(library_serial); +#endif +} + +void FTN_STDCALL FTN_SET_LIBRARY_TURNAROUND(void) { +#ifdef KMP_STUB + __kmps_set_library(library_turnaround); +#else + // __kmp_user_set_library initializes the library if needed + __kmp_user_set_library(library_turnaround); +#endif +} + +void FTN_STDCALL FTN_SET_LIBRARY_THROUGHPUT(void) { +#ifdef KMP_STUB + __kmps_set_library(library_throughput); +#else + // __kmp_user_set_library initializes the library if needed + __kmp_user_set_library(library_throughput); +#endif +} + +void FTN_STDCALL FTN_SET_LIBRARY(int KMP_DEREF arg) { +#ifdef KMP_STUB + __kmps_set_library(KMP_DEREF arg); +#else + enum library_type lib; + lib = (enum library_type)KMP_DEREF arg; + // __kmp_user_set_library initializes the library if needed + __kmp_user_set_library(lib); +#endif +} + +int FTN_STDCALL FTN_GET_LIBRARY(void) { +#ifdef KMP_STUB + return __kmps_get_library(); +#else + if (!__kmp_init_serial) { + __kmp_serial_initialize(); + } + return ((int)__kmp_library); +#endif +} + +void FTN_STDCALL FTN_SET_DISP_NUM_BUFFERS(int KMP_DEREF arg) { +#ifdef KMP_STUB + ; // empty routine +#else + // ignore after initialization because some teams have already + // allocated dispatch buffers + if (__kmp_init_serial == 0 && (KMP_DEREF arg) > 0) + __kmp_dispatch_num_buffers = KMP_DEREF arg; +#endif +} + +int FTN_STDCALL FTN_SET_AFFINITY(void **mask) { +#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + return -1; +#else + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + return __kmp_aux_set_affinity(mask); +#endif +} + +int FTN_STDCALL FTN_GET_AFFINITY(void **mask) { +#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + return -1; +#else + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + return __kmp_aux_get_affinity(mask); +#endif +} + +int FTN_STDCALL FTN_GET_AFFINITY_MAX_PROC(void) { +#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + return 0; +#else + // We really only NEED serial initialization here. + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + return __kmp_aux_get_affinity_max_proc(); +#endif +} + +void FTN_STDCALL FTN_CREATE_AFFINITY_MASK(void **mask) { +#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + *mask = NULL; +#else + // We really only NEED serial initialization here. + kmp_affin_mask_t *mask_internals; + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + mask_internals = __kmp_affinity_dispatch->allocate_mask(); + KMP_CPU_ZERO(mask_internals); + *mask = mask_internals; +#endif +} + +void FTN_STDCALL FTN_DESTROY_AFFINITY_MASK(void **mask) { +#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED +// Nothing +#else + // We really only NEED serial initialization here. + kmp_affin_mask_t *mask_internals; + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + if (__kmp_env_consistency_check) { + if (*mask == NULL) { + KMP_FATAL(AffinityInvalidMask, "kmp_destroy_affinity_mask"); + } + } + mask_internals = (kmp_affin_mask_t *)(*mask); + __kmp_affinity_dispatch->deallocate_mask(mask_internals); + *mask = NULL; +#endif +} + +int FTN_STDCALL FTN_SET_AFFINITY_MASK_PROC(int KMP_DEREF proc, void **mask) { +#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + return -1; +#else + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + return __kmp_aux_set_affinity_mask_proc(KMP_DEREF proc, mask); +#endif +} + +int FTN_STDCALL FTN_UNSET_AFFINITY_MASK_PROC(int KMP_DEREF proc, void **mask) { +#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + return -1; +#else + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + return __kmp_aux_unset_affinity_mask_proc(KMP_DEREF proc, mask); +#endif +} + +int FTN_STDCALL FTN_GET_AFFINITY_MASK_PROC(int KMP_DEREF proc, void **mask) { +#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + return -1; +#else + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + return __kmp_aux_get_affinity_mask_proc(KMP_DEREF proc, mask); +#endif +} /* ------------------------------------------------------------------------ */ /* sets the requested number of threads for the next parallel region */ +void FTN_STDCALL xexpand(FTN_SET_NUM_THREADS)(int KMP_DEREF arg) { +#ifdef KMP_STUB +// Nothing. +#else + __kmp_set_num_threads(KMP_DEREF arg, __kmp_entry_gtid()); +#endif +} -void FTN_STDCALL -xexpand(FTN_SET_NUM_THREADS)( int KMP_DEREF arg ) -{ - #ifdef KMP_STUB - // Nothing. - #else - __kmp_set_num_threads( KMP_DEREF arg, __kmp_entry_gtid() ); - #endif +/* returns the number of threads in current team */ +int FTN_STDCALL xexpand(FTN_GET_NUM_THREADS)(void) { +#ifdef KMP_STUB + return 1; +#else + // __kmpc_bound_num_threads initializes the library if needed + return __kmpc_bound_num_threads(NULL); +#endif } +int FTN_STDCALL xexpand(FTN_GET_MAX_THREADS)(void) { +#ifdef KMP_STUB + return 1; +#else + int gtid; + kmp_info_t *thread; + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + gtid = __kmp_entry_gtid(); + thread = __kmp_threads[gtid]; + // return thread -> th.th_team -> t.t_current_task[ + // thread->th.th_info.ds.ds_tid ] -> icvs.nproc; + return thread->th.th_current_task->td_icvs.nproc; +#endif +} -/* returns the number of threads in current team */ -int FTN_STDCALL -xexpand(FTN_GET_NUM_THREADS)( void ) -{ - #ifdef KMP_STUB - return 1; - #else - // __kmpc_bound_num_threads initializes the library if needed - return __kmpc_bound_num_threads(NULL); - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_MAX_THREADS)( void ) -{ - #ifdef KMP_STUB - return 1; - #else - int gtid; - kmp_info_t *thread; - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - gtid = __kmp_entry_gtid(); - thread = __kmp_threads[ gtid ]; - //return thread -> th.th_team -> t.t_current_task[ thread->th.th_info.ds.ds_tid ] -> icvs.nproc; - return thread -> th.th_current_task -> td_icvs.nproc; - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_THREAD_NUM)( void ) -{ - #ifdef KMP_STUB - return 0; - #else - int gtid; - - #if KMP_OS_DARWIN || KMP_OS_FREEBSD || KMP_OS_NETBSD - gtid = __kmp_entry_gtid(); - #elif KMP_OS_WINDOWS - if (!__kmp_init_parallel || - (gtid = (int)((kmp_intptr_t)TlsGetValue( __kmp_gtid_threadprivate_key ))) == 0) { - // Either library isn't initialized or thread is not registered - // 0 is the correct TID in this case - return 0; - } - --gtid; // We keep (gtid+1) in TLS - #elif KMP_OS_LINUX - #ifdef KMP_TDATA_GTID - if ( __kmp_gtid_mode >= 3 ) { - if ((gtid = __kmp_gtid) == KMP_GTID_DNE) { - return 0; - } - } else { - #endif - if (!__kmp_init_parallel || - (gtid = (kmp_intptr_t)(pthread_getspecific( __kmp_gtid_threadprivate_key ))) == 0) { - return 0; - } - --gtid; - #ifdef KMP_TDATA_GTID - } - #endif - #else - #error Unknown or unsupported OS - #endif - - return __kmp_tid_from_gtid( gtid ); - #endif -} - -int FTN_STDCALL -FTN_GET_NUM_KNOWN_THREADS( void ) -{ - #ifdef KMP_STUB - return 1; - #else - if ( ! __kmp_init_serial ) { - __kmp_serial_initialize(); - } - /* NOTE: this is not syncronized, so it can change at any moment */ - /* NOTE: this number also includes threads preallocated in hot-teams */ - return TCR_4(__kmp_nth); - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_NUM_PROCS)( void ) -{ - #ifdef KMP_STUB - return 1; - #else - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - return __kmp_avail_proc; - #endif -} - -void FTN_STDCALL -xexpand(FTN_SET_NESTED)( int KMP_DEREF flag ) -{ - #ifdef KMP_STUB - __kmps_set_nested( KMP_DEREF flag ); - #else - kmp_info_t *thread; - /* For the thread-private internal controls implementation */ - thread = __kmp_entry_thread(); - __kmp_save_internal_controls( thread ); - set__nested( thread, ( (KMP_DEREF flag) ? TRUE : FALSE ) ); - #endif -} - - -int FTN_STDCALL -xexpand(FTN_GET_NESTED)( void ) -{ - #ifdef KMP_STUB - return __kmps_get_nested(); - #else - kmp_info_t *thread; - thread = __kmp_entry_thread(); - return get__nested( thread ); - #endif -} - -void FTN_STDCALL -xexpand(FTN_SET_DYNAMIC)( int KMP_DEREF flag ) -{ - #ifdef KMP_STUB - __kmps_set_dynamic( KMP_DEREF flag ? TRUE : FALSE ); - #else - kmp_info_t *thread; - /* For the thread-private implementation of the internal controls */ - thread = __kmp_entry_thread(); - // !!! What if foreign thread calls it? - __kmp_save_internal_controls( thread ); - set__dynamic( thread, KMP_DEREF flag ? TRUE : FALSE ); - #endif -} - - -int FTN_STDCALL -xexpand(FTN_GET_DYNAMIC)( void ) -{ - #ifdef KMP_STUB - return __kmps_get_dynamic(); - #else - kmp_info_t *thread; - thread = __kmp_entry_thread(); - return get__dynamic( thread ); - #endif -} - -int FTN_STDCALL -xexpand(FTN_IN_PARALLEL)( void ) -{ - #ifdef KMP_STUB - return 0; - #else - kmp_info_t *th = __kmp_entry_thread(); +int FTN_STDCALL xexpand(FTN_GET_THREAD_NUM)(void) { +#ifdef KMP_STUB + return 0; +#else + int gtid; + +#if KMP_OS_DARWIN || KMP_OS_FREEBSD || KMP_OS_NETBSD + gtid = __kmp_entry_gtid(); +#elif KMP_OS_WINDOWS + if (!__kmp_init_parallel || + (gtid = (int)((kmp_intptr_t)TlsGetValue(__kmp_gtid_threadprivate_key))) == + 0) { + // Either library isn't initialized or thread is not registered + // 0 is the correct TID in this case + return 0; + } + --gtid; // We keep (gtid+1) in TLS +#elif KMP_OS_LINUX +#ifdef KMP_TDATA_GTID + if (__kmp_gtid_mode >= 3) { + if ((gtid = __kmp_gtid) == KMP_GTID_DNE) { + return 0; + } + } else { +#endif + if (!__kmp_init_parallel || + (gtid = (kmp_intptr_t)( + pthread_getspecific(__kmp_gtid_threadprivate_key))) == 0) { + return 0; + } + --gtid; +#ifdef KMP_TDATA_GTID + } +#endif +#else +#error Unknown or unsupported OS +#endif + + return __kmp_tid_from_gtid(gtid); +#endif +} + +int FTN_STDCALL FTN_GET_NUM_KNOWN_THREADS(void) { +#ifdef KMP_STUB + return 1; +#else + if (!__kmp_init_serial) { + __kmp_serial_initialize(); + } + /* NOTE: this is not syncronized, so it can change at any moment */ + /* NOTE: this number also includes threads preallocated in hot-teams */ + return TCR_4(__kmp_nth); +#endif +} + +int FTN_STDCALL xexpand(FTN_GET_NUM_PROCS)(void) { +#ifdef KMP_STUB + return 1; +#else + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + return __kmp_avail_proc; +#endif +} + +void FTN_STDCALL xexpand(FTN_SET_NESTED)(int KMP_DEREF flag) { +#ifdef KMP_STUB + __kmps_set_nested(KMP_DEREF flag); +#else + kmp_info_t *thread; + /* For the thread-private internal controls implementation */ + thread = __kmp_entry_thread(); + __kmp_save_internal_controls(thread); + set__nested(thread, ((KMP_DEREF flag) ? TRUE : FALSE)); +#endif +} + +int FTN_STDCALL xexpand(FTN_GET_NESTED)(void) { +#ifdef KMP_STUB + return __kmps_get_nested(); +#else + kmp_info_t *thread; + thread = __kmp_entry_thread(); + return get__nested(thread); +#endif +} + +void FTN_STDCALL xexpand(FTN_SET_DYNAMIC)(int KMP_DEREF flag) { +#ifdef KMP_STUB + __kmps_set_dynamic(KMP_DEREF flag ? TRUE : FALSE); +#else + kmp_info_t *thread; + /* For the thread-private implementation of the internal controls */ + thread = __kmp_entry_thread(); + // !!! What if foreign thread calls it? + __kmp_save_internal_controls(thread); + set__dynamic(thread, KMP_DEREF flag ? TRUE : FALSE); +#endif +} + +int FTN_STDCALL xexpand(FTN_GET_DYNAMIC)(void) { +#ifdef KMP_STUB + return __kmps_get_dynamic(); +#else + kmp_info_t *thread; + thread = __kmp_entry_thread(); + return get__dynamic(thread); +#endif +} + +int FTN_STDCALL xexpand(FTN_IN_PARALLEL)(void) { +#ifdef KMP_STUB + return 0; +#else + kmp_info_t *th = __kmp_entry_thread(); #if OMP_40_ENABLED - if ( th->th.th_teams_microtask ) { - // AC: r_in_parallel does not work inside teams construct - // where real parallel is inactive, but all threads have same root, - // so setting it in one team affects other teams. - // The solution is to use per-team nesting level - return ( th->th.th_team->t.t_active_level ? 1 : 0 ); - } - else + if (th->th.th_teams_microtask) { + // AC: r_in_parallel does not work inside teams construct where real + // parallel is inactive, but all threads have same root, so setting it in + // one team affects other teams. + // The solution is to use per-team nesting level + return (th->th.th_team->t.t_active_level ? 1 : 0); + } else #endif /* OMP_40_ENABLED */ - return ( th->th.th_root->r.r_in_parallel ? FTN_TRUE : FTN_FALSE ); - #endif -} - -void FTN_STDCALL -xexpand(FTN_SET_SCHEDULE)( kmp_sched_t KMP_DEREF kind, int KMP_DEREF modifier ) -{ - #ifdef KMP_STUB - __kmps_set_schedule( KMP_DEREF kind, KMP_DEREF modifier ); - #else - /* TO DO */ - /* For the per-task implementation of the internal controls */ - __kmp_set_schedule( __kmp_entry_gtid(), KMP_DEREF kind, KMP_DEREF modifier ); - #endif -} - -void FTN_STDCALL -xexpand(FTN_GET_SCHEDULE)( kmp_sched_t * kind, int * modifier ) -{ - #ifdef KMP_STUB - __kmps_get_schedule( kind, modifier ); - #else - /* TO DO */ - /* For the per-task implementation of the internal controls */ - __kmp_get_schedule( __kmp_entry_gtid(), kind, modifier ); - #endif -} - -void FTN_STDCALL -xexpand(FTN_SET_MAX_ACTIVE_LEVELS)( int KMP_DEREF arg ) -{ - #ifdef KMP_STUB - // Nothing. - #else - /* TO DO */ - /* We want per-task implementation of this internal control */ - __kmp_set_max_active_levels( __kmp_entry_gtid(), KMP_DEREF arg ); - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_MAX_ACTIVE_LEVELS)( void ) -{ - #ifdef KMP_STUB - return 0; - #else - /* TO DO */ - /* We want per-task implementation of this internal control */ - return __kmp_get_max_active_levels( __kmp_entry_gtid() ); - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_ACTIVE_LEVEL)( void ) -{ - #ifdef KMP_STUB - return 0; // returns 0 if it is called from the sequential part of the program - #else - /* TO DO */ - /* For the per-task implementation of the internal controls */ - return __kmp_entry_thread() -> th.th_team -> t.t_active_level; - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_LEVEL)( void ) -{ - #ifdef KMP_STUB - return 0; // returns 0 if it is called from the sequential part of the program - #else - /* TO DO */ - /* For the per-task implementation of the internal controls */ - return __kmp_entry_thread() -> th.th_team -> t.t_level; - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_ANCESTOR_THREAD_NUM)( int KMP_DEREF level ) -{ - #ifdef KMP_STUB - return ( KMP_DEREF level ) ? ( -1 ) : ( 0 ); - #else - return __kmp_get_ancestor_thread_num( __kmp_entry_gtid(), KMP_DEREF level ); - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_TEAM_SIZE)( int KMP_DEREF level ) -{ - #ifdef KMP_STUB - return ( KMP_DEREF level ) ? ( -1 ) : ( 1 ); - #else - return __kmp_get_team_size( __kmp_entry_gtid(), KMP_DEREF level ); - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_THREAD_LIMIT)( void ) -{ - #ifdef KMP_STUB - return 1; // TO DO: clarify whether it returns 1 or 0? - #else - if ( ! __kmp_init_serial ) { - __kmp_serial_initialize(); - }; - /* global ICV */ - return __kmp_max_nth; - #endif -} - -int FTN_STDCALL -xexpand(FTN_IN_FINAL)( void ) -{ - #ifdef KMP_STUB - return 0; // TO DO: clarify whether it returns 1 or 0? - #else - if ( ! TCR_4(__kmp_init_parallel) ) { - return 0; - } - return __kmp_entry_thread() -> th.th_current_task -> td_flags.final; - #endif + return (th->th.th_root->r.r_in_parallel ? FTN_TRUE : FTN_FALSE); +#endif +} + +void FTN_STDCALL xexpand(FTN_SET_SCHEDULE)(kmp_sched_t KMP_DEREF kind, + int KMP_DEREF modifier) { +#ifdef KMP_STUB + __kmps_set_schedule(KMP_DEREF kind, KMP_DEREF modifier); +#else + /* TO DO: For the per-task implementation of the internal controls */ + __kmp_set_schedule(__kmp_entry_gtid(), KMP_DEREF kind, KMP_DEREF modifier); +#endif +} + +void FTN_STDCALL xexpand(FTN_GET_SCHEDULE)(kmp_sched_t *kind, int *modifier) { +#ifdef KMP_STUB + __kmps_get_schedule(kind, modifier); +#else + /* TO DO: For the per-task implementation of the internal controls */ + __kmp_get_schedule(__kmp_entry_gtid(), kind, modifier); +#endif +} + +void FTN_STDCALL xexpand(FTN_SET_MAX_ACTIVE_LEVELS)(int KMP_DEREF arg) { +#ifdef KMP_STUB +// Nothing. +#else + /* TO DO: We want per-task implementation of this internal control */ + __kmp_set_max_active_levels(__kmp_entry_gtid(), KMP_DEREF arg); +#endif +} + +int FTN_STDCALL xexpand(FTN_GET_MAX_ACTIVE_LEVELS)(void) { +#ifdef KMP_STUB + return 0; +#else + /* TO DO: We want per-task implementation of this internal control */ + return __kmp_get_max_active_levels(__kmp_entry_gtid()); +#endif +} + +int FTN_STDCALL xexpand(FTN_GET_ACTIVE_LEVEL)(void) { +#ifdef KMP_STUB + return 0; // returns 0 if it is called from the sequential part of the program +#else + /* TO DO: For the per-task implementation of the internal controls */ + return __kmp_entry_thread()->th.th_team->t.t_active_level; +#endif +} + +int FTN_STDCALL xexpand(FTN_GET_LEVEL)(void) { +#ifdef KMP_STUB + return 0; // returns 0 if it is called from the sequential part of the program +#else + /* TO DO: For the per-task implementation of the internal controls */ + return __kmp_entry_thread()->th.th_team->t.t_level; +#endif +} + +int FTN_STDCALL xexpand(FTN_GET_ANCESTOR_THREAD_NUM)(int KMP_DEREF level) { +#ifdef KMP_STUB + return (KMP_DEREF level) ? (-1) : (0); +#else + return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), KMP_DEREF level); +#endif +} + +int FTN_STDCALL xexpand(FTN_GET_TEAM_SIZE)(int KMP_DEREF level) { +#ifdef KMP_STUB + return (KMP_DEREF level) ? (-1) : (1); +#else + return __kmp_get_team_size(__kmp_entry_gtid(), KMP_DEREF level); +#endif +} + +int FTN_STDCALL xexpand(FTN_GET_THREAD_LIMIT)(void) { +#ifdef KMP_STUB + return 1; // TO DO: clarify whether it returns 1 or 0? +#else + if (!__kmp_init_serial) { + __kmp_serial_initialize(); + }; + /* global ICV */ + return __kmp_max_nth; +#endif +} + +int FTN_STDCALL xexpand(FTN_IN_FINAL)(void) { +#ifdef KMP_STUB + return 0; // TO DO: clarify whether it returns 1 or 0? +#else + if (!TCR_4(__kmp_init_parallel)) { + return 0; + } + return __kmp_entry_thread()->th.th_current_task->td_flags.final; +#endif } #if OMP_40_ENABLED -kmp_proc_bind_t FTN_STDCALL -xexpand(FTN_GET_PROC_BIND)( void ) -{ - #ifdef KMP_STUB - return __kmps_get_proc_bind(); - #else - return get__proc_bind( __kmp_entry_thread() ); - #endif +kmp_proc_bind_t FTN_STDCALL xexpand(FTN_GET_PROC_BIND)(void) { +#ifdef KMP_STUB + return __kmps_get_proc_bind(); +#else + return get__proc_bind(__kmp_entry_thread()); +#endif } #if OMP_45_ENABLED -int FTN_STDCALL -FTN_GET_NUM_PLACES( void ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return 0; - #else - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - if (!KMP_AFFINITY_CAPABLE()) - return 0; - return __kmp_affinity_num_masks; - #endif -} - -int FTN_STDCALL -FTN_GET_PLACE_NUM_PROCS( int place_num ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return 0; - #else - int i; - int retval = 0; - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - if (!KMP_AFFINITY_CAPABLE()) - return 0; - if ( place_num < 0 || place_num >= (int)__kmp_affinity_num_masks ) - return 0; - kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num); - KMP_CPU_SET_ITERATE(i, mask) { - if ((! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) || - (!KMP_CPU_ISSET(i, mask))) { - continue; - } - ++retval; - } - return retval; - #endif -} - -void FTN_STDCALL -FTN_GET_PLACE_PROC_IDS( int place_num, int *ids ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - // Nothing. - #else - int i,j; - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - if (!KMP_AFFINITY_CAPABLE()) - return; - if ( place_num < 0 || place_num >= (int)__kmp_affinity_num_masks ) - return; - kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num); - j = 0; - KMP_CPU_SET_ITERATE(i, mask) { - if ((! KMP_CPU_ISSET(i, __kmp_affin_fullMask)) || - (!KMP_CPU_ISSET(i, mask))) { - continue; - } - ids[j++] = i; - } - #endif -} - -int FTN_STDCALL -FTN_GET_PLACE_NUM( void ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return -1; - #else - int gtid; - kmp_info_t *thread; - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - if (!KMP_AFFINITY_CAPABLE()) - return -1; - gtid = __kmp_entry_gtid(); - thread = __kmp_thread_from_gtid(gtid); - if ( thread->th.th_current_place < 0 ) - return -1; - return thread->th.th_current_place; - #endif -} - -int FTN_STDCALL -FTN_GET_PARTITION_NUM_PLACES( void ) -{ - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - return 0; - #else - int gtid, num_places, first_place, last_place; - kmp_info_t *thread; - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - if (!KMP_AFFINITY_CAPABLE()) - return 0; - gtid = __kmp_entry_gtid(); - thread = __kmp_thread_from_gtid(gtid); - first_place = thread->th.th_first_place; - last_place = thread->th.th_last_place; - if ( first_place < 0 || last_place < 0 ) - return 0; - if ( first_place <= last_place ) - num_places = last_place - first_place + 1; - else - num_places = __kmp_affinity_num_masks - first_place + last_place + 1; - return num_places; - #endif -} - -void FTN_STDCALL -FTN_GET_PARTITION_PLACE_NUMS( int *place_nums ) { - #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED - // Nothing. - #else - int i, gtid, place_num, first_place, last_place, start, end; - kmp_info_t *thread; - if ( ! TCR_4(__kmp_init_middle) ) { - __kmp_middle_initialize(); - } - if (!KMP_AFFINITY_CAPABLE()) - return; - gtid = __kmp_entry_gtid(); - thread = __kmp_thread_from_gtid(gtid); - first_place = thread->th.th_first_place; - last_place = thread->th.th_last_place; - if ( first_place < 0 || last_place < 0 ) - return; - if ( first_place <= last_place ) { - start = first_place; - end = last_place; - } else { - start = last_place; - end = first_place; - } - for (i = 0, place_num = start; place_num <= end; ++place_num, ++i) { - place_nums[i] = place_num; - } - #endif -} -#endif - -int FTN_STDCALL -xexpand(FTN_GET_NUM_TEAMS)( void ) -{ - #ifdef KMP_STUB - return 1; - #else - kmp_info_t *thr = __kmp_entry_thread(); - if ( thr->th.th_teams_microtask ) { - kmp_team_t *team = thr->th.th_team; - int tlevel = thr->th.th_teams_level; - int ii = team->t.t_level; // the level of the teams construct - int dd = team -> t.t_serialized; - int level = tlevel + 1; - KMP_DEBUG_ASSERT( ii >= tlevel ); - while( ii > level ) - { - for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) - { - } - if( team -> t.t_serialized && ( !dd ) ) { - team = team->t.t_parent; - continue; - } - if( ii > level ) { - team = team->t.t_parent; - ii--; - } - } - if ( dd > 1 ) { - return 1; // teams region is serialized ( 1 team of 1 thread ). - } else { - return team->t.t_parent->t.t_nproc; - } - } else { - return 1; - } - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_TEAM_NUM)( void ) -{ - #ifdef KMP_STUB - return 0; - #else - kmp_info_t *thr = __kmp_entry_thread(); - if ( thr->th.th_teams_microtask ) { - kmp_team_t *team = thr->th.th_team; - int tlevel = thr->th.th_teams_level; // the level of the teams construct - int ii = team->t.t_level; - int dd = team -> t.t_serialized; - int level = tlevel + 1; - KMP_DEBUG_ASSERT( ii >= tlevel ); - while( ii > level ) - { - for( dd = team -> t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) - { - } - if( team -> t.t_serialized && ( !dd ) ) { - team = team->t.t_parent; - continue; - } - if( ii > level ) { - team = team->t.t_parent; - ii--; - } - } - if ( dd > 1 ) { - return 0; // teams region is serialized ( 1 team of 1 thread ). - } else { - return team->t.t_master_tid; - } - } else { - return 0; - } - #endif -} - -int FTN_STDCALL -xexpand(FTN_GET_DEFAULT_DEVICE)( void ) -{ - #if KMP_MIC || KMP_OS_DARWIN || defined(KMP_STUB) - return 0; - #else - return __kmp_entry_thread() -> th.th_current_task -> td_icvs.default_device; - #endif -} - -void FTN_STDCALL -xexpand(FTN_SET_DEFAULT_DEVICE)( int KMP_DEREF arg ) -{ - #if KMP_MIC || KMP_OS_DARWIN || defined(KMP_STUB) - // Nothing. - #else - __kmp_entry_thread() -> th.th_current_task -> td_icvs.default_device = KMP_DEREF arg; - #endif +int FTN_STDCALL FTN_GET_NUM_PLACES(void) { +#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + return 0; +#else + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + if (!KMP_AFFINITY_CAPABLE()) + return 0; + return __kmp_affinity_num_masks; +#endif } -#if KMP_MIC || KMP_OS_DARWIN || defined(KMP_STUB) - -int FTN_STDCALL -FTN_GET_NUM_DEVICES( void ) -{ +int FTN_STDCALL FTN_GET_PLACE_NUM_PROCS(int place_num) { +#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + return 0; +#else + int i; + int retval = 0; + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + if (!KMP_AFFINITY_CAPABLE()) + return 0; + if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks) return 0; + kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num); + KMP_CPU_SET_ITERATE(i, mask) { + if ((!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) || + (!KMP_CPU_ISSET(i, mask))) { + continue; + } + ++retval; + } + return retval; +#endif } -#endif // KMP_MIC || KMP_OS_DARWIN || defined(KMP_STUB) +void FTN_STDCALL FTN_GET_PLACE_PROC_IDS(int place_num, int *ids) { +#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED +// Nothing. +#else + int i, j; + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + if (!KMP_AFFINITY_CAPABLE()) + return; + if (place_num < 0 || place_num >= (int)__kmp_affinity_num_masks) + return; + kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks, place_num); + j = 0; + KMP_CPU_SET_ITERATE(i, mask) { + if ((!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) || + (!KMP_CPU_ISSET(i, mask))) { + continue; + } + ids[j++] = i; + } +#endif +} + +int FTN_STDCALL FTN_GET_PLACE_NUM(void) { +#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + return -1; +#else + int gtid; + kmp_info_t *thread; + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + if (!KMP_AFFINITY_CAPABLE()) + return -1; + gtid = __kmp_entry_gtid(); + thread = __kmp_thread_from_gtid(gtid); + if (thread->th.th_current_place < 0) + return -1; + return thread->th.th_current_place; +#endif +} -#if ! KMP_OS_LINUX +int FTN_STDCALL FTN_GET_PARTITION_NUM_PLACES(void) { +#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED + return 0; +#else + int gtid, num_places, first_place, last_place; + kmp_info_t *thread; + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + if (!KMP_AFFINITY_CAPABLE()) + return 0; + gtid = __kmp_entry_gtid(); + thread = __kmp_thread_from_gtid(gtid); + first_place = thread->th.th_first_place; + last_place = thread->th.th_last_place; + if (first_place < 0 || last_place < 0) + return 0; + if (first_place <= last_place) + num_places = last_place - first_place + 1; + else + num_places = __kmp_affinity_num_masks - first_place + last_place + 1; + return num_places; +#endif +} -int FTN_STDCALL -xexpand(FTN_IS_INITIAL_DEVICE)( void ) -{ +void FTN_STDCALL FTN_GET_PARTITION_PLACE_NUMS(int *place_nums) { +#if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED +// Nothing. +#else + int i, gtid, place_num, first_place, last_place, start, end; + kmp_info_t *thread; + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + if (!KMP_AFFINITY_CAPABLE()) + return; + gtid = __kmp_entry_gtid(); + thread = __kmp_thread_from_gtid(gtid); + first_place = thread->th.th_first_place; + last_place = thread->th.th_last_place; + if (first_place < 0 || last_place < 0) + return; + if (first_place <= last_place) { + start = first_place; + end = last_place; + } else { + start = last_place; + end = first_place; + } + for (i = 0, place_num = start; place_num <= end; ++place_num, ++i) { + place_nums[i] = place_num; + } +#endif +} +#endif + +int FTN_STDCALL xexpand(FTN_GET_NUM_TEAMS)(void) { +#ifdef KMP_STUB + return 1; +#else + kmp_info_t *thr = __kmp_entry_thread(); + if (thr->th.th_teams_microtask) { + kmp_team_t *team = thr->th.th_team; + int tlevel = thr->th.th_teams_level; + int ii = team->t.t_level; // the level of the teams construct + int dd = team->t.t_serialized; + int level = tlevel + 1; + KMP_DEBUG_ASSERT(ii >= tlevel); + while (ii > level) { + for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { + } + if (team->t.t_serialized && (!dd)) { + team = team->t.t_parent; + continue; + } + if (ii > level) { + team = team->t.t_parent; + ii--; + } + } + if (dd > 1) { + return 1; // teams region is serialized ( 1 team of 1 thread ). + } else { + return team->t.t_parent->t.t_nproc; + } + } else { return 1; + } +#endif } +int FTN_STDCALL xexpand(FTN_GET_TEAM_NUM)(void) { +#ifdef KMP_STUB + return 0; +#else + kmp_info_t *thr = __kmp_entry_thread(); + if (thr->th.th_teams_microtask) { + kmp_team_t *team = thr->th.th_team; + int tlevel = thr->th.th_teams_level; // the level of the teams construct + int ii = team->t.t_level; + int dd = team->t.t_serialized; + int level = tlevel + 1; + KMP_DEBUG_ASSERT(ii >= tlevel); + while (ii > level) { + for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { + } + if (team->t.t_serialized && (!dd)) { + team = team->t.t_parent; + continue; + } + if (ii > level) { + team = team->t.t_parent; + ii--; + } + } + if (dd > 1) { + return 0; // teams region is serialized ( 1 team of 1 thread ). + } else { + return team->t.t_master_tid; + } + } else { + return 0; + } +#endif +} + +int FTN_STDCALL xexpand(FTN_GET_DEFAULT_DEVICE)(void) { +#if KMP_MIC || KMP_OS_DARWIN || defined(KMP_STUB) + return 0; +#else + return __kmp_entry_thread()->th.th_current_task->td_icvs.default_device; +#endif +} + +void FTN_STDCALL xexpand(FTN_SET_DEFAULT_DEVICE)(int KMP_DEREF arg) { +#if KMP_MIC || KMP_OS_DARWIN || defined(KMP_STUB) +// Nothing. +#else + __kmp_entry_thread()->th.th_current_task->td_icvs.default_device = + KMP_DEREF arg; +#endif +} + +#if KMP_MIC || KMP_OS_DARWIN || defined(KMP_STUB) + +int FTN_STDCALL FTN_GET_NUM_DEVICES(void) { return 0; } + +#endif // KMP_MIC || KMP_OS_DARWIN || defined(KMP_STUB) + +#if !KMP_OS_LINUX + +int FTN_STDCALL xexpand(FTN_IS_INITIAL_DEVICE)(void) { return 1; } + #else // This internal function is used when the entry from the offload library // is not found. -int _Offload_get_device_number( void ) __attribute__((weak)); +int _Offload_get_device_number(void) __attribute__((weak)); -int FTN_STDCALL -xexpand(FTN_IS_INITIAL_DEVICE)( void ) -{ - if( _Offload_get_device_number ) { - return _Offload_get_device_number() == -1; - } else { - return 1; - } +int FTN_STDCALL xexpand(FTN_IS_INITIAL_DEVICE)(void) { + if (_Offload_get_device_number) { + return _Offload_get_device_number() == -1; + } else { + return 1; + } } #endif // ! KMP_OS_LINUX @@ -964,57 +832,37 @@ xexpand(FTN_IS_INITIAL_DEVICE)( void ) #if OMP_45_ENABLED && defined(KMP_STUB) // OpenMP 4.5 entries for stubs library -int FTN_STDCALL -FTN_GET_INITIAL_DEVICE(void) -{ - return -1; -} +int FTN_STDCALL FTN_GET_INITIAL_DEVICE(void) { return -1; } // As all *target* functions are C-only parameters always passed by value -void * FTN_STDCALL -FTN_TARGET_ALLOC(size_t size, int device_num) -{ - return 0; -} +void *FTN_STDCALL FTN_TARGET_ALLOC(size_t size, int device_num) { return 0; } -void FTN_STDCALL -FTN_TARGET_FREE(void * device_ptr, int device_num) -{ -} +void FTN_STDCALL FTN_TARGET_FREE(void *device_ptr, int device_num) {} -int FTN_STDCALL -FTN_TARGET_IS_PRESENT(void * ptr, int device_num) -{ - return 0; -} +int FTN_STDCALL FTN_TARGET_IS_PRESENT(void *ptr, int device_num) { return 0; } -int FTN_STDCALL -FTN_TARGET_MEMCPY(void *dst, void *src, size_t length, size_t dst_offset, - size_t src_offset, int dst_device, int src_device) -{ - return -1; +int FTN_STDCALL FTN_TARGET_MEMCPY(void *dst, void *src, size_t length, + size_t dst_offset, size_t src_offset, + int dst_device, int src_device) { + return -1; } -int FTN_STDCALL -FTN_TARGET_MEMCPY_RECT(void *dst, void *src, size_t element_size, int num_dims, - const size_t *volume, const size_t *dst_offsets, - const size_t *src_offsets, const size_t *dst_dimensions, - const size_t *src_dimensions, int dst_device, int src_device) -{ - return -1; +int FTN_STDCALL FTN_TARGET_MEMCPY_RECT( + void *dst, void *src, size_t element_size, int num_dims, + const size_t *volume, const size_t *dst_offsets, const size_t *src_offsets, + const size_t *dst_dimensions, const size_t *src_dimensions, int dst_device, + int src_device) { + return -1; } -int FTN_STDCALL -FTN_TARGET_ASSOCIATE_PTR(void *host_ptr, void *device_ptr, size_t size, - size_t device_offset, int device_num) -{ - return -1; +int FTN_STDCALL FTN_TARGET_ASSOCIATE_PTR(void *host_ptr, void *device_ptr, + size_t size, size_t device_offset, + int device_num) { + return -1; } -int FTN_STDCALL -FTN_TARGET_DISASSOCIATE_PTR(void *host_ptr, int device_num) -{ - return -1; +int FTN_STDCALL FTN_TARGET_DISASSOCIATE_PTR(void *host_ptr, int device_num) { + return -1; } #endif // OMP_45_ENABLED && defined(KMP_STUB) @@ -1023,260 +871,222 @@ typedef enum { UNINIT = -1, UNLOCKED, LOCKED } kmp_stub_lock_t; #endif /* KMP_STUB */ #if KMP_USE_DYNAMIC_LOCK -void FTN_STDCALL -FTN_INIT_LOCK_WITH_HINT( void **user_lock, uintptr_t KMP_DEREF hint ) -{ - #ifdef KMP_STUB - *((kmp_stub_lock_t *)user_lock) = UNLOCKED; - #else - __kmpc_init_lock_with_hint( NULL, __kmp_entry_gtid(), user_lock, KMP_DEREF hint ); - #endif +void FTN_STDCALL FTN_INIT_LOCK_WITH_HINT(void **user_lock, + uintptr_t KMP_DEREF hint) { +#ifdef KMP_STUB + *((kmp_stub_lock_t *)user_lock) = UNLOCKED; +#else + __kmpc_init_lock_with_hint(NULL, __kmp_entry_gtid(), user_lock, + KMP_DEREF hint); +#endif } -void FTN_STDCALL -FTN_INIT_NEST_LOCK_WITH_HINT( void **user_lock, uintptr_t KMP_DEREF hint ) -{ - #ifdef KMP_STUB - *((kmp_stub_lock_t *)user_lock) = UNLOCKED; - #else - __kmpc_init_nest_lock_with_hint( NULL, __kmp_entry_gtid(), user_lock, KMP_DEREF hint ); - #endif +void FTN_STDCALL FTN_INIT_NEST_LOCK_WITH_HINT(void **user_lock, + uintptr_t KMP_DEREF hint) { +#ifdef KMP_STUB + *((kmp_stub_lock_t *)user_lock) = UNLOCKED; +#else + __kmpc_init_nest_lock_with_hint(NULL, __kmp_entry_gtid(), user_lock, + KMP_DEREF hint); +#endif } #endif /* initialize the lock */ -void FTN_STDCALL -xexpand(FTN_INIT_LOCK)( void **user_lock ) -{ - #ifdef KMP_STUB - *((kmp_stub_lock_t *)user_lock) = UNLOCKED; - #else - __kmpc_init_lock( NULL, __kmp_entry_gtid(), user_lock ); - #endif +void FTN_STDCALL xexpand(FTN_INIT_LOCK)(void **user_lock) { +#ifdef KMP_STUB + *((kmp_stub_lock_t *)user_lock) = UNLOCKED; +#else + __kmpc_init_lock(NULL, __kmp_entry_gtid(), user_lock); +#endif } /* initialize the lock */ -void FTN_STDCALL -xexpand(FTN_INIT_NEST_LOCK)( void **user_lock ) -{ - #ifdef KMP_STUB - *((kmp_stub_lock_t *)user_lock) = UNLOCKED; - #else - __kmpc_init_nest_lock( NULL, __kmp_entry_gtid(), user_lock ); - #endif -} - -void FTN_STDCALL -xexpand(FTN_DESTROY_LOCK)( void **user_lock ) -{ - #ifdef KMP_STUB - *((kmp_stub_lock_t *)user_lock) = UNINIT; - #else - __kmpc_destroy_lock( NULL, __kmp_entry_gtid(), user_lock ); - #endif -} - -void FTN_STDCALL -xexpand(FTN_DESTROY_NEST_LOCK)( void **user_lock ) -{ - #ifdef KMP_STUB - *((kmp_stub_lock_t *)user_lock) = UNINIT; - #else - __kmpc_destroy_nest_lock( NULL, __kmp_entry_gtid(), user_lock ); - #endif -} - -void FTN_STDCALL -xexpand(FTN_SET_LOCK)( void **user_lock ) -{ - #ifdef KMP_STUB - if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { - // TODO: Issue an error. - }; // if - if ( *((kmp_stub_lock_t *)user_lock) != UNLOCKED ) { - // TODO: Issue an error. - }; // if - *((kmp_stub_lock_t *)user_lock) = LOCKED; - #else - __kmpc_set_lock( NULL, __kmp_entry_gtid(), user_lock ); - #endif -} - -void FTN_STDCALL -xexpand(FTN_SET_NEST_LOCK)( void **user_lock ) -{ - #ifdef KMP_STUB - if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { - // TODO: Issue an error. - }; // if - (*((int *)user_lock))++; - #else - __kmpc_set_nest_lock( NULL, __kmp_entry_gtid(), user_lock ); - #endif -} - -void FTN_STDCALL -xexpand(FTN_UNSET_LOCK)( void **user_lock ) -{ - #ifdef KMP_STUB - if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { - // TODO: Issue an error. - }; // if - if ( *((kmp_stub_lock_t *)user_lock) == UNLOCKED ) { - // TODO: Issue an error. - }; // if - *((kmp_stub_lock_t *)user_lock) = UNLOCKED; - #else - __kmpc_unset_lock( NULL, __kmp_entry_gtid(), user_lock ); - #endif -} - -void FTN_STDCALL -xexpand(FTN_UNSET_NEST_LOCK)( void **user_lock ) -{ - #ifdef KMP_STUB - if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { - // TODO: Issue an error. - }; // if - if ( *((kmp_stub_lock_t *)user_lock) == UNLOCKED ) { - // TODO: Issue an error. - }; // if - (*((int *)user_lock))--; - #else - __kmpc_unset_nest_lock( NULL, __kmp_entry_gtid(), user_lock ); - #endif -} - -int FTN_STDCALL -xexpand(FTN_TEST_LOCK)( void **user_lock ) -{ - #ifdef KMP_STUB - if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { - // TODO: Issue an error. - }; // if - if ( *((kmp_stub_lock_t *)user_lock) == LOCKED ) { - return 0; - }; // if - *((kmp_stub_lock_t *)user_lock) = LOCKED; - return 1; - #else - return __kmpc_test_lock( NULL, __kmp_entry_gtid(), user_lock ); - #endif -} - -int FTN_STDCALL -xexpand(FTN_TEST_NEST_LOCK)( void **user_lock ) -{ - #ifdef KMP_STUB - if ( *((kmp_stub_lock_t *)user_lock) == UNINIT ) { - // TODO: Issue an error. - }; // if - return ++(*((int *)user_lock)); - #else - return __kmpc_test_nest_lock( NULL, __kmp_entry_gtid(), user_lock ); - #endif -} - -double FTN_STDCALL -xexpand(FTN_GET_WTIME)( void ) -{ - #ifdef KMP_STUB - return __kmps_get_wtime(); - #else - double data; - #if ! KMP_OS_LINUX - // We don't need library initialization to get the time on Linux* OS. - // The routine can be used to measure library initialization time on Linux* OS now. - if ( ! __kmp_init_serial ) { - __kmp_serial_initialize(); - }; - #endif - __kmp_elapsed( & data ); - return data; - #endif -} - -double FTN_STDCALL -xexpand(FTN_GET_WTICK)( void ) -{ - #ifdef KMP_STUB - return __kmps_get_wtick(); - #else - double data; - if ( ! __kmp_init_serial ) { - __kmp_serial_initialize(); - }; - __kmp_elapsed_tick( & data ); - return data; - #endif +void FTN_STDCALL xexpand(FTN_INIT_NEST_LOCK)(void **user_lock) { +#ifdef KMP_STUB + *((kmp_stub_lock_t *)user_lock) = UNLOCKED; +#else + __kmpc_init_nest_lock(NULL, __kmp_entry_gtid(), user_lock); +#endif +} + +void FTN_STDCALL xexpand(FTN_DESTROY_LOCK)(void **user_lock) { +#ifdef KMP_STUB + *((kmp_stub_lock_t *)user_lock) = UNINIT; +#else + __kmpc_destroy_lock(NULL, __kmp_entry_gtid(), user_lock); +#endif +} + +void FTN_STDCALL xexpand(FTN_DESTROY_NEST_LOCK)(void **user_lock) { +#ifdef KMP_STUB + *((kmp_stub_lock_t *)user_lock) = UNINIT; +#else + __kmpc_destroy_nest_lock(NULL, __kmp_entry_gtid(), user_lock); +#endif +} + +void FTN_STDCALL xexpand(FTN_SET_LOCK)(void **user_lock) { +#ifdef KMP_STUB + if (*((kmp_stub_lock_t *)user_lock) == UNINIT) { + // TODO: Issue an error. + }; // if + if (*((kmp_stub_lock_t *)user_lock) != UNLOCKED) { + // TODO: Issue an error. + }; // if + *((kmp_stub_lock_t *)user_lock) = LOCKED; +#else + __kmpc_set_lock(NULL, __kmp_entry_gtid(), user_lock); +#endif +} + +void FTN_STDCALL xexpand(FTN_SET_NEST_LOCK)(void **user_lock) { +#ifdef KMP_STUB + if (*((kmp_stub_lock_t *)user_lock) == UNINIT) { + // TODO: Issue an error. + }; // if + (*((int *)user_lock))++; +#else + __kmpc_set_nest_lock(NULL, __kmp_entry_gtid(), user_lock); +#endif +} + +void FTN_STDCALL xexpand(FTN_UNSET_LOCK)(void **user_lock) { +#ifdef KMP_STUB + if (*((kmp_stub_lock_t *)user_lock) == UNINIT) { + // TODO: Issue an error. + }; // if + if (*((kmp_stub_lock_t *)user_lock) == UNLOCKED) { + // TODO: Issue an error. + }; // if + *((kmp_stub_lock_t *)user_lock) = UNLOCKED; +#else + __kmpc_unset_lock(NULL, __kmp_entry_gtid(), user_lock); +#endif +} + +void FTN_STDCALL xexpand(FTN_UNSET_NEST_LOCK)(void **user_lock) { +#ifdef KMP_STUB + if (*((kmp_stub_lock_t *)user_lock) == UNINIT) { + // TODO: Issue an error. + }; // if + if (*((kmp_stub_lock_t *)user_lock) == UNLOCKED) { + // TODO: Issue an error. + }; // if + (*((int *)user_lock))--; +#else + __kmpc_unset_nest_lock(NULL, __kmp_entry_gtid(), user_lock); +#endif +} + +int FTN_STDCALL xexpand(FTN_TEST_LOCK)(void **user_lock) { +#ifdef KMP_STUB + if (*((kmp_stub_lock_t *)user_lock) == UNINIT) { + // TODO: Issue an error. + }; // if + if (*((kmp_stub_lock_t *)user_lock) == LOCKED) { + return 0; + }; // if + *((kmp_stub_lock_t *)user_lock) = LOCKED; + return 1; +#else + return __kmpc_test_lock(NULL, __kmp_entry_gtid(), user_lock); +#endif +} + +int FTN_STDCALL xexpand(FTN_TEST_NEST_LOCK)(void **user_lock) { +#ifdef KMP_STUB + if (*((kmp_stub_lock_t *)user_lock) == UNINIT) { + // TODO: Issue an error. + }; // if + return ++(*((int *)user_lock)); +#else + return __kmpc_test_nest_lock(NULL, __kmp_entry_gtid(), user_lock); +#endif +} + +double FTN_STDCALL xexpand(FTN_GET_WTIME)(void) { +#ifdef KMP_STUB + return __kmps_get_wtime(); +#else + double data; +#if !KMP_OS_LINUX + // We don't need library initialization to get the time on Linux* OS. The + // routine can be used to measure library initialization time on Linux* OS now + if (!__kmp_init_serial) { + __kmp_serial_initialize(); + }; +#endif + __kmp_elapsed(&data); + return data; +#endif +} + +double FTN_STDCALL xexpand(FTN_GET_WTICK)(void) { +#ifdef KMP_STUB + return __kmps_get_wtick(); +#else + double data; + if (!__kmp_init_serial) { + __kmp_serial_initialize(); + }; + __kmp_elapsed_tick(&data); + return data; +#endif } /* ------------------------------------------------------------------------ */ -void * FTN_STDCALL -FTN_MALLOC( size_t KMP_DEREF size ) -{ - // kmpc_malloc initializes the library if needed - return kmpc_malloc( KMP_DEREF size ); +void *FTN_STDCALL FTN_MALLOC(size_t KMP_DEREF size) { + // kmpc_malloc initializes the library if needed + return kmpc_malloc(KMP_DEREF size); } -void * FTN_STDCALL -FTN_ALIGNED_MALLOC( size_t KMP_DEREF size, size_t KMP_DEREF alignment ) -{ - // kmpc_aligned_malloc initializes the library if needed - return kmpc_aligned_malloc( KMP_DEREF size, KMP_DEREF alignment ); +void *FTN_STDCALL FTN_ALIGNED_MALLOC(size_t KMP_DEREF size, + size_t KMP_DEREF alignment) { + // kmpc_aligned_malloc initializes the library if needed + return kmpc_aligned_malloc(KMP_DEREF size, KMP_DEREF alignment); } -void * FTN_STDCALL -FTN_CALLOC( size_t KMP_DEREF nelem, size_t KMP_DEREF elsize ) -{ - // kmpc_calloc initializes the library if needed - return kmpc_calloc( KMP_DEREF nelem, KMP_DEREF elsize ); +void *FTN_STDCALL FTN_CALLOC(size_t KMP_DEREF nelem, size_t KMP_DEREF elsize) { + // kmpc_calloc initializes the library if needed + return kmpc_calloc(KMP_DEREF nelem, KMP_DEREF elsize); } -void * FTN_STDCALL -FTN_REALLOC( void * KMP_DEREF ptr, size_t KMP_DEREF size ) -{ - // kmpc_realloc initializes the library if needed - return kmpc_realloc( KMP_DEREF ptr, KMP_DEREF size ); +void *FTN_STDCALL FTN_REALLOC(void *KMP_DEREF ptr, size_t KMP_DEREF size) { + // kmpc_realloc initializes the library if needed + return kmpc_realloc(KMP_DEREF ptr, KMP_DEREF size); } -void FTN_STDCALL -FTN_FREE( void * KMP_DEREF ptr ) -{ - // does nothing if the library is not initialized - kmpc_free( KMP_DEREF ptr ); +void FTN_STDCALL FTN_FREE(void *KMP_DEREF ptr) { + // does nothing if the library is not initialized + kmpc_free(KMP_DEREF ptr); } -void FTN_STDCALL -FTN_SET_WARNINGS_ON( void ) -{ - #ifndef KMP_STUB - __kmp_generate_warnings = kmp_warnings_explicit; - #endif +void FTN_STDCALL FTN_SET_WARNINGS_ON(void) { +#ifndef KMP_STUB + __kmp_generate_warnings = kmp_warnings_explicit; +#endif } -void FTN_STDCALL -FTN_SET_WARNINGS_OFF( void ) -{ - #ifndef KMP_STUB - __kmp_generate_warnings = FALSE; - #endif +void FTN_STDCALL FTN_SET_WARNINGS_OFF(void) { +#ifndef KMP_STUB + __kmp_generate_warnings = FALSE; +#endif } -void FTN_STDCALL -FTN_SET_DEFAULTS( char const * str - #ifndef PASS_ARGS_BY_VALUE - , int len - #endif -) -{ - #ifndef KMP_STUB - #ifdef PASS_ARGS_BY_VALUE - int len = (int)KMP_STRLEN( str ); - #endif - __kmp_aux_set_defaults( str, len ); - #endif +void FTN_STDCALL FTN_SET_DEFAULTS(char const *str +#ifndef PASS_ARGS_BY_VALUE + , + int len +#endif + ) { +#ifndef KMP_STUB +#ifdef PASS_ARGS_BY_VALUE + int len = (int)KMP_STRLEN(str); +#endif + __kmp_aux_set_defaults(str, len); +#endif } /* ------------------------------------------------------------------------ */ @@ -1284,25 +1094,23 @@ FTN_SET_DEFAULTS( char const * str #if OMP_40_ENABLED /* returns the status of cancellation */ -int FTN_STDCALL -xexpand(FTN_GET_CANCELLATION)(void) { +int FTN_STDCALL xexpand(FTN_GET_CANCELLATION)(void) { #ifdef KMP_STUB - return 0 /* false */; + return 0 /* false */; #else - // initialize the library if needed - if ( ! __kmp_init_serial ) { - __kmp_serial_initialize(); - } - return __kmp_omp_cancellation; + // initialize the library if needed + if (!__kmp_init_serial) { + __kmp_serial_initialize(); + } + return __kmp_omp_cancellation; #endif } -int FTN_STDCALL -FTN_GET_CANCELLATION_STATUS(int cancel_kind) { +int FTN_STDCALL FTN_GET_CANCELLATION_STATUS(int cancel_kind) { #ifdef KMP_STUB - return 0 /* false */; + return 0 /* false */; #else - return __kmp_get_cancellation_status(cancel_kind); + return __kmp_get_cancellation_status(cancel_kind); #endif } @@ -1310,16 +1118,14 @@ FTN_GET_CANCELLATION_STATUS(int cancel_kind) { #if OMP_45_ENABLED /* returns the maximum allowed task priority */ -int FTN_STDCALL -FTN_GET_MAX_TASK_PRIORITY( void ) -{ +int FTN_STDCALL FTN_GET_MAX_TASK_PRIORITY(void) { #ifdef KMP_STUB - return 0; + return 0; #else - if ( ! __kmp_init_serial ) { - __kmp_serial_initialize(); - } - return __kmp_max_task_priority; + if (!__kmp_init_serial) { + __kmp_serial_initialize(); + } + return __kmp_max_task_priority; #endif } #endif @@ -1327,68 +1133,70 @@ FTN_GET_MAX_TASK_PRIORITY( void ) // GCC compatibility (versioned symbols) #ifdef KMP_USE_VERSION_SYMBOLS -/* - These following sections create function aliases (dummy symbols) for the omp_* routines. - These aliases will then be versioned according to how libgomp ``versions'' its - symbols (OMP_1.0, OMP_2.0, OMP_3.0, ...) while also retaining the - default version which libomp uses: VERSION (defined in exports_so.txt) - If you want to see the versioned symbols for libgomp.so.1 then just type: - - objdump -T /path/to/libgomp.so.1 | grep omp_ - - Example: - Step 1) Create __kmp_api_omp_set_num_threads_10_alias - which is alias of __kmp_api_omp_set_num_threads - Step 2) Set __kmp_api_omp_set_num_threads_10_alias to version: omp_set_num_threads@OMP_1.0 - Step 2B) Set __kmp_api_omp_set_num_threads to default version : omp_set_num_threads@@VERSION +/* These following sections create function aliases (dummy symbols) for the + omp_* routines. These aliases will then be versioned according to how + libgomp ``versions'' its symbols (OMP_1.0, OMP_2.0, OMP_3.0, ...) while also + retaining the default version which libomp uses: VERSION (defined in + exports_so.txt). If you want to see the versioned symbols for libgomp.so.1 + then just type: + + objdump -T /path/to/libgomp.so.1 | grep omp_ + + Example: + Step 1) Create __kmp_api_omp_set_num_threads_10_alias which is alias of + __kmp_api_omp_set_num_threads + Step 2) Set __kmp_api_omp_set_num_threads_10_alias to version: + omp_set_num_threads@OMP_1.0 + Step 2B) Set __kmp_api_omp_set_num_threads to default version: + omp_set_num_threads@@VERSION */ // OMP_1.0 aliases -xaliasify(FTN_SET_NUM_THREADS, 10); -xaliasify(FTN_GET_NUM_THREADS, 10); -xaliasify(FTN_GET_MAX_THREADS, 10); -xaliasify(FTN_GET_THREAD_NUM, 10); -xaliasify(FTN_GET_NUM_PROCS, 10); -xaliasify(FTN_IN_PARALLEL, 10); -xaliasify(FTN_SET_DYNAMIC, 10); -xaliasify(FTN_GET_DYNAMIC, 10); -xaliasify(FTN_SET_NESTED, 10); -xaliasify(FTN_GET_NESTED, 10); -xaliasify(FTN_INIT_LOCK, 10); -xaliasify(FTN_INIT_NEST_LOCK, 10); -xaliasify(FTN_DESTROY_LOCK, 10); +xaliasify(FTN_SET_NUM_THREADS, 10); +xaliasify(FTN_GET_NUM_THREADS, 10); +xaliasify(FTN_GET_MAX_THREADS, 10); +xaliasify(FTN_GET_THREAD_NUM, 10); +xaliasify(FTN_GET_NUM_PROCS, 10); +xaliasify(FTN_IN_PARALLEL, 10); +xaliasify(FTN_SET_DYNAMIC, 10); +xaliasify(FTN_GET_DYNAMIC, 10); +xaliasify(FTN_SET_NESTED, 10); +xaliasify(FTN_GET_NESTED, 10); +xaliasify(FTN_INIT_LOCK, 10); +xaliasify(FTN_INIT_NEST_LOCK, 10); +xaliasify(FTN_DESTROY_LOCK, 10); xaliasify(FTN_DESTROY_NEST_LOCK, 10); -xaliasify(FTN_SET_LOCK, 10); -xaliasify(FTN_SET_NEST_LOCK, 10); -xaliasify(FTN_UNSET_LOCK, 10); -xaliasify(FTN_UNSET_NEST_LOCK, 10); -xaliasify(FTN_TEST_LOCK, 10); -xaliasify(FTN_TEST_NEST_LOCK, 10); +xaliasify(FTN_SET_LOCK, 10); +xaliasify(FTN_SET_NEST_LOCK, 10); +xaliasify(FTN_UNSET_LOCK, 10); +xaliasify(FTN_UNSET_NEST_LOCK, 10); +xaliasify(FTN_TEST_LOCK, 10); +xaliasify(FTN_TEST_NEST_LOCK, 10); // OMP_2.0 aliases xaliasify(FTN_GET_WTICK, 20); xaliasify(FTN_GET_WTIME, 20); // OMP_3.0 aliases -xaliasify(FTN_SET_SCHEDULE, 30); -xaliasify(FTN_GET_SCHEDULE, 30); -xaliasify(FTN_GET_THREAD_LIMIT, 30); -xaliasify(FTN_SET_MAX_ACTIVE_LEVELS, 30); -xaliasify(FTN_GET_MAX_ACTIVE_LEVELS, 30); -xaliasify(FTN_GET_LEVEL, 30); +xaliasify(FTN_SET_SCHEDULE, 30); +xaliasify(FTN_GET_SCHEDULE, 30); +xaliasify(FTN_GET_THREAD_LIMIT, 30); +xaliasify(FTN_SET_MAX_ACTIVE_LEVELS, 30); +xaliasify(FTN_GET_MAX_ACTIVE_LEVELS, 30); +xaliasify(FTN_GET_LEVEL, 30); xaliasify(FTN_GET_ANCESTOR_THREAD_NUM, 30); -xaliasify(FTN_GET_TEAM_SIZE, 30); -xaliasify(FTN_GET_ACTIVE_LEVEL, 30); -xaliasify(FTN_INIT_LOCK, 30); -xaliasify(FTN_INIT_NEST_LOCK, 30); -xaliasify(FTN_DESTROY_LOCK, 30); -xaliasify(FTN_DESTROY_NEST_LOCK, 30); -xaliasify(FTN_SET_LOCK, 30); -xaliasify(FTN_SET_NEST_LOCK, 30); -xaliasify(FTN_UNSET_LOCK, 30); -xaliasify(FTN_UNSET_NEST_LOCK, 30); -xaliasify(FTN_TEST_LOCK, 30); -xaliasify(FTN_TEST_NEST_LOCK, 30); +xaliasify(FTN_GET_TEAM_SIZE, 30); +xaliasify(FTN_GET_ACTIVE_LEVEL, 30); +xaliasify(FTN_INIT_LOCK, 30); +xaliasify(FTN_INIT_NEST_LOCK, 30); +xaliasify(FTN_DESTROY_LOCK, 30); +xaliasify(FTN_DESTROY_NEST_LOCK, 30); +xaliasify(FTN_SET_LOCK, 30); +xaliasify(FTN_SET_NEST_LOCK, 30); +xaliasify(FTN_UNSET_LOCK, 30); +xaliasify(FTN_UNSET_NEST_LOCK, 30); +xaliasify(FTN_TEST_LOCK, 30); +xaliasify(FTN_TEST_NEST_LOCK, 30); // OMP_3.1 aliases xaliasify(FTN_IN_FINAL, 31); @@ -1413,66 +1221,66 @@ xaliasify(FTN_IS_INITIAL_DEVICE, 40); #endif // OMP_1.0 versioned symbols -xversionify(FTN_SET_NUM_THREADS, 10, "OMP_1.0"); -xversionify(FTN_GET_NUM_THREADS, 10, "OMP_1.0"); -xversionify(FTN_GET_MAX_THREADS, 10, "OMP_1.0"); -xversionify(FTN_GET_THREAD_NUM, 10, "OMP_1.0"); -xversionify(FTN_GET_NUM_PROCS, 10, "OMP_1.0"); -xversionify(FTN_IN_PARALLEL, 10, "OMP_1.0"); -xversionify(FTN_SET_DYNAMIC, 10, "OMP_1.0"); -xversionify(FTN_GET_DYNAMIC, 10, "OMP_1.0"); -xversionify(FTN_SET_NESTED, 10, "OMP_1.0"); -xversionify(FTN_GET_NESTED, 10, "OMP_1.0"); -xversionify(FTN_INIT_LOCK, 10, "OMP_1.0"); -xversionify(FTN_INIT_NEST_LOCK, 10, "OMP_1.0"); -xversionify(FTN_DESTROY_LOCK, 10, "OMP_1.0"); +xversionify(FTN_SET_NUM_THREADS, 10, "OMP_1.0"); +xversionify(FTN_GET_NUM_THREADS, 10, "OMP_1.0"); +xversionify(FTN_GET_MAX_THREADS, 10, "OMP_1.0"); +xversionify(FTN_GET_THREAD_NUM, 10, "OMP_1.0"); +xversionify(FTN_GET_NUM_PROCS, 10, "OMP_1.0"); +xversionify(FTN_IN_PARALLEL, 10, "OMP_1.0"); +xversionify(FTN_SET_DYNAMIC, 10, "OMP_1.0"); +xversionify(FTN_GET_DYNAMIC, 10, "OMP_1.0"); +xversionify(FTN_SET_NESTED, 10, "OMP_1.0"); +xversionify(FTN_GET_NESTED, 10, "OMP_1.0"); +xversionify(FTN_INIT_LOCK, 10, "OMP_1.0"); +xversionify(FTN_INIT_NEST_LOCK, 10, "OMP_1.0"); +xversionify(FTN_DESTROY_LOCK, 10, "OMP_1.0"); xversionify(FTN_DESTROY_NEST_LOCK, 10, "OMP_1.0"); -xversionify(FTN_SET_LOCK, 10, "OMP_1.0"); -xversionify(FTN_SET_NEST_LOCK, 10, "OMP_1.0"); -xversionify(FTN_UNSET_LOCK, 10, "OMP_1.0"); -xversionify(FTN_UNSET_NEST_LOCK, 10, "OMP_1.0"); -xversionify(FTN_TEST_LOCK, 10, "OMP_1.0"); -xversionify(FTN_TEST_NEST_LOCK, 10, "OMP_1.0"); +xversionify(FTN_SET_LOCK, 10, "OMP_1.0"); +xversionify(FTN_SET_NEST_LOCK, 10, "OMP_1.0"); +xversionify(FTN_UNSET_LOCK, 10, "OMP_1.0"); +xversionify(FTN_UNSET_NEST_LOCK, 10, "OMP_1.0"); +xversionify(FTN_TEST_LOCK, 10, "OMP_1.0"); +xversionify(FTN_TEST_NEST_LOCK, 10, "OMP_1.0"); // OMP_2.0 versioned symbols -xversionify(FTN_GET_WTICK, 20, "OMP_2.0"); -xversionify(FTN_GET_WTIME, 20, "OMP_2.0"); +xversionify(FTN_GET_WTICK, 20, "OMP_2.0"); +xversionify(FTN_GET_WTIME, 20, "OMP_2.0"); // OMP_3.0 versioned symbols -xversionify(FTN_SET_SCHEDULE, 30, "OMP_3.0"); -xversionify(FTN_GET_SCHEDULE, 30, "OMP_3.0"); -xversionify(FTN_GET_THREAD_LIMIT, 30, "OMP_3.0"); -xversionify(FTN_SET_MAX_ACTIVE_LEVELS, 30, "OMP_3.0"); -xversionify(FTN_GET_MAX_ACTIVE_LEVELS, 30, "OMP_3.0"); +xversionify(FTN_SET_SCHEDULE, 30, "OMP_3.0"); +xversionify(FTN_GET_SCHEDULE, 30, "OMP_3.0"); +xversionify(FTN_GET_THREAD_LIMIT, 30, "OMP_3.0"); +xversionify(FTN_SET_MAX_ACTIVE_LEVELS, 30, "OMP_3.0"); +xversionify(FTN_GET_MAX_ACTIVE_LEVELS, 30, "OMP_3.0"); xversionify(FTN_GET_ANCESTOR_THREAD_NUM, 30, "OMP_3.0"); -xversionify(FTN_GET_LEVEL, 30, "OMP_3.0"); -xversionify(FTN_GET_TEAM_SIZE, 30, "OMP_3.0"); -xversionify(FTN_GET_ACTIVE_LEVEL, 30, "OMP_3.0"); +xversionify(FTN_GET_LEVEL, 30, "OMP_3.0"); +xversionify(FTN_GET_TEAM_SIZE, 30, "OMP_3.0"); +xversionify(FTN_GET_ACTIVE_LEVEL, 30, "OMP_3.0"); // the lock routines have a 1.0 and 3.0 version -xversionify(FTN_INIT_LOCK, 30, "OMP_3.0"); -xversionify(FTN_INIT_NEST_LOCK, 30, "OMP_3.0"); -xversionify(FTN_DESTROY_LOCK, 30, "OMP_3.0"); +xversionify(FTN_INIT_LOCK, 30, "OMP_3.0"); +xversionify(FTN_INIT_NEST_LOCK, 30, "OMP_3.0"); +xversionify(FTN_DESTROY_LOCK, 30, "OMP_3.0"); xversionify(FTN_DESTROY_NEST_LOCK, 30, "OMP_3.0"); -xversionify(FTN_SET_LOCK, 30, "OMP_3.0"); -xversionify(FTN_SET_NEST_LOCK, 30, "OMP_3.0"); -xversionify(FTN_UNSET_LOCK, 30, "OMP_3.0"); -xversionify(FTN_UNSET_NEST_LOCK, 30, "OMP_3.0"); -xversionify(FTN_TEST_LOCK, 30, "OMP_3.0"); -xversionify(FTN_TEST_NEST_LOCK, 30, "OMP_3.0"); +xversionify(FTN_SET_LOCK, 30, "OMP_3.0"); +xversionify(FTN_SET_NEST_LOCK, 30, "OMP_3.0"); +xversionify(FTN_UNSET_LOCK, 30, "OMP_3.0"); +xversionify(FTN_UNSET_NEST_LOCK, 30, "OMP_3.0"); +xversionify(FTN_TEST_LOCK, 30, "OMP_3.0"); +xversionify(FTN_TEST_NEST_LOCK, 30, "OMP_3.0"); // OMP_3.1 versioned symbol -xversionify(FTN_IN_FINAL, 31, "OMP_3.1"); +xversionify(FTN_IN_FINAL, 31, "OMP_3.1"); #if OMP_40_ENABLED // OMP_4.0 versioned symbols -xversionify(FTN_GET_PROC_BIND, 40, "OMP_4.0"); -xversionify(FTN_GET_NUM_TEAMS, 40, "OMP_4.0"); -xversionify(FTN_GET_TEAM_NUM, 40, "OMP_4.0"); -xversionify(FTN_GET_CANCELLATION, 40, "OMP_4.0"); +xversionify(FTN_GET_PROC_BIND, 40, "OMP_4.0"); +xversionify(FTN_GET_NUM_TEAMS, 40, "OMP_4.0"); +xversionify(FTN_GET_TEAM_NUM, 40, "OMP_4.0"); +xversionify(FTN_GET_CANCELLATION, 40, "OMP_4.0"); xversionify(FTN_GET_DEFAULT_DEVICE, 40, "OMP_4.0"); xversionify(FTN_SET_DEFAULT_DEVICE, 40, "OMP_4.0"); -xversionify(FTN_IS_INITIAL_DEVICE, 40, "OMP_4.0"); +xversionify(FTN_IS_INITIAL_DEVICE, 40, "OMP_4.0"); #endif /* OMP_40_ENABLED */ #if OMP_45_ENABLED @@ -1486,7 +1294,7 @@ xversionify(FTN_IS_INITIAL_DEVICE, 40, "OMP_4.0"); #endif // KMP_USE_VERSION_SYMBOLS #ifdef __cplusplus - } //extern "C" +} // extern "C" #endif // __cplusplus // end of file // diff --git a/openmp/runtime/src/kmp_ftn_extra.cpp b/openmp/runtime/src/kmp_ftn_extra.cpp index 8acd373..a3f9875 100644 --- a/openmp/runtime/src/kmp_ftn_extra.cpp +++ b/openmp/runtime/src/kmp_ftn_extra.cpp @@ -17,18 +17,19 @@ #include "kmp_affinity.h" #if KMP_OS_WINDOWS -# define KMP_FTN_ENTRIES KMP_FTN_PLAIN +#define KMP_FTN_ENTRIES KMP_FTN_PLAIN #elif KMP_OS_UNIX -# define KMP_FTN_ENTRIES KMP_FTN_APPEND +#define KMP_FTN_ENTRIES KMP_FTN_APPEND #endif // Note: This string is not printed when KMP_VERSION=1. -char const __kmp_version_ftnextra[] = KMP_VERSION_PREFIX "Fortran \"extra\" OMP support: " +char const __kmp_version_ftnextra[] = + KMP_VERSION_PREFIX "Fortran \"extra\" OMP support: " #ifdef KMP_FTN_ENTRIES - "yes"; -# define FTN_STDCALL /* nothing to do */ -# include "kmp_ftn_os.h" -# include "kmp_ftn_entry.h" + "yes"; +#define FTN_STDCALL /* nothing to do */ +#include "kmp_ftn_os.h" +#include "kmp_ftn_entry.h" #else - "no"; + "no"; #endif /* KMP_FTN_ENTRIES */ diff --git a/openmp/runtime/src/kmp_ftn_os.h b/openmp/runtime/src/kmp_ftn_os.h index 2698a35..a204b2f 100644 --- a/openmp/runtime/src/kmp_ftn_os.h +++ b/openmp/runtime/src/kmp_ftn_os.h @@ -16,123 +16,123 @@ #ifndef KMP_FTN_OS_H #define KMP_FTN_OS_H -// KMP_FNT_ENTRIES may be one of: KMP_FTN_PLAIN, KMP_FTN_UPPER, KMP_FTN_APPEND, KMP_FTN_UAPPEND. - +// KMP_FNT_ENTRIES may be one of: KMP_FTN_PLAIN, KMP_FTN_UPPER, KMP_FTN_APPEND, +// KMP_FTN_UAPPEND. /* -------------------------- External definitions ------------------------ */ #if KMP_FTN_ENTRIES == KMP_FTN_PLAIN - #define FTN_SET_STACKSIZE kmp_set_stacksize - #define FTN_SET_STACKSIZE_S kmp_set_stacksize_s - #define FTN_GET_STACKSIZE kmp_get_stacksize - #define FTN_GET_STACKSIZE_S kmp_get_stacksize_s - #define FTN_SET_BLOCKTIME kmp_set_blocktime - #define FTN_GET_BLOCKTIME kmp_get_blocktime - #define FTN_SET_LIBRARY_SERIAL kmp_set_library_serial - #define FTN_SET_LIBRARY_TURNAROUND kmp_set_library_turnaround - #define FTN_SET_LIBRARY_THROUGHPUT kmp_set_library_throughput - #define FTN_SET_LIBRARY kmp_set_library - #define FTN_GET_LIBRARY kmp_get_library - #define FTN_SET_DEFAULTS kmp_set_defaults - #define FTN_SET_DISP_NUM_BUFFERS kmp_set_disp_num_buffers - #define FTN_SET_AFFINITY kmp_set_affinity - #define FTN_GET_AFFINITY kmp_get_affinity - #define FTN_GET_AFFINITY_MAX_PROC kmp_get_affinity_max_proc - #define FTN_CREATE_AFFINITY_MASK kmp_create_affinity_mask - #define FTN_DESTROY_AFFINITY_MASK kmp_destroy_affinity_mask - #define FTN_SET_AFFINITY_MASK_PROC kmp_set_affinity_mask_proc - #define FTN_UNSET_AFFINITY_MASK_PROC kmp_unset_affinity_mask_proc - #define FTN_GET_AFFINITY_MASK_PROC kmp_get_affinity_mask_proc - - #define FTN_MALLOC kmp_malloc - #define FTN_ALIGNED_MALLOC kmp_aligned_malloc - #define FTN_CALLOC kmp_calloc - #define FTN_REALLOC kmp_realloc - #define FTN_FREE kmp_free - - #define FTN_GET_NUM_KNOWN_THREADS kmp_get_num_known_threads - - #define FTN_SET_NUM_THREADS omp_set_num_threads - #define FTN_GET_NUM_THREADS omp_get_num_threads - #define FTN_GET_MAX_THREADS omp_get_max_threads - #define FTN_GET_THREAD_NUM omp_get_thread_num - #define FTN_GET_NUM_PROCS omp_get_num_procs - #define FTN_SET_DYNAMIC omp_set_dynamic - #define FTN_GET_DYNAMIC omp_get_dynamic - #define FTN_SET_NESTED omp_set_nested - #define FTN_GET_NESTED omp_get_nested - #define FTN_IN_PARALLEL omp_in_parallel - #define FTN_GET_THREAD_LIMIT omp_get_thread_limit - #define FTN_SET_SCHEDULE omp_set_schedule - #define FTN_GET_SCHEDULE omp_get_schedule - #define FTN_SET_MAX_ACTIVE_LEVELS omp_set_max_active_levels - #define FTN_GET_MAX_ACTIVE_LEVELS omp_get_max_active_levels - #define FTN_GET_ACTIVE_LEVEL omp_get_active_level - #define FTN_GET_LEVEL omp_get_level - #define FTN_GET_ANCESTOR_THREAD_NUM omp_get_ancestor_thread_num - #define FTN_GET_TEAM_SIZE omp_get_team_size - #define FTN_IN_FINAL omp_in_final +#define FTN_SET_STACKSIZE kmp_set_stacksize +#define FTN_SET_STACKSIZE_S kmp_set_stacksize_s +#define FTN_GET_STACKSIZE kmp_get_stacksize +#define FTN_GET_STACKSIZE_S kmp_get_stacksize_s +#define FTN_SET_BLOCKTIME kmp_set_blocktime +#define FTN_GET_BLOCKTIME kmp_get_blocktime +#define FTN_SET_LIBRARY_SERIAL kmp_set_library_serial +#define FTN_SET_LIBRARY_TURNAROUND kmp_set_library_turnaround +#define FTN_SET_LIBRARY_THROUGHPUT kmp_set_library_throughput +#define FTN_SET_LIBRARY kmp_set_library +#define FTN_GET_LIBRARY kmp_get_library +#define FTN_SET_DEFAULTS kmp_set_defaults +#define FTN_SET_DISP_NUM_BUFFERS kmp_set_disp_num_buffers +#define FTN_SET_AFFINITY kmp_set_affinity +#define FTN_GET_AFFINITY kmp_get_affinity +#define FTN_GET_AFFINITY_MAX_PROC kmp_get_affinity_max_proc +#define FTN_CREATE_AFFINITY_MASK kmp_create_affinity_mask +#define FTN_DESTROY_AFFINITY_MASK kmp_destroy_affinity_mask +#define FTN_SET_AFFINITY_MASK_PROC kmp_set_affinity_mask_proc +#define FTN_UNSET_AFFINITY_MASK_PROC kmp_unset_affinity_mask_proc +#define FTN_GET_AFFINITY_MASK_PROC kmp_get_affinity_mask_proc + +#define FTN_MALLOC kmp_malloc +#define FTN_ALIGNED_MALLOC kmp_aligned_malloc +#define FTN_CALLOC kmp_calloc +#define FTN_REALLOC kmp_realloc +#define FTN_FREE kmp_free + +#define FTN_GET_NUM_KNOWN_THREADS kmp_get_num_known_threads + +#define FTN_SET_NUM_THREADS omp_set_num_threads +#define FTN_GET_NUM_THREADS omp_get_num_threads +#define FTN_GET_MAX_THREADS omp_get_max_threads +#define FTN_GET_THREAD_NUM omp_get_thread_num +#define FTN_GET_NUM_PROCS omp_get_num_procs +#define FTN_SET_DYNAMIC omp_set_dynamic +#define FTN_GET_DYNAMIC omp_get_dynamic +#define FTN_SET_NESTED omp_set_nested +#define FTN_GET_NESTED omp_get_nested +#define FTN_IN_PARALLEL omp_in_parallel +#define FTN_GET_THREAD_LIMIT omp_get_thread_limit +#define FTN_SET_SCHEDULE omp_set_schedule +#define FTN_GET_SCHEDULE omp_get_schedule +#define FTN_SET_MAX_ACTIVE_LEVELS omp_set_max_active_levels +#define FTN_GET_MAX_ACTIVE_LEVELS omp_get_max_active_levels +#define FTN_GET_ACTIVE_LEVEL omp_get_active_level +#define FTN_GET_LEVEL omp_get_level +#define FTN_GET_ANCESTOR_THREAD_NUM omp_get_ancestor_thread_num +#define FTN_GET_TEAM_SIZE omp_get_team_size +#define FTN_IN_FINAL omp_in_final // #define FTN_SET_PROC_BIND omp_set_proc_bind - #define FTN_GET_PROC_BIND omp_get_proc_bind +#define FTN_GET_PROC_BIND omp_get_proc_bind // #define FTN_CURR_PROC_BIND omp_curr_proc_bind #if OMP_40_ENABLED - #define FTN_GET_NUM_TEAMS omp_get_num_teams - #define FTN_GET_TEAM_NUM omp_get_team_num +#define FTN_GET_NUM_TEAMS omp_get_num_teams +#define FTN_GET_TEAM_NUM omp_get_team_num #endif - #define FTN_INIT_LOCK omp_init_lock +#define FTN_INIT_LOCK omp_init_lock #if KMP_USE_DYNAMIC_LOCK - #define FTN_INIT_LOCK_WITH_HINT omp_init_lock_with_hint - #define FTN_INIT_NEST_LOCK_WITH_HINT omp_init_nest_lock_with_hint +#define FTN_INIT_LOCK_WITH_HINT omp_init_lock_with_hint +#define FTN_INIT_NEST_LOCK_WITH_HINT omp_init_nest_lock_with_hint #endif - #define FTN_DESTROY_LOCK omp_destroy_lock - #define FTN_SET_LOCK omp_set_lock - #define FTN_UNSET_LOCK omp_unset_lock - #define FTN_TEST_LOCK omp_test_lock - #define FTN_INIT_NEST_LOCK omp_init_nest_lock - #define FTN_DESTROY_NEST_LOCK omp_destroy_nest_lock - #define FTN_SET_NEST_LOCK omp_set_nest_lock - #define FTN_UNSET_NEST_LOCK omp_unset_nest_lock - #define FTN_TEST_NEST_LOCK omp_test_nest_lock - - #define FTN_SET_WARNINGS_ON kmp_set_warnings_on - #define FTN_SET_WARNINGS_OFF kmp_set_warnings_off - - #define FTN_GET_WTIME omp_get_wtime - #define FTN_GET_WTICK omp_get_wtick +#define FTN_DESTROY_LOCK omp_destroy_lock +#define FTN_SET_LOCK omp_set_lock +#define FTN_UNSET_LOCK omp_unset_lock +#define FTN_TEST_LOCK omp_test_lock +#define FTN_INIT_NEST_LOCK omp_init_nest_lock +#define FTN_DESTROY_NEST_LOCK omp_destroy_nest_lock +#define FTN_SET_NEST_LOCK omp_set_nest_lock +#define FTN_UNSET_NEST_LOCK omp_unset_nest_lock +#define FTN_TEST_NEST_LOCK omp_test_nest_lock + +#define FTN_SET_WARNINGS_ON kmp_set_warnings_on +#define FTN_SET_WARNINGS_OFF kmp_set_warnings_off + +#define FTN_GET_WTIME omp_get_wtime +#define FTN_GET_WTICK omp_get_wtick #if OMP_40_ENABLED #if KMP_MIC || KMP_OS_DARWIN || defined(KMP_STUB) - #define FTN_GET_NUM_DEVICES omp_get_num_devices +#define FTN_GET_NUM_DEVICES omp_get_num_devices #endif - #define FTN_GET_DEFAULT_DEVICE omp_get_default_device - #define FTN_SET_DEFAULT_DEVICE omp_set_default_device - #define FTN_IS_INITIAL_DEVICE omp_is_initial_device +#define FTN_GET_DEFAULT_DEVICE omp_get_default_device +#define FTN_SET_DEFAULT_DEVICE omp_set_default_device +#define FTN_IS_INITIAL_DEVICE omp_is_initial_device #endif #if OMP_40_ENABLED - #define FTN_GET_CANCELLATION omp_get_cancellation - #define FTN_GET_CANCELLATION_STATUS kmp_get_cancellation_status +#define FTN_GET_CANCELLATION omp_get_cancellation +#define FTN_GET_CANCELLATION_STATUS kmp_get_cancellation_status #endif #if OMP_45_ENABLED - #define FTN_GET_MAX_TASK_PRIORITY omp_get_max_task_priority - #define FTN_GET_NUM_PLACES omp_get_num_places - #define FTN_GET_PLACE_NUM_PROCS omp_get_place_num_procs - #define FTN_GET_PLACE_PROC_IDS omp_get_place_proc_ids - #define FTN_GET_PLACE_NUM omp_get_place_num - #define FTN_GET_PARTITION_NUM_PLACES omp_get_partition_num_places - #define FTN_GET_PARTITION_PLACE_NUMS omp_get_partition_place_nums -# ifdef KMP_STUB - #define FTN_GET_INITIAL_DEVICE omp_get_initial_device - #define FTN_TARGET_ALLOC omp_target_alloc - #define FTN_TARGET_FREE omp_target_free - #define FTN_TARGET_IS_PRESENT omp_target_is_present - #define FTN_TARGET_MEMCPY omp_target_memcpy - #define FTN_TARGET_MEMCPY_RECT omp_target_memcpy_rect - #define FTN_TARGET_ASSOCIATE_PTR omp_target_associate_ptr - #define FTN_TARGET_DISASSOCIATE_PTR omp_target_disassociate_ptr -# endif +#define FTN_GET_MAX_TASK_PRIORITY omp_get_max_task_priority +#define FTN_GET_NUM_PLACES omp_get_num_places +#define FTN_GET_PLACE_NUM_PROCS omp_get_place_num_procs +#define FTN_GET_PLACE_PROC_IDS omp_get_place_proc_ids +#define FTN_GET_PLACE_NUM omp_get_place_num +#define FTN_GET_PARTITION_NUM_PLACES omp_get_partition_num_places +#define FTN_GET_PARTITION_PLACE_NUMS omp_get_partition_place_nums +#ifdef KMP_STUB +#define FTN_GET_INITIAL_DEVICE omp_get_initial_device +#define FTN_TARGET_ALLOC omp_target_alloc +#define FTN_TARGET_FREE omp_target_free +#define FTN_TARGET_IS_PRESENT omp_target_is_present +#define FTN_TARGET_MEMCPY omp_target_memcpy +#define FTN_TARGET_MEMCPY_RECT omp_target_memcpy_rect +#define FTN_TARGET_ASSOCIATE_PTR omp_target_associate_ptr +#define FTN_TARGET_DISASSOCIATE_PTR omp_target_disassociate_ptr +#endif #endif #endif /* KMP_FTN_PLAIN */ @@ -141,117 +141,117 @@ #if KMP_FTN_ENTRIES == KMP_FTN_APPEND - #define FTN_SET_STACKSIZE kmp_set_stacksize_ - #define FTN_SET_STACKSIZE_S kmp_set_stacksize_s_ - #define FTN_GET_STACKSIZE kmp_get_stacksize_ - #define FTN_GET_STACKSIZE_S kmp_get_stacksize_s_ - #define FTN_SET_BLOCKTIME kmp_set_blocktime_ - #define FTN_GET_BLOCKTIME kmp_get_blocktime_ - #define FTN_SET_LIBRARY_SERIAL kmp_set_library_serial_ - #define FTN_SET_LIBRARY_TURNAROUND kmp_set_library_turnaround_ - #define FTN_SET_LIBRARY_THROUGHPUT kmp_set_library_throughput_ - #define FTN_SET_LIBRARY kmp_set_library_ - #define FTN_GET_LIBRARY kmp_get_library_ - #define FTN_SET_DEFAULTS kmp_set_defaults_ - #define FTN_SET_DISP_NUM_BUFFERS kmp_set_disp_num_buffers_ - #define FTN_SET_AFFINITY kmp_set_affinity_ - #define FTN_GET_AFFINITY kmp_get_affinity_ - #define FTN_GET_AFFINITY_MAX_PROC kmp_get_affinity_max_proc_ - #define FTN_CREATE_AFFINITY_MASK kmp_create_affinity_mask_ - #define FTN_DESTROY_AFFINITY_MASK kmp_destroy_affinity_mask_ - #define FTN_SET_AFFINITY_MASK_PROC kmp_set_affinity_mask_proc_ - #define FTN_UNSET_AFFINITY_MASK_PROC kmp_unset_affinity_mask_proc_ - #define FTN_GET_AFFINITY_MASK_PROC kmp_get_affinity_mask_proc_ - - #define FTN_MALLOC kmp_malloc_ - #define FTN_ALIGNED_MALLOC kmp_aligned_malloc_ - #define FTN_CALLOC kmp_calloc_ - #define FTN_REALLOC kmp_realloc_ - #define FTN_FREE kmp_free_ - - #define FTN_GET_NUM_KNOWN_THREADS kmp_get_num_known_threads_ - - #define FTN_SET_NUM_THREADS omp_set_num_threads_ - #define FTN_GET_NUM_THREADS omp_get_num_threads_ - #define FTN_GET_MAX_THREADS omp_get_max_threads_ - #define FTN_GET_THREAD_NUM omp_get_thread_num_ - #define FTN_GET_NUM_PROCS omp_get_num_procs_ - #define FTN_SET_DYNAMIC omp_set_dynamic_ - #define FTN_GET_DYNAMIC omp_get_dynamic_ - #define FTN_SET_NESTED omp_set_nested_ - #define FTN_GET_NESTED omp_get_nested_ - #define FTN_IN_PARALLEL omp_in_parallel_ - #define FTN_GET_THREAD_LIMIT omp_get_thread_limit_ - #define FTN_SET_SCHEDULE omp_set_schedule_ - #define FTN_GET_SCHEDULE omp_get_schedule_ - #define FTN_SET_MAX_ACTIVE_LEVELS omp_set_max_active_levels_ - #define FTN_GET_MAX_ACTIVE_LEVELS omp_get_max_active_levels_ - #define FTN_GET_ACTIVE_LEVEL omp_get_active_level_ - #define FTN_GET_LEVEL omp_get_level_ - #define FTN_GET_ANCESTOR_THREAD_NUM omp_get_ancestor_thread_num_ - #define FTN_GET_TEAM_SIZE omp_get_team_size_ - #define FTN_IN_FINAL omp_in_final_ +#define FTN_SET_STACKSIZE kmp_set_stacksize_ +#define FTN_SET_STACKSIZE_S kmp_set_stacksize_s_ +#define FTN_GET_STACKSIZE kmp_get_stacksize_ +#define FTN_GET_STACKSIZE_S kmp_get_stacksize_s_ +#define FTN_SET_BLOCKTIME kmp_set_blocktime_ +#define FTN_GET_BLOCKTIME kmp_get_blocktime_ +#define FTN_SET_LIBRARY_SERIAL kmp_set_library_serial_ +#define FTN_SET_LIBRARY_TURNAROUND kmp_set_library_turnaround_ +#define FTN_SET_LIBRARY_THROUGHPUT kmp_set_library_throughput_ +#define FTN_SET_LIBRARY kmp_set_library_ +#define FTN_GET_LIBRARY kmp_get_library_ +#define FTN_SET_DEFAULTS kmp_set_defaults_ +#define FTN_SET_DISP_NUM_BUFFERS kmp_set_disp_num_buffers_ +#define FTN_SET_AFFINITY kmp_set_affinity_ +#define FTN_GET_AFFINITY kmp_get_affinity_ +#define FTN_GET_AFFINITY_MAX_PROC kmp_get_affinity_max_proc_ +#define FTN_CREATE_AFFINITY_MASK kmp_create_affinity_mask_ +#define FTN_DESTROY_AFFINITY_MASK kmp_destroy_affinity_mask_ +#define FTN_SET_AFFINITY_MASK_PROC kmp_set_affinity_mask_proc_ +#define FTN_UNSET_AFFINITY_MASK_PROC kmp_unset_affinity_mask_proc_ +#define FTN_GET_AFFINITY_MASK_PROC kmp_get_affinity_mask_proc_ + +#define FTN_MALLOC kmp_malloc_ +#define FTN_ALIGNED_MALLOC kmp_aligned_malloc_ +#define FTN_CALLOC kmp_calloc_ +#define FTN_REALLOC kmp_realloc_ +#define FTN_FREE kmp_free_ + +#define FTN_GET_NUM_KNOWN_THREADS kmp_get_num_known_threads_ + +#define FTN_SET_NUM_THREADS omp_set_num_threads_ +#define FTN_GET_NUM_THREADS omp_get_num_threads_ +#define FTN_GET_MAX_THREADS omp_get_max_threads_ +#define FTN_GET_THREAD_NUM omp_get_thread_num_ +#define FTN_GET_NUM_PROCS omp_get_num_procs_ +#define FTN_SET_DYNAMIC omp_set_dynamic_ +#define FTN_GET_DYNAMIC omp_get_dynamic_ +#define FTN_SET_NESTED omp_set_nested_ +#define FTN_GET_NESTED omp_get_nested_ +#define FTN_IN_PARALLEL omp_in_parallel_ +#define FTN_GET_THREAD_LIMIT omp_get_thread_limit_ +#define FTN_SET_SCHEDULE omp_set_schedule_ +#define FTN_GET_SCHEDULE omp_get_schedule_ +#define FTN_SET_MAX_ACTIVE_LEVELS omp_set_max_active_levels_ +#define FTN_GET_MAX_ACTIVE_LEVELS omp_get_max_active_levels_ +#define FTN_GET_ACTIVE_LEVEL omp_get_active_level_ +#define FTN_GET_LEVEL omp_get_level_ +#define FTN_GET_ANCESTOR_THREAD_NUM omp_get_ancestor_thread_num_ +#define FTN_GET_TEAM_SIZE omp_get_team_size_ +#define FTN_IN_FINAL omp_in_final_ // #define FTN_SET_PROC_BIND omp_set_proc_bind_ - #define FTN_GET_PROC_BIND omp_get_proc_bind_ +#define FTN_GET_PROC_BIND omp_get_proc_bind_ // #define FTN_CURR_PROC_BIND omp_curr_proc_bind_ #if OMP_40_ENABLED - #define FTN_GET_NUM_TEAMS omp_get_num_teams_ - #define FTN_GET_TEAM_NUM omp_get_team_num_ +#define FTN_GET_NUM_TEAMS omp_get_num_teams_ +#define FTN_GET_TEAM_NUM omp_get_team_num_ #endif - #define FTN_INIT_LOCK omp_init_lock_ +#define FTN_INIT_LOCK omp_init_lock_ #if KMP_USE_DYNAMIC_LOCK - #define FTN_INIT_LOCK_WITH_HINT omp_init_lock_with_hint_ - #define FTN_INIT_NEST_LOCK_WITH_HINT omp_init_nest_lock_with_hint_ +#define FTN_INIT_LOCK_WITH_HINT omp_init_lock_with_hint_ +#define FTN_INIT_NEST_LOCK_WITH_HINT omp_init_nest_lock_with_hint_ #endif - #define FTN_DESTROY_LOCK omp_destroy_lock_ - #define FTN_SET_LOCK omp_set_lock_ - #define FTN_UNSET_LOCK omp_unset_lock_ - #define FTN_TEST_LOCK omp_test_lock_ - #define FTN_INIT_NEST_LOCK omp_init_nest_lock_ - #define FTN_DESTROY_NEST_LOCK omp_destroy_nest_lock_ - #define FTN_SET_NEST_LOCK omp_set_nest_lock_ - #define FTN_UNSET_NEST_LOCK omp_unset_nest_lock_ - #define FTN_TEST_NEST_LOCK omp_test_nest_lock_ - - #define FTN_SET_WARNINGS_ON kmp_set_warnings_on_ - #define FTN_SET_WARNINGS_OFF kmp_set_warnings_off_ - - #define FTN_GET_WTIME omp_get_wtime_ - #define FTN_GET_WTICK omp_get_wtick_ +#define FTN_DESTROY_LOCK omp_destroy_lock_ +#define FTN_SET_LOCK omp_set_lock_ +#define FTN_UNSET_LOCK omp_unset_lock_ +#define FTN_TEST_LOCK omp_test_lock_ +#define FTN_INIT_NEST_LOCK omp_init_nest_lock_ +#define FTN_DESTROY_NEST_LOCK omp_destroy_nest_lock_ +#define FTN_SET_NEST_LOCK omp_set_nest_lock_ +#define FTN_UNSET_NEST_LOCK omp_unset_nest_lock_ +#define FTN_TEST_NEST_LOCK omp_test_nest_lock_ + +#define FTN_SET_WARNINGS_ON kmp_set_warnings_on_ +#define FTN_SET_WARNINGS_OFF kmp_set_warnings_off_ + +#define FTN_GET_WTIME omp_get_wtime_ +#define FTN_GET_WTICK omp_get_wtick_ #if OMP_40_ENABLED #if KMP_MIC || KMP_OS_DARWIN || defined(KMP_STUB) - #define FTN_GET_NUM_DEVICES omp_get_num_devices_ +#define FTN_GET_NUM_DEVICES omp_get_num_devices_ #endif - #define FTN_GET_DEFAULT_DEVICE omp_get_default_device_ - #define FTN_SET_DEFAULT_DEVICE omp_set_default_device_ - #define FTN_IS_INITIAL_DEVICE omp_is_initial_device_ +#define FTN_GET_DEFAULT_DEVICE omp_get_default_device_ +#define FTN_SET_DEFAULT_DEVICE omp_set_default_device_ +#define FTN_IS_INITIAL_DEVICE omp_is_initial_device_ #endif #if OMP_40_ENABLED - #define FTN_GET_CANCELLATION omp_get_cancellation_ - #define FTN_GET_CANCELLATION_STATUS kmp_get_cancellation_status_ +#define FTN_GET_CANCELLATION omp_get_cancellation_ +#define FTN_GET_CANCELLATION_STATUS kmp_get_cancellation_status_ #endif #if OMP_45_ENABLED - #define FTN_GET_MAX_TASK_PRIORITY omp_get_max_task_priority_ - #define FTN_GET_NUM_PLACES omp_get_num_places_ - #define FTN_GET_PLACE_NUM_PROCS omp_get_place_num_procs_ - #define FTN_GET_PLACE_PROC_IDS omp_get_place_proc_ids_ - #define FTN_GET_PLACE_NUM omp_get_place_num_ - #define FTN_GET_PARTITION_NUM_PLACES omp_get_partition_num_places_ - #define FTN_GET_PARTITION_PLACE_NUMS omp_get_partition_place_nums_ -# ifdef KMP_STUB - #define FTN_GET_INITIAL_DEVICE omp_get_initial_device_ - #define FTN_TARGET_ALLOC omp_target_alloc_ - #define FTN_TARGET_FREE omp_target_free_ - #define FTN_TARGET_IS_PRESENT omp_target_is_present_ - #define FTN_TARGET_MEMCPY omp_target_memcpy_ - #define FTN_TARGET_MEMCPY_RECT omp_target_memcpy_rect_ - #define FTN_TARGET_ASSOCIATE_PTR omp_target_associate_ptr_ - #define FTN_TARGET_DISASSOCIATE_PTR omp_target_disassociate_ptr_ -# endif +#define FTN_GET_MAX_TASK_PRIORITY omp_get_max_task_priority_ +#define FTN_GET_NUM_PLACES omp_get_num_places_ +#define FTN_GET_PLACE_NUM_PROCS omp_get_place_num_procs_ +#define FTN_GET_PLACE_PROC_IDS omp_get_place_proc_ids_ +#define FTN_GET_PLACE_NUM omp_get_place_num_ +#define FTN_GET_PARTITION_NUM_PLACES omp_get_partition_num_places_ +#define FTN_GET_PARTITION_PLACE_NUMS omp_get_partition_place_nums_ +#ifdef KMP_STUB +#define FTN_GET_INITIAL_DEVICE omp_get_initial_device_ +#define FTN_TARGET_ALLOC omp_target_alloc_ +#define FTN_TARGET_FREE omp_target_free_ +#define FTN_TARGET_IS_PRESENT omp_target_is_present_ +#define FTN_TARGET_MEMCPY omp_target_memcpy_ +#define FTN_TARGET_MEMCPY_RECT omp_target_memcpy_rect_ +#define FTN_TARGET_ASSOCIATE_PTR omp_target_associate_ptr_ +#define FTN_TARGET_DISASSOCIATE_PTR omp_target_disassociate_ptr_ +#endif #endif #endif /* KMP_FTN_APPEND */ @@ -260,117 +260,117 @@ #if KMP_FTN_ENTRIES == KMP_FTN_UPPER - #define FTN_SET_STACKSIZE KMP_SET_STACKSIZE - #define FTN_SET_STACKSIZE_S KMP_SET_STACKSIZE_S - #define FTN_GET_STACKSIZE KMP_GET_STACKSIZE - #define FTN_GET_STACKSIZE_S KMP_GET_STACKSIZE_S - #define FTN_SET_BLOCKTIME KMP_SET_BLOCKTIME - #define FTN_GET_BLOCKTIME KMP_GET_BLOCKTIME - #define FTN_SET_LIBRARY_SERIAL KMP_SET_LIBRARY_SERIAL - #define FTN_SET_LIBRARY_TURNAROUND KMP_SET_LIBRARY_TURNAROUND - #define FTN_SET_LIBRARY_THROUGHPUT KMP_SET_LIBRARY_THROUGHPUT - #define FTN_SET_LIBRARY KMP_SET_LIBRARY - #define FTN_GET_LIBRARY KMP_GET_LIBRARY - #define FTN_SET_DEFAULTS KMP_SET_DEFAULTS - #define FTN_SET_DISP_NUM_BUFFERS KMP_SET_DISP_NUM_BUFFERS - #define FTN_SET_AFFINITY KMP_SET_AFFINITY - #define FTN_GET_AFFINITY KMP_GET_AFFINITY - #define FTN_GET_AFFINITY_MAX_PROC KMP_GET_AFFINITY_MAX_PROC - #define FTN_CREATE_AFFINITY_MASK KMP_CREATE_AFFINITY_MASK - #define FTN_DESTROY_AFFINITY_MASK KMP_DESTROY_AFFINITY_MASK - #define FTN_SET_AFFINITY_MASK_PROC KMP_SET_AFFINITY_MASK_PROC - #define FTN_UNSET_AFFINITY_MASK_PROC KMP_UNSET_AFFINITY_MASK_PROC - #define FTN_GET_AFFINITY_MASK_PROC KMP_GET_AFFINITY_MASK_PROC - - #define FTN_MALLOC KMP_MALLOC - #define FTN_ALIGNED_MALLOC KMP_ALIGNED_MALLOC - #define FTN_CALLOC KMP_CALLOC - #define FTN_REALLOC KMP_REALLOC - #define FTN_FREE KMP_FREE - - #define FTN_GET_NUM_KNOWN_THREADS KMP_GET_NUM_KNOWN_THREADS - - #define FTN_SET_NUM_THREADS OMP_SET_NUM_THREADS - #define FTN_GET_NUM_THREADS OMP_GET_NUM_THREADS - #define FTN_GET_MAX_THREADS OMP_GET_MAX_THREADS - #define FTN_GET_THREAD_NUM OMP_GET_THREAD_NUM - #define FTN_GET_NUM_PROCS OMP_GET_NUM_PROCS - #define FTN_SET_DYNAMIC OMP_SET_DYNAMIC - #define FTN_GET_DYNAMIC OMP_GET_DYNAMIC - #define FTN_SET_NESTED OMP_SET_NESTED - #define FTN_GET_NESTED OMP_GET_NESTED - #define FTN_IN_PARALLEL OMP_IN_PARALLEL - #define FTN_GET_THREAD_LIMIT OMP_GET_THREAD_LIMIT - #define FTN_SET_SCHEDULE OMP_SET_SCHEDULE - #define FTN_GET_SCHEDULE OMP_GET_SCHEDULE - #define FTN_SET_MAX_ACTIVE_LEVELS OMP_SET_MAX_ACTIVE_LEVELS - #define FTN_GET_MAX_ACTIVE_LEVELS OMP_GET_MAX_ACTIVE_LEVELS - #define FTN_GET_ACTIVE_LEVEL OMP_GET_ACTIVE_LEVEL - #define FTN_GET_LEVEL OMP_GET_LEVEL - #define FTN_GET_ANCESTOR_THREAD_NUM OMP_GET_ANCESTOR_THREAD_NUM - #define FTN_GET_TEAM_SIZE OMP_GET_TEAM_SIZE - #define FTN_IN_FINAL OMP_IN_FINAL +#define FTN_SET_STACKSIZE KMP_SET_STACKSIZE +#define FTN_SET_STACKSIZE_S KMP_SET_STACKSIZE_S +#define FTN_GET_STACKSIZE KMP_GET_STACKSIZE +#define FTN_GET_STACKSIZE_S KMP_GET_STACKSIZE_S +#define FTN_SET_BLOCKTIME KMP_SET_BLOCKTIME +#define FTN_GET_BLOCKTIME KMP_GET_BLOCKTIME +#define FTN_SET_LIBRARY_SERIAL KMP_SET_LIBRARY_SERIAL +#define FTN_SET_LIBRARY_TURNAROUND KMP_SET_LIBRARY_TURNAROUND +#define FTN_SET_LIBRARY_THROUGHPUT KMP_SET_LIBRARY_THROUGHPUT +#define FTN_SET_LIBRARY KMP_SET_LIBRARY +#define FTN_GET_LIBRARY KMP_GET_LIBRARY +#define FTN_SET_DEFAULTS KMP_SET_DEFAULTS +#define FTN_SET_DISP_NUM_BUFFERS KMP_SET_DISP_NUM_BUFFERS +#define FTN_SET_AFFINITY KMP_SET_AFFINITY +#define FTN_GET_AFFINITY KMP_GET_AFFINITY +#define FTN_GET_AFFINITY_MAX_PROC KMP_GET_AFFINITY_MAX_PROC +#define FTN_CREATE_AFFINITY_MASK KMP_CREATE_AFFINITY_MASK +#define FTN_DESTROY_AFFINITY_MASK KMP_DESTROY_AFFINITY_MASK +#define FTN_SET_AFFINITY_MASK_PROC KMP_SET_AFFINITY_MASK_PROC +#define FTN_UNSET_AFFINITY_MASK_PROC KMP_UNSET_AFFINITY_MASK_PROC +#define FTN_GET_AFFINITY_MASK_PROC KMP_GET_AFFINITY_MASK_PROC + +#define FTN_MALLOC KMP_MALLOC +#define FTN_ALIGNED_MALLOC KMP_ALIGNED_MALLOC +#define FTN_CALLOC KMP_CALLOC +#define FTN_REALLOC KMP_REALLOC +#define FTN_FREE KMP_FREE + +#define FTN_GET_NUM_KNOWN_THREADS KMP_GET_NUM_KNOWN_THREADS + +#define FTN_SET_NUM_THREADS OMP_SET_NUM_THREADS +#define FTN_GET_NUM_THREADS OMP_GET_NUM_THREADS +#define FTN_GET_MAX_THREADS OMP_GET_MAX_THREADS +#define FTN_GET_THREAD_NUM OMP_GET_THREAD_NUM +#define FTN_GET_NUM_PROCS OMP_GET_NUM_PROCS +#define FTN_SET_DYNAMIC OMP_SET_DYNAMIC +#define FTN_GET_DYNAMIC OMP_GET_DYNAMIC +#define FTN_SET_NESTED OMP_SET_NESTED +#define FTN_GET_NESTED OMP_GET_NESTED +#define FTN_IN_PARALLEL OMP_IN_PARALLEL +#define FTN_GET_THREAD_LIMIT OMP_GET_THREAD_LIMIT +#define FTN_SET_SCHEDULE OMP_SET_SCHEDULE +#define FTN_GET_SCHEDULE OMP_GET_SCHEDULE +#define FTN_SET_MAX_ACTIVE_LEVELS OMP_SET_MAX_ACTIVE_LEVELS +#define FTN_GET_MAX_ACTIVE_LEVELS OMP_GET_MAX_ACTIVE_LEVELS +#define FTN_GET_ACTIVE_LEVEL OMP_GET_ACTIVE_LEVEL +#define FTN_GET_LEVEL OMP_GET_LEVEL +#define FTN_GET_ANCESTOR_THREAD_NUM OMP_GET_ANCESTOR_THREAD_NUM +#define FTN_GET_TEAM_SIZE OMP_GET_TEAM_SIZE +#define FTN_IN_FINAL OMP_IN_FINAL // #define FTN_SET_PROC_BIND OMP_SET_PROC_BIND - #define FTN_GET_PROC_BIND OMP_GET_PROC_BIND +#define FTN_GET_PROC_BIND OMP_GET_PROC_BIND // #define FTN_CURR_PROC_BIND OMP_CURR_PROC_BIND #if OMP_40_ENABLED - #define FTN_GET_NUM_TEAMS OMP_GET_NUM_TEAMS - #define FTN_GET_TEAM_NUM OMP_GET_TEAM_NUM +#define FTN_GET_NUM_TEAMS OMP_GET_NUM_TEAMS +#define FTN_GET_TEAM_NUM OMP_GET_TEAM_NUM #endif - #define FTN_INIT_LOCK OMP_INIT_LOCK +#define FTN_INIT_LOCK OMP_INIT_LOCK #if KMP_USE_DYNAMIC_LOCK - #define FTN_INIT_LOCK_WITH_HINT OMP_INIT_LOCK_WITH_HINT - #define FTN_INIT_NEST_LOCK_WITH_HINT OMP_INIT_NEST_LOCK_WITH_HINT +#define FTN_INIT_LOCK_WITH_HINT OMP_INIT_LOCK_WITH_HINT +#define FTN_INIT_NEST_LOCK_WITH_HINT OMP_INIT_NEST_LOCK_WITH_HINT #endif - #define FTN_DESTROY_LOCK OMP_DESTROY_LOCK - #define FTN_SET_LOCK OMP_SET_LOCK - #define FTN_UNSET_LOCK OMP_UNSET_LOCK - #define FTN_TEST_LOCK OMP_TEST_LOCK - #define FTN_INIT_NEST_LOCK OMP_INIT_NEST_LOCK - #define FTN_DESTROY_NEST_LOCK OMP_DESTROY_NEST_LOCK - #define FTN_SET_NEST_LOCK OMP_SET_NEST_LOCK - #define FTN_UNSET_NEST_LOCK OMP_UNSET_NEST_LOCK - #define FTN_TEST_NEST_LOCK OMP_TEST_NEST_LOCK - - #define FTN_SET_WARNINGS_ON KMP_SET_WARNINGS_ON - #define FTN_SET_WARNINGS_OFF KMP_SET_WARNINGS_OFF - - #define FTN_GET_WTIME OMP_GET_WTIME - #define FTN_GET_WTICK OMP_GET_WTICK +#define FTN_DESTROY_LOCK OMP_DESTROY_LOCK +#define FTN_SET_LOCK OMP_SET_LOCK +#define FTN_UNSET_LOCK OMP_UNSET_LOCK +#define FTN_TEST_LOCK OMP_TEST_LOCK +#define FTN_INIT_NEST_LOCK OMP_INIT_NEST_LOCK +#define FTN_DESTROY_NEST_LOCK OMP_DESTROY_NEST_LOCK +#define FTN_SET_NEST_LOCK OMP_SET_NEST_LOCK +#define FTN_UNSET_NEST_LOCK OMP_UNSET_NEST_LOCK +#define FTN_TEST_NEST_LOCK OMP_TEST_NEST_LOCK + +#define FTN_SET_WARNINGS_ON KMP_SET_WARNINGS_ON +#define FTN_SET_WARNINGS_OFF KMP_SET_WARNINGS_OFF + +#define FTN_GET_WTIME OMP_GET_WTIME +#define FTN_GET_WTICK OMP_GET_WTICK #if OMP_40_ENABLED #if KMP_MIC || KMP_OS_DARWIN || defined(KMP_STUB) - #define FTN_GET_NUM_DEVICES OMP_GET_NUM_DEVICES +#define FTN_GET_NUM_DEVICES OMP_GET_NUM_DEVICES #endif - #define FTN_GET_DEFAULT_DEVICE OMP_GET_DEFAULT_DEVICE - #define FTN_SET_DEFAULT_DEVICE OMP_SET_DEFAULT_DEVICE - #define FTN_IS_INITIAL_DEVICE OMP_IS_INITIAL_DEVICE +#define FTN_GET_DEFAULT_DEVICE OMP_GET_DEFAULT_DEVICE +#define FTN_SET_DEFAULT_DEVICE OMP_SET_DEFAULT_DEVICE +#define FTN_IS_INITIAL_DEVICE OMP_IS_INITIAL_DEVICE #endif #if OMP_40_ENABLED - #define FTN_GET_CANCELLATION OMP_GET_CANCELLATION - #define FTN_GET_CANCELLATION_STATUS KMP_GET_CANCELLATION_STATUS +#define FTN_GET_CANCELLATION OMP_GET_CANCELLATION +#define FTN_GET_CANCELLATION_STATUS KMP_GET_CANCELLATION_STATUS #endif #if OMP_45_ENABLED - #define FTN_GET_MAX_TASK_PRIORITY OMP_GET_MAX_TASK_PRIORITY - #define FTN_GET_NUM_PLACES OMP_GET_NUM_PLACES - #define FTN_GET_PLACE_NUM_PROCS OMP_GET_PLACE_NUM_PROCS - #define FTN_GET_PLACE_PROC_IDS OMP_GET_PLACE_PROC_IDS - #define FTN_GET_PLACE_NUM OMP_GET_PLACE_NUM - #define FTN_GET_PARTITION_NUM_PLACES OMP_GET_PARTITION_NUM_PLACES - #define FTN_GET_PARTITION_PLACE_NUMS OMP_GET_PARTITION_PLACE_NUMS -# ifdef KMP_STUB - #define FTN_GET_INITIAL_DEVICE OMP_GET_INITIAL_DEVICE - #define FTN_TARGET_ALLOC OMP_TARGET_ALLOC - #define FTN_TARGET_FREE OMP_TARGET_FREE - #define FTN_TARGET_IS_PRESENT OMP_TARGET_IS_PRESENT - #define FTN_TARGET_MEMCPY OMP_TARGET_MEMCPY - #define FTN_TARGET_MEMCPY_RECT OMP_TARGET_MEMCPY_RECT - #define FTN_TARGET_ASSOCIATE_PTR OMP_TARGET_ASSOCIATE_PTR - #define FTN_TARGET_DISASSOCIATE_PTR OMP_TARGET_DISASSOCIATE_PTR -# endif +#define FTN_GET_MAX_TASK_PRIORITY OMP_GET_MAX_TASK_PRIORITY +#define FTN_GET_NUM_PLACES OMP_GET_NUM_PLACES +#define FTN_GET_PLACE_NUM_PROCS OMP_GET_PLACE_NUM_PROCS +#define FTN_GET_PLACE_PROC_IDS OMP_GET_PLACE_PROC_IDS +#define FTN_GET_PLACE_NUM OMP_GET_PLACE_NUM +#define FTN_GET_PARTITION_NUM_PLACES OMP_GET_PARTITION_NUM_PLACES +#define FTN_GET_PARTITION_PLACE_NUMS OMP_GET_PARTITION_PLACE_NUMS +#ifdef KMP_STUB +#define FTN_GET_INITIAL_DEVICE OMP_GET_INITIAL_DEVICE +#define FTN_TARGET_ALLOC OMP_TARGET_ALLOC +#define FTN_TARGET_FREE OMP_TARGET_FREE +#define FTN_TARGET_IS_PRESENT OMP_TARGET_IS_PRESENT +#define FTN_TARGET_MEMCPY OMP_TARGET_MEMCPY +#define FTN_TARGET_MEMCPY_RECT OMP_TARGET_MEMCPY_RECT +#define FTN_TARGET_ASSOCIATE_PTR OMP_TARGET_ASSOCIATE_PTR +#define FTN_TARGET_DISASSOCIATE_PTR OMP_TARGET_DISASSOCIATE_PTR +#endif #endif #endif /* KMP_FTN_UPPER */ @@ -379,242 +379,266 @@ #if KMP_FTN_ENTRIES == KMP_FTN_UAPPEND - #define FTN_SET_STACKSIZE KMP_SET_STACKSIZE_ - #define FTN_SET_STACKSIZE_S KMP_SET_STACKSIZE_S_ - #define FTN_GET_STACKSIZE KMP_GET_STACKSIZE_ - #define FTN_GET_STACKSIZE_S KMP_GET_STACKSIZE_S_ - #define FTN_SET_BLOCKTIME KMP_SET_BLOCKTIME_ - #define FTN_GET_BLOCKTIME KMP_GET_BLOCKTIME_ - #define FTN_SET_LIBRARY_SERIAL KMP_SET_LIBRARY_SERIAL_ - #define FTN_SET_LIBRARY_TURNAROUND KMP_SET_LIBRARY_TURNAROUND_ - #define FTN_SET_LIBRARY_THROUGHPUT KMP_SET_LIBRARY_THROUGHPUT_ - #define FTN_SET_LIBRARY KMP_SET_LIBRARY_ - #define FTN_GET_LIBRARY KMP_GET_LIBRARY_ - #define FTN_SET_DEFAULTS KMP_SET_DEFAULTS_ - #define FTN_SET_DISP_NUM_BUFFERS KMP_SET_DISP_NUM_BUFFERS_ - #define FTN_SET_AFFINITY KMP_SET_AFFINITY_ - #define FTN_GET_AFFINITY KMP_GET_AFFINITY_ - #define FTN_GET_AFFINITY_MAX_PROC KMP_GET_AFFINITY_MAX_PROC_ - #define FTN_CREATE_AFFINITY_MASK KMP_CREATE_AFFINITY_MASK_ - #define FTN_DESTROY_AFFINITY_MASK KMP_DESTROY_AFFINITY_MASK_ - #define FTN_SET_AFFINITY_MASK_PROC KMP_SET_AFFINITY_MASK_PROC_ - #define FTN_UNSET_AFFINITY_MASK_PROC KMP_UNSET_AFFINITY_MASK_PROC_ - #define FTN_GET_AFFINITY_MASK_PROC KMP_GET_AFFINITY_MASK_PROC_ - - #define FTN_MALLOC KMP_MALLOC_ - #define FTN_ALIGNED_MALLOC KMP_ALIGNED_MALLOC_ - #define FTN_CALLOC KMP_CALLOC_ - #define FTN_REALLOC KMP_REALLOC_ - #define FTN_FREE KMP_FREE_ - - #define FTN_GET_NUM_KNOWN_THREADS KMP_GET_NUM_KNOWN_THREADS_ - - #define FTN_SET_NUM_THREADS OMP_SET_NUM_THREADS_ - #define FTN_GET_NUM_THREADS OMP_GET_NUM_THREADS_ - #define FTN_GET_MAX_THREADS OMP_GET_MAX_THREADS_ - #define FTN_GET_THREAD_NUM OMP_GET_THREAD_NUM_ - #define FTN_GET_NUM_PROCS OMP_GET_NUM_PROCS_ - #define FTN_SET_DYNAMIC OMP_SET_DYNAMIC_ - #define FTN_GET_DYNAMIC OMP_GET_DYNAMIC_ - #define FTN_SET_NESTED OMP_SET_NESTED_ - #define FTN_GET_NESTED OMP_GET_NESTED_ - #define FTN_IN_PARALLEL OMP_IN_PARALLEL_ - #define FTN_GET_THREAD_LIMIT OMP_GET_THREAD_LIMIT_ - #define FTN_SET_SCHEDULE OMP_SET_SCHEDULE_ - #define FTN_GET_SCHEDULE OMP_GET_SCHEDULE_ - #define FTN_SET_MAX_ACTIVE_LEVELS OMP_SET_MAX_ACTIVE_LEVELS_ - #define FTN_GET_MAX_ACTIVE_LEVELS OMP_GET_MAX_ACTIVE_LEVELS_ - #define FTN_GET_ACTIVE_LEVEL OMP_GET_ACTIVE_LEVEL_ - #define FTN_GET_LEVEL OMP_GET_LEVEL_ - #define FTN_GET_ANCESTOR_THREAD_NUM OMP_GET_ANCESTOR_THREAD_NUM_ - #define FTN_GET_TEAM_SIZE OMP_GET_TEAM_SIZE_ - #define FTN_IN_FINAL OMP_IN_FINAL_ +#define FTN_SET_STACKSIZE KMP_SET_STACKSIZE_ +#define FTN_SET_STACKSIZE_S KMP_SET_STACKSIZE_S_ +#define FTN_GET_STACKSIZE KMP_GET_STACKSIZE_ +#define FTN_GET_STACKSIZE_S KMP_GET_STACKSIZE_S_ +#define FTN_SET_BLOCKTIME KMP_SET_BLOCKTIME_ +#define FTN_GET_BLOCKTIME KMP_GET_BLOCKTIME_ +#define FTN_SET_LIBRARY_SERIAL KMP_SET_LIBRARY_SERIAL_ +#define FTN_SET_LIBRARY_TURNAROUND KMP_SET_LIBRARY_TURNAROUND_ +#define FTN_SET_LIBRARY_THROUGHPUT KMP_SET_LIBRARY_THROUGHPUT_ +#define FTN_SET_LIBRARY KMP_SET_LIBRARY_ +#define FTN_GET_LIBRARY KMP_GET_LIBRARY_ +#define FTN_SET_DEFAULTS KMP_SET_DEFAULTS_ +#define FTN_SET_DISP_NUM_BUFFERS KMP_SET_DISP_NUM_BUFFERS_ +#define FTN_SET_AFFINITY KMP_SET_AFFINITY_ +#define FTN_GET_AFFINITY KMP_GET_AFFINITY_ +#define FTN_GET_AFFINITY_MAX_PROC KMP_GET_AFFINITY_MAX_PROC_ +#define FTN_CREATE_AFFINITY_MASK KMP_CREATE_AFFINITY_MASK_ +#define FTN_DESTROY_AFFINITY_MASK KMP_DESTROY_AFFINITY_MASK_ +#define FTN_SET_AFFINITY_MASK_PROC KMP_SET_AFFINITY_MASK_PROC_ +#define FTN_UNSET_AFFINITY_MASK_PROC KMP_UNSET_AFFINITY_MASK_PROC_ +#define FTN_GET_AFFINITY_MASK_PROC KMP_GET_AFFINITY_MASK_PROC_ + +#define FTN_MALLOC KMP_MALLOC_ +#define FTN_ALIGNED_MALLOC KMP_ALIGNED_MALLOC_ +#define FTN_CALLOC KMP_CALLOC_ +#define FTN_REALLOC KMP_REALLOC_ +#define FTN_FREE KMP_FREE_ + +#define FTN_GET_NUM_KNOWN_THREADS KMP_GET_NUM_KNOWN_THREADS_ + +#define FTN_SET_NUM_THREADS OMP_SET_NUM_THREADS_ +#define FTN_GET_NUM_THREADS OMP_GET_NUM_THREADS_ +#define FTN_GET_MAX_THREADS OMP_GET_MAX_THREADS_ +#define FTN_GET_THREAD_NUM OMP_GET_THREAD_NUM_ +#define FTN_GET_NUM_PROCS OMP_GET_NUM_PROCS_ +#define FTN_SET_DYNAMIC OMP_SET_DYNAMIC_ +#define FTN_GET_DYNAMIC OMP_GET_DYNAMIC_ +#define FTN_SET_NESTED OMP_SET_NESTED_ +#define FTN_GET_NESTED OMP_GET_NESTED_ +#define FTN_IN_PARALLEL OMP_IN_PARALLEL_ +#define FTN_GET_THREAD_LIMIT OMP_GET_THREAD_LIMIT_ +#define FTN_SET_SCHEDULE OMP_SET_SCHEDULE_ +#define FTN_GET_SCHEDULE OMP_GET_SCHEDULE_ +#define FTN_SET_MAX_ACTIVE_LEVELS OMP_SET_MAX_ACTIVE_LEVELS_ +#define FTN_GET_MAX_ACTIVE_LEVELS OMP_GET_MAX_ACTIVE_LEVELS_ +#define FTN_GET_ACTIVE_LEVEL OMP_GET_ACTIVE_LEVEL_ +#define FTN_GET_LEVEL OMP_GET_LEVEL_ +#define FTN_GET_ANCESTOR_THREAD_NUM OMP_GET_ANCESTOR_THREAD_NUM_ +#define FTN_GET_TEAM_SIZE OMP_GET_TEAM_SIZE_ +#define FTN_IN_FINAL OMP_IN_FINAL_ // #define FTN_SET_PROC_BIND OMP_SET_PROC_BIND_ - #define FTN_GET_PROC_BIND OMP_GET_PROC_BIND_ +#define FTN_GET_PROC_BIND OMP_GET_PROC_BIND_ // #define FTN_CURR_PROC_BIND OMP_CURR_PROC_BIND_ #if OMP_40_ENABLED - #define FTN_GET_NUM_TEAMS OMP_GET_NUM_TEAMS_ - #define FTN_GET_TEAM_NUM OMP_GET_TEAM_NUM_ +#define FTN_GET_NUM_TEAMS OMP_GET_NUM_TEAMS_ +#define FTN_GET_TEAM_NUM OMP_GET_TEAM_NUM_ #endif - #define FTN_INIT_LOCK OMP_INIT_LOCK_ +#define FTN_INIT_LOCK OMP_INIT_LOCK_ #if KMP_USE_DYNAMIC_LOCK - #define FTN_INIT_LOCK_WITH_HINT OMP_INIT_LOCK_WITH_HINT_ - #define FTN_INIT_NEST_LOCK_WITH_HINT OMP_INIT_NEST_LOCK_WITH_HINT_ +#define FTN_INIT_LOCK_WITH_HINT OMP_INIT_LOCK_WITH_HINT_ +#define FTN_INIT_NEST_LOCK_WITH_HINT OMP_INIT_NEST_LOCK_WITH_HINT_ #endif - #define FTN_DESTROY_LOCK OMP_DESTROY_LOCK_ - #define FTN_SET_LOCK OMP_SET_LOCK_ - #define FTN_UNSET_LOCK OMP_UNSET_LOCK_ - #define FTN_TEST_LOCK OMP_TEST_LOCK_ - #define FTN_INIT_NEST_LOCK OMP_INIT_NEST_LOCK_ - #define FTN_DESTROY_NEST_LOCK OMP_DESTROY_NEST_LOCK_ - #define FTN_SET_NEST_LOCK OMP_SET_NEST_LOCK_ - #define FTN_UNSET_NEST_LOCK OMP_UNSET_NEST_LOCK_ - #define FTN_TEST_NEST_LOCK OMP_TEST_NEST_LOCK_ - - #define FTN_SET_WARNINGS_ON KMP_SET_WARNINGS_ON_ - #define FTN_SET_WARNINGS_OFF KMP_SET_WARNINGS_OFF_ - - #define FTN_GET_WTIME OMP_GET_WTIME_ - #define FTN_GET_WTICK OMP_GET_WTICK_ +#define FTN_DESTROY_LOCK OMP_DESTROY_LOCK_ +#define FTN_SET_LOCK OMP_SET_LOCK_ +#define FTN_UNSET_LOCK OMP_UNSET_LOCK_ +#define FTN_TEST_LOCK OMP_TEST_LOCK_ +#define FTN_INIT_NEST_LOCK OMP_INIT_NEST_LOCK_ +#define FTN_DESTROY_NEST_LOCK OMP_DESTROY_NEST_LOCK_ +#define FTN_SET_NEST_LOCK OMP_SET_NEST_LOCK_ +#define FTN_UNSET_NEST_LOCK OMP_UNSET_NEST_LOCK_ +#define FTN_TEST_NEST_LOCK OMP_TEST_NEST_LOCK_ + +#define FTN_SET_WARNINGS_ON KMP_SET_WARNINGS_ON_ +#define FTN_SET_WARNINGS_OFF KMP_SET_WARNINGS_OFF_ + +#define FTN_GET_WTIME OMP_GET_WTIME_ +#define FTN_GET_WTICK OMP_GET_WTICK_ #if OMP_40_ENABLED #if KMP_MIC || KMP_OS_DARWIN || defined(KMP_STUB) - #define FTN_GET_NUM_DEVICES OMP_GET_NUM_DEVICES_ +#define FTN_GET_NUM_DEVICES OMP_GET_NUM_DEVICES_ #endif - #define FTN_GET_DEFAULT_DEVICE OMP_GET_DEFAULT_DEVICE_ - #define FTN_SET_DEFAULT_DEVICE OMP_SET_DEFAULT_DEVICE_ - #define FTN_IS_INITIAL_DEVICE OMP_IS_INITIAL_DEVICE_ +#define FTN_GET_DEFAULT_DEVICE OMP_GET_DEFAULT_DEVICE_ +#define FTN_SET_DEFAULT_DEVICE OMP_SET_DEFAULT_DEVICE_ +#define FTN_IS_INITIAL_DEVICE OMP_IS_INITIAL_DEVICE_ #endif #if OMP_40_ENABLED - #define FTN_GET_CANCELLATION OMP_GET_CANCELLATION_ - #define FTN_GET_CANCELLATION_STATUS KMP_GET_CANCELLATION_STATUS_ +#define FTN_GET_CANCELLATION OMP_GET_CANCELLATION_ +#define FTN_GET_CANCELLATION_STATUS KMP_GET_CANCELLATION_STATUS_ #endif #if OMP_45_ENABLED - #define FTN_GET_MAX_TASK_PRIORITY OMP_GET_MAX_TASK_PRIORITY_ - #define FTN_GET_NUM_PLACES OMP_GET_NUM_PLACES_ - #define FTN_GET_PLACE_NUM_PROCS OMP_GET_PLACE_NUM_PROCS_ - #define FTN_GET_PLACE_PROC_IDS OMP_GET_PLACE_PROC_IDS_ - #define FTN_GET_PLACE_NUM OMP_GET_PLACE_NUM_ - #define FTN_GET_PARTITION_NUM_PLACES OMP_GET_PARTITION_NUM_PLACES_ - #define FTN_GET_PARTITION_PLACE_NUMS OMP_GET_PARTITION_PLACE_NUMS_ -# ifdef KMP_STUB - #define FTN_GET_INITIAL_DEVICE OMP_GET_INITIAL_DEVICE_ - #define FTN_TARGET_ALLOC OMP_TARGET_ALLOC_ - #define FTN_TARGET_FREE OMP_TARGET_FREE_ - #define FTN_TARGET_IS_PRESENT OMP_TARGET_IS_PRESENT_ - #define FTN_TARGET_MEMCPY OMP_TARGET_MEMCPY_ - #define FTN_TARGET_MEMCPY_RECT OMP_TARGET_MEMCPY_RECT_ - #define FTN_TARGET_ASSOCIATE_PTR OMP_TARGET_ASSOCIATE_PTR_ - #define FTN_TARGET_DISASSOCIATE_PTR OMP_TARGET_DISASSOCIATE_PTR_ -# endif +#define FTN_GET_MAX_TASK_PRIORITY OMP_GET_MAX_TASK_PRIORITY_ +#define FTN_GET_NUM_PLACES OMP_GET_NUM_PLACES_ +#define FTN_GET_PLACE_NUM_PROCS OMP_GET_PLACE_NUM_PROCS_ +#define FTN_GET_PLACE_PROC_IDS OMP_GET_PLACE_PROC_IDS_ +#define FTN_GET_PLACE_NUM OMP_GET_PLACE_NUM_ +#define FTN_GET_PARTITION_NUM_PLACES OMP_GET_PARTITION_NUM_PLACES_ +#define FTN_GET_PARTITION_PLACE_NUMS OMP_GET_PARTITION_PLACE_NUMS_ +#ifdef KMP_STUB +#define FTN_GET_INITIAL_DEVICE OMP_GET_INITIAL_DEVICE_ +#define FTN_TARGET_ALLOC OMP_TARGET_ALLOC_ +#define FTN_TARGET_FREE OMP_TARGET_FREE_ +#define FTN_TARGET_IS_PRESENT OMP_TARGET_IS_PRESENT_ +#define FTN_TARGET_MEMCPY OMP_TARGET_MEMCPY_ +#define FTN_TARGET_MEMCPY_RECT OMP_TARGET_MEMCPY_RECT_ +#define FTN_TARGET_ASSOCIATE_PTR OMP_TARGET_ASSOCIATE_PTR_ +#define FTN_TARGET_DISASSOCIATE_PTR OMP_TARGET_DISASSOCIATE_PTR_ +#endif #endif #endif /* KMP_FTN_UAPPEND */ -/* ------------------------------------------------------------------ */ /* -------------------------- GOMP API NAMES ------------------------ */ // All GOMP_1.0 symbols -#define KMP_API_NAME_GOMP_ATOMIC_END GOMP_atomic_end -#define KMP_API_NAME_GOMP_ATOMIC_START GOMP_atomic_start -#define KMP_API_NAME_GOMP_BARRIER GOMP_barrier -#define KMP_API_NAME_GOMP_CRITICAL_END GOMP_critical_end -#define KMP_API_NAME_GOMP_CRITICAL_NAME_END GOMP_critical_name_end -#define KMP_API_NAME_GOMP_CRITICAL_NAME_START GOMP_critical_name_start -#define KMP_API_NAME_GOMP_CRITICAL_START GOMP_critical_start -#define KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT GOMP_loop_dynamic_next -#define KMP_API_NAME_GOMP_LOOP_DYNAMIC_START GOMP_loop_dynamic_start -#define KMP_API_NAME_GOMP_LOOP_END GOMP_loop_end -#define KMP_API_NAME_GOMP_LOOP_END_NOWAIT GOMP_loop_end_nowait -#define KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT GOMP_loop_guided_next -#define KMP_API_NAME_GOMP_LOOP_GUIDED_START GOMP_loop_guided_start -#define KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT GOMP_loop_ordered_dynamic_next -#define KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START GOMP_loop_ordered_dynamic_start -#define KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT GOMP_loop_ordered_guided_next -#define KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START GOMP_loop_ordered_guided_start -#define KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT GOMP_loop_ordered_runtime_next -#define KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START GOMP_loop_ordered_runtime_start -#define KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT GOMP_loop_ordered_static_next -#define KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START GOMP_loop_ordered_static_start -#define KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT GOMP_loop_runtime_next -#define KMP_API_NAME_GOMP_LOOP_RUNTIME_START GOMP_loop_runtime_start -#define KMP_API_NAME_GOMP_LOOP_STATIC_NEXT GOMP_loop_static_next -#define KMP_API_NAME_GOMP_LOOP_STATIC_START GOMP_loop_static_start -#define KMP_API_NAME_GOMP_ORDERED_END GOMP_ordered_end -#define KMP_API_NAME_GOMP_ORDERED_START GOMP_ordered_start -#define KMP_API_NAME_GOMP_PARALLEL_END GOMP_parallel_end -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START GOMP_parallel_loop_dynamic_start -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START GOMP_parallel_loop_guided_start -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START GOMP_parallel_loop_runtime_start -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START GOMP_parallel_loop_static_start -#define KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START GOMP_parallel_sections_start -#define KMP_API_NAME_GOMP_PARALLEL_START GOMP_parallel_start -#define KMP_API_NAME_GOMP_SECTIONS_END GOMP_sections_end -#define KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT GOMP_sections_end_nowait -#define KMP_API_NAME_GOMP_SECTIONS_NEXT GOMP_sections_next -#define KMP_API_NAME_GOMP_SECTIONS_START GOMP_sections_start -#define KMP_API_NAME_GOMP_SINGLE_COPY_END GOMP_single_copy_end -#define KMP_API_NAME_GOMP_SINGLE_COPY_START GOMP_single_copy_start -#define KMP_API_NAME_GOMP_SINGLE_START GOMP_single_start +#define KMP_API_NAME_GOMP_ATOMIC_END GOMP_atomic_end +#define KMP_API_NAME_GOMP_ATOMIC_START GOMP_atomic_start +#define KMP_API_NAME_GOMP_BARRIER GOMP_barrier +#define KMP_API_NAME_GOMP_CRITICAL_END GOMP_critical_end +#define KMP_API_NAME_GOMP_CRITICAL_NAME_END GOMP_critical_name_end +#define KMP_API_NAME_GOMP_CRITICAL_NAME_START GOMP_critical_name_start +#define KMP_API_NAME_GOMP_CRITICAL_START GOMP_critical_start +#define KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT GOMP_loop_dynamic_next +#define KMP_API_NAME_GOMP_LOOP_DYNAMIC_START GOMP_loop_dynamic_start +#define KMP_API_NAME_GOMP_LOOP_END GOMP_loop_end +#define KMP_API_NAME_GOMP_LOOP_END_NOWAIT GOMP_loop_end_nowait +#define KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT GOMP_loop_guided_next +#define KMP_API_NAME_GOMP_LOOP_GUIDED_START GOMP_loop_guided_start +#define KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT \ + GOMP_loop_ordered_dynamic_next +#define KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START \ + GOMP_loop_ordered_dynamic_start +#define KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT GOMP_loop_ordered_guided_next +#define KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START \ + GOMP_loop_ordered_guided_start +#define KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT \ + GOMP_loop_ordered_runtime_next +#define KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START \ + GOMP_loop_ordered_runtime_start +#define KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT GOMP_loop_ordered_static_next +#define KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START \ + GOMP_loop_ordered_static_start +#define KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT GOMP_loop_runtime_next +#define KMP_API_NAME_GOMP_LOOP_RUNTIME_START GOMP_loop_runtime_start +#define KMP_API_NAME_GOMP_LOOP_STATIC_NEXT GOMP_loop_static_next +#define KMP_API_NAME_GOMP_LOOP_STATIC_START GOMP_loop_static_start +#define KMP_API_NAME_GOMP_ORDERED_END GOMP_ordered_end +#define KMP_API_NAME_GOMP_ORDERED_START GOMP_ordered_start +#define KMP_API_NAME_GOMP_PARALLEL_END GOMP_parallel_end +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START \ + GOMP_parallel_loop_dynamic_start +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START \ + GOMP_parallel_loop_guided_start +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START \ + GOMP_parallel_loop_runtime_start +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START \ + GOMP_parallel_loop_static_start +#define KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START GOMP_parallel_sections_start +#define KMP_API_NAME_GOMP_PARALLEL_START GOMP_parallel_start +#define KMP_API_NAME_GOMP_SECTIONS_END GOMP_sections_end +#define KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT GOMP_sections_end_nowait +#define KMP_API_NAME_GOMP_SECTIONS_NEXT GOMP_sections_next +#define KMP_API_NAME_GOMP_SECTIONS_START GOMP_sections_start +#define KMP_API_NAME_GOMP_SINGLE_COPY_END GOMP_single_copy_end +#define KMP_API_NAME_GOMP_SINGLE_COPY_START GOMP_single_copy_start +#define KMP_API_NAME_GOMP_SINGLE_START GOMP_single_start // All GOMP_2.0 symbols -#define KMP_API_NAME_GOMP_TASK GOMP_task -#define KMP_API_NAME_GOMP_TASKWAIT GOMP_taskwait -#define KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT GOMP_loop_ull_dynamic_next -#define KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START GOMP_loop_ull_dynamic_start -#define KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT GOMP_loop_ull_guided_next -#define KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START GOMP_loop_ull_guided_start -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT GOMP_loop_ull_ordered_dynamic_next -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START GOMP_loop_ull_ordered_dynamic_start -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT GOMP_loop_ull_ordered_guided_next -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START GOMP_loop_ull_ordered_guided_start -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT GOMP_loop_ull_ordered_runtime_next -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START GOMP_loop_ull_ordered_runtime_start -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT GOMP_loop_ull_ordered_static_next -#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START GOMP_loop_ull_ordered_static_start -#define KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT GOMP_loop_ull_runtime_next -#define KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START GOMP_loop_ull_runtime_start -#define KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT GOMP_loop_ull_static_next -#define KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START GOMP_loop_ull_static_start +#define KMP_API_NAME_GOMP_TASK GOMP_task +#define KMP_API_NAME_GOMP_TASKWAIT GOMP_taskwait +#define KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT GOMP_loop_ull_dynamic_next +#define KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START GOMP_loop_ull_dynamic_start +#define KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT GOMP_loop_ull_guided_next +#define KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START GOMP_loop_ull_guided_start +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT \ + GOMP_loop_ull_ordered_dynamic_next +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START \ + GOMP_loop_ull_ordered_dynamic_start +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT \ + GOMP_loop_ull_ordered_guided_next +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START \ + GOMP_loop_ull_ordered_guided_start +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT \ + GOMP_loop_ull_ordered_runtime_next +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START \ + GOMP_loop_ull_ordered_runtime_start +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT \ + GOMP_loop_ull_ordered_static_next +#define KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START \ + GOMP_loop_ull_ordered_static_start +#define KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT GOMP_loop_ull_runtime_next +#define KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START GOMP_loop_ull_runtime_start +#define KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT GOMP_loop_ull_static_next +#define KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START GOMP_loop_ull_static_start // All GOMP_3.0 symbols -#define KMP_API_NAME_GOMP_TASKYIELD GOMP_taskyield +#define KMP_API_NAME_GOMP_TASKYIELD GOMP_taskyield // All GOMP_4.0 symbols -// TODO: As of 2013-10-14, none of the GOMP_4.0 functions are implemented in libomp -#define KMP_API_NAME_GOMP_BARRIER_CANCEL GOMP_barrier_cancel -#define KMP_API_NAME_GOMP_CANCEL GOMP_cancel -#define KMP_API_NAME_GOMP_CANCELLATION_POINT GOMP_cancellation_point -#define KMP_API_NAME_GOMP_LOOP_END_CANCEL GOMP_loop_end_cancel -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC GOMP_parallel_loop_dynamic -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED GOMP_parallel_loop_guided -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME GOMP_parallel_loop_runtime -#define KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC GOMP_parallel_loop_static -#define KMP_API_NAME_GOMP_PARALLEL_SECTIONS GOMP_parallel_sections -#define KMP_API_NAME_GOMP_PARALLEL GOMP_parallel -#define KMP_API_NAME_GOMP_SECTIONS_END_CANCEL GOMP_sections_end_cancel -#define KMP_API_NAME_GOMP_TASKGROUP_START GOMP_taskgroup_start -#define KMP_API_NAME_GOMP_TASKGROUP_END GOMP_taskgroup_end +// TODO: As of 2013-10-14, none of the GOMP_4.0 functions are implemented in +// libomp +#define KMP_API_NAME_GOMP_BARRIER_CANCEL GOMP_barrier_cancel +#define KMP_API_NAME_GOMP_CANCEL GOMP_cancel +#define KMP_API_NAME_GOMP_CANCELLATION_POINT GOMP_cancellation_point +#define KMP_API_NAME_GOMP_LOOP_END_CANCEL GOMP_loop_end_cancel +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC GOMP_parallel_loop_dynamic +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED GOMP_parallel_loop_guided +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME GOMP_parallel_loop_runtime +#define KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC GOMP_parallel_loop_static +#define KMP_API_NAME_GOMP_PARALLEL_SECTIONS GOMP_parallel_sections +#define KMP_API_NAME_GOMP_PARALLEL GOMP_parallel +#define KMP_API_NAME_GOMP_SECTIONS_END_CANCEL GOMP_sections_end_cancel +#define KMP_API_NAME_GOMP_TASKGROUP_START GOMP_taskgroup_start +#define KMP_API_NAME_GOMP_TASKGROUP_END GOMP_taskgroup_end /* Target functions should be taken care of by liboffload */ -#define KMP_API_NAME_GOMP_TARGET GOMP_target -#define KMP_API_NAME_GOMP_TARGET_DATA GOMP_target_data -#define KMP_API_NAME_GOMP_TARGET_END_DATA GOMP_target_end_data -#define KMP_API_NAME_GOMP_TARGET_UPDATE GOMP_target_update -#define KMP_API_NAME_GOMP_TEAMS GOMP_teams +#define KMP_API_NAME_GOMP_TARGET GOMP_target +#define KMP_API_NAME_GOMP_TARGET_DATA GOMP_target_data +#define KMP_API_NAME_GOMP_TARGET_END_DATA GOMP_target_end_data +#define KMP_API_NAME_GOMP_TARGET_UPDATE GOMP_target_update +#define KMP_API_NAME_GOMP_TEAMS GOMP_teams #ifdef KMP_USE_VERSION_SYMBOLS - #define xstr(x) str(x) - #define str(x) #x - - // If Linux, xexpand prepends __kmp_api_ to the real API name - #define xexpand(api_name) expand(api_name) - #define expand(api_name) __kmp_api_##api_name - - #define xaliasify(api_name,ver) aliasify(api_name,ver) - #define aliasify(api_name,ver) __typeof__(__kmp_api_##api_name) __kmp_api_##api_name##_##ver##_alias __attribute__((alias(xstr(__kmp_api_##api_name)))) - - #define xversionify(api_name, version_num, version_str) versionify(api_name, version_num, version_str, "VERSION") - #define versionify(api_name, version_num, version_str, default_ver) \ - __asm__(".symver " xstr(__kmp_api_##api_name##_##version_num##_alias) "," xstr(api_name) "@" version_str "\n\t"); \ - __asm__(".symver " xstr(__kmp_api_##api_name) "," xstr(api_name) "@@" default_ver "\n\t") +#define xstr(x) str(x) +#define str(x) #x + +// If Linux, xexpand prepends __kmp_api_ to the real API name +#define xexpand(api_name) expand(api_name) +#define expand(api_name) __kmp_api_##api_name + +#define xaliasify(api_name, ver) aliasify(api_name, ver) +#define aliasify(api_name, ver) \ + __typeof__(__kmp_api_##api_name) __kmp_api_##api_name##_##ver##_alias \ + __attribute__((alias(xstr(__kmp_api_##api_name)))) + +#define xversionify(api_name, version_num, version_str) \ + versionify(api_name, version_num, version_str, "VERSION") +#define versionify(api_name, version_num, version_str, default_ver) \ + __asm__( \ + ".symver " xstr(__kmp_api_##api_name##_##version_num##_alias) "," xstr( \ + api_name) "@" version_str "\n\t"); \ + __asm__(".symver " xstr(__kmp_api_##api_name) "," xstr( \ + api_name) "@@" default_ver "\n\t") #else // KMP_USE_VERSION_SYMBOLS - #define xstr(x) /* Nothing */ - #define str(x) /* Nothing */ +#define xstr(x) /* Nothing */ +#define str(x) /* Nothing */ - // if Windows or Mac, xexpand does no name transformation - #define xexpand(api_name) expand(api_name) - #define expand(api_name) api_name +// if Windows or Mac, xexpand does no name transformation +#define xexpand(api_name) expand(api_name) +#define expand(api_name) api_name - #define xaliasify(api_name,ver) /* Nothing */ - #define aliasify(api_name,ver) /* Nothing */ +#define xaliasify(api_name, ver) /* Nothing */ +#define aliasify(api_name, ver) /* Nothing */ - #define xversionify(api_name, version_num, version_str) /* Nothing */ - #define versionify(api_name, version_num, version_str, default_ver) /* Nothing */ +#define xversionify(api_name, version_num, version_str) /* Nothing */ +#define versionify(api_name, version_num, version_str, \ + default_ver) /* Nothing */ #endif // KMP_USE_VERSION_SYMBOLS #endif /* KMP_FTN_OS_H */ - diff --git a/openmp/runtime/src/kmp_ftn_stdcall.cpp b/openmp/runtime/src/kmp_ftn_stdcall.cpp index 41f04de..b7441b4 100644 --- a/openmp/runtime/src/kmp_ftn_stdcall.cpp +++ b/openmp/runtime/src/kmp_ftn_stdcall.cpp @@ -16,20 +16,20 @@ #include "kmp.h" // Note: This string is not printed when KMP_VERSION=1. -char const __kmp_version_ftnstdcall[] = KMP_VERSION_PREFIX "Fortran __stdcall OMP support: " +char const __kmp_version_ftnstdcall[] = + KMP_VERSION_PREFIX "Fortran __stdcall OMP support: " #ifdef USE_FTN_STDCALL - "yes"; + "yes"; #else - "no"; + "no"; #endif #ifdef USE_FTN_STDCALL -#define FTN_STDCALL KMP_STDCALL -#define KMP_FTN_ENTRIES USE_FTN_STDCALL +#define FTN_STDCALL KMP_STDCALL +#define KMP_FTN_ENTRIES USE_FTN_STDCALL -#include "kmp_ftn_os.h" #include "kmp_ftn_entry.h" +#include "kmp_ftn_os.h" #endif /* USE_FTN_STDCALL */ - diff --git a/openmp/runtime/src/kmp_global.cpp b/openmp/runtime/src/kmp_global.cpp index 623c439..960c4b6 100644 --- a/openmp/runtime/src/kmp_global.cpp +++ b/openmp/runtime/src/kmp_global.cpp @@ -19,7 +19,7 @@ kmp_key_t __kmp_gtid_threadprivate_key; #if KMP_ARCH_X86 || KMP_ARCH_X86_64 -kmp_cpuinfo_t __kmp_cpuinfo = { 0 }; // Not initialized +kmp_cpuinfo_t __kmp_cpuinfo = {0}; // Not initialized #endif #if KMP_STATS_ENABLED @@ -27,11 +27,12 @@ kmp_cpuinfo_t __kmp_cpuinfo = { 0 }; // Not initialized // lock for modifying the global __kmp_stats_list kmp_tas_lock_t __kmp_stats_lock; -// global list of per thread stats, the head is a sentinel node which accumulates all stats produced before __kmp_create_worker is called. -kmp_stats_list* __kmp_stats_list; +// global list of per thread stats, the head is a sentinel node which +// accumulates all stats produced before __kmp_create_worker is called. +kmp_stats_list *__kmp_stats_list; // thread local pointer to stats node within list -__thread kmp_stats_list* __kmp_stats_thread_ptr = NULL; +__thread kmp_stats_list *__kmp_stats_thread_ptr = NULL; // gives reference tick for all events (considered the 0 tick) tsc_tick_count __kmp_stats_start_time; @@ -40,176 +41,196 @@ tsc_tick_count __kmp_stats_start_time; /* ----------------------------------------------------- */ /* INITIALIZATION VARIABLES */ /* they are syncronized to write during init, but read anytime */ -volatile int __kmp_init_serial = FALSE; -volatile int __kmp_init_gtid = FALSE; -volatile int __kmp_init_common = FALSE; -volatile int __kmp_init_middle = FALSE; -volatile int __kmp_init_parallel = FALSE; +volatile int __kmp_init_serial = FALSE; +volatile int __kmp_init_gtid = FALSE; +volatile int __kmp_init_common = FALSE; +volatile int __kmp_init_middle = FALSE; +volatile int __kmp_init_parallel = FALSE; #if KMP_USE_MONITOR -volatile int __kmp_init_monitor = 0; /* 1 - launched, 2 - actually started (Windows* OS only) */ +volatile int __kmp_init_monitor = + 0; /* 1 - launched, 2 - actually started (Windows* OS only) */ #endif -volatile int __kmp_init_user_locks = FALSE; +volatile int __kmp_init_user_locks = FALSE; /* list of address of allocated caches for commons */ -kmp_cached_addr_t *__kmp_threadpriv_cache_list = NULL; +kmp_cached_addr_t *__kmp_threadpriv_cache_list = NULL; -int __kmp_init_counter = 0; -int __kmp_root_counter = 0; -int __kmp_version = 0; +int __kmp_init_counter = 0; +int __kmp_root_counter = 0; +int __kmp_version = 0; -volatile kmp_uint32 __kmp_team_counter = 0; -volatile kmp_uint32 __kmp_task_counter = 0; +volatile kmp_uint32 __kmp_team_counter = 0; +volatile kmp_uint32 __kmp_task_counter = 0; -unsigned int __kmp_init_wait = KMP_DEFAULT_INIT_WAIT; /* initial number of spin-tests */ -unsigned int __kmp_next_wait = KMP_DEFAULT_NEXT_WAIT; /* susequent number of spin-tests */ +unsigned int __kmp_init_wait = + KMP_DEFAULT_INIT_WAIT; /* initial number of spin-tests */ +unsigned int __kmp_next_wait = + KMP_DEFAULT_NEXT_WAIT; /* susequent number of spin-tests */ -size_t __kmp_stksize = KMP_DEFAULT_STKSIZE; +size_t __kmp_stksize = KMP_DEFAULT_STKSIZE; #if KMP_USE_MONITOR -size_t __kmp_monitor_stksize = 0; // auto adjust +size_t __kmp_monitor_stksize = 0; // auto adjust #endif -size_t __kmp_stkoffset = KMP_DEFAULT_STKOFFSET; -int __kmp_stkpadding = KMP_MIN_STKPADDING; +size_t __kmp_stkoffset = KMP_DEFAULT_STKOFFSET; +int __kmp_stkpadding = KMP_MIN_STKPADDING; -size_t __kmp_malloc_pool_incr = KMP_DEFAULT_MALLOC_POOL_INCR; +size_t __kmp_malloc_pool_incr = KMP_DEFAULT_MALLOC_POOL_INCR; -/* Barrier method defaults, settings, and strings */ -/* branch factor = 2^branch_bits (only relevant for tree and hyper barrier types) */ +// Barrier method defaults, settings, and strings. +// branch factor = 2^branch_bits (only relevant for tree & hyper barrier types) #if KMP_ARCH_X86_64 -kmp_uint32 __kmp_barrier_gather_bb_dflt = 2; /* branch_factor = 4 */ /* hyper2: C78980 */ -kmp_uint32 __kmp_barrier_release_bb_dflt = 2; /* branch_factor = 4 */ /* hyper2: C78980 */ +kmp_uint32 __kmp_barrier_gather_bb_dflt = 2; +/* branch_factor = 4 */ /* hyper2: C78980 */ +kmp_uint32 __kmp_barrier_release_bb_dflt = 2; +/* branch_factor = 4 */ /* hyper2: C78980 */ #else -kmp_uint32 __kmp_barrier_gather_bb_dflt = 2; /* branch_factor = 4 */ /* communication in core for MIC */ -kmp_uint32 __kmp_barrier_release_bb_dflt = 2; /* branch_factor = 4 */ /* communication in core for MIC */ +kmp_uint32 __kmp_barrier_gather_bb_dflt = 2; +/* branch_factor = 4 */ /* communication in core for MIC */ +kmp_uint32 __kmp_barrier_release_bb_dflt = 2; +/* branch_factor = 4 */ /* communication in core for MIC */ #endif // KMP_ARCH_X86_64 #if KMP_ARCH_X86_64 -kmp_bar_pat_e __kmp_barrier_gather_pat_dflt = bp_hyper_bar; /* hyper2: C78980 */ -kmp_bar_pat_e __kmp_barrier_release_pat_dflt = bp_hyper_bar; /* hyper2: C78980 */ +kmp_bar_pat_e __kmp_barrier_gather_pat_dflt = bp_hyper_bar; /* hyper2: C78980 */ +kmp_bar_pat_e __kmp_barrier_release_pat_dflt = + bp_hyper_bar; /* hyper2: C78980 */ #else -kmp_bar_pat_e __kmp_barrier_gather_pat_dflt = bp_linear_bar; +kmp_bar_pat_e __kmp_barrier_gather_pat_dflt = bp_linear_bar; kmp_bar_pat_e __kmp_barrier_release_pat_dflt = bp_linear_bar; #endif -kmp_uint32 __kmp_barrier_gather_branch_bits [ bs_last_barrier ] = { 0 }; -kmp_uint32 __kmp_barrier_release_branch_bits [ bs_last_barrier ] = { 0 }; -kmp_bar_pat_e __kmp_barrier_gather_pattern [ bs_last_barrier ] = { bp_linear_bar }; -kmp_bar_pat_e __kmp_barrier_release_pattern [ bs_last_barrier ] = { bp_linear_bar }; -char const *__kmp_barrier_branch_bit_env_name [ bs_last_barrier ] = - { "KMP_PLAIN_BARRIER", "KMP_FORKJOIN_BARRIER" - #if KMP_FAST_REDUCTION_BARRIER - , "KMP_REDUCTION_BARRIER" - #endif // KMP_FAST_REDUCTION_BARRIER - }; -char const *__kmp_barrier_pattern_env_name [ bs_last_barrier ] = - { "KMP_PLAIN_BARRIER_PATTERN", "KMP_FORKJOIN_BARRIER_PATTERN" - #if KMP_FAST_REDUCTION_BARRIER - , "KMP_REDUCTION_BARRIER_PATTERN" - #endif // KMP_FAST_REDUCTION_BARRIER - }; -char const *__kmp_barrier_type_name [ bs_last_barrier ] = - { "plain", "forkjoin" - #if KMP_FAST_REDUCTION_BARRIER - , "reduction" - #endif // KMP_FAST_REDUCTION_BARRIER - }; -char const *__kmp_barrier_pattern_name[bp_last_bar] = {"linear","tree","hyper","hierarchical"}; - -int __kmp_allThreadsSpecified = 0; -size_t __kmp_align_alloc = CACHE_LINE; - - -int __kmp_generate_warnings = kmp_warnings_low; -int __kmp_reserve_warn = 0; -int __kmp_xproc = 0; -int __kmp_avail_proc = 0; -size_t __kmp_sys_min_stksize = KMP_MIN_STKSIZE; -int __kmp_sys_max_nth = KMP_MAX_NTH; -int __kmp_max_nth = 0; -int __kmp_threads_capacity = 0; -int __kmp_dflt_team_nth = 0; -int __kmp_dflt_team_nth_ub = 0; -int __kmp_tp_capacity = 0; -int __kmp_tp_cached = 0; -int __kmp_dflt_nested = FALSE; -int __kmp_dispatch_num_buffers = KMP_DFLT_DISP_NUM_BUFF; -int __kmp_dflt_max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; /* max_active_levels limit */ +kmp_uint32 __kmp_barrier_gather_branch_bits[bs_last_barrier] = {0}; +kmp_uint32 __kmp_barrier_release_branch_bits[bs_last_barrier] = {0}; +kmp_bar_pat_e __kmp_barrier_gather_pattern[bs_last_barrier] = {bp_linear_bar}; +kmp_bar_pat_e __kmp_barrier_release_pattern[bs_last_barrier] = {bp_linear_bar}; +char const *__kmp_barrier_branch_bit_env_name[bs_last_barrier] = { + "KMP_PLAIN_BARRIER", "KMP_FORKJOIN_BARRIER" +#if KMP_FAST_REDUCTION_BARRIER + , + "KMP_REDUCTION_BARRIER" +#endif // KMP_FAST_REDUCTION_BARRIER +}; +char const *__kmp_barrier_pattern_env_name[bs_last_barrier] = { + "KMP_PLAIN_BARRIER_PATTERN", "KMP_FORKJOIN_BARRIER_PATTERN" +#if KMP_FAST_REDUCTION_BARRIER + , + "KMP_REDUCTION_BARRIER_PATTERN" +#endif // KMP_FAST_REDUCTION_BARRIER +}; +char const *__kmp_barrier_type_name[bs_last_barrier] = {"plain", "forkjoin" +#if KMP_FAST_REDUCTION_BARRIER + , + "reduction" +#endif // KMP_FAST_REDUCTION_BARRIER +}; +char const *__kmp_barrier_pattern_name[bp_last_bar] = {"linear", "tree", + "hyper", "hierarchical"}; + +int __kmp_allThreadsSpecified = 0; +size_t __kmp_align_alloc = CACHE_LINE; + + +int __kmp_generate_warnings = kmp_warnings_low; +int __kmp_reserve_warn = 0; +int __kmp_xproc = 0; +int __kmp_avail_proc = 0; +size_t __kmp_sys_min_stksize = KMP_MIN_STKSIZE; +int __kmp_sys_max_nth = KMP_MAX_NTH; +int __kmp_max_nth = 0; +int __kmp_threads_capacity = 0; +int __kmp_dflt_team_nth = 0; +int __kmp_dflt_team_nth_ub = 0; +int __kmp_tp_capacity = 0; +int __kmp_tp_cached = 0; +int __kmp_dflt_nested = FALSE; +int __kmp_dispatch_num_buffers = KMP_DFLT_DISP_NUM_BUFF; +int __kmp_dflt_max_active_levels = + KMP_MAX_ACTIVE_LEVELS_LIMIT; /* max_active_levels limit */ #if KMP_NESTED_HOT_TEAMS -int __kmp_hot_teams_mode = 0; /* 0 - free extra threads when reduced */ - /* 1 - keep extra threads when reduced */ -int __kmp_hot_teams_max_level = 1; /* nesting level of hot teams */ +int __kmp_hot_teams_mode = 0; /* 0 - free extra threads when reduced */ +/* 1 - keep extra threads when reduced */ +int __kmp_hot_teams_max_level = 1; /* nesting level of hot teams */ #endif enum library_type __kmp_library = library_none; -enum sched_type __kmp_sched = kmp_sch_default; /* scheduling method for runtime scheduling */ -enum sched_type __kmp_static = kmp_sch_static_greedy; /* default static scheduling method */ -enum sched_type __kmp_guided = kmp_sch_guided_iterative_chunked; /* default guided scheduling method */ -enum sched_type __kmp_auto = kmp_sch_guided_analytical_chunked; /* default auto scheduling method */ -int __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; +enum sched_type __kmp_sched = + kmp_sch_default; /* scheduling method for runtime scheduling */ +enum sched_type __kmp_static = + kmp_sch_static_greedy; /* default static scheduling method */ +enum sched_type __kmp_guided = + kmp_sch_guided_iterative_chunked; /* default guided scheduling method */ +enum sched_type __kmp_auto = + kmp_sch_guided_analytical_chunked; /* default auto scheduling method */ +int __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; #if KMP_USE_MONITOR -int __kmp_monitor_wakeups = KMP_MIN_MONITOR_WAKEUPS; -int __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( KMP_DEFAULT_BLOCKTIME, KMP_MIN_MONITOR_WAKEUPS ); +int __kmp_monitor_wakeups = KMP_MIN_MONITOR_WAKEUPS; +int __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(KMP_DEFAULT_BLOCKTIME, + KMP_MIN_MONITOR_WAKEUPS); #endif #ifdef KMP_ADJUST_BLOCKTIME -int __kmp_zero_bt = FALSE; +int __kmp_zero_bt = FALSE; #endif /* KMP_ADJUST_BLOCKTIME */ #ifdef KMP_DFLT_NTH_CORES -int __kmp_ncores = 0; +int __kmp_ncores = 0; #endif -int __kmp_chunk = 0; -int __kmp_abort_delay = 0; +int __kmp_chunk = 0; +int __kmp_abort_delay = 0; #if KMP_OS_LINUX && defined(KMP_TDATA_GTID) -int __kmp_gtid_mode = 3; /* use __declspec(thread) TLS to store gtid */ -int __kmp_adjust_gtid_mode = FALSE; +int __kmp_gtid_mode = 3; /* use __declspec(thread) TLS to store gtid */ +int __kmp_adjust_gtid_mode = FALSE; #elif KMP_OS_WINDOWS -int __kmp_gtid_mode = 2; /* use TLS functions to store gtid */ -int __kmp_adjust_gtid_mode = FALSE; +int __kmp_gtid_mode = 2; /* use TLS functions to store gtid */ +int __kmp_adjust_gtid_mode = FALSE; #else -int __kmp_gtid_mode = 0; /* select method to get gtid based on #threads */ -int __kmp_adjust_gtid_mode = TRUE; +int __kmp_gtid_mode = 0; /* select method to get gtid based on #threads */ +int __kmp_adjust_gtid_mode = TRUE; #endif /* KMP_OS_LINUX && defined(KMP_TDATA_GTID) */ #ifdef KMP_TDATA_GTID #if KMP_OS_WINDOWS __declspec(thread) int __kmp_gtid = KMP_GTID_DNE; #else __thread int __kmp_gtid = KMP_GTID_DNE; -#endif /* KMP_OS_WINDOWS - workaround because Intel(R) Many Integrated Core compiler 20110316 doesn't accept __declspec */ +#endif /* KMP_OS_WINDOWS - workaround because Intel(R) Many Integrated Core \ + compiler 20110316 doesn't accept __declspec */ #endif /* KMP_TDATA_GTID */ -int __kmp_tls_gtid_min = INT_MAX; -int __kmp_foreign_tp = TRUE; +int __kmp_tls_gtid_min = INT_MAX; +int __kmp_foreign_tp = TRUE; #if KMP_ARCH_X86 || KMP_ARCH_X86_64 -int __kmp_inherit_fp_control = TRUE; -kmp_int16 __kmp_init_x87_fpu_control_word = 0; -kmp_uint32 __kmp_init_mxcsr = 0; +int __kmp_inherit_fp_control = TRUE; +kmp_int16 __kmp_init_x87_fpu_control_word = 0; +kmp_uint32 __kmp_init_mxcsr = 0; #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ #ifdef USE_LOAD_BALANCE -double __kmp_load_balance_interval = 1.0; +double __kmp_load_balance_interval = 1.0; #endif /* USE_LOAD_BALANCE */ -kmp_nested_nthreads_t __kmp_nested_nth = { NULL, 0, 0 }; +kmp_nested_nthreads_t __kmp_nested_nth = {NULL, 0, 0}; #if KMP_USE_ADAPTIVE_LOCKS -kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params = { 1, 1024 }; // TODO: tune it! +kmp_adaptive_backoff_params_t __kmp_adaptive_backoff_params = { + 1, 1024}; // TODO: tune it! #if KMP_DEBUG_ADAPTIVE_LOCKS -char * __kmp_speculative_statsfile = "-"; +char *__kmp_speculative_statsfile = "-"; #endif #endif // KMP_USE_ADAPTIVE_LOCKS #if OMP_40_ENABLED -int __kmp_display_env = FALSE; -int __kmp_display_env_verbose = FALSE; -int __kmp_omp_cancellation = FALSE; +int __kmp_display_env = FALSE; +int __kmp_display_env_verbose = FALSE; +int __kmp_omp_cancellation = FALSE; #endif /* map OMP 3.0 schedule types with our internal schedule types */ -enum sched_type __kmp_sch_map[ kmp_sched_upper - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ] = { - kmp_sch_static_chunked, // ==> kmp_sched_static = 1 - kmp_sch_dynamic_chunked, // ==> kmp_sched_dynamic = 2 - kmp_sch_guided_chunked, // ==> kmp_sched_guided = 3 - kmp_sch_auto, // ==> kmp_sched_auto = 4 - kmp_sch_trapezoidal // ==> kmp_sched_trapezoidal = 101 - // will likely not used, introduced here just to debug the code - // of public intel extension schedules +enum sched_type __kmp_sch_map[kmp_sched_upper - kmp_sched_lower_ext + + kmp_sched_upper_std - kmp_sched_lower - 2] = { + kmp_sch_static_chunked, // ==> kmp_sched_static = 1 + kmp_sch_dynamic_chunked, // ==> kmp_sched_dynamic = 2 + kmp_sch_guided_chunked, // ==> kmp_sched_guided = 3 + kmp_sch_auto, // ==> kmp_sched_auto = 4 + kmp_sch_trapezoidal // ==> kmp_sched_trapezoidal = 101 + // will likely not be used, introduced here just to debug the code + // of public intel extension schedules }; #if KMP_OS_LINUX @@ -223,44 +244,45 @@ enum mic_type __kmp_mic_type = non_mic; #if KMP_AFFINITY_SUPPORTED -KMPAffinity* __kmp_affinity_dispatch = NULL; +KMPAffinity *__kmp_affinity_dispatch = NULL; -# if KMP_USE_HWLOC +#if KMP_USE_HWLOC int __kmp_hwloc_error = FALSE; hwloc_topology_t __kmp_hwloc_topology = NULL; -# endif +#endif -# if KMP_OS_WINDOWS -# if KMP_GROUP_AFFINITY +#if KMP_OS_WINDOWS +#if KMP_GROUP_AFFINITY int __kmp_num_proc_groups = 1; -# endif /* KMP_GROUP_AFFINITY */ +#endif /* KMP_GROUP_AFFINITY */ kmp_GetActiveProcessorCount_t __kmp_GetActiveProcessorCount = NULL; kmp_GetActiveProcessorGroupCount_t __kmp_GetActiveProcessorGroupCount = NULL; kmp_GetThreadGroupAffinity_t __kmp_GetThreadGroupAffinity = NULL; kmp_SetThreadGroupAffinity_t __kmp_SetThreadGroupAffinity = NULL; -# endif /* KMP_OS_WINDOWS */ +#endif /* KMP_OS_WINDOWS */ -size_t __kmp_affin_mask_size = 0; +size_t __kmp_affin_mask_size = 0; enum affinity_type __kmp_affinity_type = affinity_default; enum affinity_gran __kmp_affinity_gran = affinity_gran_default; -int __kmp_affinity_gran_levels = -1; +int __kmp_affinity_gran_levels = -1; int __kmp_affinity_dups = TRUE; -enum affinity_top_method __kmp_affinity_top_method = affinity_top_method_default; -int __kmp_affinity_compact = 0; -int __kmp_affinity_offset = 0; -int __kmp_affinity_verbose = FALSE; -int __kmp_affinity_warnings = TRUE; -int __kmp_affinity_respect_mask = affinity_respect_mask_default; -char * __kmp_affinity_proclist = NULL; +enum affinity_top_method __kmp_affinity_top_method = + affinity_top_method_default; +int __kmp_affinity_compact = 0; +int __kmp_affinity_offset = 0; +int __kmp_affinity_verbose = FALSE; +int __kmp_affinity_warnings = TRUE; +int __kmp_affinity_respect_mask = affinity_respect_mask_default; +char *__kmp_affinity_proclist = NULL; kmp_affin_mask_t *__kmp_affinity_masks = NULL; -unsigned __kmp_affinity_num_masks = 0; +unsigned __kmp_affinity_num_masks = 0; -char const * __kmp_cpuinfo_file = NULL; +char const *__kmp_cpuinfo_file = NULL; #endif /* KMP_AFFINITY_SUPPORTED */ #if OMP_40_ENABLED -kmp_nested_proc_bind_t __kmp_nested_proc_bind = { NULL, 0, 0 }; +kmp_nested_proc_bind_t __kmp_nested_proc_bind = {NULL, 0, 0}; int __kmp_affinity_num_places = 0; #endif @@ -281,75 +303,87 @@ kmp_tasking_mode_t __kmp_tasking_mode = tskm_task_teams; kmp_int32 __kmp_max_task_priority = 0; #endif -/* This check ensures that the compiler is passing the correct data type - * for the flags formal parameter of the function kmpc_omp_task_alloc(). - * If the type is not a 4-byte type, then give an error message about - * a non-positive length array pointing here. If that happens, the - * kmp_tasking_flags_t structure must be redefined to have exactly 32 bits. - */ -KMP_BUILD_ASSERT( sizeof(kmp_tasking_flags_t) == 4 ); +/* This check ensures that the compiler is passing the correct data type for the + flags formal parameter of the function kmpc_omp_task_alloc(). If the type is + not a 4-byte type, then give an error message about a non-positive length + array pointing here. If that happens, the kmp_tasking_flags_t structure must + be redefined to have exactly 32 bits. */ +KMP_BUILD_ASSERT(sizeof(kmp_tasking_flags_t) == 4); -kmp_int32 __kmp_task_stealing_constraint = 1; /* Constrain task stealing by default */ +kmp_int32 __kmp_task_stealing_constraint = + 1; /* Constrain task stealing by default */ #ifdef DEBUG_SUSPEND -int __kmp_suspend_count = 0; +int __kmp_suspend_count = 0; #endif -int __kmp_settings = FALSE; -int __kmp_duplicate_library_ok = 0; +int __kmp_settings = FALSE; +int __kmp_duplicate_library_ok = 0; #if USE_ITT_BUILD -int __kmp_forkjoin_frames = 1; -int __kmp_forkjoin_frames_mode = 3; +int __kmp_forkjoin_frames = 1; +int __kmp_forkjoin_frames_mode = 3; #endif -PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method = reduction_method_not_defined; -int __kmp_determ_red = FALSE; +PACKED_REDUCTION_METHOD_T __kmp_force_reduction_method = + reduction_method_not_defined; +int __kmp_determ_red = FALSE; #ifdef KMP_DEBUG -int kmp_a_debug = 0; -int kmp_b_debug = 0; -int kmp_c_debug = 0; -int kmp_d_debug = 0; -int kmp_e_debug = 0; -int kmp_f_debug = 0; -int kmp_diag = 0; +int kmp_a_debug = 0; +int kmp_b_debug = 0; +int kmp_c_debug = 0; +int kmp_d_debug = 0; +int kmp_e_debug = 0; +int kmp_f_debug = 0; +int kmp_diag = 0; #endif /* For debug information logging using rotating buffer */ -int __kmp_debug_buf = FALSE; /* TRUE means use buffer, FALSE means print to stderr */ -int __kmp_debug_buf_lines = KMP_DEBUG_BUF_LINES_INIT; /* Lines of debug stored in buffer */ -int __kmp_debug_buf_chars = KMP_DEBUG_BUF_CHARS_INIT; /* Characters allowed per line in buffer */ -int __kmp_debug_buf_atomic = FALSE; /* TRUE means use atomic update of buffer entry pointer */ - -char *__kmp_debug_buffer = NULL; /* Debug buffer itself */ -int __kmp_debug_count = 0; /* Counter for number of lines printed in buffer so far */ -int __kmp_debug_buf_warn_chars = 0; /* Keep track of char increase recommended in warnings */ +int __kmp_debug_buf = + FALSE; /* TRUE means use buffer, FALSE means print to stderr */ +int __kmp_debug_buf_lines = + KMP_DEBUG_BUF_LINES_INIT; /* Lines of debug stored in buffer */ +int __kmp_debug_buf_chars = + KMP_DEBUG_BUF_CHARS_INIT; /* Characters allowed per line in buffer */ +int __kmp_debug_buf_atomic = + FALSE; /* TRUE means use atomic update of buffer entry pointer */ + +char *__kmp_debug_buffer = NULL; /* Debug buffer itself */ +int __kmp_debug_count = + 0; /* Counter for number of lines printed in buffer so far */ +int __kmp_debug_buf_warn_chars = + 0; /* Keep track of char increase recommended in warnings */ /* end rotating debug buffer */ #ifdef KMP_DEBUG -int __kmp_par_range; /* +1 => only go par for constructs in range */ - /* -1 => only go par for constructs outside range */ -char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN] = { '\0' }; -char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN] = { '\0' }; -int __kmp_par_range_lb = 0; -int __kmp_par_range_ub = INT_MAX; +int __kmp_par_range; /* +1 => only go par for constructs in range */ +/* -1 => only go par for constructs outside range */ +char __kmp_par_range_routine[KMP_PAR_RANGE_ROUTINE_LEN] = {'\0'}; +char __kmp_par_range_filename[KMP_PAR_RANGE_FILENAME_LEN] = {'\0'}; +int __kmp_par_range_lb = 0; +int __kmp_par_range_ub = INT_MAX; #endif /* KMP_DEBUG */ /* For printing out dynamic storage map for threads and teams */ -int __kmp_storage_map = FALSE; /* True means print storage map for threads and teams */ -int __kmp_storage_map_verbose = FALSE; /* True means storage map includes placement info */ -int __kmp_storage_map_verbose_specified = FALSE; -/* Initialize the library data structures when we fork a child process, defaults to TRUE */ -int __kmp_need_register_atfork = TRUE; /* At initialization, call pthread_atfork to install fork handler */ -int __kmp_need_register_atfork_specified = TRUE; - -int __kmp_env_chunk = FALSE; /* KMP_CHUNK specified? */ -int __kmp_env_stksize = FALSE; /* KMP_STACKSIZE specified? */ -int __kmp_env_omp_stksize = FALSE; /* OMP_STACKSIZE specified? */ -int __kmp_env_all_threads = FALSE;/* KMP_ALL_THREADS or KMP_MAX_THREADS specified? */ -int __kmp_env_omp_all_threads = FALSE;/* OMP_THREAD_LIMIT specified? */ -int __kmp_env_blocktime = FALSE; /* KMP_BLOCKTIME specified? */ -int __kmp_env_checks = FALSE; /* KMP_CHECKS specified? */ -int __kmp_env_consistency_check = FALSE; /* KMP_CONSISTENCY_CHECK specified? */ +int __kmp_storage_map = + FALSE; /* True means print storage map for threads and teams */ +int __kmp_storage_map_verbose = + FALSE; /* True means storage map includes placement info */ +int __kmp_storage_map_verbose_specified = FALSE; +/* Initialize the library data structures when we fork a child process, defaults + * to TRUE */ +int __kmp_need_register_atfork = + TRUE; /* At initialization, call pthread_atfork to install fork handler */ +int __kmp_need_register_atfork_specified = TRUE; + +int __kmp_env_chunk = FALSE; /* KMP_CHUNK specified? */ +int __kmp_env_stksize = FALSE; /* KMP_STACKSIZE specified? */ +int __kmp_env_omp_stksize = FALSE; /* OMP_STACKSIZE specified? */ +int __kmp_env_all_threads = + FALSE; /* KMP_ALL_THREADS or KMP_MAX_THREADS specified? */ +int __kmp_env_omp_all_threads = FALSE; /* OMP_THREAD_LIMIT specified? */ +int __kmp_env_blocktime = FALSE; /* KMP_BLOCKTIME specified? */ +int __kmp_env_checks = FALSE; /* KMP_CHECKS specified? */ +int __kmp_env_consistency_check = FALSE; /* KMP_CONSISTENCY_CHECK specified? */ kmp_uint32 __kmp_yield_init = KMP_INIT_WAIT; kmp_uint32 __kmp_yield_next = KMP_NEXT_WAIT; @@ -360,42 +394,38 @@ kmp_uint32 __kmp_yielding_on = 1; #if KMP_OS_CNK kmp_uint32 __kmp_yield_cycle = 0; #else -kmp_uint32 __kmp_yield_cycle = 1; /* Yield-cycle is on by default */ +kmp_uint32 __kmp_yield_cycle = 1; /* Yield-cycle is on by default */ #endif -kmp_int32 __kmp_yield_on_count = 10; /* By default, yielding is on for 10 monitor periods. */ -kmp_int32 __kmp_yield_off_count = 1; /* By default, yielding is off for 1 monitor periods. */ -/* ----------------------------------------------------- */ - +kmp_int32 __kmp_yield_on_count = + 10; /* By default, yielding is on for 10 monitor periods. */ +kmp_int32 __kmp_yield_off_count = + 1; /* By default, yielding is off for 1 monitor periods. */ /* ------------------------------------------------------ */ /* STATE mostly syncronized with global lock */ /* data written to rarely by masters, read often by workers */ -/* - * SHALL WE EDIT THE COMMENT BELOW IN SOME WAY? - * TODO: None of this global padding stuff works consistently because - * the order of declaration is not necessarily correlated to storage order. - * To fix this, all the important globals must be put in a big structure - * instead. - */ +/* TODO: None of this global padding stuff works consistently because the order + of declaration is not necessarily correlated to storage order. To fix this, + all the important globals must be put in a big structure instead. */ KMP_ALIGN_CACHE - kmp_info_t **__kmp_threads = NULL; - kmp_root_t **__kmp_root = NULL; +kmp_info_t **__kmp_threads = NULL; +kmp_root_t **__kmp_root = NULL; /* data read/written to often by masters */ KMP_ALIGN_CACHE -volatile int __kmp_nth = 0; -volatile int __kmp_all_nth = 0; -int __kmp_thread_pool_nth = 0; -volatile kmp_info_t *__kmp_thread_pool = NULL; -volatile kmp_team_t *__kmp_team_pool = NULL; +volatile int __kmp_nth = 0; +volatile int __kmp_all_nth = 0; +int __kmp_thread_pool_nth = 0; +volatile kmp_info_t *__kmp_thread_pool = NULL; +volatile kmp_team_t *__kmp_team_pool = NULL; KMP_ALIGN_CACHE -volatile int __kmp_thread_pool_active_nth = 0; +volatile int __kmp_thread_pool_active_nth = 0; /* ------------------------------------------------- * GLOBAL/ROOT STATE */ KMP_ALIGN_CACHE -kmp_global_t __kmp_global = {{ 0 }}; +kmp_global_t __kmp_global = {{0}}; /* ----------------------------------------------- */ /* GLOBAL SYNCHRONIZATION LOCKS */ @@ -406,66 +436,72 @@ kmp_global_t __kmp_global = {{ 0 }}; * false sharing if the alignment is not large enough for these locks */ KMP_ALIGN_CACHE_INTERNODE -kmp_bootstrap_lock_t __kmp_initz_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_initz_lock ); /* Control initializations */ +kmp_bootstrap_lock_t __kmp_initz_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( + __kmp_initz_lock); /* Control initializations */ KMP_ALIGN_CACHE_INTERNODE kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */ KMP_ALIGN_CACHE_INTERNODE -kmp_bootstrap_lock_t __kmp_exit_lock; /* exit() is not always thread-safe */ +kmp_bootstrap_lock_t __kmp_exit_lock; /* exit() is not always thread-safe */ #if KMP_USE_MONITOR KMP_ALIGN_CACHE_INTERNODE kmp_bootstrap_lock_t __kmp_monitor_lock; /* control monitor thread creation */ #endif +/* used for the hack to allow threadprivate cache and __kmp_threads expansion + to co-exist */ KMP_ALIGN_CACHE_INTERNODE -kmp_bootstrap_lock_t __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and __kmp_threads expansion to co-exist */ +kmp_bootstrap_lock_t __kmp_tp_cached_lock; KMP_ALIGN_CACHE_INTERNODE -kmp_lock_t __kmp_global_lock; /* Control OS/global access */ +kmp_lock_t __kmp_global_lock; /* Control OS/global access */ KMP_ALIGN_CACHE_INTERNODE -kmp_queuing_lock_t __kmp_dispatch_lock; /* Control dispatch access */ +kmp_queuing_lock_t __kmp_dispatch_lock; /* Control dispatch access */ KMP_ALIGN_CACHE_INTERNODE -kmp_lock_t __kmp_debug_lock; /* Control I/O access for KMP_DEBUG */ +kmp_lock_t __kmp_debug_lock; /* Control I/O access for KMP_DEBUG */ #else KMP_ALIGN_CACHE -kmp_bootstrap_lock_t __kmp_initz_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_initz_lock ); /* Control initializations */ +kmp_bootstrap_lock_t __kmp_initz_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( + __kmp_initz_lock); /* Control initializations */ kmp_bootstrap_lock_t __kmp_forkjoin_lock; /* control fork/join access */ -kmp_bootstrap_lock_t __kmp_exit_lock; /* exit() is not always thread-safe */ +kmp_bootstrap_lock_t __kmp_exit_lock; /* exit() is not always thread-safe */ #if KMP_USE_MONITOR kmp_bootstrap_lock_t __kmp_monitor_lock; /* control monitor thread creation */ #endif -kmp_bootstrap_lock_t __kmp_tp_cached_lock; /* used for the hack to allow threadprivate cache and __kmp_threads expansion to co-exist */ +/* used for the hack to allow threadprivate cache and __kmp_threads expansion + to co-exist */ +kmp_bootstrap_lock_t __kmp_tp_cached_lock; KMP_ALIGN(128) -kmp_lock_t __kmp_global_lock; /* Control OS/global access */ +kmp_lock_t __kmp_global_lock; /* Control OS/global access */ KMP_ALIGN(128) -kmp_queuing_lock_t __kmp_dispatch_lock; /* Control dispatch access */ +kmp_queuing_lock_t __kmp_dispatch_lock; /* Control dispatch access */ KMP_ALIGN(128) -kmp_lock_t __kmp_debug_lock; /* Control I/O access for KMP_DEBUG */ +kmp_lock_t __kmp_debug_lock; /* Control I/O access for KMP_DEBUG */ #endif /* ----------------------------------------------- */ #if KMP_HANDLE_SIGNALS - /* - Signal handling is disabled by default, because it confuses users: In case of sigsegv - (or other trouble) in user code signal handler catches the signal, which then "appears" in - the monitor thread (when the monitor executes raise() function). Users see signal in the - monitor thread and blame OpenMP RTL. - - Grant said signal handling required on some older OSes (Irix?) supported by KAI, because - bad applications hung but not aborted. Currently it is not a problem for Linux* OS, OS X* and - Windows* OS. - - Grant: Found new hangs for EL4, EL5, and a Fedora Core machine. So I'm putting - the default back for now to see if that fixes hangs on those machines. - - 2010-04013 Lev: It was a bug in Fortran RTL. Fortran RTL prints a kind of stack backtrace - when program is aborting, but the code is not signal-safe. When multiple signals raised at - the same time (which occurs in dynamic negative tests because all the worker threads detects - the same error), Fortran RTL may hang. The bug finally fixed in Fortran RTL library provided - by Steve R., and will be available soon. - */ - int __kmp_handle_signals = FALSE; +/* Signal handling is disabled by default, because it confuses users: In case of + sigsegv (or other trouble) in user code signal handler catches the signal, + which then "appears" in the monitor thread (when the monitor executes raise() + function). Users see signal in the monitor thread and blame OpenMP RTL. + + Grant said signal handling required on some older OSes (Irix?) supported by + KAI, because bad applications hung but not aborted. Currently it is not a + problem for Linux* OS, OS X* and Windows* OS. + + Grant: Found new hangs for EL4, EL5, and a Fedora Core machine. So I'm + putting the default back for now to see if that fixes hangs on those + machines. + + 2010-04013 Lev: It was a bug in Fortran RTL. Fortran RTL prints a kind of + stack backtrace when program is aborting, but the code is not signal-safe. + When multiple signals raised at the same time (which occurs in dynamic + negative tests because all the worker threads detects the same error), + Fortran RTL may hang. The bug finally fixed in Fortran RTL library provided + by Steve R., and will be available soon. */ +int __kmp_handle_signals = FALSE; #endif /* ----------------------------------------------- */ @@ -474,26 +510,21 @@ kmp_key_t __kmp_tv_key = 0; #endif /* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ #ifdef DEBUG_SUSPEND -int -get_suspend_count_( void ) { - int count = __kmp_suspend_count; - __kmp_suspend_count = 0; - return count; -} -void -set_suspend_count_( int * value ) { - __kmp_suspend_count = *value; +int get_suspend_count_(void) { + int count = __kmp_suspend_count; + __kmp_suspend_count = 0; + return count; } +void set_suspend_count_(int *value) { __kmp_suspend_count = *value; } #endif // Symbols for MS mutual detection. int _You_must_link_with_exactly_one_OpenMP_library = 1; -int _You_must_link_with_Intel_OpenMP_library = 1; -#if KMP_OS_WINDOWS && ( KMP_VERSION_MAJOR > 4 ) - int _You_must_link_with_Microsoft_OpenMP_library = 1; +int _You_must_link_with_Intel_OpenMP_library = 1; +#if KMP_OS_WINDOWS && (KMP_VERSION_MAJOR > 4) +int _You_must_link_with_Microsoft_OpenMP_library = 1; #endif // end of file // diff --git a/openmp/runtime/src/kmp_gsupport.cpp b/openmp/runtime/src/kmp_gsupport.cpp index ada7ce9..2eca497 100644 --- a/openmp/runtime/src/kmp_gsupport.cpp +++ b/openmp/runtime/src/kmp_gsupport.cpp @@ -21,551 +21,473 @@ #endif #ifdef __cplusplus - extern "C" { +extern "C" { #endif // __cplusplus -#define MKLOC(loc,routine) \ - static ident_t (loc) = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;" }; +#define MKLOC(loc, routine) \ + static ident_t(loc) = {0, KMP_IDENT_KMPC, 0, 0, ";unknown;unknown;0;0;;"}; #include "kmp_ftn_os.h" -void -xexpand(KMP_API_NAME_GOMP_BARRIER)(void) -{ - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_barrier"); - KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid)); +void xexpand(KMP_API_NAME_GOMP_BARRIER)(void) { + int gtid = __kmp_entry_gtid(); + MKLOC(loc, "GOMP_barrier"); + KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid)); #if OMPT_SUPPORT && OMPT_TRACE - ompt_frame_t * ompt_frame; - if (ompt_enabled ) { - ompt_frame = __ompt_get_task_frame_internal(0); - ompt_frame->reenter_runtime_frame = __builtin_frame_address(1); - } + ompt_frame_t *ompt_frame; + if (ompt_enabled) { + ompt_frame = __ompt_get_task_frame_internal(0); + ompt_frame->reenter_runtime_frame = __builtin_frame_address(1); + } #endif - __kmpc_barrier(&loc, gtid); + __kmpc_barrier(&loc, gtid); } - -// // Mutual exclusion -// -// -// The symbol that icc/ifort generates for unnamed for unnamed critical -// sections - .gomp_critical_user_ - is defined using .comm in any objects -// reference it. We can't reference it directly here in C code, as the -// symbol contains a ".". +// The symbol that icc/ifort generates for unnamed for unnamed critical sections +// - .gomp_critical_user_ - is defined using .comm in any objects reference it. +// We can't reference it directly here in C code, as the symbol contains a ".". // // The RTL contains an assembly language definition of .gomp_critical_user_ // with another symbol __kmp_unnamed_critical_addr initialized with it's // address. -// extern kmp_critical_name *__kmp_unnamed_critical_addr; - -void -xexpand(KMP_API_NAME_GOMP_CRITICAL_START)(void) -{ - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_critical_start"); - KA_TRACE(20, ("GOMP_critical_start: T#%d\n", gtid)); - __kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr); +void xexpand(KMP_API_NAME_GOMP_CRITICAL_START)(void) { + int gtid = __kmp_entry_gtid(); + MKLOC(loc, "GOMP_critical_start"); + KA_TRACE(20, ("GOMP_critical_start: T#%d\n", gtid)); + __kmpc_critical(&loc, gtid, __kmp_unnamed_critical_addr); } - -void -xexpand(KMP_API_NAME_GOMP_CRITICAL_END)(void) -{ - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_critical_end"); - KA_TRACE(20, ("GOMP_critical_end: T#%d\n", gtid)); - __kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr); +void xexpand(KMP_API_NAME_GOMP_CRITICAL_END)(void) { + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_critical_end"); + KA_TRACE(20, ("GOMP_critical_end: T#%d\n", gtid)); + __kmpc_end_critical(&loc, gtid, __kmp_unnamed_critical_addr); } - -void -xexpand(KMP_API_NAME_GOMP_CRITICAL_NAME_START)(void **pptr) -{ - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_critical_name_start"); - KA_TRACE(20, ("GOMP_critical_name_start: T#%d\n", gtid)); - __kmpc_critical(&loc, gtid, (kmp_critical_name *)pptr); +void xexpand(KMP_API_NAME_GOMP_CRITICAL_NAME_START)(void **pptr) { + int gtid = __kmp_entry_gtid(); + MKLOC(loc, "GOMP_critical_name_start"); + KA_TRACE(20, ("GOMP_critical_name_start: T#%d\n", gtid)); + __kmpc_critical(&loc, gtid, (kmp_critical_name *)pptr); } - -void -xexpand(KMP_API_NAME_GOMP_CRITICAL_NAME_END)(void **pptr) -{ - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_critical_name_end"); - KA_TRACE(20, ("GOMP_critical_name_end: T#%d\n", gtid)); - __kmpc_end_critical(&loc, gtid, (kmp_critical_name *)pptr); +void xexpand(KMP_API_NAME_GOMP_CRITICAL_NAME_END)(void **pptr) { + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_critical_name_end"); + KA_TRACE(20, ("GOMP_critical_name_end: T#%d\n", gtid)); + __kmpc_end_critical(&loc, gtid, (kmp_critical_name *)pptr); } - -// // The Gnu codegen tries to use locked operations to perform atomic updates // inline. If it can't, then it calls GOMP_atomic_start() before performing // the update and GOMP_atomic_end() afterward, regardless of the data type. -// - -void -xexpand(KMP_API_NAME_GOMP_ATOMIC_START)(void) -{ - int gtid = __kmp_entry_gtid(); - KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid)); +void xexpand(KMP_API_NAME_GOMP_ATOMIC_START)(void) { + int gtid = __kmp_entry_gtid(); + KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid)); #if OMPT_SUPPORT - __ompt_thread_assign_wait_id(0); + __ompt_thread_assign_wait_id(0); #endif - __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); + __kmp_acquire_atomic_lock(&__kmp_atomic_lock, gtid); } - -void -xexpand(KMP_API_NAME_GOMP_ATOMIC_END)(void) -{ - int gtid = __kmp_get_gtid(); - KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid)); - __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); +void xexpand(KMP_API_NAME_GOMP_ATOMIC_END)(void) { + int gtid = __kmp_get_gtid(); + KA_TRACE(20, ("GOMP_atomic_start: T#%d\n", gtid)); + __kmp_release_atomic_lock(&__kmp_atomic_lock, gtid); } +int xexpand(KMP_API_NAME_GOMP_SINGLE_START)(void) { + int gtid = __kmp_entry_gtid(); + MKLOC(loc, "GOMP_single_start"); + KA_TRACE(20, ("GOMP_single_start: T#%d\n", gtid)); -int -xexpand(KMP_API_NAME_GOMP_SINGLE_START)(void) -{ - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_single_start"); - KA_TRACE(20, ("GOMP_single_start: T#%d\n", gtid)); - - if (! TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); + if (!TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); - // - // 3rd parameter == FALSE prevents kmp_enter_single from pushing a - // workshare when USE_CHECKS is defined. We need to avoid the push, - // as there is no corresponding GOMP_single_end() call. - // - return __kmp_enter_single(gtid, &loc, FALSE); + // 3rd parameter == FALSE prevents kmp_enter_single from pushing a + // workshare when USE_CHECKS is defined. We need to avoid the push, + // as there is no corresponding GOMP_single_end() call. + return __kmp_enter_single(gtid, &loc, FALSE); } - -void * -xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) -{ - void *retval; - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_single_copy_start"); - KA_TRACE(20, ("GOMP_single_copy_start: T#%d\n", gtid)); - - if (! TCR_4(__kmp_init_parallel)) - __kmp_parallel_initialize(); - - // - // If this is the first thread to enter, return NULL. The generated - // code will then call GOMP_single_copy_end() for this thread only, - // with the copyprivate data pointer as an argument. - // - if (__kmp_enter_single(gtid, &loc, FALSE)) - return NULL; - - // - // Wait for the first thread to set the copyprivate data pointer, - // and for all other threads to reach this point. - // - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - - // - // Retrieve the value of the copyprivate data point, and wait for all - // threads to do likewise, then return. - // - retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data; - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - return retval; +void *xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) { + void *retval; + int gtid = __kmp_entry_gtid(); + MKLOC(loc, "GOMP_single_copy_start"); + KA_TRACE(20, ("GOMP_single_copy_start: T#%d\n", gtid)); + + if (!TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); + + // If this is the first thread to enter, return NULL. The generated code will + // then call GOMP_single_copy_end() for this thread only, with the + // copyprivate data pointer as an argument. + if (__kmp_enter_single(gtid, &loc, FALSE)) + return NULL; + + // Wait for the first thread to set the copyprivate data pointer, + // and for all other threads to reach this point. + __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); + + // Retrieve the value of the copyprivate data point, and wait for all + // threads to do likewise, then return. + retval = __kmp_team_from_gtid(gtid)->t.t_copypriv_data; + __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); + return retval; } - -void -xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data) -{ - int gtid = __kmp_get_gtid(); - KA_TRACE(20, ("GOMP_single_copy_end: T#%d\n", gtid)); - - // - // Set the copyprivate data pointer fo the team, then hit the barrier - // so that the other threads will continue on and read it. Hit another - // barrier before continuing, so that the know that the copyprivate - // data pointer has been propagated to all threads before trying to - // reuse the t_copypriv_data field. - // - __kmp_team_from_gtid(gtid)->t.t_copypriv_data = data; - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); +void xexpand(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data) { + int gtid = __kmp_get_gtid(); + KA_TRACE(20, ("GOMP_single_copy_end: T#%d\n", gtid)); + + // Set the copyprivate data pointer fo the team, then hit the barrier so that + // the other threads will continue on and read it. Hit another barrier before + // continuing, so that the know that the copyprivate data pointer has been + // propagated to all threads before trying to reuse the t_copypriv_data field. + __kmp_team_from_gtid(gtid)->t.t_copypriv_data = data; + __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); + __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); } - -void -xexpand(KMP_API_NAME_GOMP_ORDERED_START)(void) -{ - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_ordered_start"); - KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid)); - __kmpc_ordered(&loc, gtid); +void xexpand(KMP_API_NAME_GOMP_ORDERED_START)(void) { + int gtid = __kmp_entry_gtid(); + MKLOC(loc, "GOMP_ordered_start"); + KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid)); + __kmpc_ordered(&loc, gtid); } - -void -xexpand(KMP_API_NAME_GOMP_ORDERED_END)(void) -{ - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_ordered_end"); - KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid)); - __kmpc_end_ordered(&loc, gtid); +void xexpand(KMP_API_NAME_GOMP_ORDERED_END)(void) { + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_ordered_end"); + KA_TRACE(20, ("GOMP_ordered_start: T#%d\n", gtid)); + __kmpc_end_ordered(&loc, gtid); } - -// // Dispatch macro defs // // They come in two flavors: 64-bit unsigned, and either 32-bit signed // (IA-32 architecture) or 64-bit signed (Intel(R) 64). -// #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS -# define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_4 -# define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_4 -# define KMP_DISPATCH_NEXT __kmpc_dispatch_next_4 +#define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_4 +#define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_4 +#define KMP_DISPATCH_NEXT __kmpc_dispatch_next_4 #else -# define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_8 -# define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_8 -# define KMP_DISPATCH_NEXT __kmpc_dispatch_next_8 +#define KMP_DISPATCH_INIT __kmp_aux_dispatch_init_8 +#define KMP_DISPATCH_FINI_CHUNK __kmp_aux_dispatch_fini_chunk_8 +#define KMP_DISPATCH_NEXT __kmpc_dispatch_next_8 #endif /* KMP_ARCH_X86 */ -# define KMP_DISPATCH_INIT_ULL __kmp_aux_dispatch_init_8u -# define KMP_DISPATCH_FINI_CHUNK_ULL __kmp_aux_dispatch_fini_chunk_8u -# define KMP_DISPATCH_NEXT_ULL __kmpc_dispatch_next_8u - +#define KMP_DISPATCH_INIT_ULL __kmp_aux_dispatch_init_8u +#define KMP_DISPATCH_FINI_CHUNK_ULL __kmp_aux_dispatch_fini_chunk_8u +#define KMP_DISPATCH_NEXT_ULL __kmpc_dispatch_next_8u -// // The parallel contruct -// #ifndef KMP_DEBUG static #endif /* KMP_DEBUG */ -void -__kmp_GOMP_microtask_wrapper(int *gtid, int *npr, void (*task)(void *), - void *data) -{ + void + __kmp_GOMP_microtask_wrapper(int *gtid, int *npr, void (*task)(void *), + void *data) { #if OMPT_SUPPORT - kmp_info_t *thr; - ompt_frame_t *ompt_frame; - ompt_state_t enclosing_state; - - if (ompt_enabled) { - // get pointer to thread data structure - thr = __kmp_threads[*gtid]; - - // save enclosing task state; set current state for task - enclosing_state = thr->th.ompt_thread_info.state; - thr->th.ompt_thread_info.state = ompt_state_work_parallel; - - // set task frame - ompt_frame = __ompt_get_task_frame_internal(0); - ompt_frame->exit_runtime_frame = __builtin_frame_address(0); - } + kmp_info_t *thr; + ompt_frame_t *ompt_frame; + ompt_state_t enclosing_state; + + if (ompt_enabled) { + // get pointer to thread data structure + thr = __kmp_threads[*gtid]; + + // save enclosing task state; set current state for task + enclosing_state = thr->th.ompt_thread_info.state; + thr->th.ompt_thread_info.state = ompt_state_work_parallel; + + // set task frame + ompt_frame = __ompt_get_task_frame_internal(0); + ompt_frame->exit_runtime_frame = __builtin_frame_address(0); + } #endif - task(data); + task(data); #if OMPT_SUPPORT - if (ompt_enabled) { - // clear task frame - ompt_frame->exit_runtime_frame = NULL; + if (ompt_enabled) { + // clear task frame + ompt_frame->exit_runtime_frame = NULL; - // restore enclosing state - thr->th.ompt_thread_info.state = enclosing_state; - } + // restore enclosing state + thr->th.ompt_thread_info.state = enclosing_state; + } #endif } - #ifndef KMP_DEBUG static #endif /* KMP_DEBUG */ -void -__kmp_GOMP_parallel_microtask_wrapper(int *gtid, int *npr, - void (*task)(void *), void *data, unsigned num_threads, ident_t *loc, - enum sched_type schedule, long start, long end, long incr, long chunk_size) -{ - // - // Intialize the loop worksharing construct. - // - KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size, - schedule != kmp_sch_static); + void + __kmp_GOMP_parallel_microtask_wrapper(int *gtid, int *npr, + void (*task)(void *), void *data, + unsigned num_threads, ident_t *loc, + enum sched_type schedule, long start, + long end, long incr, + long chunk_size) { + // Intialize the loop worksharing construct. + KMP_DISPATCH_INIT(loc, *gtid, schedule, start, end, incr, chunk_size, + schedule != kmp_sch_static); #if OMPT_SUPPORT - kmp_info_t *thr; - ompt_frame_t *ompt_frame; - ompt_state_t enclosing_state; - - if (ompt_enabled) { - thr = __kmp_threads[*gtid]; - // save enclosing task state; set current state for task - enclosing_state = thr->th.ompt_thread_info.state; - thr->th.ompt_thread_info.state = ompt_state_work_parallel; - - // set task frame - ompt_frame = __ompt_get_task_frame_internal(0); - ompt_frame->exit_runtime_frame = __builtin_frame_address(0); - } + kmp_info_t *thr; + ompt_frame_t *ompt_frame; + ompt_state_t enclosing_state; + + if (ompt_enabled) { + thr = __kmp_threads[*gtid]; + // save enclosing task state; set current state for task + enclosing_state = thr->th.ompt_thread_info.state; + thr->th.ompt_thread_info.state = ompt_state_work_parallel; + + // set task frame + ompt_frame = __ompt_get_task_frame_internal(0); + ompt_frame->exit_runtime_frame = __builtin_frame_address(0); + } #endif - // - // Now invoke the microtask. - // - task(data); + // Now invoke the microtask. + task(data); #if OMPT_SUPPORT - if (ompt_enabled) { - // clear task frame - ompt_frame->exit_runtime_frame = NULL; + if (ompt_enabled) { + // clear task frame + ompt_frame->exit_runtime_frame = NULL; - // reset enclosing state - thr->th.ompt_thread_info.state = enclosing_state; - } + // reset enclosing state + thr->th.ompt_thread_info.state = enclosing_state; + } #endif } - #ifndef KMP_DEBUG static #endif /* KMP_DEBUG */ -void -__kmp_GOMP_fork_call(ident_t *loc, int gtid, void (*unwrapped_task)(void *), microtask_t wrapper, int argc,...) -{ - int rc; - kmp_info_t *thr = __kmp_threads[gtid]; - kmp_team_t *team = thr->th.th_team; - int tid = __kmp_tid_from_gtid(gtid); - - va_list ap; - va_start(ap, argc); - - rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, + void + __kmp_GOMP_fork_call(ident_t *loc, int gtid, void (*unwrapped_task)(void *), + microtask_t wrapper, int argc, ...) { + int rc; + kmp_info_t *thr = __kmp_threads[gtid]; + kmp_team_t *team = thr->th.th_team; + int tid = __kmp_tid_from_gtid(gtid); + + va_list ap; + va_start(ap, argc); + + rc = __kmp_fork_call(loc, gtid, fork_context_gnu, argc, #if OMPT_SUPPORT - VOLATILE_CAST(void *) unwrapped_task, + VOLATILE_CAST(void *) unwrapped_task, #endif - wrapper, __kmp_invoke_task_func, + wrapper, __kmp_invoke_task_func, #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - &ap + &ap #else - ap + ap #endif - ); + ); - va_end(ap); + va_end(ap); - if (rc) { - __kmp_run_before_invoked_task(gtid, tid, thr, team); - } + if (rc) { + __kmp_run_before_invoked_task(gtid, tid, thr, team); + } #if OMPT_SUPPORT - if (ompt_enabled) { + if (ompt_enabled) { #if OMPT_TRACE - ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - - // implicit task callback - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - team_info->parallel_id, task_info->task_id); - } -#endif - thr->th.ompt_thread_info.state = ompt_state_work_parallel; + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); + ompt_task_info_t *task_info = __ompt_get_taskinfo(0); + + // implicit task callback + if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( + team_info->parallel_id, task_info->task_id); } #endif + thr->th.ompt_thread_info.state = ompt_state_work_parallel; + } +#endif } -static void -__kmp_GOMP_serialized_parallel(ident_t *loc, kmp_int32 gtid, void (*task)(void *)) -{ +static void __kmp_GOMP_serialized_parallel(ident_t *loc, kmp_int32 gtid, + void (*task)(void *)) { #if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id; - if (ompt_enabled) { - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - - ompt_parallel_id = __ompt_parallel_id_new(gtid); - - // parallel region callback - if (ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) { - int team_size = 1; - ompt_callbacks.ompt_callback(ompt_event_parallel_begin)( - task_info->task_id, &task_info->frame, ompt_parallel_id, - team_size, (void *) task, - OMPT_INVOKER(fork_context_gnu)); - } + ompt_parallel_id_t ompt_parallel_id; + if (ompt_enabled) { + ompt_task_info_t *task_info = __ompt_get_taskinfo(0); + + ompt_parallel_id = __ompt_parallel_id_new(gtid); + + // parallel region callback + if (ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) { + int team_size = 1; + ompt_callbacks.ompt_callback(ompt_event_parallel_begin)( + task_info->task_id, &task_info->frame, ompt_parallel_id, team_size, + (void *)task, OMPT_INVOKER(fork_context_gnu)); } + } #endif - __kmp_serialized_parallel(loc, gtid); + __kmp_serialized_parallel(loc, gtid); #if OMPT_SUPPORT - if (ompt_enabled) { - kmp_info_t *thr = __kmp_threads[gtid]; + if (ompt_enabled) { + kmp_info_t *thr = __kmp_threads[gtid]; - ompt_task_id_t my_ompt_task_id = __ompt_task_id_new(gtid); + ompt_task_id_t my_ompt_task_id = __ompt_task_id_new(gtid); - // set up lightweight task - ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *) - __kmp_allocate(sizeof(ompt_lw_taskteam_t)); - __ompt_lw_taskteam_init(lwt, thr, gtid, (void *) task, ompt_parallel_id); - lwt->ompt_task_info.task_id = my_ompt_task_id; - __ompt_lw_taskteam_link(lwt, thr); + // set up lightweight task + ompt_lw_taskteam_t *lwt = + (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t)); + __ompt_lw_taskteam_init(lwt, thr, gtid, (void *)task, ompt_parallel_id); + lwt->ompt_task_info.task_id = my_ompt_task_id; + __ompt_lw_taskteam_link(lwt, thr); #if OMPT_TRACE - // implicit task callback - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - ompt_parallel_id, my_ompt_task_id); - } - thr->th.ompt_thread_info.state = ompt_state_work_parallel; -#endif + // implicit task callback + if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( + ompt_parallel_id, my_ompt_task_id); } + thr->th.ompt_thread_info.state = ompt_state_work_parallel; +#endif + } #endif } - -void -xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data, unsigned num_threads) -{ - int gtid = __kmp_entry_gtid(); +void xexpand(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), void *data, + unsigned num_threads) { + int gtid = __kmp_entry_gtid(); #if OMPT_SUPPORT - ompt_frame_t *parent_frame, *frame; + ompt_frame_t *parent_frame, *frame; - if (ompt_enabled) { - parent_frame = __ompt_get_task_frame_internal(0); - parent_frame->reenter_runtime_frame = __builtin_frame_address(1); - } + if (ompt_enabled) { + parent_frame = __ompt_get_task_frame_internal(0); + parent_frame->reenter_runtime_frame = __builtin_frame_address(1); + } #endif - MKLOC(loc, "GOMP_parallel_start"); - KA_TRACE(20, ("GOMP_parallel_start: T#%d\n", gtid)); + MKLOC(loc, "GOMP_parallel_start"); + KA_TRACE(20, ("GOMP_parallel_start: T#%d\n", gtid)); - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { - if (num_threads != 0) { - __kmp_push_num_threads(&loc, gtid, num_threads); - } - __kmp_GOMP_fork_call(&loc, gtid, task, - (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, data); - } - else { - __kmp_GOMP_serialized_parallel(&loc, gtid, task); + if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { + if (num_threads != 0) { + __kmp_push_num_threads(&loc, gtid, num_threads); } + __kmp_GOMP_fork_call(&loc, gtid, task, + (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, + data); + } else { + __kmp_GOMP_serialized_parallel(&loc, gtid, task); + } #if OMPT_SUPPORT - if (ompt_enabled) { - frame = __ompt_get_task_frame_internal(0); - frame->exit_runtime_frame = __builtin_frame_address(1); - } + if (ompt_enabled) { + frame = __ompt_get_task_frame_internal(0); + frame->exit_runtime_frame = __builtin_frame_address(1); + } #endif } +void xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) { + int gtid = __kmp_get_gtid(); + kmp_info_t *thr; -void -xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) -{ - int gtid = __kmp_get_gtid(); - kmp_info_t *thr; - - thr = __kmp_threads[gtid]; - - MKLOC(loc, "GOMP_parallel_end"); - KA_TRACE(20, ("GOMP_parallel_end: T#%d\n", gtid)); + thr = __kmp_threads[gtid]; + MKLOC(loc, "GOMP_parallel_end"); + KA_TRACE(20, ("GOMP_parallel_end: T#%d\n", gtid)); #if OMPT_SUPPORT - ompt_parallel_id_t parallel_id; - ompt_task_id_t serialized_task_id; - ompt_frame_t *ompt_frame = NULL; - - if (ompt_enabled) { - ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); - parallel_id = team_info->parallel_id; - - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - serialized_task_id = task_info->task_id; - - // unlink if necessary. no-op if there is not a lightweight task. - ompt_lw_taskteam_t *lwt = __ompt_lw_taskteam_unlink(thr); - // GOMP allocates/frees lwt since it can't be kept on the stack - if (lwt) { - __kmp_free(lwt); - - } + ompt_parallel_id_t parallel_id; + ompt_task_id_t serialized_task_id; + ompt_frame_t *ompt_frame = NULL; + + if (ompt_enabled) { + ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL); + parallel_id = team_info->parallel_id; + + ompt_task_info_t *task_info = __ompt_get_taskinfo(0); + serialized_task_id = task_info->task_id; + + // unlink if necessary. no-op if there is not a lightweight task. + ompt_lw_taskteam_t *lwt = __ompt_lw_taskteam_unlink(thr); + // GOMP allocates/frees lwt since it can't be kept on the stack + if (lwt) { + __kmp_free(lwt); } + } #endif - if (! thr->th.th_team->t.t_serialized) { - __kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr, - thr->th.th_team); + if (!thr->th.th_team->t.t_serialized) { + __kmp_run_after_invoked_task(gtid, __kmp_tid_from_gtid(gtid), thr, + thr->th.th_team); #if OMPT_SUPPORT - if (ompt_enabled) { - // Implicit task is finished here, in the barrier we might schedule deferred tasks, - // these don't see the implicit task on the stack - ompt_frame = __ompt_get_task_frame_internal(0); - ompt_frame->exit_runtime_frame = NULL; - } + if (ompt_enabled) { + // Implicit task is finished here, in the barrier we might schedule + // deferred tasks, + // these don't see the implicit task on the stack + ompt_frame = __ompt_get_task_frame_internal(0); + ompt_frame->exit_runtime_frame = NULL; + } #endif - __kmp_join_call(&loc, gtid + __kmp_join_call(&loc, gtid #if OMPT_SUPPORT - , fork_context_gnu + , + fork_context_gnu #endif - ); - } - else { + ); + } else { #if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - parallel_id, serialized_task_id); - } + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( + parallel_id, serialized_task_id); + } #endif - __kmpc_end_serialized_parallel(&loc, gtid); + __kmpc_end_serialized_parallel(&loc, gtid); #if OMPT_SUPPORT - if (ompt_enabled) { - // Record that we re-entered the runtime system in the frame that - // created the parallel region. - ompt_task_info_t *parent_task_info = __ompt_get_taskinfo(0); - - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - parallel_id, parent_task_info->task_id, - OMPT_INVOKER(fork_context_gnu)); - } - - parent_task_info->frame.reenter_runtime_frame = NULL; - - thr->th.ompt_thread_info.state = - (((thr->th.th_team)->t.t_serialized) ? - ompt_state_work_serial : ompt_state_work_parallel); - } -#endif + if (ompt_enabled) { + // Record that we re-entered the runtime system in the frame that + // created the parallel region. + ompt_task_info_t *parent_task_info = __ompt_get_taskinfo(0); + + if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { + ompt_callbacks.ompt_callback(ompt_event_parallel_end)( + parallel_id, parent_task_info->task_id, + OMPT_INVOKER(fork_context_gnu)); + } + + parent_task_info->frame.reenter_runtime_frame = NULL; + + thr->th.ompt_thread_info.state = + (((thr->th.th_team)->t.t_serialized) ? ompt_state_work_serial + : ompt_state_work_parallel); } +#endif + } } - -// // Loop worksharing constructs -// -// // The Gnu codegen passes in an exclusive upper bound for the overall range, // but the libguide dispatch code expects an inclusive upper bound, hence the // "end - incr" 5th argument to KMP_DISPATCH_INIT (and the " ub - str" 11th @@ -587,311 +509,308 @@ xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(void) // next iteration. Instead, it emits inline code to call omp_get_thread_num() // num and calculate the iteration space using the result. It doesn't do this // with ordered static loop, so they can be checked. -// - -#define LOOP_START(func,schedule) \ - int func (long lb, long ub, long str, long chunk_sz, long *p_lb, \ - long *p_ub) \ - { \ - int status; \ - long stride; \ - int gtid = __kmp_entry_gtid(); \ - MKLOC(loc, #func); \ - KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \ - gtid, lb, ub, str, chunk_sz )); \ - \ - if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ - (schedule) != kmp_sch_static); \ - status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ - (kmp_int *)p_ub, (kmp_int *)&stride); \ - if (status) { \ - KMP_DEBUG_ASSERT(stride == str); \ - *p_ub += (str > 0) ? 1 : -1; \ - } \ - } \ - else { \ - status = 0; \ - } \ - \ - KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \ - gtid, *p_lb, *p_ub, status)); \ - return status; \ - } - - -#define LOOP_RUNTIME_START(func,schedule) \ - int func (long lb, long ub, long str, long *p_lb, long *p_ub) \ - { \ - int status; \ - long stride; \ - long chunk_sz = 0; \ - int gtid = __kmp_entry_gtid(); \ - MKLOC(loc, #func); \ - KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n", \ - gtid, lb, ub, str, chunk_sz )); \ - \ - if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \ - status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ - (kmp_int *)p_ub, (kmp_int *)&stride); \ - if (status) { \ - KMP_DEBUG_ASSERT(stride == str); \ - *p_ub += (str > 0) ? 1 : -1; \ - } \ - } \ - else { \ - status = 0; \ - } \ - \ - KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \ - gtid, *p_lb, *p_ub, status)); \ - return status; \ - } - - -#define LOOP_NEXT(func,fini_code) \ - int func(long *p_lb, long *p_ub) \ - { \ - int status; \ - long stride; \ - int gtid = __kmp_get_gtid(); \ - MKLOC(loc, #func); \ - KA_TRACE(20, ( #func ": T#%d\n", gtid)); \ - \ - fini_code \ - status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ - (kmp_int *)p_ub, (kmp_int *)&stride); \ - if (status) { \ - *p_ub += (stride > 0) ? 1 : -1; \ - } \ - \ - KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, stride 0x%lx, " \ - "returning %d\n", gtid, *p_lb, *p_ub, stride, status)); \ - return status; \ - } +#define LOOP_START(func, schedule) \ + int func(long lb, long ub, long str, long chunk_sz, long *p_lb, \ + long *p_ub) { \ + int status; \ + long stride; \ + int gtid = __kmp_entry_gtid(); \ + MKLOC(loc, #func); \ + KA_TRACE(20, \ + (#func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \ + gtid, lb, ub, str, chunk_sz)); \ + \ + if ((str > 0) ? (lb < ub) : (lb > ub)) { \ + KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ + (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ + (schedule) != kmp_sch_static); \ + status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ + (kmp_int *)p_ub, (kmp_int *)&stride); \ + if (status) { \ + KMP_DEBUG_ASSERT(stride == str); \ + *p_ub += (str > 0) ? 1 : -1; \ + } \ + } else { \ + status = 0; \ + } \ + \ + KA_TRACE(20, \ + (#func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \ + gtid, *p_lb, *p_ub, status)); \ + return status; \ + } + +#define LOOP_RUNTIME_START(func, schedule) \ + int func(long lb, long ub, long str, long *p_lb, long *p_ub) { \ + int status; \ + long stride; \ + long chunk_sz = 0; \ + int gtid = __kmp_entry_gtid(); \ + MKLOC(loc, #func); \ + KA_TRACE(20, \ + (#func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz %d\n", \ + gtid, lb, ub, str, chunk_sz)); \ + \ + if ((str > 0) ? (lb < ub) : (lb > ub)) { \ + KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ + (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, TRUE); \ + status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ + (kmp_int *)p_ub, (kmp_int *)&stride); \ + if (status) { \ + KMP_DEBUG_ASSERT(stride == str); \ + *p_ub += (str > 0) ? 1 : -1; \ + } \ + } else { \ + status = 0; \ + } \ + \ + KA_TRACE(20, \ + (#func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, returning %d\n", \ + gtid, *p_lb, *p_ub, status)); \ + return status; \ + } + +#define LOOP_NEXT(func, fini_code) \ + int func(long *p_lb, long *p_ub) { \ + int status; \ + long stride; \ + int gtid = __kmp_get_gtid(); \ + MKLOC(loc, #func); \ + KA_TRACE(20, (#func ": T#%d\n", gtid)); \ + \ + fini_code status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, (kmp_int *)p_lb, \ + (kmp_int *)p_ub, (kmp_int *)&stride); \ + if (status) { \ + *p_ub += (stride > 0) ? 1 : -1; \ + } \ + \ + KA_TRACE(20, \ + (#func " exit: T#%d, *p_lb 0x%lx, *p_ub 0x%lx, stride 0x%lx, " \ + "returning %d\n", \ + gtid, *p_lb, *p_ub, stride, status)); \ + return status; \ + } LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_STATIC_START), kmp_sch_static) LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_STATIC_NEXT), {}) -LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START), kmp_sch_dynamic_chunked) +LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_DYNAMIC_START), + kmp_sch_dynamic_chunked) LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_DYNAMIC_NEXT), {}) LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_GUIDED_START), kmp_sch_guided_chunked) LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_GUIDED_NEXT), {}) -LOOP_RUNTIME_START(xexpand(KMP_API_NAME_GOMP_LOOP_RUNTIME_START), kmp_sch_runtime) +LOOP_RUNTIME_START(xexpand(KMP_API_NAME_GOMP_LOOP_RUNTIME_START), + kmp_sch_runtime) LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_RUNTIME_NEXT), {}) LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_START), kmp_ord_static) -LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT), \ - { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) -LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START), kmp_ord_dynamic_chunked) -LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT), \ - { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) -LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START), kmp_ord_guided_chunked) -LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT), \ - { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) -LOOP_RUNTIME_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START), kmp_ord_runtime) -LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT), \ - { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) - - -void -xexpand(KMP_API_NAME_GOMP_LOOP_END)(void) -{ - int gtid = __kmp_get_gtid(); - KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid)) - - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - - KA_TRACE(20, ("GOMP_loop_end exit: T#%d\n", gtid)) +LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_STATIC_NEXT), + { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) +LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_START), + kmp_ord_dynamic_chunked) +LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_DYNAMIC_NEXT), + { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) +LOOP_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_START), + kmp_ord_guided_chunked) +LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_GUIDED_NEXT), + { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) +LOOP_RUNTIME_START(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_START), + kmp_ord_runtime) +LOOP_NEXT(xexpand(KMP_API_NAME_GOMP_LOOP_ORDERED_RUNTIME_NEXT), + { KMP_DISPATCH_FINI_CHUNK(&loc, gtid); }) + +void xexpand(KMP_API_NAME_GOMP_LOOP_END)(void) { + int gtid = __kmp_get_gtid(); + KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid)) + + __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); + + KA_TRACE(20, ("GOMP_loop_end exit: T#%d\n", gtid)) } - -void -xexpand(KMP_API_NAME_GOMP_LOOP_END_NOWAIT)(void) -{ - KA_TRACE(20, ("GOMP_loop_end_nowait: T#%d\n", __kmp_get_gtid())) +void xexpand(KMP_API_NAME_GOMP_LOOP_END_NOWAIT)(void) { + KA_TRACE(20, ("GOMP_loop_end_nowait: T#%d\n", __kmp_get_gtid())) } - -// // Unsigned long long loop worksharing constructs // // These are new with gcc 4.4 -// - -#define LOOP_START_ULL(func,schedule) \ - int func (int up, unsigned long long lb, unsigned long long ub, \ - unsigned long long str, unsigned long long chunk_sz, \ - unsigned long long *p_lb, unsigned long long *p_ub) \ - { \ - int status; \ - long long str2 = up ? ((long long)str) : -((long long)str); \ - long long stride; \ - int gtid = __kmp_entry_gtid(); \ - MKLOC(loc, #func); \ - \ - KA_TRACE(20, ( #func ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str 0x%llx, chunk_sz 0x%llx\n", \ - gtid, up, lb, ub, str, chunk_sz )); \ - \ - if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \ - (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz, \ - (schedule) != kmp_sch_static); \ - status = KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, \ - (kmp_uint64 *)p_lb, (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ - if (status) { \ - KMP_DEBUG_ASSERT(stride == str2); \ - *p_ub += (str > 0) ? 1 : -1; \ - } \ - } \ - else { \ - status = 0; \ - } \ - \ - KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \ - gtid, *p_lb, *p_ub, status)); \ - return status; \ - } - - -#define LOOP_RUNTIME_START_ULL(func,schedule) \ - int func (int up, unsigned long long lb, unsigned long long ub, \ - unsigned long long str, unsigned long long *p_lb, \ - unsigned long long *p_ub) \ - { \ - int status; \ - long long str2 = up ? ((long long)str) : -((long long)str); \ - unsigned long long stride; \ - unsigned long long chunk_sz = 0; \ - int gtid = __kmp_entry_gtid(); \ - MKLOC(loc, #func); \ - \ - KA_TRACE(20, ( #func ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str 0x%llx, chunk_sz 0x%llx\n", \ - gtid, up, lb, ub, str, chunk_sz )); \ - \ - if ((str > 0) ? (lb < ub) : (lb > ub)) { \ - KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \ - (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz, TRUE); \ - status = KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, \ - (kmp_uint64 *)p_lb, (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ - if (status) { \ - KMP_DEBUG_ASSERT((long long)stride == str2); \ - *p_ub += (str > 0) ? 1 : -1; \ - } \ - } \ - else { \ - status = 0; \ - } \ - \ - KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \ - gtid, *p_lb, *p_ub, status)); \ - return status; \ - } - - -#define LOOP_NEXT_ULL(func,fini_code) \ - int func(unsigned long long *p_lb, unsigned long long *p_ub) \ - { \ - int status; \ - long long stride; \ - int gtid = __kmp_get_gtid(); \ - MKLOC(loc, #func); \ - KA_TRACE(20, ( #func ": T#%d\n", gtid)); \ - \ - fini_code \ - status = KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \ - (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ - if (status) { \ - *p_ub += (stride > 0) ? 1 : -1; \ - } \ - \ - KA_TRACE(20, ( #func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, stride 0x%llx, " \ - "returning %d\n", gtid, *p_lb, *p_ub, stride, status)); \ - return status; \ - } +#define LOOP_START_ULL(func, schedule) \ + int func(int up, unsigned long long lb, unsigned long long ub, \ + unsigned long long str, unsigned long long chunk_sz, \ + unsigned long long *p_lb, unsigned long long *p_ub) { \ + int status; \ + long long str2 = up ? ((long long)str) : -((long long)str); \ + long long stride; \ + int gtid = __kmp_entry_gtid(); \ + MKLOC(loc, #func); \ + \ + KA_TRACE( \ + 20, \ + (#func \ + ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str 0x%llx, chunk_sz 0x%llx\n", \ + gtid, up, lb, ub, str, chunk_sz)); \ + \ + if ((str > 0) ? (lb < ub) : (lb > ub)) { \ + KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \ + (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz, \ + (schedule) != kmp_sch_static); \ + status = \ + KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \ + (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ + if (status) { \ + KMP_DEBUG_ASSERT(stride == str2); \ + *p_ub += (str > 0) ? 1 : -1; \ + } \ + } else { \ + status = 0; \ + } \ + \ + KA_TRACE(20, \ + (#func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \ + gtid, *p_lb, *p_ub, status)); \ + return status; \ + } + +#define LOOP_RUNTIME_START_ULL(func, schedule) \ + int func(int up, unsigned long long lb, unsigned long long ub, \ + unsigned long long str, unsigned long long *p_lb, \ + unsigned long long *p_ub) { \ + int status; \ + long long str2 = up ? ((long long)str) : -((long long)str); \ + unsigned long long stride; \ + unsigned long long chunk_sz = 0; \ + int gtid = __kmp_entry_gtid(); \ + MKLOC(loc, #func); \ + \ + KA_TRACE( \ + 20, \ + (#func \ + ": T#%d, up %d, lb 0x%llx, ub 0x%llx, str 0x%llx, chunk_sz 0x%llx\n", \ + gtid, up, lb, ub, str, chunk_sz)); \ + \ + if ((str > 0) ? (lb < ub) : (lb > ub)) { \ + KMP_DISPATCH_INIT_ULL(&loc, gtid, (schedule), lb, \ + (str2 > 0) ? (ub - 1) : (ub + 1), str2, chunk_sz, \ + TRUE); \ + status = \ + KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \ + (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ + if (status) { \ + KMP_DEBUG_ASSERT((long long)stride == str2); \ + *p_ub += (str > 0) ? 1 : -1; \ + } \ + } else { \ + status = 0; \ + } \ + \ + KA_TRACE(20, \ + (#func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, returning %d\n", \ + gtid, *p_lb, *p_ub, status)); \ + return status; \ + } + +#define LOOP_NEXT_ULL(func, fini_code) \ + int func(unsigned long long *p_lb, unsigned long long *p_ub) { \ + int status; \ + long long stride; \ + int gtid = __kmp_get_gtid(); \ + MKLOC(loc, #func); \ + KA_TRACE(20, (#func ": T#%d\n", gtid)); \ + \ + fini_code status = \ + KMP_DISPATCH_NEXT_ULL(&loc, gtid, NULL, (kmp_uint64 *)p_lb, \ + (kmp_uint64 *)p_ub, (kmp_int64 *)&stride); \ + if (status) { \ + *p_ub += (stride > 0) ? 1 : -1; \ + } \ + \ + KA_TRACE(20, \ + (#func " exit: T#%d, *p_lb 0x%llx, *p_ub 0x%llx, stride 0x%llx, " \ + "returning %d\n", \ + gtid, *p_lb, *p_ub, stride, status)); \ + return status; \ + } LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_START), kmp_sch_static) LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_STATIC_NEXT), {}) -LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START), kmp_sch_dynamic_chunked) +LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_START), + kmp_sch_dynamic_chunked) LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_DYNAMIC_NEXT), {}) -LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START), kmp_sch_guided_chunked) +LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_START), + kmp_sch_guided_chunked) LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_GUIDED_NEXT), {}) -LOOP_RUNTIME_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), kmp_sch_runtime) +LOOP_RUNTIME_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_START), + kmp_sch_runtime) LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_RUNTIME_NEXT), {}) -LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START), kmp_ord_static) -LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT), \ - { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) -LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START), kmp_ord_dynamic_chunked) -LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT), \ - { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) -LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START), kmp_ord_guided_chunked) -LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT), \ - { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) -LOOP_RUNTIME_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START), kmp_ord_runtime) -LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT), \ - { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) +LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_START), + kmp_ord_static) +LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_STATIC_NEXT), + { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) +LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_START), + kmp_ord_dynamic_chunked) +LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_DYNAMIC_NEXT), + { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) +LOOP_START_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_START), + kmp_ord_guided_chunked) +LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_GUIDED_NEXT), + { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) +LOOP_RUNTIME_START_ULL( + xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_START), kmp_ord_runtime) +LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT), + { KMP_DISPATCH_FINI_CHUNK_ULL(&loc, gtid); }) - -// // Combined parallel / loop worksharing constructs // // There are no ull versions (yet). -// - -#define PARALLEL_LOOP_START(func, schedule, ompt_pre, ompt_post) \ - void func (void (*task) (void *), void *data, unsigned num_threads, \ - long lb, long ub, long str, long chunk_sz) \ - { \ - int gtid = __kmp_entry_gtid(); \ - MKLOC(loc, #func); \ - KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \ - gtid, lb, ub, str, chunk_sz )); \ - \ - ompt_pre(); \ - \ - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { \ - if (num_threads != 0) { \ - __kmp_push_num_threads(&loc, gtid, num_threads); \ - } \ - __kmp_GOMP_fork_call(&loc, gtid, task, \ - (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, \ - task, data, num_threads, &loc, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \ - } \ - else { \ - __kmp_GOMP_serialized_parallel(&loc, gtid, task); \ - } \ - \ - KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ - (schedule) != kmp_sch_static); \ - \ - ompt_post(); \ - \ - KA_TRACE(20, ( #func " exit: T#%d\n", gtid)); \ - } - +#define PARALLEL_LOOP_START(func, schedule, ompt_pre, ompt_post) \ + void func(void (*task)(void *), void *data, unsigned num_threads, long lb, \ + long ub, long str, long chunk_sz) { \ + int gtid = __kmp_entry_gtid(); \ + MKLOC(loc, #func); \ + KA_TRACE(20, \ + (#func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \ + gtid, lb, ub, str, chunk_sz)); \ + \ + ompt_pre(); \ + \ + if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { \ + if (num_threads != 0) { \ + __kmp_push_num_threads(&loc, gtid, num_threads); \ + } \ + __kmp_GOMP_fork_call(&loc, gtid, task, \ + (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \ + 9, task, data, num_threads, &loc, (schedule), lb, \ + (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \ + } else { \ + __kmp_GOMP_serialized_parallel(&loc, gtid, task); \ + } \ + \ + KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ + (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ + (schedule) != kmp_sch_static); \ + \ + ompt_post(); \ + \ + KA_TRACE(20, (#func " exit: T#%d\n", gtid)); \ + } #if OMPT_SUPPORT -#define OMPT_LOOP_PRE() \ - ompt_frame_t *parent_frame; \ - if (ompt_enabled) { \ - parent_frame = __ompt_get_task_frame_internal(0); \ - parent_frame->reenter_runtime_frame = __builtin_frame_address(1); \ - } - +#define OMPT_LOOP_PRE() \ + ompt_frame_t *parent_frame; \ + if (ompt_enabled) { \ + parent_frame = __ompt_get_task_frame_internal(0); \ + parent_frame->reenter_runtime_frame = __builtin_frame_address(1); \ + } -#define OMPT_LOOP_POST() \ - if (ompt_enabled) { \ - parent_frame->reenter_runtime_frame = NULL; \ - } +#define OMPT_LOOP_POST() \ + if (ompt_enabled) { \ + parent_frame->reenter_runtime_frame = NULL; \ + } #else @@ -901,7 +820,6 @@ LOOP_NEXT_ULL(xexpand(KMP_API_NAME_GOMP_LOOP_ULL_ORDERED_RUNTIME_NEXT), \ #endif - PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC_START), kmp_sch_static, OMPT_LOOP_PRE, OMPT_LOOP_POST) PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC_START), @@ -911,548 +829,497 @@ PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED_START), PARALLEL_LOOP_START(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME_START), kmp_sch_runtime, OMPT_LOOP_PRE, OMPT_LOOP_POST) - -// // Tasking constructs -// -void -xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, void (*copy_func)(void *, void *), - long arg_size, long arg_align, bool if_cond, unsigned gomp_flags +void xexpand(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, + void (*copy_func)(void *, void *), + long arg_size, long arg_align, + bool if_cond, unsigned gomp_flags #if OMP_40_ENABLED - , void **depend + , + void **depend #endif -) -{ - MKLOC(loc, "GOMP_task"); - int gtid = __kmp_entry_gtid(); - kmp_int32 flags = 0; - kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags; - - KA_TRACE(20, ("GOMP_task: T#%d\n", gtid)); - - // The low-order bit is the "tied" flag - if (gomp_flags & 1) { - input_flags->tiedness = 1; - } - // The second low-order bit is the "final" flag - if (gomp_flags & 2) { - input_flags->final = 1; + ) { + MKLOC(loc, "GOMP_task"); + int gtid = __kmp_entry_gtid(); + kmp_int32 flags = 0; + kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags; + + KA_TRACE(20, ("GOMP_task: T#%d\n", gtid)); + + // The low-order bit is the "tied" flag + if (gomp_flags & 1) { + input_flags->tiedness = 1; + } + // The second low-order bit is the "final" flag + if (gomp_flags & 2) { + input_flags->final = 1; + } + input_flags->native = 1; + // __kmp_task_alloc() sets up all other flags + + if (!if_cond) { + arg_size = 0; + } + + kmp_task_t *task = __kmp_task_alloc( + &loc, gtid, input_flags, sizeof(kmp_task_t), + arg_size ? arg_size + arg_align - 1 : 0, (kmp_routine_entry_t)func); + + if (arg_size > 0) { + if (arg_align > 0) { + task->shareds = (void *)((((size_t)task->shareds) + arg_align - 1) / + arg_align * arg_align); } - input_flags->native = 1; - // __kmp_task_alloc() sets up all other flags + // else error?? - if (! if_cond) { - arg_size = 0; - } - - kmp_task_t *task = __kmp_task_alloc(&loc, gtid, input_flags, - sizeof(kmp_task_t), arg_size ? arg_size + arg_align - 1 : 0, - (kmp_routine_entry_t)func); - - if (arg_size > 0) { - if (arg_align > 0) { - task->shareds = (void *)((((size_t)task->shareds) - + arg_align - 1) / arg_align * arg_align); - } - //else error?? - - if (copy_func) { - (*copy_func)(task->shareds, data); - } - else { - KMP_MEMCPY(task->shareds, data, arg_size); - } + if (copy_func) { + (*copy_func)(task->shareds, data); + } else { + KMP_MEMCPY(task->shareds, data, arg_size); } + } - if (if_cond) { + if (if_cond) { #if OMP_40_ENABLED - if (gomp_flags & 8) { - KMP_ASSERT(depend); - const size_t ndeps = (kmp_intptr_t)depend[0]; - const size_t nout = (kmp_intptr_t)depend[1]; - kmp_depend_info_t dep_list[ndeps]; - - for (size_t i = 0U; i < ndeps; i++) { - dep_list[i].base_addr = (kmp_intptr_t)depend[2U + i]; - dep_list[i].len = 0U; - dep_list[i].flags.in = 1; - dep_list[i].flags.out = (i < nout); - } - __kmpc_omp_task_with_deps(&loc, gtid, task, ndeps, dep_list, 0, NULL); - } - else + if (gomp_flags & 8) { + KMP_ASSERT(depend); + const size_t ndeps = (kmp_intptr_t)depend[0]; + const size_t nout = (kmp_intptr_t)depend[1]; + kmp_depend_info_t dep_list[ndeps]; + + for (size_t i = 0U; i < ndeps; i++) { + dep_list[i].base_addr = (kmp_intptr_t)depend[2U + i]; + dep_list[i].len = 0U; + dep_list[i].flags.in = 1; + dep_list[i].flags.out = (i < nout); + } + __kmpc_omp_task_with_deps(&loc, gtid, task, ndeps, dep_list, 0, NULL); + } else #endif - __kmpc_omp_task(&loc, gtid, task); - } - else { + __kmpc_omp_task(&loc, gtid, task); + } else { #if OMPT_SUPPORT - ompt_thread_info_t oldInfo; - kmp_info_t *thread; - kmp_taskdata_t *taskdata; - if (ompt_enabled) { - // Store the threads states and restore them after the task - thread = __kmp_threads[ gtid ]; - taskdata = KMP_TASK_TO_TASKDATA(task); - oldInfo = thread->th.ompt_thread_info; - thread->th.ompt_thread_info.wait_id = 0; - thread->th.ompt_thread_info.state = ompt_state_work_parallel; - taskdata->ompt_task_info.frame.exit_runtime_frame = - __builtin_frame_address(0); - } + ompt_thread_info_t oldInfo; + kmp_info_t *thread; + kmp_taskdata_t *taskdata; + if (ompt_enabled) { + // Store the threads states and restore them after the task + thread = __kmp_threads[gtid]; + taskdata = KMP_TASK_TO_TASKDATA(task); + oldInfo = thread->th.ompt_thread_info; + thread->th.ompt_thread_info.wait_id = 0; + thread->th.ompt_thread_info.state = ompt_state_work_parallel; + taskdata->ompt_task_info.frame.exit_runtime_frame = + __builtin_frame_address(0); + } #endif - __kmpc_omp_task_begin_if0(&loc, gtid, task); - func(data); - __kmpc_omp_task_complete_if0(&loc, gtid, task); + __kmpc_omp_task_begin_if0(&loc, gtid, task); + func(data); + __kmpc_omp_task_complete_if0(&loc, gtid, task); #if OMPT_SUPPORT - if (ompt_enabled) { - thread->th.ompt_thread_info = oldInfo; - taskdata->ompt_task_info.frame.exit_runtime_frame = NULL; - } -#endif + if (ompt_enabled) { + thread->th.ompt_thread_info = oldInfo; + taskdata->ompt_task_info.frame.exit_runtime_frame = NULL; } +#endif + } - KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid)); + KA_TRACE(20, ("GOMP_task exit: T#%d\n", gtid)); } +void xexpand(KMP_API_NAME_GOMP_TASKWAIT)(void) { + MKLOC(loc, "GOMP_taskwait"); + int gtid = __kmp_entry_gtid(); -void -xexpand(KMP_API_NAME_GOMP_TASKWAIT)(void) -{ - MKLOC(loc, "GOMP_taskwait"); - int gtid = __kmp_entry_gtid(); - - KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid)); + KA_TRACE(20, ("GOMP_taskwait: T#%d\n", gtid)); - __kmpc_omp_taskwait(&loc, gtid); + __kmpc_omp_taskwait(&loc, gtid); - KA_TRACE(20, ("GOMP_taskwait exit: T#%d\n", gtid)); + KA_TRACE(20, ("GOMP_taskwait exit: T#%d\n", gtid)); } - -// // Sections worksharing constructs // - -// // For the sections construct, we initialize a dynamically scheduled loop // worksharing construct with lb 1 and stride 1, and use the iteration #'s // that its returns as sections ids. // // There are no special entry points for ordered sections, so we always use // the dynamically scheduled workshare, even if the sections aren't ordered. -// -unsigned -xexpand(KMP_API_NAME_GOMP_SECTIONS_START)(unsigned count) -{ - int status; - kmp_int lb, ub, stride; - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_sections_start"); - KA_TRACE(20, ("GOMP_sections_start: T#%d\n", gtid)); - - KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); - - status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride); - if (status) { - KMP_DEBUG_ASSERT(stride == 1); - KMP_DEBUG_ASSERT(lb > 0); - KMP_ASSERT(lb == ub); - } - else { - lb = 0; - } - - KA_TRACE(20, ("GOMP_sections_start exit: T#%d returning %u\n", gtid, - (unsigned)lb)); - return (unsigned)lb; +unsigned xexpand(KMP_API_NAME_GOMP_SECTIONS_START)(unsigned count) { + int status; + kmp_int lb, ub, stride; + int gtid = __kmp_entry_gtid(); + MKLOC(loc, "GOMP_sections_start"); + KA_TRACE(20, ("GOMP_sections_start: T#%d\n", gtid)); + + KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); + + status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride); + if (status) { + KMP_DEBUG_ASSERT(stride == 1); + KMP_DEBUG_ASSERT(lb > 0); + KMP_ASSERT(lb == ub); + } else { + lb = 0; + } + + KA_TRACE(20, ("GOMP_sections_start exit: T#%d returning %u\n", gtid, + (unsigned)lb)); + return (unsigned)lb; } - -unsigned -xexpand(KMP_API_NAME_GOMP_SECTIONS_NEXT)(void) -{ - int status; - kmp_int lb, ub, stride; - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_sections_next"); - KA_TRACE(20, ("GOMP_sections_next: T#%d\n", gtid)); - - status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride); - if (status) { - KMP_DEBUG_ASSERT(stride == 1); - KMP_DEBUG_ASSERT(lb > 0); - KMP_ASSERT(lb == ub); - } - else { - lb = 0; - } - - KA_TRACE(20, ("GOMP_sections_next exit: T#%d returning %u\n", gtid, - (unsigned)lb)); - return (unsigned)lb; +unsigned xexpand(KMP_API_NAME_GOMP_SECTIONS_NEXT)(void) { + int status; + kmp_int lb, ub, stride; + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_sections_next"); + KA_TRACE(20, ("GOMP_sections_next: T#%d\n", gtid)); + + status = KMP_DISPATCH_NEXT(&loc, gtid, NULL, &lb, &ub, &stride); + if (status) { + KMP_DEBUG_ASSERT(stride == 1); + KMP_DEBUG_ASSERT(lb > 0); + KMP_ASSERT(lb == ub); + } else { + lb = 0; + } + + KA_TRACE( + 20, ("GOMP_sections_next exit: T#%d returning %u\n", gtid, (unsigned)lb)); + return (unsigned)lb; } - -void -xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(void (*task) (void *), void *data, - unsigned num_threads, unsigned count) -{ - int gtid = __kmp_entry_gtid(); +void xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(void (*task)(void *), + void *data, + unsigned num_threads, + unsigned count) { + int gtid = __kmp_entry_gtid(); #if OMPT_SUPPORT - ompt_frame_t *parent_frame; + ompt_frame_t *parent_frame; - if (ompt_enabled) { - parent_frame = __ompt_get_task_frame_internal(0); - parent_frame->reenter_runtime_frame = __builtin_frame_address(1); - } + if (ompt_enabled) { + parent_frame = __ompt_get_task_frame_internal(0); + parent_frame->reenter_runtime_frame = __builtin_frame_address(1); + } #endif - MKLOC(loc, "GOMP_parallel_sections_start"); - KA_TRACE(20, ("GOMP_parallel_sections_start: T#%d\n", gtid)); - - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { - if (num_threads != 0) { - __kmp_push_num_threads(&loc, gtid, num_threads); - } - __kmp_GOMP_fork_call(&loc, gtid, task, - (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, task, data, - num_threads, &loc, kmp_nm_dynamic_chunked, (kmp_int)1, - (kmp_int)count, (kmp_int)1, (kmp_int)1); - } - else { - __kmp_GOMP_serialized_parallel(&loc, gtid, task); + MKLOC(loc, "GOMP_parallel_sections_start"); + KA_TRACE(20, ("GOMP_parallel_sections_start: T#%d\n", gtid)); + + if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { + if (num_threads != 0) { + __kmp_push_num_threads(&loc, gtid, num_threads); } + __kmp_GOMP_fork_call(&loc, gtid, task, + (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, + task, data, num_threads, &loc, kmp_nm_dynamic_chunked, + (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1); + } else { + __kmp_GOMP_serialized_parallel(&loc, gtid, task); + } #if OMPT_SUPPORT - if (ompt_enabled) { - parent_frame->reenter_runtime_frame = NULL; - } + if (ompt_enabled) { + parent_frame->reenter_runtime_frame = NULL; + } #endif - KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); + KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); - KA_TRACE(20, ("GOMP_parallel_sections_start exit: T#%d\n", gtid)); + KA_TRACE(20, ("GOMP_parallel_sections_start exit: T#%d\n", gtid)); } +void xexpand(KMP_API_NAME_GOMP_SECTIONS_END)(void) { + int gtid = __kmp_get_gtid(); + KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid)) -void -xexpand(KMP_API_NAME_GOMP_SECTIONS_END)(void) -{ - int gtid = __kmp_get_gtid(); - KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid)) - - __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); + __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); - KA_TRACE(20, ("GOMP_sections_end exit: T#%d\n", gtid)) + KA_TRACE(20, ("GOMP_sections_end exit: T#%d\n", gtid)) } - -void -xexpand(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT)(void) -{ - KA_TRACE(20, ("GOMP_sections_end_nowait: T#%d\n", __kmp_get_gtid())) +void xexpand(KMP_API_NAME_GOMP_SECTIONS_END_NOWAIT)(void) { + KA_TRACE(20, ("GOMP_sections_end_nowait: T#%d\n", __kmp_get_gtid())) } // libgomp has an empty function for GOMP_taskyield as of 2013-10-10 -void -xexpand(KMP_API_NAME_GOMP_TASKYIELD)(void) -{ - KA_TRACE(20, ("GOMP_taskyield: T#%d\n", __kmp_get_gtid())) - return; +void xexpand(KMP_API_NAME_GOMP_TASKYIELD)(void) { + KA_TRACE(20, ("GOMP_taskyield: T#%d\n", __kmp_get_gtid())) + return; } #if OMP_40_ENABLED // these are new GOMP_4.0 entry points -void -xexpand(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *), void *data, unsigned num_threads, unsigned int flags) -{ - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_parallel"); - KA_TRACE(20, ("GOMP_parallel: T#%d\n", gtid)); +void xexpand(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *), void *data, + unsigned num_threads, + unsigned int flags) { + int gtid = __kmp_entry_gtid(); + MKLOC(loc, "GOMP_parallel"); + KA_TRACE(20, ("GOMP_parallel: T#%d\n", gtid)); #if OMPT_SUPPORT - ompt_task_info_t *parent_task_info, *task_info; - if (ompt_enabled) { - parent_task_info = __ompt_get_taskinfo(0); - parent_task_info->frame.reenter_runtime_frame = __builtin_frame_address(1); - } + ompt_task_info_t *parent_task_info, *task_info; + if (ompt_enabled) { + parent_task_info = __ompt_get_taskinfo(0); + parent_task_info->frame.reenter_runtime_frame = __builtin_frame_address(1); + } #endif - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { - if (num_threads != 0) { - __kmp_push_num_threads(&loc, gtid, num_threads); - } - if(flags != 0) { - __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); - } - __kmp_GOMP_fork_call(&loc, gtid, task, - (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, data); + if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { + if (num_threads != 0) { + __kmp_push_num_threads(&loc, gtid, num_threads); } - else { - __kmp_GOMP_serialized_parallel(&loc, gtid, task); + if (flags != 0) { + __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); } + __kmp_GOMP_fork_call(&loc, gtid, task, + (microtask_t)__kmp_GOMP_microtask_wrapper, 2, task, + data); + } else { + __kmp_GOMP_serialized_parallel(&loc, gtid, task); + } #if OMPT_SUPPORT - if (ompt_enabled) { - task_info = __ompt_get_taskinfo(0); - task_info->frame.exit_runtime_frame = __builtin_frame_address(0); - } + if (ompt_enabled) { + task_info = __ompt_get_taskinfo(0); + task_info->frame.exit_runtime_frame = __builtin_frame_address(0); + } #endif - task(data); - xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(); + task(data); + xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(); #if OMPT_SUPPORT - if (ompt_enabled) { - task_info->frame.exit_runtime_frame = NULL; - parent_task_info->frame.reenter_runtime_frame = NULL; - } + if (ompt_enabled) { + task_info->frame.exit_runtime_frame = NULL; + parent_task_info->frame.reenter_runtime_frame = NULL; + } #endif } -void -xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task) (void *), void *data, - unsigned num_threads, unsigned count, unsigned flags) -{ - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_parallel_sections"); - KA_TRACE(20, ("GOMP_parallel_sections: T#%d\n", gtid)); - - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { - if (num_threads != 0) { - __kmp_push_num_threads(&loc, gtid, num_threads); - } - if(flags != 0) { - __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); - } - __kmp_GOMP_fork_call(&loc, gtid, task, - (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, task, data, - num_threads, &loc, kmp_nm_dynamic_chunked, (kmp_int)1, - (kmp_int)count, (kmp_int)1, (kmp_int)1); +void xexpand(KMP_API_NAME_GOMP_PARALLEL_SECTIONS)(void (*task)(void *), + void *data, + unsigned num_threads, + unsigned count, + unsigned flags) { + int gtid = __kmp_entry_gtid(); + MKLOC(loc, "GOMP_parallel_sections"); + KA_TRACE(20, ("GOMP_parallel_sections: T#%d\n", gtid)); + + if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { + if (num_threads != 0) { + __kmp_push_num_threads(&loc, gtid, num_threads); } - else { - __kmp_GOMP_serialized_parallel(&loc, gtid, task); + if (flags != 0) { + __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); } - - KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); - - task(data); - xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(); - KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid)); + __kmp_GOMP_fork_call(&loc, gtid, task, + (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, + task, data, num_threads, &loc, kmp_nm_dynamic_chunked, + (kmp_int)1, (kmp_int)count, (kmp_int)1, (kmp_int)1); + } else { + __kmp_GOMP_serialized_parallel(&loc, gtid, task); + } + + KMP_DISPATCH_INIT(&loc, gtid, kmp_nm_dynamic_chunked, 1, count, 1, 1, TRUE); + + task(data); + xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(); + KA_TRACE(20, ("GOMP_parallel_sections exit: T#%d\n", gtid)); } -#define PARALLEL_LOOP(func, schedule, ompt_pre, ompt_post) \ - void func (void (*task) (void *), void *data, unsigned num_threads, \ - long lb, long ub, long str, long chunk_sz, unsigned flags) \ - { \ - int gtid = __kmp_entry_gtid(); \ - MKLOC(loc, #func); \ - KA_TRACE(20, ( #func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \ - gtid, lb, ub, str, chunk_sz )); \ - \ - ompt_pre(); \ - if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { \ - if (num_threads != 0) { \ - __kmp_push_num_threads(&loc, gtid, num_threads); \ - } \ - if (flags != 0) { \ - __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); \ - } \ - __kmp_GOMP_fork_call(&loc, gtid, task, \ - (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, 9, \ - task, data, num_threads, &loc, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \ - } \ - else { \ - __kmp_GOMP_serialized_parallel(&loc, gtid, task); \ - } \ - \ - KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ - (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ - (schedule) != kmp_sch_static); \ - task(data); \ - xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(); \ - ompt_post(); \ - \ - KA_TRACE(20, ( #func " exit: T#%d\n", gtid)); \ - } - -PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC), kmp_sch_static, - OMPT_LOOP_PRE, OMPT_LOOP_POST) -PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC), kmp_sch_dynamic_chunked, - OMPT_LOOP_PRE, OMPT_LOOP_POST) -PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED), kmp_sch_guided_chunked, - OMPT_LOOP_PRE, OMPT_LOOP_POST) -PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME), kmp_sch_runtime, - OMPT_LOOP_PRE, OMPT_LOOP_POST) - - -void -xexpand(KMP_API_NAME_GOMP_TASKGROUP_START)(void) -{ - int gtid = __kmp_entry_gtid(); - MKLOC(loc, "GOMP_taskgroup_start"); - KA_TRACE(20, ("GOMP_taskgroup_start: T#%d\n", gtid)); - - __kmpc_taskgroup(&loc, gtid); - - return; +#define PARALLEL_LOOP(func, schedule, ompt_pre, ompt_post) \ + void func(void (*task)(void *), void *data, unsigned num_threads, long lb, \ + long ub, long str, long chunk_sz, unsigned flags) { \ + int gtid = __kmp_entry_gtid(); \ + MKLOC(loc, #func); \ + KA_TRACE(20, \ + (#func ": T#%d, lb 0x%lx, ub 0x%lx, str 0x%lx, chunk_sz 0x%lx\n", \ + gtid, lb, ub, str, chunk_sz)); \ + \ + ompt_pre(); \ + if (__kmpc_ok_to_fork(&loc) && (num_threads != 1)) { \ + if (num_threads != 0) { \ + __kmp_push_num_threads(&loc, gtid, num_threads); \ + } \ + if (flags != 0) { \ + __kmp_push_proc_bind(&loc, gtid, (kmp_proc_bind_t)flags); \ + } \ + __kmp_GOMP_fork_call(&loc, gtid, task, \ + (microtask_t)__kmp_GOMP_parallel_microtask_wrapper, \ + 9, task, data, num_threads, &loc, (schedule), lb, \ + (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz); \ + } else { \ + __kmp_GOMP_serialized_parallel(&loc, gtid, task); \ + } \ + \ + KMP_DISPATCH_INIT(&loc, gtid, (schedule), lb, \ + (str > 0) ? (ub - 1) : (ub + 1), str, chunk_sz, \ + (schedule) != kmp_sch_static); \ + task(data); \ + xexpand(KMP_API_NAME_GOMP_PARALLEL_END)(); \ + ompt_post(); \ + \ + KA_TRACE(20, (#func " exit: T#%d\n", gtid)); \ + } + +PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_STATIC), kmp_sch_static, + OMPT_LOOP_PRE, OMPT_LOOP_POST) +PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_DYNAMIC), + kmp_sch_dynamic_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) +PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_GUIDED), + kmp_sch_guided_chunked, OMPT_LOOP_PRE, OMPT_LOOP_POST) +PARALLEL_LOOP(xexpand(KMP_API_NAME_GOMP_PARALLEL_LOOP_RUNTIME), kmp_sch_runtime, + OMPT_LOOP_PRE, OMPT_LOOP_POST) + +void xexpand(KMP_API_NAME_GOMP_TASKGROUP_START)(void) { + int gtid = __kmp_entry_gtid(); + MKLOC(loc, "GOMP_taskgroup_start"); + KA_TRACE(20, ("GOMP_taskgroup_start: T#%d\n", gtid)); + + __kmpc_taskgroup(&loc, gtid); + + return; } -void -xexpand(KMP_API_NAME_GOMP_TASKGROUP_END)(void) -{ - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_taskgroup_end"); - KA_TRACE(20, ("GOMP_taskgroup_end: T#%d\n", gtid)); +void xexpand(KMP_API_NAME_GOMP_TASKGROUP_END)(void) { + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_taskgroup_end"); + KA_TRACE(20, ("GOMP_taskgroup_end: T#%d\n", gtid)); - __kmpc_end_taskgroup(&loc, gtid); + __kmpc_end_taskgroup(&loc, gtid); - return; + return; } #ifndef KMP_DEBUG static #endif /* KMP_DEBUG */ -kmp_int32 __kmp_gomp_to_omp_cancellation_kind(int gomp_kind) { - kmp_int32 cncl_kind = 0; - switch(gomp_kind) { - case 1: - cncl_kind = cancel_parallel; - break; - case 2: - cncl_kind = cancel_loop; - break; - case 4: - cncl_kind = cancel_sections; - break; - case 8: - cncl_kind = cancel_taskgroup; - break; - } - return cncl_kind; + kmp_int32 + __kmp_gomp_to_omp_cancellation_kind(int gomp_kind) { + kmp_int32 cncl_kind = 0; + switch (gomp_kind) { + case 1: + cncl_kind = cancel_parallel; + break; + case 2: + cncl_kind = cancel_loop; + break; + case 4: + cncl_kind = cancel_sections; + break; + case 8: + cncl_kind = cancel_taskgroup; + break; + } + return cncl_kind; } -bool -xexpand(KMP_API_NAME_GOMP_CANCELLATION_POINT)(int which) -{ - if(__kmp_omp_cancellation) { - KMP_FATAL(NoGompCancellation); - } - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_cancellation_point"); - KA_TRACE(20, ("GOMP_cancellation_point: T#%d\n", gtid)); +bool xexpand(KMP_API_NAME_GOMP_CANCELLATION_POINT)(int which) { + if (__kmp_omp_cancellation) { + KMP_FATAL(NoGompCancellation); + } + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_cancellation_point"); + KA_TRACE(20, ("GOMP_cancellation_point: T#%d\n", gtid)); - kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which); + kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which); - return __kmpc_cancellationpoint(&loc, gtid, cncl_kind); + return __kmpc_cancellationpoint(&loc, gtid, cncl_kind); } -bool -xexpand(KMP_API_NAME_GOMP_BARRIER_CANCEL)(void) -{ - if(__kmp_omp_cancellation) { - KMP_FATAL(NoGompCancellation); - } +bool xexpand(KMP_API_NAME_GOMP_BARRIER_CANCEL)(void) { + if (__kmp_omp_cancellation) { KMP_FATAL(NoGompCancellation); - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_barrier_cancel"); - KA_TRACE(20, ("GOMP_barrier_cancel: T#%d\n", gtid)); + } + KMP_FATAL(NoGompCancellation); + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_barrier_cancel"); + KA_TRACE(20, ("GOMP_barrier_cancel: T#%d\n", gtid)); - return __kmpc_cancel_barrier(&loc, gtid); + return __kmpc_cancel_barrier(&loc, gtid); } -bool -xexpand(KMP_API_NAME_GOMP_CANCEL)(int which, bool do_cancel) -{ - if(__kmp_omp_cancellation) { - KMP_FATAL(NoGompCancellation); - } else { - return FALSE; - } +bool xexpand(KMP_API_NAME_GOMP_CANCEL)(int which, bool do_cancel) { + if (__kmp_omp_cancellation) { + KMP_FATAL(NoGompCancellation); + } else { + return FALSE; + } - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_cancel"); - KA_TRACE(20, ("GOMP_cancel: T#%d\n", gtid)); + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_cancel"); + KA_TRACE(20, ("GOMP_cancel: T#%d\n", gtid)); - kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which); + kmp_int32 cncl_kind = __kmp_gomp_to_omp_cancellation_kind(which); - if(do_cancel == FALSE) { - return xexpand(KMP_API_NAME_GOMP_CANCELLATION_POINT)(which); - } else { - return __kmpc_cancel(&loc, gtid, cncl_kind); - } + if (do_cancel == FALSE) { + return xexpand(KMP_API_NAME_GOMP_CANCELLATION_POINT)(which); + } else { + return __kmpc_cancel(&loc, gtid, cncl_kind); + } } -bool -xexpand(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL)(void) -{ - if(__kmp_omp_cancellation) { - KMP_FATAL(NoGompCancellation); - } - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_sections_end_cancel"); - KA_TRACE(20, ("GOMP_sections_end_cancel: T#%d\n", gtid)); +bool xexpand(KMP_API_NAME_GOMP_SECTIONS_END_CANCEL)(void) { + if (__kmp_omp_cancellation) { + KMP_FATAL(NoGompCancellation); + } + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_sections_end_cancel"); + KA_TRACE(20, ("GOMP_sections_end_cancel: T#%d\n", gtid)); - return __kmpc_cancel_barrier(&loc, gtid); + return __kmpc_cancel_barrier(&loc, gtid); } -bool -xexpand(KMP_API_NAME_GOMP_LOOP_END_CANCEL)(void) -{ - if(__kmp_omp_cancellation) { - KMP_FATAL(NoGompCancellation); - } - int gtid = __kmp_get_gtid(); - MKLOC(loc, "GOMP_loop_end_cancel"); - KA_TRACE(20, ("GOMP_loop_end_cancel: T#%d\n", gtid)); +bool xexpand(KMP_API_NAME_GOMP_LOOP_END_CANCEL)(void) { + if (__kmp_omp_cancellation) { + KMP_FATAL(NoGompCancellation); + } + int gtid = __kmp_get_gtid(); + MKLOC(loc, "GOMP_loop_end_cancel"); + KA_TRACE(20, ("GOMP_loop_end_cancel: T#%d\n", gtid)); - return __kmpc_cancel_barrier(&loc, gtid); + return __kmpc_cancel_barrier(&loc, gtid); } // All target functions are empty as of 2014-05-29 -void -xexpand(KMP_API_NAME_GOMP_TARGET)(int device, void (*fn) (void *), const void *openmp_target, - size_t mapnum, void **hostaddrs, size_t *sizes, unsigned char *kinds) -{ - return; +void xexpand(KMP_API_NAME_GOMP_TARGET)(int device, void (*fn)(void *), + const void *openmp_target, size_t mapnum, + void **hostaddrs, size_t *sizes, + unsigned char *kinds) { + return; } -void -xexpand(KMP_API_NAME_GOMP_TARGET_DATA)(int device, const void *openmp_target, size_t mapnum, - void **hostaddrs, size_t *sizes, unsigned char *kinds) -{ - return; +void xexpand(KMP_API_NAME_GOMP_TARGET_DATA)(int device, + const void *openmp_target, + size_t mapnum, void **hostaddrs, + size_t *sizes, + unsigned char *kinds) { + return; } -void -xexpand(KMP_API_NAME_GOMP_TARGET_END_DATA)(void) -{ - return; -} +void xexpand(KMP_API_NAME_GOMP_TARGET_END_DATA)(void) { return; } -void -xexpand(KMP_API_NAME_GOMP_TARGET_UPDATE)(int device, const void *openmp_target, size_t mapnum, - void **hostaddrs, size_t *sizes, unsigned char *kinds) -{ - return; +void xexpand(KMP_API_NAME_GOMP_TARGET_UPDATE)(int device, + const void *openmp_target, + size_t mapnum, void **hostaddrs, + size_t *sizes, + unsigned char *kinds) { + return; } -void -xexpand(KMP_API_NAME_GOMP_TEAMS)(unsigned int num_teams, unsigned int thread_limit) -{ - return; +void xexpand(KMP_API_NAME_GOMP_TEAMS)(unsigned int num_teams, + unsigned int thread_limit) { + return; } #endif // OMP_40_ENABLED - -/* - The following sections of code create aliases for the GOMP_* functions, - then create versioned symbols using the assembler directive .symver. - This is only pertinent for ELF .so library - xaliasify and xversionify are defined in kmp_ftn_os.h -*/ +/* The following sections of code create aliases for the GOMP_* functions, then + create versioned symbols using the assembler directive .symver. This is only + pertinent for ELF .so library xaliasify and xversionify are defined in + kmp_ftn_os.h */ #ifdef KMP_USE_VERSION_SYMBOLS @@ -1636,7 +1503,5 @@ xversionify(KMP_API_NAME_GOMP_TEAMS, 40, "GOMP_4.0"); #endif // KMP_USE_VERSION_SYMBOLS #ifdef __cplusplus - } //extern "C" +} // extern "C" #endif // __cplusplus - - diff --git a/openmp/runtime/src/kmp_i18n.cpp b/openmp/runtime/src/kmp_i18n.cpp index 992d1fe..e542ea7 100644 --- a/openmp/runtime/src/kmp_i18n.cpp +++ b/openmp/runtime/src/kmp_i18n.cpp @@ -13,247 +13,208 @@ //===----------------------------------------------------------------------===// - #include "kmp_i18n.h" -#include "kmp_os.h" -#include "kmp_debug.h" #include "kmp.h" +#include "kmp_debug.h" +#include "kmp_io.h" // __kmp_printf. #include "kmp_lock.h" -#include "kmp_io.h" // __kmp_printf. +#include "kmp_os.h" -#include #include -#include #include #include +#include +#include +#include "kmp_environment.h" #include "kmp_i18n_default.inc" #include "kmp_str.h" -#include "kmp_environment.h" #undef KMP_I18N_OK -#define get_section( id ) ( (id) >> 16 ) -#define get_number( id ) ( (id) & 0xFFFF ) +#define get_section(id) ((id) >> 16) +#define get_number(id) ((id)&0xFFFF) -kmp_msg_t __kmp_msg_empty = { kmp_mt_dummy, 0, "", 0 }; -kmp_msg_t __kmp_msg_null = { kmp_mt_dummy, 0, NULL, 0 }; -static char const * no_message_available = "(No message available)"; +kmp_msg_t __kmp_msg_empty = {kmp_mt_dummy, 0, "", 0}; +kmp_msg_t __kmp_msg_null = {kmp_mt_dummy, 0, NULL, 0}; +static char const *no_message_available = "(No message available)"; enum kmp_i18n_cat_status { - KMP_I18N_CLOSED, // Not yet opened or closed. - KMP_I18N_OPENED, // Opened successfully, ready to use. - KMP_I18N_ABSENT // Opening failed, message catalog should not be used. + KMP_I18N_CLOSED, // Not yet opened or closed. + KMP_I18N_OPENED, // Opened successfully, ready to use. + KMP_I18N_ABSENT // Opening failed, message catalog should not be used. }; // enum kmp_i18n_cat_status -typedef enum kmp_i18n_cat_status kmp_i18n_cat_status_t; -static volatile kmp_i18n_cat_status_t status = KMP_I18N_CLOSED; +typedef enum kmp_i18n_cat_status kmp_i18n_cat_status_t; +static volatile kmp_i18n_cat_status_t status = KMP_I18N_CLOSED; -/* - Message catalog is opened at first usage, so we have to synchronize opening to avoid race and - multiple openings. +/* Message catalog is opened at first usage, so we have to synchronize opening + to avoid race and multiple openings. - Closing does not require synchronization, because catalog is closed very late at library - shutting down, when no other threads are alive. -*/ + Closing does not require synchronization, because catalog is closed very late + at library shutting down, when no other threads are alive. */ static void __kmp_i18n_do_catopen(); -static kmp_bootstrap_lock_t lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( lock ); - // `lock' variable may be placed into __kmp_i18n_catopen function because it is used only by - // that function. But we afraid a (buggy) compiler may treat it wrongly. So we put it outside of - // function just in case. - -void -__kmp_i18n_catopen( -) { - if ( status == KMP_I18N_CLOSED ) { - __kmp_acquire_bootstrap_lock( & lock ); - if ( status == KMP_I18N_CLOSED ) { - __kmp_i18n_do_catopen(); - }; // if - __kmp_release_bootstrap_lock( & lock ); +static kmp_bootstrap_lock_t lock = KMP_BOOTSTRAP_LOCK_INITIALIZER(lock); +// `lock' variable may be placed into __kmp_i18n_catopen function because it is +// used only by that function. But we afraid a (buggy) compiler may treat it +// wrongly. So we put it outside of function just in case. + +void __kmp_i18n_catopen() { + if (status == KMP_I18N_CLOSED) { + __kmp_acquire_bootstrap_lock(&lock); + if (status == KMP_I18N_CLOSED) { + __kmp_i18n_do_catopen(); }; // if + __kmp_release_bootstrap_lock(&lock); + }; // if } // func __kmp_i18n_catopen - -/* - ================================================================================================ - Linux* OS and OS X* part. - ================================================================================================ -*/ - +/* Linux* OS and OS X* part */ #if KMP_OS_UNIX #define KMP_I18N_OK #include -#define KMP_I18N_NULLCAT ((nl_catd)( -1 )) -static nl_catd cat = KMP_I18N_NULLCAT; // !!! Shall it be volatile? -static char const * name = ( KMP_VERSION_MAJOR == 4 ? "libguide.cat" : "libomp.cat" ); +#define KMP_I18N_NULLCAT ((nl_catd)(-1)) +static nl_catd cat = KMP_I18N_NULLCAT; // !!! Shall it be volatile? +static char const *name = + (KMP_VERSION_MAJOR == 4 ? "libguide.cat" : "libomp.cat"); -/* - Useful links: - http://www.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html#tag_08_02 - http://www.opengroup.org/onlinepubs/000095399/functions/catopen.html - http://www.opengroup.org/onlinepubs/000095399/functions/setlocale.html +/* Useful links: +http://www.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html#tag_08_02 +http://www.opengroup.org/onlinepubs/000095399/functions/catopen.html +http://www.opengroup.org/onlinepubs/000095399/functions/setlocale.html */ -void -__kmp_i18n_do_catopen( -) { - int english = 0; - char * lang = __kmp_env_get( "LANG" ); - // TODO: What about LC_ALL or LC_MESSAGES? - - KMP_DEBUG_ASSERT( status == KMP_I18N_CLOSED ); - KMP_DEBUG_ASSERT( cat == KMP_I18N_NULLCAT ); - - english = - lang == NULL || // In all these cases English language is used. - strcmp( lang, "" ) == 0 || - strcmp( lang, " " ) == 0 || - // Workaround for Fortran RTL bug DPD200137873 "Fortran runtime resets LANG env var - // to space if it is not set". - strcmp( lang, "C" ) == 0 || - strcmp( lang, "POSIX" ) == 0; - - if ( ! english ) { // English language is not yet detected, let us continue. - // Format of LANG is: [language[_territory][.codeset][@modifier]] - // Strip all parts except language. - char * tail = NULL; - __kmp_str_split( lang, '@', & lang, & tail ); - __kmp_str_split( lang, '.', & lang, & tail ); - __kmp_str_split( lang, '_', & lang, & tail ); - english = ( strcmp( lang, "en" ) == 0 ); - }; // if - - KMP_INTERNAL_FREE( lang ); +void __kmp_i18n_do_catopen() { + int english = 0; + char *lang = __kmp_env_get("LANG"); + // TODO: What about LC_ALL or LC_MESSAGES? + + KMP_DEBUG_ASSERT(status == KMP_I18N_CLOSED); + KMP_DEBUG_ASSERT(cat == KMP_I18N_NULLCAT); + + english = lang == NULL || // In all these cases English language is used. + strcmp(lang, "") == 0 || strcmp(lang, " ") == 0 || + // Workaround for Fortran RTL bug DPD200137873 "Fortran runtime + // resets LANG env var to space if it is not set". + strcmp(lang, "C") == 0 || strcmp(lang, "POSIX") == 0; + + if (!english) { // English language is not yet detected, let us continue. + // Format of LANG is: [language[_territory][.codeset][@modifier]] + // Strip all parts except language. + char *tail = NULL; + __kmp_str_split(lang, '@', &lang, &tail); + __kmp_str_split(lang, '.', &lang, &tail); + __kmp_str_split(lang, '_', &lang, &tail); + english = (strcmp(lang, "en") == 0); + }; // if + + KMP_INTERNAL_FREE(lang); + + // Do not try to open English catalog because internal messages are + // exact copy of messages in English catalog. + if (english) { + status = KMP_I18N_ABSENT; // mark catalog as absent so it will not + // be re-opened. + return; + } + + cat = catopen(name, 0); + // TODO: Why do we pass 0 in flags? + status = (cat == KMP_I18N_NULLCAT ? KMP_I18N_ABSENT : KMP_I18N_OPENED); + + if (status == KMP_I18N_ABSENT) { + if (__kmp_generate_warnings > kmp_warnings_low) { + // AC: only issue warning in case explicitly asked to + int error = errno; // Save errno immediately. + char *nlspath = __kmp_env_get("NLSPATH"); + char *lang = __kmp_env_get("LANG"); + + // Infinite recursion will not occur -- status is KMP_I18N_ABSENT now, so + // __kmp_i18n_catgets() will not try to open catalog, but will return + // default message. + kmp_msg_t err_code = KMP_ERR(error); + __kmp_msg(kmp_ms_warning, KMP_MSG(CantOpenMessageCatalog, name), err_code, + KMP_HNT(CheckEnvVar, "NLSPATH", nlspath), + KMP_HNT(CheckEnvVar, "LANG", lang), __kmp_msg_null); + if (__kmp_generate_warnings == kmp_warnings_off) { + __kmp_str_free(&err_code.str); + } - // Do not try to open English catalog because internal messages are - // exact copy of messages in English catalog. - if ( english ) { - status = KMP_I18N_ABSENT; // mark catalog as absent so it will not be re-opened. - return; + KMP_INFORM(WillUseDefaultMessages); + KMP_INTERNAL_FREE(nlspath); + KMP_INTERNAL_FREE(lang); } - - cat = catopen( name, 0 ); - // TODO: Why do we pass 0 in flags? - status = ( cat == KMP_I18N_NULLCAT ? KMP_I18N_ABSENT : KMP_I18N_OPENED ); - - if ( status == KMP_I18N_ABSENT ) { - if (__kmp_generate_warnings > kmp_warnings_low) { // AC: only issue warning in case explicitly asked to - int error = errno; // Save errno immediately. - char * nlspath = __kmp_env_get( "NLSPATH" ); - char * lang = __kmp_env_get( "LANG" ); - - // Infinite recursion will not occur -- status is KMP_I18N_ABSENT now, so - // __kmp_i18n_catgets() will not try to open catalog, but will return default message. - kmp_msg_t err_code = KMP_ERR( error ); - __kmp_msg( - kmp_ms_warning, - KMP_MSG( CantOpenMessageCatalog, name ), - err_code, - KMP_HNT( CheckEnvVar, "NLSPATH", nlspath ), - KMP_HNT( CheckEnvVar, "LANG", lang ), - __kmp_msg_null - ); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - - KMP_INFORM( WillUseDefaultMessages ); - KMP_INTERNAL_FREE( nlspath ); - KMP_INTERNAL_FREE( lang ); - } - } else { // status == KMP_I18N_OPENED - - int section = get_section( kmp_i18n_prp_Version ); - int number = get_number( kmp_i18n_prp_Version ); - char const * expected = __kmp_i18n_default_table.sect[ section ].str[ number ]; - // Expected version of the catalog. - kmp_str_buf_t version; // Actual version of the catalog. - __kmp_str_buf_init( & version ); - __kmp_str_buf_print( & version, "%s", catgets( cat, section, number, NULL ) ); - - // String returned by catgets is invalid after closing the catalog, so copy it. - if ( strcmp( version.str, expected ) != 0 ) { - __kmp_i18n_catclose(); // Close bad catalog. - status = KMP_I18N_ABSENT; // And mark it as absent. - if (__kmp_generate_warnings > kmp_warnings_low) { // AC: only issue warning in case explicitly asked to - // And now print a warning using default messages. - char const * name = "NLSPATH"; - char const * nlspath = __kmp_env_get( name ); - __kmp_msg( - kmp_ms_warning, - KMP_MSG( WrongMessageCatalog, name, version.str, expected ), - KMP_HNT( CheckEnvVar, name, nlspath ), - __kmp_msg_null - ); - KMP_INFORM( WillUseDefaultMessages ); - KMP_INTERNAL_FREE( (void *) nlspath ); - } // __kmp_generate_warnings - }; // if - __kmp_str_buf_free( & version ); - + } else { // status == KMP_I18N_OPENED + int section = get_section(kmp_i18n_prp_Version); + int number = get_number(kmp_i18n_prp_Version); + char const *expected = __kmp_i18n_default_table.sect[section].str[number]; + // Expected version of the catalog. + kmp_str_buf_t version; // Actual version of the catalog. + __kmp_str_buf_init(&version); + __kmp_str_buf_print(&version, "%s", catgets(cat, section, number, NULL)); + + // String returned by catgets is invalid after closing catalog, so copy it. + if (strcmp(version.str, expected) != 0) { + __kmp_i18n_catclose(); // Close bad catalog. + status = KMP_I18N_ABSENT; // And mark it as absent. + if (__kmp_generate_warnings > kmp_warnings_low) { + // AC: only issue warning in case explicitly asked to + // And now print a warning using default messages. + char const *name = "NLSPATH"; + char const *nlspath = __kmp_env_get(name); + __kmp_msg(kmp_ms_warning, + KMP_MSG(WrongMessageCatalog, name, version.str, expected), + KMP_HNT(CheckEnvVar, name, nlspath), __kmp_msg_null); + KMP_INFORM(WillUseDefaultMessages); + KMP_INTERNAL_FREE((void *)nlspath); + } // __kmp_generate_warnings }; // if - + __kmp_str_buf_free(&version); + }; // if } // func __kmp_i18n_do_catopen - -void -__kmp_i18n_catclose( -) { - if ( status == KMP_I18N_OPENED ) { - KMP_DEBUG_ASSERT( cat != KMP_I18N_NULLCAT ); - catclose( cat ); - cat = KMP_I18N_NULLCAT; - }; // if - status = KMP_I18N_CLOSED; +void __kmp_i18n_catclose() { + if (status == KMP_I18N_OPENED) { + KMP_DEBUG_ASSERT(cat != KMP_I18N_NULLCAT); + catclose(cat); + cat = KMP_I18N_NULLCAT; + }; // if + status = KMP_I18N_CLOSED; } // func __kmp_i18n_catclose - -char const * -__kmp_i18n_catgets( - kmp_i18n_id_t id -) { - - int section = get_section( id ); - int number = get_number( id ); - char const * message = NULL; - - if ( 1 <= section && section <= __kmp_i18n_default_table.size ) { - if ( 1 <= number && number <= __kmp_i18n_default_table.sect[ section ].size ) { - if ( status == KMP_I18N_CLOSED ) { - __kmp_i18n_catopen(); - }; // if - if ( status == KMP_I18N_OPENED ) { - message = - catgets( - cat, - section, number, - __kmp_i18n_default_table.sect[ section ].str[ number ] - ); - }; // if - if ( message == NULL ) { - message = __kmp_i18n_default_table.sect[ section ].str[ number ]; - }; // if - }; // if - }; // if - if ( message == NULL ) { - message = no_message_available; +char const *__kmp_i18n_catgets(kmp_i18n_id_t id) { + + int section = get_section(id); + int number = get_number(id); + char const *message = NULL; + + if (1 <= section && section <= __kmp_i18n_default_table.size) { + if (1 <= number && number <= __kmp_i18n_default_table.sect[section].size) { + if (status == KMP_I18N_CLOSED) { + __kmp_i18n_catopen(); + }; // if + if (status == KMP_I18N_OPENED) { + message = catgets(cat, section, number, + __kmp_i18n_default_table.sect[section].str[number]); + }; // if + if (message == NULL) { + message = __kmp_i18n_default_table.sect[section].str[number]; + }; // if }; // if - return message; + }; // if + if (message == NULL) { + message = no_message_available; + }; // if + return message; } // func __kmp_i18n_catgets - #endif // KMP_OS_UNIX -/* - ================================================================================================ - Windows* OS part. - ================================================================================================ -*/ +/* Windows* OS part. */ #if KMP_OS_WINDOWS #define KMP_I18N_OK @@ -261,737 +222,645 @@ __kmp_i18n_catgets( #include "kmp_environment.h" #include -#define KMP_I18N_NULLCAT NULL -static HMODULE cat = KMP_I18N_NULLCAT; // !!! Shall it be volatile? -static char const * name = ( KMP_VERSION_MAJOR == 4 ? "libguide40ui.dll" : "libompui.dll" ); - -static kmp_i18n_table_t table = { 0, NULL }; - // Messages formatted by FormatMessage() should be freed, but catgets() interface assumes - // user will not free messages. So we cache all the retrieved messages in the table, which - // are freed at catclose(). -static UINT const default_code_page = CP_OEMCP; -static UINT code_page = default_code_page; - -static char const * ___catgets( kmp_i18n_id_t id ); -static UINT get_code_page(); -static void kmp_i18n_table_free( kmp_i18n_table_t * table ); - - -static UINT -get_code_page( -) { - - UINT cp = default_code_page; - char const * value = __kmp_env_get( "KMP_CODEPAGE" ); - if ( value != NULL ) { - if ( _stricmp( value, "ANSI" ) == 0 ) { - cp = CP_ACP; - } else if ( _stricmp( value, "OEM" ) == 0 ) { - cp = CP_OEMCP; - } else if ( _stricmp( value, "UTF-8" ) == 0 || _stricmp( value, "UTF8" ) == 0 ) { - cp = CP_UTF8; - } else if ( _stricmp( value, "UTF-7" ) == 0 || _stricmp( value, "UTF7" ) == 0 ) { - cp = CP_UTF7; - } else { - // !!! TODO: Issue a warning? - }; // if +#define KMP_I18N_NULLCAT NULL +static HMODULE cat = KMP_I18N_NULLCAT; // !!! Shall it be volatile? +static char const *name = + (KMP_VERSION_MAJOR == 4 ? "libguide40ui.dll" : "libompui.dll"); + +static kmp_i18n_table_t table = {0, NULL}; +// Messages formatted by FormatMessage() should be freed, but catgets() +// interface assumes user will not free messages. So we cache all the retrieved +// messages in the table, which are freed at catclose(). +static UINT const default_code_page = CP_OEMCP; +static UINT code_page = default_code_page; + +static char const *___catgets(kmp_i18n_id_t id); +static UINT get_code_page(); +static void kmp_i18n_table_free(kmp_i18n_table_t *table); + +static UINT get_code_page() { + + UINT cp = default_code_page; + char const *value = __kmp_env_get("KMP_CODEPAGE"); + if (value != NULL) { + if (_stricmp(value, "ANSI") == 0) { + cp = CP_ACP; + } else if (_stricmp(value, "OEM") == 0) { + cp = CP_OEMCP; + } else if (_stricmp(value, "UTF-8") == 0 || _stricmp(value, "UTF8") == 0) { + cp = CP_UTF8; + } else if (_stricmp(value, "UTF-7") == 0 || _stricmp(value, "UTF7") == 0) { + cp = CP_UTF7; + } else { + // !!! TODO: Issue a warning? }; // if - KMP_INTERNAL_FREE( (void *) value ); - return cp; + }; // if + KMP_INTERNAL_FREE((void *)value); + return cp; } // func get_code_page - -static void -kmp_i18n_table_free( - kmp_i18n_table_t * table -) { - int s; - int m; - for ( s = 0; s < table->size; ++ s ) { - for ( m = 0; m < table->sect[ s ].size; ++ m ) { - // Free message. - KMP_INTERNAL_FREE( (void *) table->sect[ s ].str[ m ] ); - table->sect[ s ].str[ m ] = NULL; - }; // for m - table->sect[ s ].size = 0; - // Free section itself. - KMP_INTERNAL_FREE ( (void *) table->sect[ s ].str ); - table->sect[ s ].str = NULL; - }; // for s - table->size = 0; - KMP_INTERNAL_FREE( (void *) table->sect ); - table->sect = NULL; +static void kmp_i18n_table_free(kmp_i18n_table_t *table) { + int s; + int m; + for (s = 0; s < table->size; ++s) { + for (m = 0; m < table->sect[s].size; ++m) { + // Free message. + KMP_INTERNAL_FREE((void *)table->sect[s].str[m]); + table->sect[s].str[m] = NULL; + }; // for m + table->sect[s].size = 0; + // Free section itself. + KMP_INTERNAL_FREE((void *)table->sect[s].str); + table->sect[s].str = NULL; + }; // for s + table->size = 0; + KMP_INTERNAL_FREE((void *)table->sect); + table->sect = NULL; } // kmp_i18n_table_free - -void -__kmp_i18n_do_catopen( -) { - - LCID locale_id = GetThreadLocale(); - WORD lang_id = LANGIDFROMLCID( locale_id ); - WORD primary_lang_id = PRIMARYLANGID( lang_id ); - kmp_str_buf_t path; - - KMP_DEBUG_ASSERT( status == KMP_I18N_CLOSED ); - KMP_DEBUG_ASSERT( cat == KMP_I18N_NULLCAT ); - - __kmp_str_buf_init( & path ); - - // Do not try to open English catalog because internal messages are - // exact copy of messages in English catalog. - if ( primary_lang_id == LANG_ENGLISH ) { - status = KMP_I18N_ABSENT; // mark catalog as absent so it will not be re-opened. - goto end; +void __kmp_i18n_do_catopen() { + + LCID locale_id = GetThreadLocale(); + WORD lang_id = LANGIDFROMLCID(locale_id); + WORD primary_lang_id = PRIMARYLANGID(lang_id); + kmp_str_buf_t path; + + KMP_DEBUG_ASSERT(status == KMP_I18N_CLOSED); + KMP_DEBUG_ASSERT(cat == KMP_I18N_NULLCAT); + + __kmp_str_buf_init(&path); + + // Do not try to open English catalog because internal messages are exact copy + // of messages in English catalog. + if (primary_lang_id == LANG_ENGLISH) { + status = KMP_I18N_ABSENT; // mark catalog as absent so it will not + // be re-opened. + goto end; + }; // if + + // Construct resource DLL name. + /* Simple LoadLibrary( name ) is not suitable due to security issue (see + http://www.microsoft.com/technet/security/advisory/2269637.mspx). We have + to specify full path to the message catalog. */ + { + // Get handle of our DLL first. + HMODULE handle; + BOOL brc = GetModuleHandleEx( + GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | + GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, + reinterpret_cast(&__kmp_i18n_do_catopen), &handle); + if (!brc) { // Error occurred. + status = KMP_I18N_ABSENT; // mark catalog as absent so it will not be + // re-opened. + goto end; + // TODO: Enable multiple messages (KMP_MSG) to be passed to __kmp_msg; and + // print a proper warning. }; // if - // Construct resource DLL name. - /* - Simple - LoadLibrary( name ) - is not suitable due to security issue (see - http://www.microsoft.com/technet/security/advisory/2269637.mspx). We have to specify full - path to the message catalog. - */ - { - - // Get handle of our DLL first. - HMODULE handle; - BOOL brc = - GetModuleHandleEx( - GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | GET_MODULE_HANDLE_EX_FLAG_UNCHANGED_REFCOUNT, - reinterpret_cast< LPCSTR >( & __kmp_i18n_do_catopen ), - & handle - ); - if ( ! brc ) { // Error occurred. - status = KMP_I18N_ABSENT; // mark catalog as absent so it will not be re-opened. - goto end; - // TODO: Enable multiple messages (KMP_MSG) to be passed to __kmp_msg; and print - // a proper warning. - }; // if - - // Now get path to the our DLL. - for ( ; ; ) { - DWORD drc = GetModuleFileName( handle, path.str, path.size ); - if ( drc == 0 ) { // Error occurred. - status = KMP_I18N_ABSENT; - goto end; - }; // if - if ( drc < path.size ) { - path.used = drc; - break; - }; // if - __kmp_str_buf_reserve( & path, path.size * 2 ); - }; // forever - - // Now construct the name of message catalog. - kmp_str_fname fname; - __kmp_str_fname_init( & fname, path.str ); - __kmp_str_buf_clear( & path ); - __kmp_str_buf_print( & path, "%s%lu/%s", fname.dir, (unsigned long)( locale_id ), name ); - __kmp_str_fname_free( & fname ); - - } + // Now get path to the our DLL. + for (;;) { + DWORD drc = GetModuleFileName(handle, path.str, path.size); + if (drc == 0) { // Error occurred. + status = KMP_I18N_ABSENT; + goto end; + }; // if + if (drc < path.size) { + path.used = drc; + break; + }; // if + __kmp_str_buf_reserve(&path, path.size * 2); + }; // forever - // For security reasons, use LoadLibraryEx() and load message catalog as a data file. - cat = LoadLibraryEx( path.str, NULL, LOAD_LIBRARY_AS_DATAFILE ); - status = ( cat == KMP_I18N_NULLCAT ? KMP_I18N_ABSENT : KMP_I18N_OPENED ); - - if ( status == KMP_I18N_ABSENT ) { - if (__kmp_generate_warnings > kmp_warnings_low) { // AC: only issue warning in case explicitly asked to - DWORD error = GetLastError(); - // Infinite recursion will not occur -- status is KMP_I18N_ABSENT now, so - // __kmp_i18n_catgets() will not try to open catalog but will return default message. - /* - If message catalog for another architecture found (e.g. OpenMP RTL - for IA-32 architecture opens libompui.dll for Intel(R) 64) - Windows* OS returns error 193 (ERROR_BAD_EXE_FORMAT). However, - FormatMessage fails to return a message for this error, so user - will see: - - OMP: Warning #2: Cannot open message catalog "1041\libompui.dll": - OMP: System error #193: (No system error message available) - OMP: Info #3: Default messages will be used. - - Issue a hint in this case to let cause of trouble more understandable. - */ - kmp_msg_t err_code = KMP_SYSERRCODE(error); - __kmp_msg( - kmp_ms_warning, - KMP_MSG( CantOpenMessageCatalog, path.str ), - err_code, - ( error == ERROR_BAD_EXE_FORMAT ? KMP_HNT( BadExeFormat, path.str, KMP_ARCH_STR ) : __kmp_msg_null ), - __kmp_msg_null - ); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - - KMP_INFORM( WillUseDefaultMessages ); + // Now construct the name of message catalog. + kmp_str_fname fname; + __kmp_str_fname_init(&fname, path.str); + __kmp_str_buf_clear(&path); + __kmp_str_buf_print(&path, "%s%lu/%s", fname.dir, + (unsigned long)(locale_id), name); + __kmp_str_fname_free(&fname); + } + + // For security reasons, use LoadLibraryEx() and load message catalog as a + // data file. + cat = LoadLibraryEx(path.str, NULL, LOAD_LIBRARY_AS_DATAFILE); + status = (cat == KMP_I18N_NULLCAT ? KMP_I18N_ABSENT : KMP_I18N_OPENED); + + if (status == KMP_I18N_ABSENT) { + if (__kmp_generate_warnings > kmp_warnings_low) { + // AC: only issue warning in case explicitly asked to + DWORD error = GetLastError(); + // Infinite recursion will not occur -- status is KMP_I18N_ABSENT now, so + // __kmp_i18n_catgets() will not try to open catalog but will return + // default message. + /* If message catalog for another architecture found (e.g. OpenMP RTL for + IA-32 architecture opens libompui.dll for Intel(R) 64) Windows* OS + returns error 193 (ERROR_BAD_EXE_FORMAT). However, FormatMessage fails + to return a message for this error, so user will see: + + OMP: Warning #2: Cannot open message catalog "1041\libompui.dll": + OMP: System error #193: (No system error message available) + OMP: Info #3: Default messages will be used. + + Issue hint in this case so cause of trouble is more understandable. */ + kmp_msg_t err_code = KMP_SYSERRCODE(error); + __kmp_msg(kmp_ms_warning, KMP_MSG(CantOpenMessageCatalog, path.str), + err_code, (error == ERROR_BAD_EXE_FORMAT + ? KMP_HNT(BadExeFormat, path.str, KMP_ARCH_STR) + : __kmp_msg_null), + __kmp_msg_null); + if (__kmp_generate_warnings == kmp_warnings_off) { + __kmp_str_free(&err_code.str); } - } else { // status == KMP_I18N_OPENED - - int section = get_section( kmp_i18n_prp_Version ); - int number = get_number( kmp_i18n_prp_Version ); - char const * expected = __kmp_i18n_default_table.sect[ section ].str[ number ]; - kmp_str_buf_t version; // Actual version of the catalog. - __kmp_str_buf_init( & version ); - __kmp_str_buf_print( & version, "%s", ___catgets( kmp_i18n_prp_Version ) ); - // String returned by catgets is invalid after closing the catalog, so copy it. - if ( strcmp( version.str, expected ) != 0 ) { - // Close bad catalog. - __kmp_i18n_catclose(); - status = KMP_I18N_ABSENT; // And mark it as absent. - if (__kmp_generate_warnings > kmp_warnings_low) { - // And now print a warning using default messages. - __kmp_msg( - kmp_ms_warning, - KMP_MSG( WrongMessageCatalog, path.str, version.str, expected ), - __kmp_msg_null - ); - KMP_INFORM( WillUseDefaultMessages ); - } // __kmp_generate_warnings - }; // if - __kmp_str_buf_free( & version ); - + KMP_INFORM(WillUseDefaultMessages); + } + } else { // status == KMP_I18N_OPENED + + int section = get_section(kmp_i18n_prp_Version); + int number = get_number(kmp_i18n_prp_Version); + char const *expected = __kmp_i18n_default_table.sect[section].str[number]; + kmp_str_buf_t version; // Actual version of the catalog. + __kmp_str_buf_init(&version); + __kmp_str_buf_print(&version, "%s", ___catgets(kmp_i18n_prp_Version)); + // String returned by catgets is invalid after closing catalog, so copy it. + if (strcmp(version.str, expected) != 0) { + // Close bad catalog. + __kmp_i18n_catclose(); + status = KMP_I18N_ABSENT; // And mark it as absent. + if (__kmp_generate_warnings > kmp_warnings_low) { + // And now print a warning using default messages. + __kmp_msg(kmp_ms_warning, + KMP_MSG(WrongMessageCatalog, path.str, version.str, expected), + __kmp_msg_null); + KMP_INFORM(WillUseDefaultMessages); + } // __kmp_generate_warnings }; // if - code_page = get_code_page(); + __kmp_str_buf_free(&version); - end: - __kmp_str_buf_free( & path ); - return; + }; // if + code_page = get_code_page(); +end: + __kmp_str_buf_free(&path); + return; } // func __kmp_i18n_do_catopen - -void -__kmp_i18n_catclose( -) { - if ( status == KMP_I18N_OPENED ) { - KMP_DEBUG_ASSERT( cat != KMP_I18N_NULLCAT ); - kmp_i18n_table_free( & table ); - FreeLibrary( cat ); - cat = KMP_I18N_NULLCAT; - }; // if - code_page = default_code_page; - status = KMP_I18N_CLOSED; +void __kmp_i18n_catclose() { + if (status == KMP_I18N_OPENED) { + KMP_DEBUG_ASSERT(cat != KMP_I18N_NULLCAT); + kmp_i18n_table_free(&table); + FreeLibrary(cat); + cat = KMP_I18N_NULLCAT; + }; // if + code_page = default_code_page; + status = KMP_I18N_CLOSED; } // func __kmp_i18n_catclose -/* - We use FormatMessage() to get strings from catalog, get system error messages, etc. - FormatMessage() tends to return Windows* OS-style end-of-lines, "\r\n". When string is printed, - printf() also replaces all the occurrences of "\n" with "\r\n" (again!), so sequences like - "\r\r\r\n" appear in output. It is not too good. +/* We use FormatMessage() to get strings from catalog, get system error + messages, etc. FormatMessage() tends to return Windows* OS-style + end-of-lines, "\r\n". When string is printed, printf() also replaces all the + occurrences of "\n" with "\r\n" (again!), so sequences like "\r\r\r\n" + appear in output. It is not too good. - Additional mess comes from message catalog: Our catalog source en_US.mc file (generated by - message-converter.pl) contains only "\n" characters, but en_US_msg_1033.bin file (produced by - mc.exe) may contain "\r\n" or just "\n". This mess goes from en_US_msg_1033.bin file to - message catalog, libompui.dll. For example, message + Additional mess comes from message catalog: Our catalog source en_US.mc file + (generated by message-converter.pl) contains only "\n" characters, but + en_US_msg_1033.bin file (produced by mc.exe) may contain "\r\n" or just "\n". + This mess goes from en_US_msg_1033.bin file to message catalog, + libompui.dll. For example, message - Error + Error - (there is "\n" at the end) is compiled by mc.exe to "Error\r\n", while + (there is "\n" at the end) is compiled by mc.exe to "Error\r\n", while - OMP: Error %1!d!: %2!s!\n + OMP: Error %1!d!: %2!s!\n - (there is "\n" at the end as well) is compiled to "OMP: Error %1!d!: %2!s!\r\n\n". + (there is "\n" at the end as well) is compiled to "OMP: Error %1!d!: + %2!s!\r\n\n". - Thus, stripping all "\r" normalizes string and returns it to canonical form, so printf() will - produce correct end-of-line sequences. + Thus, stripping all "\r" normalizes string and returns it to canonical form, + so printf() will produce correct end-of-line sequences. - ___strip_crs() serves for this purpose: it removes all the occurrences of "\r" in-place and - returns new length of string. -*/ -static -int -___strip_crs( - char * str -) { - int in = 0; // Input character index. - int out = 0; // Output character index. - for ( ; ; ) { - if ( str[ in ] != '\r' ) { - str[ out ] = str[ in ]; - ++ out; - }; // if - if ( str[ in ] == 0 ) { - break; - }; // if - ++ in; - }; // forever - return out - 1; + ___strip_crs() serves for this purpose: it removes all the occurrences of + "\r" in-place and returns new length of string. */ +static int ___strip_crs(char *str) { + int in = 0; // Input character index. + int out = 0; // Output character index. + for (;;) { + if (str[in] != '\r') { + str[out] = str[in]; + ++out; + }; // if + if (str[in] == 0) { + break; + }; // if + ++in; + }; // forever + return out - 1; } // func __strip_crs +static char const *___catgets(kmp_i18n_id_t id) { + + char *result = NULL; + PVOID addr = NULL; + wchar_t *wmsg = NULL; + DWORD wlen = 0; + char *msg = NULL; + int len = 0; + int rc; + + KMP_DEBUG_ASSERT(cat != KMP_I18N_NULLCAT); + wlen = // wlen does *not* include terminating null. + FormatMessageW(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_HMODULE | + FORMAT_MESSAGE_IGNORE_INSERTS, + cat, id, + 0, // LangId + (LPWSTR)&addr, + 0, // Size in elements, not in bytes. + NULL); + if (wlen <= 0) { + goto end; + }; // if + wmsg = (wchar_t *)addr; // Warning: wmsg may be not nul-terminated! + + // Calculate length of multibyte message. + // Since wlen does not include terminating null, len does not include it also. + len = WideCharToMultiByte(code_page, + 0, // Flags. + wmsg, wlen, // Wide buffer and size. + NULL, 0, // Buffer and size. + NULL, NULL // Default char and used default char. + ); + if (len <= 0) { + goto end; + }; // if + + // Allocate memory. + msg = (char *)KMP_INTERNAL_MALLOC(len + 1); + + // Convert wide message to multibyte one. + rc = WideCharToMultiByte(code_page, + 0, // Flags. + wmsg, wlen, // Wide buffer and size. + msg, len, // Buffer and size. + NULL, NULL // Default char and used default char. + ); + if (rc <= 0 || rc > len) { + goto end; + }; // if + KMP_DEBUG_ASSERT(rc == len); + len = rc; + msg[len] = 0; // Put terminating null to the end. + + // Stripping all "\r" before stripping last end-of-line simplifies the task. + len = ___strip_crs(msg); + + // Every message in catalog is terminated with "\n". Strip it. + if (len >= 1 && msg[len - 1] == '\n') { + --len; + msg[len] = 0; + }; // if + + // Everything looks ok. + result = msg; + msg = NULL; + +end: + + if (msg != NULL) { + KMP_INTERNAL_FREE(msg); + }; // if + if (wmsg != NULL) { + LocalFree(wmsg); + }; // if + + return result; -static -char const * -___catgets( - kmp_i18n_id_t id -) { - - char * result = NULL; - PVOID addr = NULL; - wchar_t * wmsg = NULL; - DWORD wlen = 0; - char * msg = NULL; - int len = 0; - int rc; - - KMP_DEBUG_ASSERT( cat != KMP_I18N_NULLCAT ); - wlen = // wlen does *not* include terminating null. - FormatMessageW( - FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_HMODULE | - FORMAT_MESSAGE_IGNORE_INSERTS, - cat, - id, - 0, // LangId - (LPWSTR) & addr, - 0, // Size in elements, not in bytes. - NULL - ); - if ( wlen <= 0 ) { - goto end; - }; // if - wmsg = (wchar_t *) addr; // Warning: wmsg may be not nul-terminated! - - // Calculate length of multibyte message. - len = // Since wlen does not include terminating null, len does not include it also. - WideCharToMultiByte( - code_page, - 0, // Flags. - wmsg, wlen, // Wide buffer and size. - NULL, 0, // Buffer and size. - NULL, NULL // Default char and used default char. - ); - if ( len <= 0 ) { - goto end; - }; // if +} // ___catgets - // Allocate memory. - msg = (char *) KMP_INTERNAL_MALLOC( len + 1 ); - - // Convert wide message to multibyte one. - rc = - WideCharToMultiByte( - code_page, - 0, // Flags. - wmsg, wlen, // Wide buffer and size. - msg, len, // Buffer and size. - NULL, NULL // Default char and used default char. - ); - if ( rc <= 0 || rc > len ) { - goto end; +char const *__kmp_i18n_catgets(kmp_i18n_id_t id) { + + int section = get_section(id); + int number = get_number(id); + char const *message = NULL; + + if (1 <= section && section <= __kmp_i18n_default_table.size) { + if (1 <= number && number <= __kmp_i18n_default_table.sect[section].size) { + if (status == KMP_I18N_CLOSED) { + __kmp_i18n_catopen(); + }; // if + if (cat != KMP_I18N_NULLCAT) { + if (table.size == 0) { + table.sect = (kmp_i18n_section_t *)KMP_INTERNAL_CALLOC( + (__kmp_i18n_default_table.size + 2), sizeof(kmp_i18n_section_t)); + table.size = __kmp_i18n_default_table.size; + }; // if + if (table.sect[section].size == 0) { + table.sect[section].str = (const char **)KMP_INTERNAL_CALLOC( + __kmp_i18n_default_table.sect[section].size + 2, + sizeof(char const *)); + table.sect[section].size = + __kmp_i18n_default_table.sect[section].size; + }; // if + if (table.sect[section].str[number] == NULL) { + table.sect[section].str[number] = ___catgets(id); + }; // if + message = table.sect[section].str[number]; + }; // if + if (message == NULL) { + // Catalog is not opened or message is not found, return default + // message. + message = __kmp_i18n_default_table.sect[section].str[number]; + }; // if }; // if - KMP_DEBUG_ASSERT( rc == len ); - len = rc; - msg[ len ] = 0; // Put terminating null to the end. + }; // if + if (message == NULL) { + message = no_message_available; + }; // if + return message; - // Stripping all "\r" before stripping last end-of-line simplifies the task. - len = ___strip_crs( msg ); +} // func __kmp_i18n_catgets - // Every message in catalog is terminated with "\n". Strip it. - if ( len >= 1 && msg[ len - 1 ] == '\n' ) { - -- len; - msg[ len ] = 0; - }; // if +#endif // KMP_OS_WINDOWS - // Everything looks ok. - result = msg; - msg = NULL; +// ----------------------------------------------------------------------------- - end: +#ifndef KMP_I18N_OK +#error I18n support is not implemented for this OS. +#endif // KMP_I18N_OK - if ( msg != NULL ) { - KMP_INTERNAL_FREE( msg ); - }; // if - if ( wmsg != NULL ) { - LocalFree( wmsg ); - }; // if +// ----------------------------------------------------------------------------- - return result; +void __kmp_i18n_dump_catalog(kmp_str_buf_t *buffer) { -} // ___catgets + struct kmp_i18n_id_range_t { + kmp_i18n_id_t first; + kmp_i18n_id_t last; + }; // struct kmp_i18n_id_range_t + static struct kmp_i18n_id_range_t ranges[] = { + {kmp_i18n_prp_first, kmp_i18n_prp_last}, + {kmp_i18n_str_first, kmp_i18n_str_last}, + {kmp_i18n_fmt_first, kmp_i18n_fmt_last}, + {kmp_i18n_msg_first, kmp_i18n_msg_last}, + {kmp_i18n_hnt_first, kmp_i18n_hnt_last}}; // ranges -char const * -__kmp_i18n_catgets( - kmp_i18n_id_t id -) { - - int section = get_section( id ); - int number = get_number( id ); - char const * message = NULL; - - if ( 1 <= section && section <= __kmp_i18n_default_table.size ) { - if ( 1 <= number && number <= __kmp_i18n_default_table.sect[ section ].size ) { - if ( status == KMP_I18N_CLOSED ) { - __kmp_i18n_catopen(); - }; // if - if ( cat != KMP_I18N_NULLCAT ) { - if ( table.size == 0 ) { - table.sect = (kmp_i18n_section_t *) - KMP_INTERNAL_CALLOC( - ( __kmp_i18n_default_table.size + 2 ), - sizeof( kmp_i18n_section_t ) - ); - table.size = __kmp_i18n_default_table.size; - }; // if - if ( table.sect[ section ].size == 0 ) { - table.sect[ section ].str = (const char **) - KMP_INTERNAL_CALLOC( - __kmp_i18n_default_table.sect[ section ].size + 2, - sizeof( char const * ) - ); - table.sect[ section ].size = __kmp_i18n_default_table.sect[ section ].size; - }; // if - if ( table.sect[ section ].str[ number ] == NULL ) { - table.sect[ section ].str[ number ] = ___catgets( id ); - }; // if - message = table.sect[ section ].str[ number ]; - }; // if - if ( message == NULL ) { - // Catalog is not opened or message is not found, return default message. - message = __kmp_i18n_default_table.sect[ section ].str[ number ]; - }; // if - }; // if - }; // if - if ( message == NULL ) { - message = no_message_available; - }; // if - return message; + int num_of_ranges = sizeof(ranges) / sizeof(struct kmp_i18n_id_range_t); + int range; + kmp_i18n_id_t id; -} // func __kmp_i18n_catgets + for (range = 0; range < num_of_ranges; ++range) { + __kmp_str_buf_print(buffer, "*** Set #%d ***\n", range + 1); + for (id = (kmp_i18n_id_t)(ranges[range].first + 1); id < ranges[range].last; + id = (kmp_i18n_id_t)(id + 1)) { + __kmp_str_buf_print(buffer, "%d: <<%s>>\n", id, __kmp_i18n_catgets(id)); + }; // for id + }; // for range + __kmp_printf("%s", buffer->str); -#endif // KMP_OS_WINDOWS +} // __kmp_i18n_dump_catalog -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- +kmp_msg_t __kmp_msg_format(unsigned id_arg, ...) { -#ifndef KMP_I18N_OK - #error I18n support is not implemented for this OS. -#endif // KMP_I18N_OK + kmp_msg_t msg; + va_list args; + kmp_str_buf_t buffer; + __kmp_str_buf_init(&buffer); -// ------------------------------------------------------------------------------------------------- - -void -__kmp_i18n_dump_catalog( - kmp_str_buf_t * buffer -) { - - struct kmp_i18n_id_range_t { - kmp_i18n_id_t first; - kmp_i18n_id_t last; - }; // struct kmp_i18n_id_range_t - - static struct kmp_i18n_id_range_t ranges[] = { - { kmp_i18n_prp_first, kmp_i18n_prp_last }, - { kmp_i18n_str_first, kmp_i18n_str_last }, - { kmp_i18n_fmt_first, kmp_i18n_fmt_last }, - { kmp_i18n_msg_first, kmp_i18n_msg_last }, - { kmp_i18n_hnt_first, kmp_i18n_hnt_last } - }; // ranges - - int num_of_ranges = sizeof( ranges ) / sizeof( struct kmp_i18n_id_range_t ); - int range; - kmp_i18n_id_t id; - - for ( range = 0; range < num_of_ranges; ++ range ) { - __kmp_str_buf_print( buffer, "*** Set #%d ***\n", range + 1 ); - for ( id = (kmp_i18n_id_t)( ranges[ range ].first + 1 ); - id < ranges[ range ].last; - id = (kmp_i18n_id_t)( id + 1 ) ) { - __kmp_str_buf_print( buffer, "%d: <<%s>>\n", id, __kmp_i18n_catgets( id ) ); - }; // for id - }; // for range - - __kmp_printf( "%s", buffer->str ); + va_start(args, id_arg); -} // __kmp_i18n_dump_catalog + // We use unsigned for the ID argument and explicitly cast it here to the + // right enumerator because variadic functions are not compatible with + // default promotions. + kmp_i18n_id_t id = (kmp_i18n_id_t)id_arg; -// ------------------------------------------------------------------------------------------------- - -kmp_msg_t -__kmp_msg_format( - unsigned id_arg, - ... -) { - - kmp_msg_t msg; - va_list args; - kmp_str_buf_t buffer; - __kmp_str_buf_init( & buffer ); - - va_start( args, id_arg ); - - // We use unsigned for the ID argument and explicitly cast it here to the - // right enumerator because variadic functions are not compatible with - // default promotions. - kmp_i18n_id_t id = (kmp_i18n_id_t)id_arg; - - #if KMP_OS_UNIX - // On Linux* OS and OS X*, printf() family functions process parameter numbers, for example: - // "%2$s %1$s". - __kmp_str_buf_vprint( & buffer, __kmp_i18n_catgets( id ), args ); - #elif KMP_OS_WINDOWS - // On Winodws, printf() family functions does not recognize GNU style parameter numbers, - // so we have to use FormatMessage() instead. It recognizes parameter numbers, e. g.: - // "%2!s! "%1!s!". - { - LPTSTR str = NULL; - int len; - FormatMessage( - FORMAT_MESSAGE_FROM_STRING | FORMAT_MESSAGE_ALLOCATE_BUFFER, - __kmp_i18n_catgets( id ), - 0, 0, - (LPTSTR)( & str ), - 0, - & args - ); - len = ___strip_crs( str ); - __kmp_str_buf_cat( & buffer, str, len ); - LocalFree( str ); - } - #else - #error - #endif - va_end( args ); - __kmp_str_buf_detach( & buffer ); - - msg.type = (kmp_msg_type_t)( id >> 16 ); - msg.num = id & 0xFFFF; - msg.str = buffer.str; - msg.len = buffer.used; - - return msg; +#if KMP_OS_UNIX + // On Linux* OS and OS X*, printf() family functions process parameter + // numbers, for example: "%2$s %1$s". + __kmp_str_buf_vprint(&buffer, __kmp_i18n_catgets(id), args); +#elif KMP_OS_WINDOWS + // On Winodws, printf() family functions does not recognize GNU style + // parameter numbers, so we have to use FormatMessage() instead. It recognizes + // parameter numbers, e. g.: "%2!s! "%1!s!". + { + LPTSTR str = NULL; + int len; + FormatMessage(FORMAT_MESSAGE_FROM_STRING | FORMAT_MESSAGE_ALLOCATE_BUFFER, + __kmp_i18n_catgets(id), 0, 0, (LPTSTR)(&str), 0, &args); + len = ___strip_crs(str); + __kmp_str_buf_cat(&buffer, str, len); + LocalFree(str); + } +#else +#error +#endif + va_end(args); + __kmp_str_buf_detach(&buffer); + + msg.type = (kmp_msg_type_t)(id >> 16); + msg.num = id & 0xFFFF; + msg.str = buffer.str; + msg.len = buffer.used; + + return msg; } // __kmp_msg_format -// ------------------------------------------------------------------------------------------------- - -static -char * -sys_error( - int err -) { - - char * message = NULL; - - #if KMP_OS_WINDOWS - - LPVOID buffer = NULL; - int len; - DWORD rc; - rc = - FormatMessage( - FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, - NULL, - err, - MAKELANGID( LANG_NEUTRAL, SUBLANG_DEFAULT ), // Default language. - (LPTSTR) & buffer, - 0, - NULL - ); - if ( rc > 0 ) { - // Message formatted. Copy it (so we can free it later with normal free(). - message = __kmp_str_format( "%s", (char *) buffer ); - len = ___strip_crs( message ); // Delete carriage returns if any. - // Strip trailing newlines. - while ( len > 0 && message[ len - 1 ] == '\n' ) { - -- len; - }; // while - message[ len ] = 0; - } else { - // FormatMessage() failed to format system error message. GetLastError() would give us - // error code, which we would convert to message... this it dangerous recursion, which - // cannot clarify original error, so we will not even start it. - }; // if - if ( buffer != NULL ) { - LocalFree( buffer ); - }; // if +// ----------------------------------------------------------------------------- +static char *sys_error(int err) { - #else // Non-Windows* OS: Linux* OS or OS X* - - /* - There are 2 incompatible versions of strerror_r: - - char * strerror_r( int, char *, size_t ); // GNU version - int strerror_r( int, char *, size_t ); // XSI version - */ - - #if (defined(__GLIBC__) && defined(_GNU_SOURCE)) || \ - (defined(__BIONIC__) && defined(_GNU_SOURCE) && \ - __ANDROID_API__ >= __ANDROID_API_M__) - - // GNU version of strerror_r. - - char buffer[ 2048 ]; - char * const err_msg = strerror_r( err, buffer, sizeof( buffer ) ); - // Do not eliminate this assignment to temporary variable, otherwise compiler would - // not issue warning if strerror_r() returns `int' instead of expected `char *'. - message = __kmp_str_format( "%s", err_msg ); - - #else // OS X*, FreeBSD* etc. - - // XSI version of strerror_r. - - int size = 2048; - char * buffer = (char *) KMP_INTERNAL_MALLOC( size ); - int rc; - if (buffer == NULL) { - KMP_FATAL(MemoryAllocFailed); - } - rc = strerror_r( err, buffer, size ); - if ( rc == -1 ) { - rc = errno; // XSI version sets errno. - }; // if - while ( rc == ERANGE ) { // ERANGE means the buffer is too small. - KMP_INTERNAL_FREE( buffer ); - size *= 2; - buffer = (char *) KMP_INTERNAL_MALLOC( size ); - if (buffer == NULL) { - KMP_FATAL(MemoryAllocFailed); - } - rc = strerror_r( err, buffer, size ); - if ( rc == -1 ) { - rc = errno; // XSI version sets errno. - }; // if - }; // while - if ( rc == 0 ) { - message = buffer; - } else { - // Buffer is unused. Free it. - KMP_INTERNAL_FREE( buffer ); - }; // if - - #endif - - #endif /* KMP_OS_WINDOWS */ - - if ( message == NULL ) { - // TODO: I18n this message. - message = __kmp_str_format( "%s", "(No system error message available)" ); - }; // if - return message; + char *message = NULL; -} // sys_error +#if KMP_OS_WINDOWS + + LPVOID buffer = NULL; + int len; + DWORD rc; + rc = FormatMessage( + FORMAT_MESSAGE_ALLOCATE_BUFFER | FORMAT_MESSAGE_FROM_SYSTEM, NULL, err, + MAKELANGID(LANG_NEUTRAL, SUBLANG_DEFAULT), // Default language. + (LPTSTR)&buffer, 0, NULL); + if (rc > 0) { + // Message formatted. Copy it (so we can free it later with normal free(). + message = __kmp_str_format("%s", (char *)buffer); + len = ___strip_crs(message); // Delete carriage returns if any. + // Strip trailing newlines. + while (len > 0 && message[len - 1] == '\n') { + --len; + }; // while + message[len] = 0; + } else { + // FormatMessage() failed to format system error message. GetLastError() + // would give us error code, which we would convert to message... this it + // dangerous recursion, which cannot clarify original error, so we will not + // even start it. + }; // if + if (buffer != NULL) { + LocalFree(buffer); + }; // if + +#else // Non-Windows* OS: Linux* OS or OS X* + +/* There are 2 incompatible versions of strerror_r: + + char * strerror_r( int, char *, size_t ); // GNU version + int strerror_r( int, char *, size_t ); // XSI version +*/ -// ------------------------------------------------------------------------------------------------- +#if (defined(__GLIBC__) && defined(_GNU_SOURCE)) || \ + (defined(__BIONIC__) && defined(_GNU_SOURCE) && \ + __ANDROID_API__ >= __ANDROID_API_M__) + // GNU version of strerror_r. + + char buffer[2048]; + char *const err_msg = strerror_r(err, buffer, sizeof(buffer)); + // Do not eliminate this assignment to temporary variable, otherwise compiler + // would not issue warning if strerror_r() returns `int' instead of expected + // `char *'. + message = __kmp_str_format("%s", err_msg); + +#else // OS X*, FreeBSD* etc. + // XSI version of strerror_r. + int size = 2048; + char *buffer = (char *)KMP_INTERNAL_MALLOC(size); + int rc; + if (buffer == NULL) { + KMP_FATAL(MemoryAllocFailed); + } + rc = strerror_r(err, buffer, size); + if (rc == -1) { + rc = errno; // XSI version sets errno. + }; // if + while (rc == ERANGE) { // ERANGE means the buffer is too small. + KMP_INTERNAL_FREE(buffer); + size *= 2; + buffer = (char *)KMP_INTERNAL_MALLOC(size); + if (buffer == NULL) { + KMP_FATAL(MemoryAllocFailed); + } + rc = strerror_r(err, buffer, size); + if (rc == -1) { + rc = errno; // XSI version sets errno. + }; // if + }; // while + if (rc == 0) { + message = buffer; + } else { // Buffer is unused. Free it. + KMP_INTERNAL_FREE(buffer); + }; // if + +#endif + +#endif /* KMP_OS_WINDOWS */ + + if (message == NULL) { + // TODO: I18n this message. + message = __kmp_str_format("%s", "(No system error message available)"); + }; // if + return message; +} // sys_error -kmp_msg_t -__kmp_msg_error_code( - int code -) { +// ----------------------------------------------------------------------------- +kmp_msg_t __kmp_msg_error_code(int code) { - kmp_msg_t msg; - msg.type = kmp_mt_syserr; - msg.num = code; - msg.str = sys_error( code ); - msg.len = KMP_STRLEN( msg.str ); - return msg; + kmp_msg_t msg; + msg.type = kmp_mt_syserr; + msg.num = code; + msg.str = sys_error(code); + msg.len = KMP_STRLEN(msg.str); + return msg; } // __kmp_msg_error_code -// ------------------------------------------------------------------------------------------------- - -kmp_msg_t -__kmp_msg_error_mesg( - char const * mesg -) { +// ----------------------------------------------------------------------------- +kmp_msg_t __kmp_msg_error_mesg(char const *mesg) { - kmp_msg_t msg; - msg.type = kmp_mt_syserr; - msg.num = 0; - msg.str = __kmp_str_format( "%s", mesg ); - msg.len = KMP_STRLEN( msg.str ); - return msg; + kmp_msg_t msg; + msg.type = kmp_mt_syserr; + msg.num = 0; + msg.str = __kmp_str_format("%s", mesg); + msg.len = KMP_STRLEN(msg.str); + return msg; } // __kmp_msg_error_mesg -// ------------------------------------------------------------------------------------------------- - -void -__kmp_msg( - kmp_msg_severity_t severity, - kmp_msg_t message, - ... -) { - - va_list args; - kmp_i18n_id_t format; // format identifier - kmp_msg_t fmsg; // formatted message - kmp_str_buf_t buffer; - - if ( severity != kmp_ms_fatal && __kmp_generate_warnings == kmp_warnings_off ) - return; // no reason to form a string in order to not print it - - __kmp_str_buf_init( & buffer ); - - // Format the primary message. - switch ( severity ) { - case kmp_ms_inform : { - format = kmp_i18n_fmt_Info; - } break; - case kmp_ms_warning : { - format = kmp_i18n_fmt_Warning; - } break; - case kmp_ms_fatal : { - format = kmp_i18n_fmt_Fatal; - } break; - default : { - KMP_DEBUG_ASSERT( 0 ); - }; +// ----------------------------------------------------------------------------- +void __kmp_msg(kmp_msg_severity_t severity, kmp_msg_t message, ...) { + + va_list args; + kmp_i18n_id_t format; // format identifier + kmp_msg_t fmsg; // formatted message + kmp_str_buf_t buffer; + + if (severity != kmp_ms_fatal && __kmp_generate_warnings == kmp_warnings_off) + return; // no reason to form a string in order to not print it + + __kmp_str_buf_init(&buffer); + + // Format the primary message. + switch (severity) { + case kmp_ms_inform: { + format = kmp_i18n_fmt_Info; + } break; + case kmp_ms_warning: { + format = kmp_i18n_fmt_Warning; + } break; + case kmp_ms_fatal: { + format = kmp_i18n_fmt_Fatal; + } break; + default: { KMP_DEBUG_ASSERT(0); }; + }; // switch + fmsg = __kmp_msg_format(format, message.num, message.str); + __kmp_str_free(&message.str); + __kmp_str_buf_cat(&buffer, fmsg.str, fmsg.len); + __kmp_str_free(&fmsg.str); + + // Format other messages. + va_start(args, message); + for (;;) { + message = va_arg(args, kmp_msg_t); + if (message.type == kmp_mt_dummy && message.str == NULL) { + break; + }; // if + if (message.type == kmp_mt_dummy && message.str == __kmp_msg_empty.str) { + continue; + }; // if + switch (message.type) { + case kmp_mt_hint: { + format = kmp_i18n_fmt_Hint; + } break; + case kmp_mt_syserr: { + format = kmp_i18n_fmt_SysErr; + } break; + default: { KMP_DEBUG_ASSERT(0); }; }; // switch - fmsg = __kmp_msg_format( format, message.num, message.str ); + fmsg = __kmp_msg_format(format, message.num, message.str); __kmp_str_free(&message.str); - __kmp_str_buf_cat( & buffer, fmsg.str, fmsg.len ); + __kmp_str_buf_cat(&buffer, fmsg.str, fmsg.len); __kmp_str_free(&fmsg.str); + }; // forever + va_end(args); - // Format other messages. - va_start( args, message ); - for ( ; ; ) { - message = va_arg( args, kmp_msg_t ); - if ( message.type == kmp_mt_dummy && message.str == NULL ) { - break; - }; // if - if ( message.type == kmp_mt_dummy && message.str == __kmp_msg_empty.str ) { - continue; - }; // if - switch ( message.type ) { - case kmp_mt_hint : { - format = kmp_i18n_fmt_Hint; - } break; - case kmp_mt_syserr : { - format = kmp_i18n_fmt_SysErr; - } break; - default : { - KMP_DEBUG_ASSERT( 0 ); - }; - }; // switch - fmsg = __kmp_msg_format( format, message.num, message.str ); - __kmp_str_free(&message.str); - __kmp_str_buf_cat( & buffer, fmsg.str, fmsg.len ); - __kmp_str_free(&fmsg.str); - }; // forever - va_end( args ); - - // Print formatted messages. - // This lock prevents multiple fatal errors on the same problem. - // __kmp_acquire_bootstrap_lock( & lock ); // GEH - This lock causing tests to hang on OS X*. - __kmp_printf( "%s", buffer.str ); - __kmp_str_buf_free( & buffer ); - - if ( severity == kmp_ms_fatal ) { - #if KMP_OS_WINDOWS - __kmp_thread_sleep( 500 ); /* Delay to give message a chance to appear before reaping */ - #endif - __kmp_abort_process(); - }; // if + // Print formatted messages. + // This lock prevents multiple fatal errors on the same problem. + // __kmp_acquire_bootstrap_lock( & lock ); // GEH - This lock causing tests + // to hang on OS X*. + __kmp_printf("%s", buffer.str); + __kmp_str_buf_free(&buffer); - // __kmp_release_bootstrap_lock( & lock ); // GEH - this lock causing tests to hang on OS X*. + if (severity == kmp_ms_fatal) { +#if KMP_OS_WINDOWS + __kmp_thread_sleep( + 500); /* Delay to give message a chance to appear before reaping */ +#endif + __kmp_abort_process(); + }; // if -} // __kmp_msg + // __kmp_release_bootstrap_lock( & lock ); // GEH - this lock causing tests + // to hang on OS X*. -// ------------------------------------------------------------------------------------------------- +} // __kmp_msg // end of file // diff --git a/openmp/runtime/src/kmp_i18n.h b/openmp/runtime/src/kmp_i18n.h index 3d28da7..c2b28f7 100644 --- a/openmp/runtime/src/kmp_i18n.h +++ b/openmp/runtime/src/kmp_i18n.h @@ -19,173 +19,164 @@ #include "kmp_str.h" #ifdef __cplusplus - extern "C" { +extern "C" { #endif // __cplusplus -/* - kmp_i18n_id.inc defines kmp_i18n_id_t type. It is an enumeration with identifiers of all the - messages in the catalog. There is one special identifier: kmp_i18n_null, which denotes absence - of message. -*/ +/* kmp_i18n_id.inc defines kmp_i18n_id_t type. It is an enumeration with + identifiers of all the messages in the catalog. There is one special + identifier: kmp_i18n_null, which denotes absence of message. */ #include "kmp_i18n_id.inc" // Generated file. Do not edit it manually. -/* - Low-level functions handling message catalog. __kmp_i18n_open() opens message catalog, - __kmp_i18n_closes() it. Explicit opening is not required: if message catalog is not yet open, - __kmp_i18n_catgets() will open it implicitly. However, catalog should be explicitly closed, - otherwise resources (mamory, handles) may leak. +/* Low-level functions handling message catalog. __kmp_i18n_open() opens message + catalog, __kmp_i18n_closes() it. Explicit opening is not required: if message + catalog is not yet open, __kmp_i18n_catgets() will open it implicitly. + However, catalog should be explicitly closed, otherwise resources (mamory, + handles) may leak. - __kmp_i18n_catgets() returns read-only string. It should not be freed. + __kmp_i18n_catgets() returns read-only string. It should not be freed. - KMP_I18N_STR macro simplifies acces to strings in message catalog a bit. Following two lines are - equivalent: + KMP_I18N_STR macro simplifies acces to strings in message catalog a bit. + Following two lines are equivalent: - __kmp_i18n_catgets( kmp_i18n_str_Warning ) - KMP_I18N_STR( Warning ) + __kmp_i18n_catgets( kmp_i18n_str_Warning ) + KMP_I18N_STR( Warning ) */ -void __kmp_i18n_catopen(); -void __kmp_i18n_catclose(); -char const * __kmp_i18n_catgets( kmp_i18n_id_t id ); +void __kmp_i18n_catopen(); +void __kmp_i18n_catclose(); +char const *__kmp_i18n_catgets(kmp_i18n_id_t id); -#define KMP_I18N_STR( id ) __kmp_i18n_catgets( kmp_i18n_str_ ## id ) - - -/* - ------------------------------------------------------------------------------------------------ +#define KMP_I18N_STR(id) __kmp_i18n_catgets(kmp_i18n_str_##id) - High-level interface for printing strings targeted to the user. +/* High-level interface for printing strings targeted to the user. - All the strings are divided into 3 types: + All the strings are divided into 3 types: + * messages, + * hints, + * system errors. - * messages, - * hints, - * system errors. + There are 3 kind of message severities: + * informational messages, + * warnings (non-fatal errors), + * fatal errors. - There are 3 kind of message severities: + For example: + OMP: Warning #2: Cannot open message catalog "libguide.cat": (1) + OMP: System error #2: No such file or directory (2) + OMP: Hint: Please check NLSPATH environment variable. (3) + OMP: Info #3: Default messages will be used. (4) - * informational messages, - * warnings (non-fatal errors), - * fatal errors. - - For example: - - OMP: Warning #2: Cannot open message catalog "libguide.cat": (1) - OMP: System error #2: No such file or directory (2) - OMP: Hint: Please check NLSPATH environment variable. (3) - OMP: Info #3: Default messages will be used. (4) - - where - - (1) is a message of warning severity, - (2) is a system error caused the previous warning, - (3) is a hint for the user how to fix the problem, - (4) is a message of informational severity. + where + (1) is a message of warning severity, + (2) is a system error caused the previous warning, + (3) is a hint for the user how to fix the problem, + (4) is a message of informational severity. Usage in complex cases (message is accompanied with hints and system errors): - int error = errno; // We need save errno immediately, because it may be changed. - __kmp_msg( - kmp_ms_warning, // Severity - KMP_MSG( CantOpenMessageCatalog, name ), // Primary message - KMP_ERR( error ), // System error - KMP_HNT( CheckNLSPATH ), // Hint - __kmp_msg_null // Variadic argument list finisher - ); - - Usage in simple cases (just a message, no system errors or hints): - - KMP_INFORM( WillUseDefaultMessages ); - KMP_WARNING( CantOpenMessageCatalog, name ); - KMP_FATAL( StackOverlap ); - KMP_SYSFAIL( "pthread_create", status ); - KMP_CHECK_SYSFAIL( "pthread_create", status ); - KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); - - ------------------------------------------------------------------------------------------------ + int error = errno; // We need save errno immediately, because it may + // be changed. + __kmp_msg( + kmp_ms_warning, // Severity + KMP_MSG( CantOpenMessageCatalog, name ), // Primary message + KMP_ERR( error ), // System error + KMP_HNT( CheckNLSPATH ), // Hint + __kmp_msg_null // Variadic argument list finisher + ); + + Usage in simple cases (just a message, no system errors or hints): + KMP_INFORM( WillUseDefaultMessages ); + KMP_WARNING( CantOpenMessageCatalog, name ); + KMP_FATAL( StackOverlap ); + KMP_SYSFAIL( "pthread_create", status ); + KMP_CHECK_SYSFAIL( "pthread_create", status ); + KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); */ enum kmp_msg_type { - kmp_mt_dummy = 0, // Special type for internal purposes. - kmp_mt_mesg = 4, // Primary OpenMP message, could be information, warning, or fatal. - kmp_mt_hint = 5, // Hint to the user. - kmp_mt_syserr = -1 // System error message. + kmp_mt_dummy = 0, // Special type for internal purposes. + kmp_mt_mesg = + 4, // Primary OpenMP message, could be information, warning, or fatal. + kmp_mt_hint = 5, // Hint to the user. + kmp_mt_syserr = -1 // System error message. }; // enum kmp_msg_type -typedef enum kmp_msg_type kmp_msg_type_t; +typedef enum kmp_msg_type kmp_msg_type_t; struct kmp_msg { - kmp_msg_type_t type; - int num; - char const * str; - int len; + kmp_msg_type_t type; + int num; + char const *str; + int len; }; // struct kmp_message -typedef struct kmp_msg kmp_msg_t; +typedef struct kmp_msg kmp_msg_t; // Two special messages. -extern kmp_msg_t __kmp_msg_empty; // Can be used in place where message is required syntactically. -extern kmp_msg_t __kmp_msg_null; // Denotes the end of variadic list of arguments. - -// Helper functions. Creates messages either from message catalog or from system. Note: these -// functions allocate memory. You should pass created messages to __kmp_msg() function, it will -// print messages and destroy them. -kmp_msg_t __kmp_msg_format( unsigned id_arg, ... ); -kmp_msg_t __kmp_msg_error_code( int code ); -kmp_msg_t __kmp_msg_error_mesg( char const * mesg ); +extern kmp_msg_t __kmp_msg_empty; // Can be used in place where message is +// required syntactically. +extern kmp_msg_t + __kmp_msg_null; // Denotes the end of variadic list of arguments. + +// Helper functions. Creates messages either from message catalog or from +// system. Note: these functions allocate memory. You should pass created +// messages to __kmp_msg() function, it will print messages and destroy them. +kmp_msg_t __kmp_msg_format(unsigned id_arg, ...); +kmp_msg_t __kmp_msg_error_code(int code); +kmp_msg_t __kmp_msg_error_mesg(char const *mesg); // Helper macros to make calls shorter. -#define KMP_MSG( ... ) __kmp_msg_format( kmp_i18n_msg_ ## __VA_ARGS__ ) -#define KMP_HNT( ... ) __kmp_msg_format( kmp_i18n_hnt_ ## __VA_ARGS__ ) -#define KMP_SYSERRCODE( code ) __kmp_msg_error_code( code ) -#define KMP_SYSERRMESG( mesg ) __kmp_msg_error_mesg( mesg ) +#define KMP_MSG(...) __kmp_msg_format(kmp_i18n_msg_##__VA_ARGS__) +#define KMP_HNT(...) __kmp_msg_format(kmp_i18n_hnt_##__VA_ARGS__) +#define KMP_SYSERRCODE(code) __kmp_msg_error_code(code) +#define KMP_SYSERRMESG(mesg) __kmp_msg_error_mesg(mesg) #define KMP_ERR KMP_SYSERRCODE // Message severity. enum kmp_msg_severity { - kmp_ms_inform, // Just information for the user. - kmp_ms_warning, // Non-fatal error, execution continues. - kmp_ms_fatal // Fatal error, program aborts. + kmp_ms_inform, // Just information for the user. + kmp_ms_warning, // Non-fatal error, execution continues. + kmp_ms_fatal // Fatal error, program aborts. }; // enum kmp_msg_severity -typedef enum kmp_msg_severity kmp_msg_severity_t; +typedef enum kmp_msg_severity kmp_msg_severity_t; -// Primary function for printing messages for the user. The first message is mandatory. Any number -// of system errors and hints may be specified. Argument list must be finished with __kmp_msg_null. -void __kmp_msg( kmp_msg_severity_t severity, kmp_msg_t message, ... ); +// Primary function for printing messages for the user. The first message is +// mandatory. Any number of system errors and hints may be specified. Argument +// list must be finished with __kmp_msg_null. +void __kmp_msg(kmp_msg_severity_t severity, kmp_msg_t message, ...); // Helper macros to make calls shorter in simple cases. -#define KMP_INFORM( ... ) __kmp_msg( kmp_ms_inform, KMP_MSG( __VA_ARGS__ ), __kmp_msg_null ) -#define KMP_WARNING( ... ) __kmp_msg( kmp_ms_warning, KMP_MSG( __VA_ARGS__ ), __kmp_msg_null ) -#define KMP_FATAL( ... ) __kmp_msg( kmp_ms_fatal, KMP_MSG( __VA_ARGS__ ), __kmp_msg_null ) -#define KMP_SYSFAIL( func, error ) \ - __kmp_msg( \ - kmp_ms_fatal, \ - KMP_MSG( FunctionError, func ), \ - KMP_SYSERRCODE( error ), \ - __kmp_msg_null \ - ) +#define KMP_INFORM(...) \ + __kmp_msg(kmp_ms_inform, KMP_MSG(__VA_ARGS__), __kmp_msg_null) +#define KMP_WARNING(...) \ + __kmp_msg(kmp_ms_warning, KMP_MSG(__VA_ARGS__), __kmp_msg_null) +#define KMP_FATAL(...) \ + __kmp_msg(kmp_ms_fatal, KMP_MSG(__VA_ARGS__), __kmp_msg_null) +#define KMP_SYSFAIL(func, error) \ + __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, func), KMP_SYSERRCODE(error), \ + __kmp_msg_null) // Check error, if not zero, generate fatal error message. -#define KMP_CHECK_SYSFAIL( func, error ) \ - { \ - if ( error ) { \ - KMP_SYSFAIL( func, error ); \ - }; \ - } +#define KMP_CHECK_SYSFAIL(func, error) \ + { \ + if (error) { \ + KMP_SYSFAIL(func, error); \ + }; \ + } // Check status, if not zero, generate fatal error message using errno. -#define KMP_CHECK_SYSFAIL_ERRNO( func, status ) \ - { \ - if ( status != 0 ) { \ - int error = errno; \ - KMP_SYSFAIL( func, error ); \ - }; \ - } +#define KMP_CHECK_SYSFAIL_ERRNO(func, status) \ + { \ + if (status != 0) { \ + int error = errno; \ + KMP_SYSFAIL(func, error); \ + }; \ + } #ifdef KMP_DEBUG - void __kmp_i18n_dump_catalog( kmp_str_buf_t * buffer ); +void __kmp_i18n_dump_catalog(kmp_str_buf_t *buffer); #endif // KMP_DEBUG #ifdef __cplusplus - }; // extern "C" +}; // extern "C" #endif // __cplusplus #endif // KMP_I18N_H diff --git a/openmp/runtime/src/kmp_import.cpp b/openmp/runtime/src/kmp_import.cpp index fc4bdae..94b8842 100644 --- a/openmp/runtime/src/kmp_import.cpp +++ b/openmp/runtime/src/kmp_import.cpp @@ -13,26 +13,20 @@ //===----------------------------------------------------------------------===// -/* - ------------------------------------------------------------------------------------------------ - Object generated from this source file is linked to Windows* OS DLL import library (libompmd.lib) - only! It is not a part of regular static or dynamic OpenMP RTL. Any code that just needs to go - in the libompmd.lib (but not in libompmt.lib and libompmd.dll) should be placed in this - file. - ------------------------------------------------------------------------------------------------ -*/ +/* Object generated from this source file is linked to Windows* OS DLL import + library (libompmd.lib) only! It is not a part of regular static or dynamic + OpenMP RTL. Any code that just needs to go in the libompmd.lib (but not in + libompmt.lib and libompmd.dll) should be placed in this file. */ #ifdef __cplusplus extern "C" { #endif -/* - These symbols are required for mutual exclusion with Microsoft OpenMP RTL (and compatibility - with MS Compiler). -*/ +/*These symbols are required for mutual exclusion with Microsoft OpenMP RTL + (and compatibility with MS Compiler). */ int _You_must_link_with_exactly_one_OpenMP_library = 1; -int _You_must_link_with_Intel_OpenMP_library = 1; +int _You_must_link_with_Intel_OpenMP_library = 1; int _You_must_link_with_Microsoft_OpenMP_library = 1; #ifdef __cplusplus diff --git a/openmp/runtime/src/kmp_io.cpp b/openmp/runtime/src/kmp_io.cpp index 88a2c15..61ac1ca 100644 --- a/openmp/runtime/src/kmp_io.cpp +++ b/openmp/runtime/src/kmp_io.cpp @@ -13,236 +13,218 @@ //===----------------------------------------------------------------------===// +#include +#include #include #include -#include -#include #include #ifndef __ABSOFT_WIN -# include +#include #endif -#include "kmp_os.h" +#include "kmp.h" // KMP_GTID_DNE, __kmp_debug_buf, etc +#include "kmp_io.h" #include "kmp_lock.h" +#include "kmp_os.h" #include "kmp_str.h" -#include "kmp_io.h" -#include "kmp.h" // KMP_GTID_DNE, __kmp_debug_buf, etc #if KMP_OS_WINDOWS -# pragma warning( push ) -# pragma warning( disable: 271 310 ) -# include -# pragma warning( pop ) +#pragma warning(push) +#pragma warning(disable : 271 310) +#include +#pragma warning(pop) #endif /* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ -kmp_bootstrap_lock_t __kmp_stdio_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_stdio_lock ); /* Control stdio functions */ -kmp_bootstrap_lock_t __kmp_console_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_console_lock ); /* Control console initialization */ +kmp_bootstrap_lock_t __kmp_stdio_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( + __kmp_stdio_lock); /* Control stdio functions */ +kmp_bootstrap_lock_t __kmp_console_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( + __kmp_console_lock); /* Control console initialization */ #if KMP_OS_WINDOWS - # ifdef KMP_DEBUG - /* __kmp_stdout is used only for dev build */ - static HANDLE __kmp_stdout = NULL; - # endif - static HANDLE __kmp_stderr = NULL; - static int __kmp_console_exists = FALSE; - static kmp_str_buf_t __kmp_console_buf; - - static int - is_console( void ) - { - char buffer[ 128 ]; - DWORD rc = 0; - DWORD err = 0; - // Try to get console title. - SetLastError( 0 ); - // GetConsoleTitle does not reset last error in case of success or short buffer, - // so we need to clear it explicitly. - rc = GetConsoleTitle( buffer, sizeof( buffer ) ); - if ( rc == 0 ) { - // rc == 0 means getting console title failed. Let us find out why. - err = GetLastError(); - // err == 0 means buffer too short (we suppose console exists). - // In Window applications we usually have err == 6 (invalid handle). - }; // if - return rc > 0 || err == 0; - } - - void - __kmp_close_console( void ) - { - /* wait until user presses return before closing window */ - /* TODO only close if a window was opened */ - if( __kmp_console_exists ) { - #ifdef KMP_DEBUG - /* standard out is used only in dev build */ - __kmp_stdout = NULL; - #endif - __kmp_stderr = NULL; - __kmp_str_buf_free( &__kmp_console_buf ); - __kmp_console_exists = FALSE; - } - } +#ifdef KMP_DEBUG +/* __kmp_stdout is used only for dev build */ +static HANDLE __kmp_stdout = NULL; +#endif +static HANDLE __kmp_stderr = NULL; +static int __kmp_console_exists = FALSE; +static kmp_str_buf_t __kmp_console_buf; + +static int is_console(void) { + char buffer[128]; + DWORD rc = 0; + DWORD err = 0; + // Try to get console title. + SetLastError(0); + // GetConsoleTitle does not reset last error in case of success or short + // buffer, so we need to clear it explicitly. + rc = GetConsoleTitle(buffer, sizeof(buffer)); + if (rc == 0) { + // rc == 0 means getting console title failed. Let us find out why. + err = GetLastError(); + // err == 0 means buffer too short (we suppose console exists). + // In Window applications we usually have err == 6 (invalid handle). + }; // if + return rc > 0 || err == 0; +} - /* For windows, call this before stdout, stderr, or stdin are used. - * It opens a console window and starts processing */ - static void - __kmp_redirect_output( void ) - { - __kmp_acquire_bootstrap_lock( &__kmp_console_lock ); +void __kmp_close_console(void) { + /* wait until user presses return before closing window */ + /* TODO only close if a window was opened */ + if (__kmp_console_exists) { +#ifdef KMP_DEBUG + /* standard out is used only in dev build */ + __kmp_stdout = NULL; +#endif + __kmp_stderr = NULL; + __kmp_str_buf_free(&__kmp_console_buf); + __kmp_console_exists = FALSE; + } +} - if( ! __kmp_console_exists ) { - #ifdef KMP_DEBUG - /* standard out is used only in dev build */ - HANDLE ho; - #endif - HANDLE he; +/* For windows, call this before stdout, stderr, or stdin are used. + It opens a console window and starts processing */ +static void __kmp_redirect_output(void) { + __kmp_acquire_bootstrap_lock(&__kmp_console_lock); - __kmp_str_buf_init( &__kmp_console_buf ); + if (!__kmp_console_exists) { +#ifdef KMP_DEBUG + /* standard out is used only in dev build */ + HANDLE ho; +#endif + HANDLE he; - AllocConsole(); - // We do not check the result of AllocConsole because - // 1. the call is harmless - // 2. it is not clear how to communicate failue - // 3. we will detect failure later when we get handle(s) + __kmp_str_buf_init(&__kmp_console_buf); - #ifdef KMP_DEBUG - ho = GetStdHandle( STD_OUTPUT_HANDLE ); - if ( ho == INVALID_HANDLE_VALUE || ho == NULL ) { + AllocConsole(); +// We do not check the result of AllocConsole because +// 1. the call is harmless +// 2. it is not clear how to communicate failue +// 3. we will detect failure later when we get handle(s) - DWORD err = GetLastError(); - // TODO: output error somehow (maybe message box) - __kmp_stdout = NULL; +#ifdef KMP_DEBUG + ho = GetStdHandle(STD_OUTPUT_HANDLE); + if (ho == INVALID_HANDLE_VALUE || ho == NULL) { - } else { + DWORD err = GetLastError(); + // TODO: output error somehow (maybe message box) + __kmp_stdout = NULL; - __kmp_stdout = ho; // temporary code, need new global for ho + } else { - } - #endif - he = GetStdHandle( STD_ERROR_HANDLE ); - if ( he == INVALID_HANDLE_VALUE || he == NULL ) { + __kmp_stdout = ho; // temporary code, need new global for ho + } +#endif + he = GetStdHandle(STD_ERROR_HANDLE); + if (he == INVALID_HANDLE_VALUE || he == NULL) { - DWORD err = GetLastError(); - // TODO: output error somehow (maybe message box) - __kmp_stderr = NULL; + DWORD err = GetLastError(); + // TODO: output error somehow (maybe message box) + __kmp_stderr = NULL; - } else { + } else { - __kmp_stderr = he; // temporary code, need new global - } - __kmp_console_exists = TRUE; - } - __kmp_release_bootstrap_lock( &__kmp_console_lock ); + __kmp_stderr = he; // temporary code, need new global } + __kmp_console_exists = TRUE; + } + __kmp_release_bootstrap_lock(&__kmp_console_lock); +} #else - #define __kmp_stderr (stderr) +#define __kmp_stderr (stderr) #endif /* KMP_OS_WINDOWS */ -void -__kmp_vprintf( enum kmp_io __kmp_io, char const * format, va_list ap ) -{ - #if KMP_OS_WINDOWS - if( !__kmp_console_exists ) { - __kmp_redirect_output(); - } - if( ! __kmp_stderr && __kmp_io == kmp_err ) { - return; - } - #ifdef KMP_DEBUG - if( ! __kmp_stdout && __kmp_io == kmp_out ) { - return; - } - #endif - #endif /* KMP_OS_WINDOWS */ - - if ( __kmp_debug_buf && __kmp_debug_buffer != NULL ) { - - int dc = ( __kmp_debug_buf_atomic ? - KMP_TEST_THEN_INC32( & __kmp_debug_count) : __kmp_debug_count++ ) - % __kmp_debug_buf_lines; - char *db = & __kmp_debug_buffer[ dc * __kmp_debug_buf_chars ]; - int chars = 0; - - #ifdef KMP_DEBUG_PIDS - chars = KMP_SNPRINTF( db, __kmp_debug_buf_chars, "pid=%d: ", (kmp_int32)getpid() ); - #endif - chars += KMP_VSNPRINTF( db, __kmp_debug_buf_chars, format, ap ); - - if ( chars + 1 > __kmp_debug_buf_chars ) { - if ( chars + 1 > __kmp_debug_buf_warn_chars ) { - #if KMP_OS_WINDOWS - DWORD count; - __kmp_str_buf_print( &__kmp_console_buf, - "OMP warning: Debugging buffer overflow; increase KMP_DEBUG_BUF_CHARS to %d\n", - chars + 1 ); - WriteFile( __kmp_stderr, __kmp_console_buf.str, __kmp_console_buf.used, &count, NULL ); - __kmp_str_buf_clear( &__kmp_console_buf ); - #else - fprintf( __kmp_stderr, - "OMP warning: Debugging buffer overflow; increase KMP_DEBUG_BUF_CHARS to %d\n", - chars + 1 ); - fflush( __kmp_stderr ); - #endif - __kmp_debug_buf_warn_chars = chars + 1; - } - /* terminate string if overflow occurred */ - db[ __kmp_debug_buf_chars - 2 ] = '\n'; - db[ __kmp_debug_buf_chars - 1 ] = '\0'; - } - } else { - #if KMP_OS_WINDOWS - DWORD count; - #ifdef KMP_DEBUG_PIDS - __kmp_str_buf_print( &__kmp_console_buf, "pid=%d: ", - (kmp_int32)getpid() ); - #endif - __kmp_str_buf_vprint( &__kmp_console_buf, format, ap ); - WriteFile( - __kmp_stderr, - __kmp_console_buf.str, - __kmp_console_buf.used, - &count, - NULL - ); - __kmp_str_buf_clear( &__kmp_console_buf ); - #else - #ifdef KMP_DEBUG_PIDS - fprintf( __kmp_stderr, "pid=%d: ", (kmp_int32)getpid() ); - #endif - vfprintf( __kmp_stderr, format, ap ); - fflush( __kmp_stderr ); - #endif +void __kmp_vprintf(enum kmp_io __kmp_io, char const *format, va_list ap) { +#if KMP_OS_WINDOWS + if (!__kmp_console_exists) { + __kmp_redirect_output(); + } + if (!__kmp_stderr && __kmp_io == kmp_err) { + return; + } +#ifdef KMP_DEBUG + if (!__kmp_stdout && __kmp_io == kmp_out) { + return; + } +#endif +#endif /* KMP_OS_WINDOWS */ + + if (__kmp_debug_buf && __kmp_debug_buffer != NULL) { + + int dc = (__kmp_debug_buf_atomic ? KMP_TEST_THEN_INC32(&__kmp_debug_count) + : __kmp_debug_count++) % + __kmp_debug_buf_lines; + char *db = &__kmp_debug_buffer[dc * __kmp_debug_buf_chars]; + int chars = 0; + +#ifdef KMP_DEBUG_PIDS + chars = KMP_SNPRINTF(db, __kmp_debug_buf_chars, "pid=%d: ", + (kmp_int32)getpid()); +#endif + chars += KMP_VSNPRINTF(db, __kmp_debug_buf_chars, format, ap); + + if (chars + 1 > __kmp_debug_buf_chars) { + if (chars + 1 > __kmp_debug_buf_warn_chars) { +#if KMP_OS_WINDOWS + DWORD count; + __kmp_str_buf_print(&__kmp_console_buf, "OMP warning: Debugging buffer " + "overflow; increase " + "KMP_DEBUG_BUF_CHARS to %d\n", + chars + 1); + WriteFile(__kmp_stderr, __kmp_console_buf.str, __kmp_console_buf.used, + &count, NULL); + __kmp_str_buf_clear(&__kmp_console_buf); +#else + fprintf(__kmp_stderr, "OMP warning: Debugging buffer overflow; " + "increase KMP_DEBUG_BUF_CHARS to %d\n", + chars + 1); + fflush(__kmp_stderr); +#endif + __kmp_debug_buf_warn_chars = chars + 1; + } + /* terminate string if overflow occurred */ + db[__kmp_debug_buf_chars - 2] = '\n'; + db[__kmp_debug_buf_chars - 1] = '\0'; } + } else { +#if KMP_OS_WINDOWS + DWORD count; +#ifdef KMP_DEBUG_PIDS + __kmp_str_buf_print(&__kmp_console_buf, "pid=%d: ", (kmp_int32)getpid()); +#endif + __kmp_str_buf_vprint(&__kmp_console_buf, format, ap); + WriteFile(__kmp_stderr, __kmp_console_buf.str, __kmp_console_buf.used, + &count, NULL); + __kmp_str_buf_clear(&__kmp_console_buf); +#else +#ifdef KMP_DEBUG_PIDS + fprintf(__kmp_stderr, "pid=%d: ", (kmp_int32)getpid()); +#endif + vfprintf(__kmp_stderr, format, ap); + fflush(__kmp_stderr); +#endif + } } -void -__kmp_printf( char const * format, ... ) -{ - va_list ap; - va_start( ap, format ); +void __kmp_printf(char const *format, ...) { + va_list ap; + va_start(ap, format); - __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); - __kmp_vprintf( kmp_err, format, ap ); - __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); + __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); + __kmp_vprintf(kmp_err, format, ap); + __kmp_release_bootstrap_lock(&__kmp_stdio_lock); - va_end( ap ); + va_end(ap); } -void -__kmp_printf_no_lock( char const * format, ... ) -{ - va_list ap; - va_start( ap, format ); +void __kmp_printf_no_lock(char const *format, ...) { + va_list ap; + va_start(ap, format); - __kmp_vprintf( kmp_err, format, ap ); + __kmp_vprintf(kmp_err, format, ap); - va_end( ap ); + va_end(ap); } - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ diff --git a/openmp/runtime/src/kmp_io.h b/openmp/runtime/src/kmp_io.h index a0caa64..7b6e813 100644 --- a/openmp/runtime/src/kmp_io.h +++ b/openmp/runtime/src/kmp_io.h @@ -21,24 +21,20 @@ extern "C" { #endif /* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ -enum kmp_io { - kmp_out = 0, - kmp_err -}; +enum kmp_io { kmp_out = 0, kmp_err }; -extern kmp_bootstrap_lock_t __kmp_stdio_lock; /* Control stdio functions */ -extern kmp_bootstrap_lock_t __kmp_console_lock; /* Control console initialization */ +extern kmp_bootstrap_lock_t __kmp_stdio_lock; /* Control stdio functions */ +extern kmp_bootstrap_lock_t + __kmp_console_lock; /* Control console initialization */ -extern void __kmp_vprintf( enum kmp_io __kmp_io, char const * format, va_list ap ); -extern void __kmp_printf( char const * format, ... ); -extern void __kmp_printf_no_lock( char const * format, ... ); -extern void __kmp_close_console( void ); +extern void __kmp_vprintf(enum kmp_io __kmp_io, char const *format, va_list ap); +extern void __kmp_printf(char const *format, ...); +extern void __kmp_printf_no_lock(char const *format, ...); +extern void __kmp_close_console(void); #ifdef __cplusplus } #endif #endif /* KMP_IO_H */ - diff --git a/openmp/runtime/src/kmp_itt.cpp b/openmp/runtime/src/kmp_itt.cpp index 56d7e67..dec5990 100644 --- a/openmp/runtime/src/kmp_itt.cpp +++ b/openmp/runtime/src/kmp_itt.cpp @@ -19,145 +19,133 @@ #include "kmp_itt.h" #if KMP_DEBUG - #include "kmp_itt.inl" +#include "kmp_itt.inl" #endif - #if USE_ITT_NOTIFY - kmp_int32 __kmp_barrier_domain_count; - kmp_int32 __kmp_region_domain_count; - __itt_domain* __kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS]; - __itt_domain* __kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS]; - __itt_domain* __kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS]; - kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS]; - __itt_domain * metadata_domain = NULL; - __itt_string_handle * string_handle_imbl = NULL; - __itt_string_handle * string_handle_loop = NULL; - __itt_string_handle * string_handle_sngl = NULL; - - #include "kmp_version.h" - #include "kmp_i18n.h" - #include "kmp_str.h" +kmp_int32 __kmp_barrier_domain_count; +kmp_int32 __kmp_region_domain_count; +__itt_domain *__kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS]; +__itt_domain *__kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS]; +__itt_domain *__kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS]; +kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS]; +__itt_domain *metadata_domain = NULL; +__itt_string_handle *string_handle_imbl = NULL; +__itt_string_handle *string_handle_loop = NULL; +__itt_string_handle *string_handle_sngl = NULL; - KMP_BUILD_ASSERT( sizeof( kmp_itt_mark_t ) == sizeof( __itt_mark_type ) ); +#include "kmp_i18n.h" +#include "kmp_str.h" +#include "kmp_version.h" - /* - Previously used warnings: +KMP_BUILD_ASSERT(sizeof(kmp_itt_mark_t) == sizeof(__itt_mark_type)); - KMP_WARNING( IttAllNotifDisabled ); - KMP_WARNING( IttObjNotifDisabled ); - KMP_WARNING( IttMarkNotifDisabled ); - KMP_WARNING( IttUnloadLibFailed, libittnotify ); - */ +/* Previously used warnings: + KMP_WARNING( IttAllNotifDisabled ); + KMP_WARNING( IttObjNotifDisabled ); + KMP_WARNING( IttMarkNotifDisabled ); + KMP_WARNING( IttUnloadLibFailed, libittnotify ); +*/ - kmp_int32 __kmp_itt_prepare_delay = 0; - kmp_bootstrap_lock_t __kmp_itt_debug_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_itt_debug_lock ); +kmp_int32 __kmp_itt_prepare_delay = 0; +kmp_bootstrap_lock_t __kmp_itt_debug_lock = + KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_itt_debug_lock); #endif // USE_ITT_NOTIFY void __kmp_itt_initialize() { - // ITTNotify library is loaded and initialized at first call to any ittnotify function, - // so we do not need to explicitly load it any more. - // Jusr report OMP RTL version to ITTNotify. - - #if USE_ITT_NOTIFY - // Report OpenMP RTL version. - kmp_str_buf_t buf; - __itt_mark_type version; - __kmp_str_buf_init( & buf ); - __kmp_str_buf_print( - & buf, - "OMP RTL Version %d.%d.%d", - __kmp_version_major, - __kmp_version_minor, - __kmp_version_build - ); - if ( __itt_api_version_ptr != NULL ) { - __kmp_str_buf_print( & buf, ":%s", __itt_api_version() ); - }; // if - version = __itt_mark_create( buf.str ); - __itt_mark( version, NULL ); - __kmp_str_buf_free( & buf ); - #endif +// ITTNotify library is loaded and initialized at first call to any ittnotify +// function, so we do not need to explicitly load it any more. Just report OMP +// RTL version to ITTNotify. -} // __kmp_itt_initialize +#if USE_ITT_NOTIFY + // Report OpenMP RTL version. + kmp_str_buf_t buf; + __itt_mark_type version; + __kmp_str_buf_init(&buf); + __kmp_str_buf_print(&buf, "OMP RTL Version %d.%d.%d", __kmp_version_major, + __kmp_version_minor, __kmp_version_build); + if (__itt_api_version_ptr != NULL) { + __kmp_str_buf_print(&buf, ":%s", __itt_api_version()); + }; // if + version = __itt_mark_create(buf.str); + __itt_mark(version, NULL); + __kmp_str_buf_free(&buf); +#endif +} // __kmp_itt_initialize void __kmp_itt_destroy() { - #if USE_ITT_NOTIFY - __kmp_itt_fini_ittlib(); - #endif +#if USE_ITT_NOTIFY + __kmp_itt_fini_ittlib(); +#endif } // __kmp_itt_destroy +extern "C" void __itt_error_handler(__itt_error_code err, va_list args) { -extern "C" -void -__itt_error_handler( - __itt_error_code err, - va_list args -) { - - switch ( err ) { - case __itt_error_no_module : { - char const * library = va_arg( args, char const * ); + switch (err) { + case __itt_error_no_module: { + char const *library = va_arg(args, char const *); #if KMP_OS_WINDOWS - int sys_err = va_arg( args, int ); - kmp_msg_t err_code = KMP_SYSERRCODE( sys_err ); - __kmp_msg( kmp_ms_warning, KMP_MSG( IttLoadLibFailed, library ), err_code, __kmp_msg_null ); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } + int sys_err = va_arg(args, int); + kmp_msg_t err_code = KMP_SYSERRCODE(sys_err); + __kmp_msg(kmp_ms_warning, KMP_MSG(IttLoadLibFailed, library), err_code, + __kmp_msg_null); + if (__kmp_generate_warnings == kmp_warnings_off) { + __kmp_str_free(&err_code.str); + } #else - char const * sys_err = va_arg( args, char const * ); - kmp_msg_t err_code = KMP_SYSERRMESG( sys_err ); - __kmp_msg( kmp_ms_warning, KMP_MSG( IttLoadLibFailed, library ), err_code, __kmp_msg_null ); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } + char const *sys_err = va_arg(args, char const *); + kmp_msg_t err_code = KMP_SYSERRMESG(sys_err); + __kmp_msg(kmp_ms_warning, KMP_MSG(IttLoadLibFailed, library), err_code, + __kmp_msg_null); + if (__kmp_generate_warnings == kmp_warnings_off) { + __kmp_str_free(&err_code.str); + } #endif - } break; - case __itt_error_no_symbol : { - char const * library = va_arg( args, char const * ); - char const * symbol = va_arg( args, char const * ); - KMP_WARNING( IttLookupFailed, symbol, library ); - } break; - case __itt_error_unknown_group : { - char const * var = va_arg( args, char const * ); - char const * group = va_arg( args, char const * ); - KMP_WARNING( IttUnknownGroup, var, group ); - } break; - case __itt_error_env_too_long : { - char const * var = va_arg( args, char const * ); - size_t act_len = va_arg( args, size_t ); - size_t max_len = va_arg( args, size_t ); - KMP_WARNING( IttEnvVarTooLong, var, (unsigned long) act_len, (unsigned long) max_len ); - } break; - case __itt_error_cant_read_env : { - char const * var = va_arg( args, char const * ); - int sys_err = va_arg( args, int ); - kmp_msg_t err_code = KMP_ERR( sys_err ); - __kmp_msg( kmp_ms_warning, KMP_MSG( CantGetEnvVar, var ), err_code, __kmp_msg_null ); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - } break; - case __itt_error_system : { - char const * func = va_arg( args, char const * ); - int sys_err = va_arg( args, int ); - kmp_msg_t err_code = KMP_SYSERRCODE( sys_err ); - __kmp_msg( kmp_ms_warning, KMP_MSG( IttFunctionError, func ), err_code, __kmp_msg_null ); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - } break; - default : { - KMP_WARNING( IttUnknownError, err ); - }; - }; // switch - + } break; + case __itt_error_no_symbol: { + char const *library = va_arg(args, char const *); + char const *symbol = va_arg(args, char const *); + KMP_WARNING(IttLookupFailed, symbol, library); + } break; + case __itt_error_unknown_group: { + char const *var = va_arg(args, char const *); + char const *group = va_arg(args, char const *); + KMP_WARNING(IttUnknownGroup, var, group); + } break; + case __itt_error_env_too_long: { + char const *var = va_arg(args, char const *); + size_t act_len = va_arg(args, size_t); + size_t max_len = va_arg(args, size_t); + KMP_WARNING(IttEnvVarTooLong, var, (unsigned long)act_len, + (unsigned long)max_len); + } break; + case __itt_error_cant_read_env: { + char const *var = va_arg(args, char const *); + int sys_err = va_arg(args, int); + kmp_msg_t err_code = KMP_ERR(sys_err); + __kmp_msg(kmp_ms_warning, KMP_MSG(CantGetEnvVar, var), err_code, + __kmp_msg_null); + if (__kmp_generate_warnings == kmp_warnings_off) { + __kmp_str_free(&err_code.str); + } + } break; + case __itt_error_system: { + char const *func = va_arg(args, char const *); + int sys_err = va_arg(args, int); + kmp_msg_t err_code = KMP_SYSERRCODE(sys_err); + __kmp_msg(kmp_ms_warning, KMP_MSG(IttFunctionError, func), err_code, + __kmp_msg_null); + if (__kmp_generate_warnings == kmp_warnings_off) { + __kmp_str_free(&err_code.str); + } + } break; + default: { KMP_WARNING(IttUnknownError, err); }; + }; // switch } // __itt_error_handler #endif /* USE_ITT_BUILD */ diff --git a/openmp/runtime/src/kmp_itt.h b/openmp/runtime/src/kmp_itt.h index 286023d..92dacd6 100644 --- a/openmp/runtime/src/kmp_itt.h +++ b/openmp/runtime/src/kmp_itt.h @@ -24,104 +24,121 @@ #include "legacy/ittnotify.h" #if KMP_DEBUG - #define __kmp_inline // Turn off inlining in debug mode. +#define __kmp_inline // Turn off inlining in debug mode. #else - #define __kmp_inline static inline +#define __kmp_inline static inline #endif #if USE_ITT_NOTIFY - extern kmp_int32 __kmp_itt_prepare_delay; -# ifdef __cplusplus - extern "C" void __kmp_itt_fini_ittlib(void); -# else - extern void __kmp_itt_fini_ittlib(void); -# endif +extern kmp_int32 __kmp_itt_prepare_delay; +#ifdef __cplusplus +extern "C" void __kmp_itt_fini_ittlib(void); +#else +extern void __kmp_itt_fini_ittlib(void); +#endif #endif -// Simplify the handling of an argument that is only required when USE_ITT_BUILD is enabled. -#define USE_ITT_BUILD_ARG(x) ,x +// Simplify the handling of an argument that is only required when USE_ITT_BUILD +// is enabled. +#define USE_ITT_BUILD_ARG(x) , x void __kmp_itt_initialize(); void __kmp_itt_destroy(); -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // New stuff for reporting high-level constructs. -// ------------------------------------------------------------------------------------------------- // Note the naming convention: // __kmp_itt_xxxing() function should be called before action, while // __kmp_itt_xxxed() function should be called after action. // --- Parallel region reporting --- -__kmp_inline void __kmp_itt_region_forking( int gtid, int team_size, int barriers ); // Master only, before forking threads. -__kmp_inline void __kmp_itt_region_joined( int gtid ); // Master only, after joining threads. - // (*) Note: A thread may execute tasks after this point, though. +__kmp_inline void +__kmp_itt_region_forking(int gtid, int team_size, + int barriers); // Master only, before forking threads. +__kmp_inline void +__kmp_itt_region_joined(int gtid); // Master only, after joining threads. +// (*) Note: A thread may execute tasks after this point, though. // --- Frame reporting --- -// region = 0 - no regions, region = 1 - parallel, region = 2 - serialized parallel -__kmp_inline void __kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t *loc, int team_size, int region = 0 ); +// region=0: no regions, region=1: parallel, region=2: serialized parallel +__kmp_inline void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin, + __itt_timestamp end, int imbalance, + ident_t *loc, int team_size, + int region = 0); // --- Metadata reporting --- -// begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated wait time value, reduction -if this is a reduction barrier -__kmp_inline void __kmp_itt_metadata_imbalance( int gtid, kmp_uint64 begin, kmp_uint64 end, kmp_uint64 imbalance, kmp_uint64 reduction ); -// sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others); iterations - loop trip count, chunk - chunk size -__kmp_inline void __kmp_itt_metadata_loop( ident_t * loc, kmp_uint64 sched_type, kmp_uint64 iterations, kmp_uint64 chunk ); -__kmp_inline void __kmp_itt_metadata_single( ident_t * loc ); +// begin/end - begin/end timestamps of a barrier frame, imbalance - aggregated +// wait time value, reduction -if this is a reduction barrier +__kmp_inline void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin, + kmp_uint64 end, + kmp_uint64 imbalance, + kmp_uint64 reduction); +// sched_type: 0 - static, 1 - dynamic, 2 - guided, 3 - custom (all others); +// iterations - loop trip count, chunk - chunk size +__kmp_inline void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type, + kmp_uint64 iterations, + kmp_uint64 chunk); +__kmp_inline void __kmp_itt_metadata_single(ident_t *loc); // --- Barrier reporting --- -__kmp_inline void * __kmp_itt_barrier_object( int gtid, int bt, int set_name = 0, int delta = 0 ); -__kmp_inline void __kmp_itt_barrier_starting( int gtid, void * object ); -__kmp_inline void __kmp_itt_barrier_middle( int gtid, void * object ); -__kmp_inline void __kmp_itt_barrier_finished( int gtid, void * object ); +__kmp_inline void *__kmp_itt_barrier_object(int gtid, int bt, int set_name = 0, + int delta = 0); +__kmp_inline void __kmp_itt_barrier_starting(int gtid, void *object); +__kmp_inline void __kmp_itt_barrier_middle(int gtid, void *object); +__kmp_inline void __kmp_itt_barrier_finished(int gtid, void *object); // --- Taskwait reporting --- -__kmp_inline void * __kmp_itt_taskwait_object( int gtid ); -__kmp_inline void __kmp_itt_taskwait_starting( int gtid, void * object ); -__kmp_inline void __kmp_itt_taskwait_finished( int gtid, void * object ); +__kmp_inline void *__kmp_itt_taskwait_object(int gtid); +__kmp_inline void __kmp_itt_taskwait_starting(int gtid, void *object); +__kmp_inline void __kmp_itt_taskwait_finished(int gtid, void *object); // --- Task reporting --- -__kmp_inline void __kmp_itt_task_starting( void * object ); -__kmp_inline void __kmp_itt_task_finished( void * object ); +__kmp_inline void __kmp_itt_task_starting(void *object); +__kmp_inline void __kmp_itt_task_finished(void *object); // --- Lock reporting --- #if KMP_USE_DYNAMIC_LOCK -__kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t * ); +__kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock, + const ident_t *); #else -__kmp_inline void __kmp_itt_lock_creating( kmp_user_lock_p lock ); +__kmp_inline void __kmp_itt_lock_creating(kmp_user_lock_p lock); #endif -__kmp_inline void __kmp_itt_lock_acquiring( kmp_user_lock_p lock ); -__kmp_inline void __kmp_itt_lock_acquired( kmp_user_lock_p lock ); -__kmp_inline void __kmp_itt_lock_releasing( kmp_user_lock_p lock ); -__kmp_inline void __kmp_itt_lock_cancelled( kmp_user_lock_p lock ); -__kmp_inline void __kmp_itt_lock_destroyed( kmp_user_lock_p lock ); +__kmp_inline void __kmp_itt_lock_acquiring(kmp_user_lock_p lock); +__kmp_inline void __kmp_itt_lock_acquired(kmp_user_lock_p lock); +__kmp_inline void __kmp_itt_lock_releasing(kmp_user_lock_p lock); +__kmp_inline void __kmp_itt_lock_cancelled(kmp_user_lock_p lock); +__kmp_inline void __kmp_itt_lock_destroyed(kmp_user_lock_p lock); // --- Critical reporting --- #if KMP_USE_DYNAMIC_LOCK -__kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t * ); +__kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock, + const ident_t *); #else -__kmp_inline void __kmp_itt_critical_creating( kmp_user_lock_p lock ); +__kmp_inline void __kmp_itt_critical_creating(kmp_user_lock_p lock); #endif -__kmp_inline void __kmp_itt_critical_acquiring( kmp_user_lock_p lock ); -__kmp_inline void __kmp_itt_critical_acquired( kmp_user_lock_p lock ); -__kmp_inline void __kmp_itt_critical_releasing( kmp_user_lock_p lock ); -__kmp_inline void __kmp_itt_critical_destroyed( kmp_user_lock_p lock ); +__kmp_inline void __kmp_itt_critical_acquiring(kmp_user_lock_p lock); +__kmp_inline void __kmp_itt_critical_acquired(kmp_user_lock_p lock); +__kmp_inline void __kmp_itt_critical_releasing(kmp_user_lock_p lock); +__kmp_inline void __kmp_itt_critical_destroyed(kmp_user_lock_p lock); // --- Single reporting --- -__kmp_inline void __kmp_itt_single_start( int gtid ); -__kmp_inline void __kmp_itt_single_end( int gtid ); +__kmp_inline void __kmp_itt_single_start(int gtid); +__kmp_inline void __kmp_itt_single_end(int gtid); // --- Ordered reporting --- -__kmp_inline void __kmp_itt_ordered_init( int gtid ); -__kmp_inline void __kmp_itt_ordered_prep( int gtid ); -__kmp_inline void __kmp_itt_ordered_start( int gtid ); -__kmp_inline void __kmp_itt_ordered_end( int gtid ); +__kmp_inline void __kmp_itt_ordered_init(int gtid); +__kmp_inline void __kmp_itt_ordered_prep(int gtid); +__kmp_inline void __kmp_itt_ordered_start(int gtid); +__kmp_inline void __kmp_itt_ordered_end(int gtid); // --- Threads reporting --- -__kmp_inline void __kmp_itt_thread_ignore(); -__kmp_inline void __kmp_itt_thread_name( int gtid ); +__kmp_inline void __kmp_itt_thread_ignore(); +__kmp_inline void __kmp_itt_thread_name(int gtid); // --- System objects --- -__kmp_inline void __kmp_itt_system_object_created( void * object, char const * name ); +__kmp_inline void __kmp_itt_system_object_created(void *object, + char const *name); // --- Stack stitching --- __kmp_inline __itt_caller __kmp_itt_stack_caller_create(void); @@ -129,184 +146,189 @@ __kmp_inline void __kmp_itt_stack_caller_destroy(__itt_caller); __kmp_inline void __kmp_itt_stack_callee_enter(__itt_caller); __kmp_inline void __kmp_itt_stack_callee_leave(__itt_caller); -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // Old stuff for reporting low-level internal synchronization. -// ------------------------------------------------------------------------------------------------- #if USE_ITT_NOTIFY - /* - * Support for SSC marks, which are used by SDE - * http://software.intel.com/en-us/articles/intel-software-development-emulator - * to mark points in instruction traces that represent spin-loops and are - * therefore uninteresting when collecting traces for architecture simulation. - */ - #ifndef INCLUDE_SSC_MARKS - # define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64) - #endif - - /* Linux 64 only for now */ - #if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64) - // Portable (at least for gcc and icc) code to insert the necessary instructions - // to set %ebx and execute the unlikely no-op. - #if defined( __INTEL_COMPILER ) - # define INSERT_SSC_MARK(tag) __SSC_MARK(tag) - #else - # define INSERT_SSC_MARK(tag) \ - __asm__ __volatile__ ("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag):"%ebx") - #endif - #else - # define INSERT_SSC_MARK(tag) ((void)0) - #endif - - /* Markers for the start and end of regions that represent polling and - * are therefore uninteresting to architectural simulations 0x4376 and - * 0x4377 are arbitrary numbers that should be unique in the space of - * SSC tags, but there is no central issuing authority rather - * randomness is expected to work. - */ - #define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376) - #define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377) - - // Markers for architecture simulation. - // FORKING : Before the master thread forks. - // JOINING : At the start of the join. - // INVOKING : Before the threads invoke microtasks. - // DISPATCH_INIT: At the start of dynamically scheduled loop. - // DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop. - #define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693) - #define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694) - #define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695) - #define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696) - #define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697) - - // The object is an address that associates a specific set of the prepare, acquire, release, - // and cancel operations. - - /* Sync prepare indicates a thread is going to start waiting for another thread - to send a release event. This operation should be done just before the thread - begins checking for the existence of the release event */ - - /* Sync cancel indicates a thread is cancelling a wait on another thread anc - continuing execution without waiting for the other thread to release it */ - - /* Sync acquired indicates a thread has received a release event from another - thread and has stopped waiting. This operation must occur only after the release - event is received. */ - - /* Sync release indicates a thread is going to send a release event to another thread - so it will stop waiting and continue execution. This operation must just happen before - the release event. */ - - #define KMP_FSYNC_PREPARE( obj ) __itt_fsync_prepare( (void *)( obj ) ) - #define KMP_FSYNC_CANCEL( obj ) __itt_fsync_cancel( (void *)( obj ) ) - #define KMP_FSYNC_ACQUIRED( obj ) __itt_fsync_acquired( (void *)( obj ) ) - #define KMP_FSYNC_RELEASING( obj ) __itt_fsync_releasing( (void *)( obj ) ) - - /* - In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called with a delay - (and not called at all if waiting time is small). So, in spin loops, do not use - KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before spin loop), - KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and KMP_FSYNC_SPIN_ACQUIRED(). - See KMP_WAIT_YIELD() for example. - */ - - #undef KMP_FSYNC_SPIN_INIT - #define KMP_FSYNC_SPIN_INIT( obj, spin ) \ - int sync_iters = 0; \ - if ( __itt_fsync_prepare_ptr ) { \ - if ( obj == NULL ) { \ - obj = spin; \ - } /* if */ \ - } /* if */ \ - SSC_MARK_SPIN_START() - - #undef KMP_FSYNC_SPIN_PREPARE - #define KMP_FSYNC_SPIN_PREPARE( obj ) do { \ - if ( __itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay ) { \ - ++ sync_iters; \ - if ( sync_iters >= __kmp_itt_prepare_delay ) { \ - KMP_FSYNC_PREPARE( (void*) obj ); \ - } /* if */ \ - } /* if */ \ - } while (0) - #undef KMP_FSYNC_SPIN_ACQUIRED - #define KMP_FSYNC_SPIN_ACQUIRED( obj ) do { \ - SSC_MARK_SPIN_END(); \ - if ( sync_iters >= __kmp_itt_prepare_delay ) { \ - KMP_FSYNC_ACQUIRED( (void*) obj ); \ - } /* if */ \ - } while (0) - - /* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.: - KMP_ITT_IGNORE( - ptr = malloc( size ); - ); - */ - #define KMP_ITT_IGNORE( statement ) do { \ - __itt_state_t __itt_state_; \ - if ( __itt_state_get_ptr ) { \ - __itt_state_ = __itt_state_get(); \ - __itt_obj_mode_set( __itt_obj_prop_ignore, __itt_obj_state_set ); \ - } /* if */ \ - { statement } \ - if ( __itt_state_get_ptr ) { \ - __itt_state_set( __itt_state_ ); \ - } /* if */ \ - } while (0) - - const int KMP_MAX_FRAME_DOMAINS = 512; // Maximum number of frame domains to use (maps to - // different OpenMP regions in the user source code). - extern kmp_int32 __kmp_barrier_domain_count; - extern kmp_int32 __kmp_region_domain_count; - extern __itt_domain* __kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS]; - extern __itt_domain* __kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS]; - extern __itt_domain* __kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS]; - extern kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS]; - extern __itt_domain * metadata_domain; - extern __itt_string_handle * string_handle_imbl; - extern __itt_string_handle * string_handle_loop; - extern __itt_string_handle * string_handle_sngl; +/* Support for SSC marks, which are used by SDE + http://software.intel.com/en-us/articles/intel-software-development-emulator + to mark points in instruction traces that represent spin-loops and are + therefore uninteresting when collecting traces for architecture simulation. + */ +#ifndef INCLUDE_SSC_MARKS +#define INCLUDE_SSC_MARKS (KMP_OS_LINUX && KMP_ARCH_X86_64) +#endif + +/* Linux 64 only for now */ +#if (INCLUDE_SSC_MARKS && KMP_OS_LINUX && KMP_ARCH_X86_64) +// Portable (at least for gcc and icc) code to insert the necessary instructions +// to set %ebx and execute the unlikely no-op. +#if defined(__INTEL_COMPILER) +#define INSERT_SSC_MARK(tag) __SSC_MARK(tag) +#else +#define INSERT_SSC_MARK(tag) \ + __asm__ __volatile__("movl %0, %%ebx; .byte 0x64, 0x67, 0x90 " ::"i"(tag) \ + : "%ebx") +#endif +#else +#define INSERT_SSC_MARK(tag) ((void)0) +#endif + +/* Markers for the start and end of regions that represent polling and are + therefore uninteresting to architectural simulations 0x4376 and 0x4377 are + arbitrary numbers that should be unique in the space of SSC tags, but there + is no central issuing authority rather randomness is expected to work. */ +#define SSC_MARK_SPIN_START() INSERT_SSC_MARK(0x4376) +#define SSC_MARK_SPIN_END() INSERT_SSC_MARK(0x4377) + +// Markers for architecture simulation. +// FORKING : Before the master thread forks. +// JOINING : At the start of the join. +// INVOKING : Before the threads invoke microtasks. +// DISPATCH_INIT: At the start of dynamically scheduled loop. +// DISPATCH_NEXT: After claming next iteration of dynamically scheduled loop. +#define SSC_MARK_FORKING() INSERT_SSC_MARK(0xd693) +#define SSC_MARK_JOINING() INSERT_SSC_MARK(0xd694) +#define SSC_MARK_INVOKING() INSERT_SSC_MARK(0xd695) +#define SSC_MARK_DISPATCH_INIT() INSERT_SSC_MARK(0xd696) +#define SSC_MARK_DISPATCH_NEXT() INSERT_SSC_MARK(0xd697) + +// The object is an address that associates a specific set of the prepare, +// acquire, release, and cancel operations. + +/* Sync prepare indicates a thread is going to start waiting for another thread + to send a release event. This operation should be done just before the + thread begins checking for the existence of the release event */ + +/* Sync cancel indicates a thread is cancelling a wait on another thread and + continuing execution without waiting for the other thread to release it */ + +/* Sync acquired indicates a thread has received a release event from another + thread and has stopped waiting. This operation must occur only after the + release event is received. */ + +/* Sync release indicates a thread is going to send a release event to another + thread so it will stop waiting and continue execution. This operation must + just happen before the release event. */ + +#define KMP_FSYNC_PREPARE(obj) __itt_fsync_prepare((void *)(obj)) +#define KMP_FSYNC_CANCEL(obj) __itt_fsync_cancel((void *)(obj)) +#define KMP_FSYNC_ACQUIRED(obj) __itt_fsync_acquired((void *)(obj)) +#define KMP_FSYNC_RELEASING(obj) __itt_fsync_releasing((void *)(obj)) + +/* In case of waiting in a spin loop, ITT wants KMP_FSYNC_PREPARE() to be called + with a delay (and not called at all if waiting time is small). So, in spin + loops, do not use KMP_FSYNC_PREPARE(), but use KMP_FSYNC_SPIN_INIT() (before + spin loop), KMP_FSYNC_SPIN_PREPARE() (whithin the spin loop), and + KMP_FSYNC_SPIN_ACQUIRED(). See KMP_WAIT_YIELD() for example. */ + +#undef KMP_FSYNC_SPIN_INIT +#define KMP_FSYNC_SPIN_INIT(obj, spin) \ + int sync_iters = 0; \ + if (__itt_fsync_prepare_ptr) { \ + if (obj == NULL) { \ + obj = spin; \ + } /* if */ \ + } /* if */ \ + SSC_MARK_SPIN_START() + +#undef KMP_FSYNC_SPIN_PREPARE +#define KMP_FSYNC_SPIN_PREPARE(obj) \ + do { \ + if (__itt_fsync_prepare_ptr && sync_iters < __kmp_itt_prepare_delay) { \ + ++sync_iters; \ + if (sync_iters >= __kmp_itt_prepare_delay) { \ + KMP_FSYNC_PREPARE((void *)obj); \ + } /* if */ \ + } /* if */ \ + } while (0) +#undef KMP_FSYNC_SPIN_ACQUIRED +#define KMP_FSYNC_SPIN_ACQUIRED(obj) \ + do { \ + SSC_MARK_SPIN_END(); \ + if (sync_iters >= __kmp_itt_prepare_delay) { \ + KMP_FSYNC_ACQUIRED((void *)obj); \ + } /* if */ \ + } while (0) + +/* ITT will not report objects created within KMP_ITT_IGNORE(), e. g.: + KMP_ITT_IGNORE( + ptr = malloc( size ); + ); +*/ +#define KMP_ITT_IGNORE(statement) \ + do { \ + __itt_state_t __itt_state_; \ + if (__itt_state_get_ptr) { \ + __itt_state_ = __itt_state_get(); \ + __itt_obj_mode_set(__itt_obj_prop_ignore, __itt_obj_state_set); \ + } /* if */ \ + { statement } \ + if (__itt_state_get_ptr) { \ + __itt_state_set(__itt_state_); \ + } /* if */ \ + } while (0) + +const int KMP_MAX_FRAME_DOMAINS = + 512; // Maximum number of frame domains to use (maps to +// different OpenMP regions in the user source code). +extern kmp_int32 __kmp_barrier_domain_count; +extern kmp_int32 __kmp_region_domain_count; +extern __itt_domain *__kmp_itt_barrier_domains[KMP_MAX_FRAME_DOMAINS]; +extern __itt_domain *__kmp_itt_region_domains[KMP_MAX_FRAME_DOMAINS]; +extern __itt_domain *__kmp_itt_imbalance_domains[KMP_MAX_FRAME_DOMAINS]; +extern kmp_int32 __kmp_itt_region_team_size[KMP_MAX_FRAME_DOMAINS]; +extern __itt_domain *metadata_domain; +extern __itt_string_handle *string_handle_imbl; +extern __itt_string_handle *string_handle_loop; +extern __itt_string_handle *string_handle_sngl; #else // Null definitions of the synchronization tracing functions. -# define KMP_FSYNC_PREPARE( obj ) ((void)0) -# define KMP_FSYNC_CANCEL( obj ) ((void)0) -# define KMP_FSYNC_ACQUIRED( obj ) ((void)0) -# define KMP_FSYNC_RELEASING( obj ) ((void)0) +#define KMP_FSYNC_PREPARE(obj) ((void)0) +#define KMP_FSYNC_CANCEL(obj) ((void)0) +#define KMP_FSYNC_ACQUIRED(obj) ((void)0) +#define KMP_FSYNC_RELEASING(obj) ((void)0) -# define KMP_FSYNC_SPIN_INIT( obj, spin ) ((void)0) -# define KMP_FSYNC_SPIN_PREPARE( obj ) ((void)0) -# define KMP_FSYNC_SPIN_ACQUIRED( obj ) ((void)0) +#define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0) +#define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0) +#define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0) -# define KMP_ITT_IGNORE(stmt ) do { stmt } while (0) +#define KMP_ITT_IGNORE(stmt) \ + do { \ + stmt \ + } while (0) #endif // USE_ITT_NOTIFY -#if ! KMP_DEBUG - // In release mode include definitions of inline functions. - #include "kmp_itt.inl" +#if !KMP_DEBUG +// In release mode include definitions of inline functions. +#include "kmp_itt.inl" #endif #endif // KMP_ITT_H -#else /* USE_ITT_BUILD */ +#else /* USE_ITT_BUILD */ // Null definitions of the synchronization tracing functions. // If USE_ITT_BULID is not enabled, USE_ITT_NOTIFY cannot be either. // By defining these we avoid unpleasant ifdef tests in many places. -# define KMP_FSYNC_PREPARE( obj ) ((void)0) -# define KMP_FSYNC_CANCEL( obj ) ((void)0) -# define KMP_FSYNC_ACQUIRED( obj ) ((void)0) -# define KMP_FSYNC_RELEASING( obj ) ((void)0) +#define KMP_FSYNC_PREPARE(obj) ((void)0) +#define KMP_FSYNC_CANCEL(obj) ((void)0) +#define KMP_FSYNC_ACQUIRED(obj) ((void)0) +#define KMP_FSYNC_RELEASING(obj) ((void)0) -# define KMP_FSYNC_SPIN_INIT( obj, spin ) ((void)0) -# define KMP_FSYNC_SPIN_PREPARE( obj ) ((void)0) -# define KMP_FSYNC_SPIN_ACQUIRED( obj ) ((void)0) +#define KMP_FSYNC_SPIN_INIT(obj, spin) ((void)0) +#define KMP_FSYNC_SPIN_PREPARE(obj) ((void)0) +#define KMP_FSYNC_SPIN_ACQUIRED(obj) ((void)0) -# define KMP_ITT_IGNORE(stmt ) do { stmt } while (0) +#define KMP_ITT_IGNORE(stmt) \ + do { \ + stmt \ + } while (0) -# define USE_ITT_BUILD_ARG(x) +#define USE_ITT_BUILD_ARG(x) #endif /* USE_ITT_BUILD */ diff --git a/openmp/runtime/src/kmp_itt.inl b/openmp/runtime/src/kmp_itt.inl index fbf8490..258a19e 100644 --- a/openmp/runtime/src/kmp_itt.inl +++ b/openmp/runtime/src/kmp_itt.inl @@ -14,1134 +14,1030 @@ //===----------------------------------------------------------------------===// -// Inline function definitions. This file should be included into kmp_itt.h file for prodiction -// build (to let compliler inline functions) or into kmp_itt.c file for debug build (to reduce -// the number of files to recompile and save build time). - +// Inline function definitions. This file should be included into kmp_itt.h file +// for production build (to let compliler inline functions) or into kmp_itt.c +// file for debug build (to reduce the number of files to recompile and save +// build time). #include "kmp.h" #include "kmp_str.h" #if KMP_ITT_DEBUG - extern kmp_bootstrap_lock_t __kmp_itt_debug_lock; - #define KMP_ITT_DEBUG_LOCK() { \ - __kmp_acquire_bootstrap_lock( & __kmp_itt_debug_lock ); \ - } - #define KMP_ITT_DEBUG_PRINT( ... ) { \ - fprintf( stderr, "#%02d: ", __kmp_get_gtid() ); \ - fprintf( stderr, __VA_ARGS__ ); \ - fflush( stderr ); \ - __kmp_release_bootstrap_lock( & __kmp_itt_debug_lock ); \ - } +extern kmp_bootstrap_lock_t __kmp_itt_debug_lock; +#define KMP_ITT_DEBUG_LOCK() \ + { __kmp_acquire_bootstrap_lock(&__kmp_itt_debug_lock); } +#define KMP_ITT_DEBUG_PRINT(...) \ + { \ + fprintf(stderr, "#%02d: ", __kmp_get_gtid()); \ + fprintf(stderr, __VA_ARGS__); \ + fflush(stderr); \ + __kmp_release_bootstrap_lock(&__kmp_itt_debug_lock); \ + } #else - #define KMP_ITT_DEBUG_LOCK() - #define KMP_ITT_DEBUG_PRINT( ... ) +#define KMP_ITT_DEBUG_LOCK() +#define KMP_ITT_DEBUG_PRINT(...) #endif // KMP_ITT_DEBUG -// Ensure that the functions are static if they're supposed to be -// being inlined. Otherwise they cannot be used in more than one file, -// since there will be multiple definitions. +// Ensure that the functions are static if they're supposed to be being inlined. +// Otherwise they cannot be used in more than one file, since there will be +// multiple definitions. #if KMP_DEBUG -# define LINKAGE +#define LINKAGE #else -# define LINKAGE static inline +#define LINKAGE static inline #endif -// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses this -// API to support user-defined synchronization primitives, but does not use ZCA; -// it would be safe to turn this off until wider support becomes available. +// ZCA interface used by Intel(R) Inspector. Intel(R) Parallel Amplifier uses +// this API to support user-defined synchronization primitives, but does not use +// ZCA; it would be safe to turn this off until wider support becomes available. #if USE_ITT_ZCA #ifdef __INTEL_COMPILER -# if __INTEL_COMPILER >= 1200 -# undef __itt_sync_acquired -# undef __itt_sync_releasing -# define __itt_sync_acquired(addr) __notify_zc_intrinsic((char *)"sync_acquired", addr) -# define __itt_sync_releasing(addr) __notify_intrinsic((char *)"sync_releasing", addr) -# endif +#if __INTEL_COMPILER >= 1200 +#undef __itt_sync_acquired +#undef __itt_sync_releasing +#define __itt_sync_acquired(addr) \ + __notify_zc_intrinsic((char *)"sync_acquired", addr) +#define __itt_sync_releasing(addr) \ + __notify_intrinsic((char *)"sync_releasing", addr) +#endif #endif #endif -static kmp_bootstrap_lock_t metadata_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( metadata_lock ); - -/* - ------------------------------------------------------------------------------------------------ - Parallel region reporting. - - * __kmp_itt_region_forking should be called by master thread of a team. Exact moment of - call does not matter, but it should be completed before any thread of this team calls - __kmp_itt_region_starting. - * __kmp_itt_region_starting should be called by each thread of a team just before entering - parallel region body. - * __kmp_itt_region_finished should be called by each thread of a team right after returning - from parallel region body. - * __kmp_itt_region_joined should be called by master thread of a team, after all threads - called __kmp_itt_region_finished. - - Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can execute some more - user code -- such a thread can execute tasks. +static kmp_bootstrap_lock_t metadata_lock = + KMP_BOOTSTRAP_LOCK_INITIALIZER(metadata_lock); - Note: The overhead of logging region_starting and region_finished in each thread is too large, - so these calls are not used. +/* Parallel region reporting. + * __kmp_itt_region_forking should be called by master thread of a team. + Exact moment of call does not matter, but it should be completed before any + thread of this team calls __kmp_itt_region_starting. + * __kmp_itt_region_starting should be called by each thread of a team just + before entering parallel region body. + * __kmp_itt_region_finished should be called by each thread of a team right + after returning from parallel region body. + * __kmp_itt_region_joined should be called by master thread of a team, after + all threads called __kmp_itt_region_finished. - ------------------------------------------------------------------------------------------------ -*/ + Note: Thread waiting at join barrier (after __kmp_itt_region_finished) can + execute some more user code -- such a thread can execute tasks. -// ------------------------------------------------------------------------------------------------- + Note: The overhead of logging region_starting and region_finished in each + thread is too large, so these calls are not used. */ -LINKAGE void -__kmp_itt_region_forking( int gtid, int team_size, int barriers ) { +LINKAGE void __kmp_itt_region_forking(int gtid, int team_size, int barriers) { #if USE_ITT_NOTIFY - kmp_team_t * team = __kmp_team_from_gtid( gtid ); - if (team->t.t_active_level > 1) - { - // The frame notifications are only supported for the outermost teams. - return; - } - ident_t * loc = __kmp_thread_from_gtid( gtid )->th.th_ident; - if (loc) { - // Use the reserved_2 field to store the index to the region domain. - // Assume that reserved_2 contains zero initially. Since zero is special - // value here, store the index into domain array increased by 1. - if (loc->reserved_2 == 0) { - if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { - int frm = KMP_TEST_THEN_INC32( & __kmp_region_domain_count ); // get "old" value - if (frm >= KMP_MAX_FRAME_DOMAINS) { - KMP_TEST_THEN_DEC32( & __kmp_region_domain_count ); // revert the count - return; // loc->reserved_2 is still 0 - } - //if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) { - // frm = loc->reserved_2 - 1; // get value saved by other thread for same loc - //} // AC: this block is to replace next unsynchronized line - - // We need to save indexes for both region and barrier frames. We'll use loc->reserved_2 - // field but put region index to the low two bytes and barrier indexes to the high - // two bytes. It is OK because KMP_MAX_FRAME_DOMAINS = 512. - loc->reserved_2 |= (frm + 1); // save "new" value - - // Transform compiler-generated region location into the format - // that the tools more or less standardized on: - // "$omp$parallel@[file:][:]" - const char * buff = NULL; - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", - str_loc.func, team_size, str_loc.file, - str_loc.line, str_loc.col); - - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); - __itt_suppress_pop(); - - __kmp_str_free( &buff ); - if( barriers ) { - if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { - int frm = KMP_TEST_THEN_INC32( & __kmp_barrier_domain_count ); // get "old" value - if (frm >= KMP_MAX_FRAME_DOMAINS) { - KMP_TEST_THEN_DEC32( & __kmp_barrier_domain_count ); // revert the count - return; // loc->reserved_2 is still 0 - } - const char * buff = NULL; - buff = __kmp_str_format("%s$omp$barrier@%s:%d", - str_loc.func, str_loc.file, str_loc.col); - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_barrier_domains[ frm ] = __itt_domain_create( buff ); - __itt_suppress_pop(); - __kmp_str_free( &buff ); - // Save the barrier frame index to the high two bytes. - loc->reserved_2 |= (frm + 1) << 16; - } - } - __kmp_str_loc_free( &str_loc ); - __itt_frame_begin_v3(__kmp_itt_region_domains[ frm ], NULL); - } - } else { // Region domain exists for this location - // Check if team size was changed. Then create new region domain for this location - int frm = (loc->reserved_2 & 0x0000FFFF) - 1; - if( __kmp_itt_region_team_size[frm] != team_size ) { - const char * buff = NULL; - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", - str_loc.func, team_size, str_loc.file, - str_loc.line, str_loc.col); - - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); - __itt_suppress_pop(); - - __kmp_str_free( &buff ); - __kmp_str_loc_free( &str_loc ); - __kmp_itt_region_team_size[frm] = team_size; - __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); - } else { // Team size was not changed. Use existing domain. - __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); + kmp_team_t *team = __kmp_team_from_gtid(gtid); + if (team->t.t_active_level > 1) { + // The frame notifications are only supported for the outermost teams. + return; + } + ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident; + if (loc) { + // Use the reserved_2 field to store the index to the region domain. + // Assume that reserved_2 contains zero initially. Since zero is special + // value here, store the index into domain array increased by 1. + if (loc->reserved_2 == 0) { + if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { + int frm = + KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value + if (frm >= KMP_MAX_FRAME_DOMAINS) { + KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count + return; // loc->reserved_2 is still 0 + } + // if (!KMP_COMPARE_AND_STORE_ACQ32( &loc->reserved_2, 0, frm + 1 )) { + // frm = loc->reserved_2 - 1; // get value saved by other thread + // for same loc + //} // AC: this block is to replace next unsynchronized line + + // We need to save indexes for both region and barrier frames. We'll use + // loc->reserved_2 field but put region index to the low two bytes and + // barrier indexes to the high two bytes. It is OK because + // KMP_MAX_FRAME_DOMAINS = 512. + loc->reserved_2 |= (frm + 1); // save "new" value + + // Transform compiler-generated region location into the format + // that the tools more or less standardized on: + // "$omp$parallel@[file:][:]" + const char *buff = NULL; + kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); + buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, + team_size, str_loc.file, str_loc.line, + str_loc.col); + + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_region_domains[frm] = __itt_domain_create(buff); + __itt_suppress_pop(); + + __kmp_str_free(&buff); + if (barriers) { + if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { + int frm = KMP_TEST_THEN_INC32( + &__kmp_barrier_domain_count); // get "old" value + if (frm >= KMP_MAX_FRAME_DOMAINS) { + KMP_TEST_THEN_DEC32( + &__kmp_barrier_domain_count); // revert the count + return; // loc->reserved_2 is still 0 } + const char *buff = NULL; + buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, + str_loc.file, str_loc.col); + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff); + __itt_suppress_pop(); + __kmp_str_free(&buff); + // Save the barrier frame index to the high two bytes. + loc->reserved_2 |= (frm + 1) << 16; + } } - KMP_ITT_DEBUG_LOCK(); - KMP_ITT_DEBUG_PRINT( "[frm beg] gtid=%d, idx=%x, loc:%p\n", - gtid, loc->reserved_2, loc ); + __kmp_str_loc_free(&str_loc); + __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); + } + } else { // Region domain exists for this location + // Check if team size was changed. Then create new region domain for this + // location + int frm = (loc->reserved_2 & 0x0000FFFF) - 1; + if (__kmp_itt_region_team_size[frm] != team_size) { + const char *buff = NULL; + kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); + buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, + team_size, str_loc.file, str_loc.line, + str_loc.col); + + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_region_domains[frm] = __itt_domain_create(buff); + __itt_suppress_pop(); + + __kmp_str_free(&buff); + __kmp_str_loc_free(&str_loc); + __kmp_itt_region_team_size[frm] = team_size; + __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); + } else { // Team size was not changed. Use existing domain. + __itt_frame_begin_v3(__kmp_itt_region_domains[frm], NULL); + } } + KMP_ITT_DEBUG_LOCK(); + KMP_ITT_DEBUG_PRINT("[frm beg] gtid=%d, idx=%x, loc:%p\n", gtid, + loc->reserved_2, loc); + } #endif } // __kmp_itt_region_forking -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_frame_submit( int gtid, __itt_timestamp begin, __itt_timestamp end, int imbalance, ident_t * loc, int team_size, int region ) { +// ----------------------------------------------------------------------------- +LINKAGE void __kmp_itt_frame_submit(int gtid, __itt_timestamp begin, + __itt_timestamp end, int imbalance, + ident_t *loc, int team_size, int region) { #if USE_ITT_NOTIFY - if( region ) { - kmp_team_t * team = __kmp_team_from_gtid( gtid ); - int serialized = ( region == 2 ? 1 : 0 ); - if (team->t.t_active_level + serialized > 1) - { - // The frame notifications are only supported for the outermost teams. - return; + if (region) { + kmp_team_t *team = __kmp_team_from_gtid(gtid); + int serialized = (region == 2 ? 1 : 0); + if (team->t.t_active_level + serialized > 1) { + // The frame notifications are only supported for the outermost teams. + return; + } + // Check region domain has not been created before. It's index is saved in + // the low two bytes. + if ((loc->reserved_2 & 0x0000FFFF) == 0) { + if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { + int frm = + KMP_TEST_THEN_INC32(&__kmp_region_domain_count); // get "old" value + if (frm >= KMP_MAX_FRAME_DOMAINS) { + KMP_TEST_THEN_DEC32(&__kmp_region_domain_count); // revert the count + return; // loc->reserved_2 is still 0 } - //Check region domain has not been created before. It's index is saved in the low two bytes. - if ((loc->reserved_2 & 0x0000FFFF) == 0) { - if (__kmp_region_domain_count < KMP_MAX_FRAME_DOMAINS) { - int frm = KMP_TEST_THEN_INC32( & __kmp_region_domain_count ); // get "old" value - if (frm >= KMP_MAX_FRAME_DOMAINS) { - KMP_TEST_THEN_DEC32( & __kmp_region_domain_count ); // revert the count - return; // loc->reserved_2 is still 0 - } - - // We need to save indexes for both region and barrier frames. We'll use loc->reserved_2 - // field but put region index to the low two bytes and barrier indexes to the high - // two bytes. It is OK because KMP_MAX_FRAME_DOMAINS = 512. - loc->reserved_2 |= (frm + 1); // save "new" value - - // Transform compiler-generated region location into the format - // that the tools more or less standardized on: - // "$omp$parallel:team_size@[file:][:]" - const char * buff = NULL; - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", - str_loc.func, team_size, str_loc.file, - str_loc.line, str_loc.col); - - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); - __itt_suppress_pop(); - - __kmp_str_free( &buff ); - __kmp_str_loc_free( &str_loc ); - __kmp_itt_region_team_size[frm] = team_size; - __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end ); - } - } else { // Region domain exists for this location - // Check if team size was changed. Then create new region domain for this location - int frm = (loc->reserved_2 & 0x0000FFFF) - 1; - if( __kmp_itt_region_team_size[frm] != team_size ) { - const char * buff = NULL; - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", - str_loc.func, team_size, str_loc.file, - str_loc.line, str_loc.col); - - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_region_domains[ frm ] = __itt_domain_create( buff ); - __itt_suppress_pop(); - - __kmp_str_free( &buff ); - __kmp_str_loc_free( &str_loc ); - __kmp_itt_region_team_size[frm] = team_size; - __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end ); - } else { // Team size was not changed. Use existing domain. - __itt_frame_submit_v3(__kmp_itt_region_domains[ frm ], NULL, begin, end ); - } - } - KMP_ITT_DEBUG_LOCK(); - KMP_ITT_DEBUG_PRINT( "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n", - gtid, loc->reserved_2, region, loc, begin, end ); - return; - } else { // called for barrier reporting - if (loc) { - if ((loc->reserved_2 & 0xFFFF0000) == 0) { - if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { - int frm = KMP_TEST_THEN_INC32( & __kmp_barrier_domain_count ); // get "old" value - if (frm >= KMP_MAX_FRAME_DOMAINS) { - KMP_TEST_THEN_DEC32( & __kmp_barrier_domain_count ); // revert the count - return; // loc->reserved_2 is still 0 - } - // Save the barrier frame index to the high two bytes. - loc->reserved_2 |= (frm + 1) << 16; // save "new" value - - // Transform compiler-generated region location into the format - // that the tools more or less standardized on: - // "$omp$frame@[file:][:]" - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - if( imbalance ) { - const char * buff_imb = NULL; - buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d", - str_loc.func, team_size, str_loc.file, str_loc.col); - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_imbalance_domains[ frm ] = __itt_domain_create( buff_imb ); - __itt_suppress_pop(); - __itt_frame_submit_v3(__kmp_itt_imbalance_domains[ frm ], NULL, begin, end ); - __kmp_str_free( &buff_imb ); - } else { - const char * buff = NULL; - buff = __kmp_str_format("%s$omp$barrier@%s:%d", - str_loc.func, str_loc.file, str_loc.col); - __itt_suppress_push(__itt_suppress_memory_errors); - __kmp_itt_barrier_domains[ frm ] = __itt_domain_create( buff ); - __itt_suppress_pop(); - __itt_frame_submit_v3(__kmp_itt_barrier_domains[ frm ], NULL, begin, end ); - __kmp_str_free( &buff ); - } - __kmp_str_loc_free( &str_loc ); - } - } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS - if( imbalance ) { - __itt_frame_submit_v3(__kmp_itt_imbalance_domains[ (loc->reserved_2 >> 16) - 1 ], NULL, begin, end ); - } else { - __itt_frame_submit_v3(__kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL, begin, end ); - } - } - KMP_ITT_DEBUG_LOCK(); - KMP_ITT_DEBUG_PRINT( "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", - gtid, loc->reserved_2, loc, begin, end ); + + // We need to save indexes for both region and barrier frames. We'll use + // loc->reserved_2 field but put region index to the low two bytes and + // barrier indexes to the high two bytes. It is OK because + // KMP_MAX_FRAME_DOMAINS = 512. + loc->reserved_2 |= (frm + 1); // save "new" value + + // Transform compiler-generated region location into the format + // that the tools more or less standardized on: + // "$omp$parallel:team_size@[file:][:]" + const char *buff = NULL; + kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); + buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, + team_size, str_loc.file, str_loc.line, + str_loc.col); + + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_region_domains[frm] = __itt_domain_create(buff); + __itt_suppress_pop(); + + __kmp_str_free(&buff); + __kmp_str_loc_free(&str_loc); + __kmp_itt_region_team_size[frm] = team_size; + __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); + } + } else { // Region domain exists for this location + // Check if team size was changed. Then create new region domain for this + // location + int frm = (loc->reserved_2 & 0x0000FFFF) - 1; + if (__kmp_itt_region_team_size[frm] != team_size) { + const char *buff = NULL; + kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); + buff = __kmp_str_format("%s$omp$parallel:%d@%s:%d:%d", str_loc.func, + team_size, str_loc.file, str_loc.line, + str_loc.col); + + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_region_domains[frm] = __itt_domain_create(buff); + __itt_suppress_pop(); + + __kmp_str_free(&buff); + __kmp_str_loc_free(&str_loc); + __kmp_itt_region_team_size[frm] = team_size; + __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); + } else { // Team size was not changed. Use existing domain. + __itt_frame_submit_v3(__kmp_itt_region_domains[frm], NULL, begin, end); + } + } + KMP_ITT_DEBUG_LOCK(); + KMP_ITT_DEBUG_PRINT( + "[reg sub] gtid=%d, idx=%x, region:%d, loc:%p, beg:%llu, end:%llu\n", + gtid, loc->reserved_2, region, loc, begin, end); + return; + } else { // called for barrier reporting + if (loc) { + if ((loc->reserved_2 & 0xFFFF0000) == 0) { + if (__kmp_barrier_domain_count < KMP_MAX_FRAME_DOMAINS) { + int frm = KMP_TEST_THEN_INC32( + &__kmp_barrier_domain_count); // get "old" value + if (frm >= KMP_MAX_FRAME_DOMAINS) { + KMP_TEST_THEN_DEC32( + &__kmp_barrier_domain_count); // revert the count + return; // loc->reserved_2 is still 0 + } + // Save the barrier frame index to the high two bytes. + loc->reserved_2 |= (frm + 1) << 16; // save "new" value + + // Transform compiler-generated region location into the format + // that the tools more or less standardized on: + // "$omp$frame@[file:][:]" + kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); + if (imbalance) { + const char *buff_imb = NULL; + buff_imb = __kmp_str_format("%s$omp$barrier-imbalance:%d@%s:%d", + str_loc.func, team_size, str_loc.file, + str_loc.col); + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_imbalance_domains[frm] = __itt_domain_create(buff_imb); + __itt_suppress_pop(); + __itt_frame_submit_v3(__kmp_itt_imbalance_domains[frm], NULL, begin, + end); + __kmp_str_free(&buff_imb); + } else { + const char *buff = NULL; + buff = __kmp_str_format("%s$omp$barrier@%s:%d", str_loc.func, + str_loc.file, str_loc.col); + __itt_suppress_push(__itt_suppress_memory_errors); + __kmp_itt_barrier_domains[frm] = __itt_domain_create(buff); + __itt_suppress_pop(); + __itt_frame_submit_v3(__kmp_itt_barrier_domains[frm], NULL, begin, + end); + __kmp_str_free(&buff); + } + __kmp_str_loc_free(&str_loc); } + } else { // if it is not 0 then it should be <= KMP_MAX_FRAME_DOMAINS + if (imbalance) { + __itt_frame_submit_v3( + __kmp_itt_imbalance_domains[(loc->reserved_2 >> 16) - 1], NULL, + begin, end); + } else { + __itt_frame_submit_v3( + __kmp_itt_barrier_domains[(loc->reserved_2 >> 16) - 1], NULL, + begin, end); + } + } + KMP_ITT_DEBUG_LOCK(); + KMP_ITT_DEBUG_PRINT( + "[frm sub] gtid=%d, idx=%x, loc:%p, beg:%llu, end:%llu\n", gtid, + loc->reserved_2, loc, begin, end); } + } #endif } // __kmp_itt_frame_submit -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_metadata_imbalance( int gtid, kmp_uint64 begin, kmp_uint64 end, kmp_uint64 imbalance, kmp_uint64 reduction ) { +// ----------------------------------------------------------------------------- +LINKAGE void __kmp_itt_metadata_imbalance(int gtid, kmp_uint64 begin, + kmp_uint64 end, kmp_uint64 imbalance, + kmp_uint64 reduction) { #if USE_ITT_NOTIFY - if( metadata_domain == NULL) { - __kmp_acquire_bootstrap_lock( & metadata_lock ); - if( metadata_domain == NULL) { - __itt_suppress_push(__itt_suppress_memory_errors); - metadata_domain = __itt_domain_create( "OMP Metadata" ); - string_handle_imbl = __itt_string_handle_create( "omp_metadata_imbalance"); - string_handle_loop = __itt_string_handle_create( "omp_metadata_loop"); - string_handle_sngl = __itt_string_handle_create( "omp_metadata_single"); - __itt_suppress_pop(); - } - __kmp_release_bootstrap_lock( & metadata_lock ); + if (metadata_domain == NULL) { + __kmp_acquire_bootstrap_lock(&metadata_lock); + if (metadata_domain == NULL) { + __itt_suppress_push(__itt_suppress_memory_errors); + metadata_domain = __itt_domain_create("OMP Metadata"); + string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); + string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); + string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); + __itt_suppress_pop(); } + __kmp_release_bootstrap_lock(&metadata_lock); + } - kmp_uint64 imbalance_data[ 4 ]; - imbalance_data[ 0 ] = begin; - imbalance_data[ 1 ] = end; - imbalance_data[ 2 ] = imbalance; - imbalance_data[ 3 ] = reduction; + kmp_uint64 imbalance_data[4]; + imbalance_data[0] = begin; + imbalance_data[1] = end; + imbalance_data[2] = imbalance; + imbalance_data[3] = reduction; - __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl, __itt_metadata_u64, 4, imbalance_data); + __itt_metadata_add(metadata_domain, __itt_null, string_handle_imbl, + __itt_metadata_u64, 4, imbalance_data); #endif } // __kmp_itt_metadata_imbalance -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_metadata_loop( ident_t * loc, kmp_uint64 sched_type, kmp_uint64 iterations, kmp_uint64 chunk ) { +// ----------------------------------------------------------------------------- +LINKAGE void __kmp_itt_metadata_loop(ident_t *loc, kmp_uint64 sched_type, + kmp_uint64 iterations, kmp_uint64 chunk) { #if USE_ITT_NOTIFY - if( metadata_domain == NULL) { - __kmp_acquire_bootstrap_lock( & metadata_lock ); - if( metadata_domain == NULL) { - __itt_suppress_push(__itt_suppress_memory_errors); - metadata_domain = __itt_domain_create( "OMP Metadata" ); - string_handle_imbl = __itt_string_handle_create( "omp_metadata_imbalance"); - string_handle_loop = __itt_string_handle_create( "omp_metadata_loop"); - string_handle_sngl = __itt_string_handle_create( "omp_metadata_single"); - __itt_suppress_pop(); - } - __kmp_release_bootstrap_lock( & metadata_lock ); + if (metadata_domain == NULL) { + __kmp_acquire_bootstrap_lock(&metadata_lock); + if (metadata_domain == NULL) { + __itt_suppress_push(__itt_suppress_memory_errors); + metadata_domain = __itt_domain_create("OMP Metadata"); + string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); + string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); + string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); + __itt_suppress_pop(); } + __kmp_release_bootstrap_lock(&metadata_lock); + } - // Parse line and column from psource string: ";file;func;line;col;;" - char * s_line; - char * s_col; - KMP_DEBUG_ASSERT(loc->psource); + // Parse line and column from psource string: ";file;func;line;col;;" + char *s_line; + char *s_col; + KMP_DEBUG_ASSERT(loc->psource); #ifdef __cplusplus - s_line = strchr((char*)loc->psource, ';'); + s_line = strchr((char *)loc->psource, ';'); #else - s_line = strchr(loc->psource, ';'); + s_line = strchr(loc->psource, ';'); #endif - KMP_DEBUG_ASSERT(s_line); - s_line = strchr(s_line + 1, ';'); // 2-nd semicolon - KMP_DEBUG_ASSERT(s_line); - s_line = strchr(s_line + 1, ';'); // 3-rd semicolon - KMP_DEBUG_ASSERT(s_line); - s_col = strchr(s_line + 1, ';'); // 4-th semicolon - KMP_DEBUG_ASSERT(s_col); - - kmp_uint64 loop_data[ 5 ]; - loop_data[ 0 ] = atoi(s_line + 1); // read line - loop_data[ 1 ] = atoi(s_col + 1); // read column - loop_data[ 2 ] = sched_type; - loop_data[ 3 ] = iterations; - loop_data[ 4 ] = chunk; - - __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop, __itt_metadata_u64, 5, loop_data); + KMP_DEBUG_ASSERT(s_line); + s_line = strchr(s_line + 1, ';'); // 2-nd semicolon + KMP_DEBUG_ASSERT(s_line); + s_line = strchr(s_line + 1, ';'); // 3-rd semicolon + KMP_DEBUG_ASSERT(s_line); + s_col = strchr(s_line + 1, ';'); // 4-th semicolon + KMP_DEBUG_ASSERT(s_col); + + kmp_uint64 loop_data[5]; + loop_data[0] = atoi(s_line + 1); // read line + loop_data[1] = atoi(s_col + 1); // read column + loop_data[2] = sched_type; + loop_data[3] = iterations; + loop_data[4] = chunk; + + __itt_metadata_add(metadata_domain, __itt_null, string_handle_loop, + __itt_metadata_u64, 5, loop_data); #endif } // __kmp_itt_metadata_loop -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_metadata_single( ident_t * loc ) { +// ----------------------------------------------------------------------------- +LINKAGE void __kmp_itt_metadata_single(ident_t *loc) { #if USE_ITT_NOTIFY - if( metadata_domain == NULL) { - __kmp_acquire_bootstrap_lock( & metadata_lock ); - if( metadata_domain == NULL) { - __itt_suppress_push(__itt_suppress_memory_errors); - metadata_domain = __itt_domain_create( "OMP Metadata" ); - string_handle_imbl = __itt_string_handle_create( "omp_metadata_imbalance"); - string_handle_loop = __itt_string_handle_create( "omp_metadata_loop"); - string_handle_sngl = __itt_string_handle_create( "omp_metadata_single"); - __itt_suppress_pop(); - } - __kmp_release_bootstrap_lock( & metadata_lock ); + if (metadata_domain == NULL) { + __kmp_acquire_bootstrap_lock(&metadata_lock); + if (metadata_domain == NULL) { + __itt_suppress_push(__itt_suppress_memory_errors); + metadata_domain = __itt_domain_create("OMP Metadata"); + string_handle_imbl = __itt_string_handle_create("omp_metadata_imbalance"); + string_handle_loop = __itt_string_handle_create("omp_metadata_loop"); + string_handle_sngl = __itt_string_handle_create("omp_metadata_single"); + __itt_suppress_pop(); } + __kmp_release_bootstrap_lock(&metadata_lock); + } - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 1 ); - kmp_uint64 single_data[ 2 ]; - single_data[ 0 ] = str_loc.line; - single_data[ 1 ] = str_loc.col; + kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 1); + kmp_uint64 single_data[2]; + single_data[0] = str_loc.line; + single_data[1] = str_loc.col; - __kmp_str_loc_free( &str_loc ); + __kmp_str_loc_free(&str_loc); - __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl, __itt_metadata_u64, 2, single_data); + __itt_metadata_add(metadata_domain, __itt_null, string_handle_sngl, + __itt_metadata_u64, 2, single_data); #endif } // __kmp_itt_metadata_single -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_region_starting( int gtid ) { +// ----------------------------------------------------------------------------- +LINKAGE void __kmp_itt_region_starting(int gtid) { #if USE_ITT_NOTIFY #endif } // __kmp_itt_region_starting -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_region_finished( int gtid ) { +// ----------------------------------------------------------------------------- +LINKAGE void __kmp_itt_region_finished(int gtid) { #if USE_ITT_NOTIFY #endif } // __kmp_itt_region_finished -// ------------------------------------------------------------------------------------------------- - -LINKAGE void -__kmp_itt_region_joined( int gtid ) { +// ---------------------------------------------------------------------------- +LINKAGE void __kmp_itt_region_joined(int gtid) { #if USE_ITT_NOTIFY - kmp_team_t * team = __kmp_team_from_gtid( gtid ); - if (team->t.t_active_level > 1) - { - // The frame notifications are only supported for the outermost teams. - return; - } - ident_t * loc = __kmp_thread_from_gtid( gtid )->th.th_ident; - if (loc && loc->reserved_2) - { - int frm = (loc->reserved_2 & 0x0000FFFF) - 1; - if(frm < KMP_MAX_FRAME_DOMAINS) { - KMP_ITT_DEBUG_LOCK(); - __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL); - KMP_ITT_DEBUG_PRINT( "[frm end] gtid=%d, idx=%x, loc:%p\n", - gtid, loc->reserved_2, loc ); - } + kmp_team_t *team = __kmp_team_from_gtid(gtid); + if (team->t.t_active_level > 1) { + // The frame notifications are only supported for the outermost teams. + return; + } + ident_t *loc = __kmp_thread_from_gtid(gtid)->th.th_ident; + if (loc && loc->reserved_2) { + int frm = (loc->reserved_2 & 0x0000FFFF) - 1; + if (frm < KMP_MAX_FRAME_DOMAINS) { + KMP_ITT_DEBUG_LOCK(); + __itt_frame_end_v3(__kmp_itt_region_domains[frm], NULL); + KMP_ITT_DEBUG_PRINT("[frm end] gtid=%d, idx=%x, loc:%p\n", gtid, + loc->reserved_2, loc); } + } #endif } // __kmp_itt_region_joined -/* - ------------------------------------------------------------------------------------------------ - Barriers reporting. - - A barrier consists of two phases: - - 1. Gather -- master waits for arriving of all the worker threads; each worker thread - registers arrival and goes further. - 2. Release -- each worker threads waits until master lets it go; master lets worker threads - go. - - Function should be called by each thread: - - * __kmp_itt_barrier_starting() -- before arriving to the gather phase. - * __kmp_itt_barrier_middle() -- between gather and release phases. - * __kmp_itt_barrier_finished() -- after release phase. - - Note: Call __kmp_itt_barrier_object() before call to __kmp_itt_barrier_starting() and save - result in local variable. __kmp_itt_barrier_object(), being called too late (e. g. after gather - phase) would return itt sync object for the next barrier! - - ITT need an address (void *) to be specified as a sync object. OpenMP RTL does not have - barrier object or barrier data structure. Barrier is just a counter in team and thread - structures. We could use an address of team structure as an barrier sync object, but ITT wants - different objects for different barriers (even whithin the same team). So let us use - team address as barrier sync object for the first barrier, then increase it by one for the next - barrier, and so on (but wrap it not to use addresses outside of team structure). - - ------------------------------------------------------------------------------------------------ -*/ - -void * -__kmp_itt_barrier_object( - int gtid, - int bt, - int set_name, - int delta // 0 (current barrier) is default value; specify -1 to get previous barrier. -) { - void * object = NULL; +/* Barriers reporting. + + A barrier consists of two phases: + 1. Gather -- master waits for arriving of all the worker threads; each + worker thread registers arrival and goes further. + 2. Release -- each worker threads waits until master lets it go; master lets + worker threads go. + + Function should be called by each thread: + * __kmp_itt_barrier_starting() -- before arriving to the gather phase. + * __kmp_itt_barrier_middle() -- between gather and release phases. + * __kmp_itt_barrier_finished() -- after release phase. + + Note: Call __kmp_itt_barrier_object() before call to + __kmp_itt_barrier_starting() and save result in local variable. + __kmp_itt_barrier_object(), being called too late (e. g. after gather phase) + would return itt sync object for the next barrier! + + ITT need an address (void *) to be specified as a sync object. OpenMP RTL + does not have barrier object or barrier data structure. Barrier is just a + counter in team and thread structures. We could use an address of team + structure as an barrier sync object, but ITT wants different objects for + different barriers (even whithin the same team). So let us use team address + as barrier sync object for the first barrier, then increase it by one for the + next barrier, and so on (but wrap it not to use addresses outside of team + structure). */ + +void *__kmp_itt_barrier_object(int gtid, int bt, int set_name, + int delta // 0 (current barrier) is default + // value; specify -1 to get previous + // barrier. + ) { + void *object = NULL; #if USE_ITT_NOTIFY - kmp_info_t * thr = __kmp_thread_from_gtid( gtid ); - kmp_team_t * team = thr->th.th_team; - - // NOTE: - // If the function is called from __kmp_fork_barrier, team pointer can be NULL. This "if" - // helps to avoid crash. However, this is not complete solution, and reporting fork/join - // barriers to ITT should be revisited. - - if ( team != NULL ) { - - // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time. Divide b_arrived - // by KMP_BARRIER_STATE_BUMP to get plain barrier counter. - kmp_uint64 counter = team->t.t_bar[ bt ].b_arrived / KMP_BARRIER_STATE_BUMP + delta; - // Now form the barrier id. Encode barrier type (bt) in barrier id too, so barriers of - // different types do not have the same ids. - KMP_BUILD_ASSERT( sizeof( kmp_team_t ) >= bs_last_barrier ); - // This conditon is a must (we would have zero divide otherwise). - KMP_BUILD_ASSERT( sizeof( kmp_team_t ) >= 2 * bs_last_barrier ); - // More strong condition: make sure we have room at least for for two differtent ids - // (for each barrier type). - object = - reinterpret_cast< void * >( - kmp_uintptr_t( team ) - + counter % ( sizeof( kmp_team_t ) / bs_last_barrier ) * bs_last_barrier - + bt - ); - KMP_ITT_DEBUG_LOCK(); - KMP_ITT_DEBUG_PRINT( "[bar obj] type=%d, counter=%lld, object=%p\n", bt, counter, object ); - - if ( set_name ) { - ident_t const * loc = NULL; - char const * src = NULL; - char const * type = "OMP Barrier"; - switch ( bt ) { - case bs_plain_barrier : { - // For plain barrier compiler calls __kmpc_barrier() function, which saves - // location in thr->th.th_ident. - loc = thr->th.th_ident; - // Get the barrier type from flags provided by compiler. - kmp_int32 expl = 0; - kmp_uint32 impl = 0; - if ( loc != NULL ) { - src = loc->psource; - expl = ( loc->flags & KMP_IDENT_BARRIER_EXPL ) != 0; - impl = ( loc->flags & KMP_IDENT_BARRIER_IMPL ) != 0; - }; // if - if ( impl ) { - switch ( loc->flags & KMP_IDENT_BARRIER_IMPL_MASK ) { - case KMP_IDENT_BARRIER_IMPL_FOR : { - type = "OMP For Barrier"; - } break; - case KMP_IDENT_BARRIER_IMPL_SECTIONS : { - type = "OMP Sections Barrier"; - } break; - case KMP_IDENT_BARRIER_IMPL_SINGLE : { - type = "OMP Single Barrier"; - } break; - case KMP_IDENT_BARRIER_IMPL_WORKSHARE : { - type = "OMP Workshare Barrier"; - } break; - default : { - type = "OMP Implicit Barrier"; - KMP_DEBUG_ASSERT( 0 ); - }; - }; /* switch */ - } else if ( expl ) { - type = "OMP Explicit Barrier"; - }; /* if */ - } break; - case bs_forkjoin_barrier : { - // In case of fork/join barrier we can read thr->th.th_ident, because it - // contains location of last passed construct (while join barrier is not - // such one). Use th_ident of master thread instead -- __kmp_join_call() - // called by the master thread saves location. - // - // AC: cannot read from master because __kmp_join_call may be not called - // yet, so we read the location from team. This is the same location. - // And team is valid at the enter to join barrier where this happens. - loc = team->t.t_ident; - if ( loc != NULL ) { - src = loc->psource; - }; // if - type = "OMP Join Barrier"; - } break; - }; // switch - KMP_ITT_DEBUG_LOCK(); - __itt_sync_create( object, type, src, __itt_attr_barrier ); - KMP_ITT_DEBUG_PRINT( "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object, type, src ); + kmp_info_t *thr = __kmp_thread_from_gtid(gtid); + kmp_team_t *team = thr->th.th_team; + + // NOTE: If the function is called from __kmp_fork_barrier, team pointer can + // be NULL. This "if" helps to avoid crash. However, this is not complete + // solution, and reporting fork/join barriers to ITT should be revisited. + + if (team != NULL) { + // Master thread increases b_arrived by KMP_BARRIER_STATE_BUMP each time. + // Divide b_arrived by KMP_BARRIER_STATE_BUMP to get plain barrier counter. + kmp_uint64 counter = + team->t.t_bar[bt].b_arrived / KMP_BARRIER_STATE_BUMP + delta; + // Now form the barrier id. Encode barrier type (bt) in barrier id too, so + // barriers of different types do not have the same ids. + KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= bs_last_barrier); + // This conditon is a must (we would have zero divide otherwise). + KMP_BUILD_ASSERT(sizeof(kmp_team_t) >= 2 * bs_last_barrier); + // More strong condition: make sure we have room at least for for two + // differtent ids (for each barrier type). + object = reinterpret_cast( + kmp_uintptr_t(team) + + counter % (sizeof(kmp_team_t) / bs_last_barrier) * bs_last_barrier + + bt); + KMP_ITT_DEBUG_LOCK(); + KMP_ITT_DEBUG_PRINT("[bar obj] type=%d, counter=%lld, object=%p\n", bt, + counter, object); + + if (set_name) { + ident_t const *loc = NULL; + char const *src = NULL; + char const *type = "OMP Barrier"; + switch (bt) { + case bs_plain_barrier: { + // For plain barrier compiler calls __kmpc_barrier() function, which + // saves location in thr->th.th_ident. + loc = thr->th.th_ident; + // Get the barrier type from flags provided by compiler. + kmp_int32 expl = 0; + kmp_uint32 impl = 0; + if (loc != NULL) { + src = loc->psource; + expl = (loc->flags & KMP_IDENT_BARRIER_EXPL) != 0; + impl = (loc->flags & KMP_IDENT_BARRIER_IMPL) != 0; }; // if - + if (impl) { + switch (loc->flags & KMP_IDENT_BARRIER_IMPL_MASK) { + case KMP_IDENT_BARRIER_IMPL_FOR: { + type = "OMP For Barrier"; + } break; + case KMP_IDENT_BARRIER_IMPL_SECTIONS: { + type = "OMP Sections Barrier"; + } break; + case KMP_IDENT_BARRIER_IMPL_SINGLE: { + type = "OMP Single Barrier"; + } break; + case KMP_IDENT_BARRIER_IMPL_WORKSHARE: { + type = "OMP Workshare Barrier"; + } break; + default: { + type = "OMP Implicit Barrier"; + KMP_DEBUG_ASSERT(0); + }; + }; /* switch */ + } else if (expl) { + type = "OMP Explicit Barrier"; + }; /* if */ + } break; + case bs_forkjoin_barrier: { + // In case of fork/join barrier we can read thr->th.th_ident, because it + // contains location of last passed construct (while join barrier is not + // such one). Use th_ident of master thread instead -- __kmp_join_call() + // called by the master thread saves location. + // + // AC: cannot read from master because __kmp_join_call may be not called + // yet, so we read the location from team. This is the same location. + // And team is valid at the enter to join barrier where this happens. + loc = team->t.t_ident; + if (loc != NULL) { + src = loc->psource; + }; // if + type = "OMP Join Barrier"; + } break; + }; // switch + KMP_ITT_DEBUG_LOCK(); + __itt_sync_create(object, type, src, __itt_attr_barrier); + KMP_ITT_DEBUG_PRINT( + "[bar sta] scre( %p, \"%s\", \"%s\", __itt_attr_barrier )\n", object, + type, src); }; // if + + }; // if #endif - return object; + return object; } // __kmp_itt_barrier_object -// ------------------------------------------------------------------------------------------------- - -void -__kmp_itt_barrier_starting( int gtid, void * object ) { +// ----------------------------------------------------------------------------- +void __kmp_itt_barrier_starting(int gtid, void *object) { #if USE_ITT_NOTIFY - if ( !KMP_MASTER_GTID( gtid ) ) { - KMP_ITT_DEBUG_LOCK(); - __itt_sync_releasing( object ); - KMP_ITT_DEBUG_PRINT( "[bar sta] srel( %p )\n", object ); - }; // if + if (!KMP_MASTER_GTID(gtid)) { KMP_ITT_DEBUG_LOCK(); - __itt_sync_prepare( object ); - KMP_ITT_DEBUG_PRINT( "[bar sta] spre( %p )\n", object ); + __itt_sync_releasing(object); + KMP_ITT_DEBUG_PRINT("[bar sta] srel( %p )\n", object); + }; // if + KMP_ITT_DEBUG_LOCK(); + __itt_sync_prepare(object); + KMP_ITT_DEBUG_PRINT("[bar sta] spre( %p )\n", object); #endif } // __kmp_itt_barrier_starting -// ------------------------------------------------------------------------------------------------- - -void -__kmp_itt_barrier_middle( int gtid, void * object ) { +// ----------------------------------------------------------------------------- +void __kmp_itt_barrier_middle(int gtid, void *object) { #if USE_ITT_NOTIFY - if ( KMP_MASTER_GTID( gtid ) ) { - KMP_ITT_DEBUG_LOCK(); - __itt_sync_acquired( object ); - KMP_ITT_DEBUG_PRINT( "[bar mid] sacq( %p )\n", object ); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_releasing( object ); - KMP_ITT_DEBUG_PRINT( "[bar mid] srel( %p )\n", object ); - } else { - }; // if + if (KMP_MASTER_GTID(gtid)) { + KMP_ITT_DEBUG_LOCK(); + __itt_sync_acquired(object); + KMP_ITT_DEBUG_PRINT("[bar mid] sacq( %p )\n", object); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_releasing(object); + KMP_ITT_DEBUG_PRINT("[bar mid] srel( %p )\n", object); + } else { + }; // if #endif } // __kmp_itt_barrier_middle -// ------------------------------------------------------------------------------------------------- - -void -__kmp_itt_barrier_finished( int gtid, void * object ) { +// ----------------------------------------------------------------------------- +void __kmp_itt_barrier_finished(int gtid, void *object) { #if USE_ITT_NOTIFY - if ( KMP_MASTER_GTID( gtid ) ) { - } else { - KMP_ITT_DEBUG_LOCK(); - __itt_sync_acquired( object ); - KMP_ITT_DEBUG_PRINT( "[bar end] sacq( %p )\n", object ); - }; // if + if (KMP_MASTER_GTID(gtid)) { + } else { + KMP_ITT_DEBUG_LOCK(); + __itt_sync_acquired(object); + KMP_ITT_DEBUG_PRINT("[bar end] sacq( %p )\n", object); + }; // if #endif } // __kmp_itt_barrier_finished -/* - ------------------------------------------------------------------------------------------------ - Taskwait reporting. - - ITT need an address (void *) to be specified as a sync object. OpenMP RTL does not have taskwait - structure, so we need to construct something. - -*/ +/* Taskwait reporting. + ITT need an address (void *) to be specified as a sync object. OpenMP RTL + does not have taskwait structure, so we need to construct something. */ -void * -__kmp_itt_taskwait_object( int gtid ) { - void * object = NULL; +void *__kmp_itt_taskwait_object(int gtid) { + void *object = NULL; #if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - kmp_info_t * thread = __kmp_thread_from_gtid( gtid ); - kmp_taskdata_t * taskdata = thread -> th.th_current_task; - object = - reinterpret_cast< void * >( - kmp_uintptr_t( taskdata ) + taskdata->td_taskwait_counter % sizeof( kmp_taskdata_t ) - ); - }; // if + if (__itt_sync_create_ptr) { + kmp_info_t *thread = __kmp_thread_from_gtid(gtid); + kmp_taskdata_t *taskdata = thread->th.th_current_task; + object = reinterpret_cast(kmp_uintptr_t(taskdata) + + taskdata->td_taskwait_counter % + sizeof(kmp_taskdata_t)); + }; // if #endif - return object; + return object; } // __kmp_itt_taskwait_object -void -__kmp_itt_taskwait_starting( - int gtid, - void * object -) { +void __kmp_itt_taskwait_starting(int gtid, void *object) { #if USE_ITT_NOTIFY - kmp_info_t * thread = __kmp_thread_from_gtid( gtid ); - kmp_taskdata_t * taskdata = thread -> th.th_current_task; - ident_t const * loc = taskdata->td_taskwait_ident; - char const * src = ( loc == NULL? NULL : loc->psource ); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_create( object, "OMP Taskwait", src, 0 ); - KMP_ITT_DEBUG_PRINT( "[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n", object, src ); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_prepare( object ); - KMP_ITT_DEBUG_PRINT( "[twa sta] spre( %p )\n", object ); + kmp_info_t *thread = __kmp_thread_from_gtid(gtid); + kmp_taskdata_t *taskdata = thread->th.th_current_task; + ident_t const *loc = taskdata->td_taskwait_ident; + char const *src = (loc == NULL ? NULL : loc->psource); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_create(object, "OMP Taskwait", src, 0); + KMP_ITT_DEBUG_PRINT("[twa sta] scre( %p, \"OMP Taskwait\", \"%s\", 0 )\n", + object, src); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_prepare(object); + KMP_ITT_DEBUG_PRINT("[twa sta] spre( %p )\n", object); #endif } // __kmp_itt_taskwait_starting -void -__kmp_itt_taskwait_finished( - int gtid, - void * object -) { +void __kmp_itt_taskwait_finished(int gtid, void *object) { #if USE_ITT_NOTIFY - KMP_ITT_DEBUG_LOCK(); - __itt_sync_acquired( object ); - KMP_ITT_DEBUG_PRINT( "[twa end] sacq( %p )\n", object ); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_destroy( object ); - KMP_ITT_DEBUG_PRINT( "[twa end] sdes( %p )\n", object ); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_acquired(object); + KMP_ITT_DEBUG_PRINT("[twa end] sacq( %p )\n", object); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_destroy(object); + KMP_ITT_DEBUG_PRINT("[twa end] sdes( %p )\n", object); #endif } // __kmp_itt_taskwait_finished -/* - ------------------------------------------------------------------------------------------------ - Task reporting. - - Only those tasks are reported which are executed by a thread spinning at barrier (or taskwait). - Synch object passed to the function must be barrier of taskwait the threads waiting at. - ------------------------------------------------------------------------------------------------ -*/ - -void -__kmp_itt_task_starting( - void * object // ITT sync object: barrier or taskwait. -) { +/* Task reporting. + Only those tasks are reported which are executed by a thread spinning at + barrier (or taskwait). Synch object passed to the function must be barrier of + taskwait the threads waiting at. */ + +void __kmp_itt_task_starting( + void *object // ITT sync object: barrier or taskwait. + ) { #if USE_ITT_NOTIFY - if ( object != NULL ) { - KMP_ITT_DEBUG_LOCK(); - __itt_sync_cancel( object ); - KMP_ITT_DEBUG_PRINT( "[tsk sta] scan( %p )\n", object ); - }; // if + if (object != NULL) { + KMP_ITT_DEBUG_LOCK(); + __itt_sync_cancel(object); + KMP_ITT_DEBUG_PRINT("[tsk sta] scan( %p )\n", object); + }; // if #endif } // __kmp_itt_task_starting -// ------------------------------------------------------------------------------------------------- - -void -__kmp_itt_task_finished( - void * object // ITT sync object: barrier or taskwait. -) { +// ----------------------------------------------------------------------------- +void __kmp_itt_task_finished( + void *object // ITT sync object: barrier or taskwait. + ) { #if USE_ITT_NOTIFY - KMP_ITT_DEBUG_LOCK(); - __itt_sync_prepare( object ); - KMP_ITT_DEBUG_PRINT( "[tsk end] spre( %p )\n", object ); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_prepare(object); + KMP_ITT_DEBUG_PRINT("[tsk end] spre( %p )\n", object); #endif } // __kmp_itt_task_finished -// ------------------------------------------------------------------------------------------------- - -/* - ------------------------------------------------------------------------------------------------ - Lock reporting. - - * __kmp_itt_lock_creating( lock ) should be called *before* the first lock operation - (set/unset). It is not a real event shown to the user but just setting a name for - synchronization object. `lock' is an address of sync object, the same address should be - used in all subsequent calls. - - * __kmp_itt_lock_acquiring() should be called before setting the lock. - - * __kmp_itt_lock_acquired() should be called after setting the lock. - - * __kmp_itt_lock_realeasing() should be called before unsetting the lock. - - * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting for the lock. - - * __kmp_itt_lock_destroyed( lock ) should be called after the last lock operation. After - __kmp_itt_lock_destroyed() all the references to the same address will be considered - as another sync object, not related with the original one. - ------------------------------------------------------------------------------------------------ -*/ - -// ------------------------------------------------------------------------------------------------- +/* Lock reporting. + * __kmp_itt_lock_creating( lock ) should be called *before* the first lock + operation (set/unset). It is not a real event shown to the user but just + setting a name for synchronization object. `lock' is an address of sync + object, the same address should be used in all subsequent calls. + * __kmp_itt_lock_acquiring() should be called before setting the lock. + * __kmp_itt_lock_acquired() should be called after setting the lock. + * __kmp_itt_lock_realeasing() should be called before unsetting the lock. + * __kmp_itt_lock_cancelled() should be called after thread cancelled waiting + for the lock. + * __kmp_itt_lock_destroyed( lock ) should be called after the last lock + operation. After __kmp_itt_lock_destroyed() all the references to the same + address will be considered as another sync object, not related with the + original one. */ #if KMP_USE_DYNAMIC_LOCK // Takes location information directly -__kmp_inline -void -___kmp_itt_lock_init( kmp_user_lock_p lock, char const *type, const ident_t *loc ) { +__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type, + const ident_t *loc) { #if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - char const * src = ( loc == NULL ? NULL : loc->psource ); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_create( lock, type, src, 0 ); - KMP_ITT_DEBUG_PRINT( "[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, src ); - } + if (__itt_sync_create_ptr) { + char const *src = (loc == NULL ? NULL : loc->psource); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_create(lock, type, src, 0); + KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, + src); + } #endif } #else // KMP_USE_DYNAMIC_LOCK -// Internal guts -- common code for locks and critical sections, do not call directly. -__kmp_inline -void -___kmp_itt_lock_init( kmp_user_lock_p lock, char const * type ) { +// Internal guts -- common code for locks and critical sections, do not call +// directly. +__kmp_inline void ___kmp_itt_lock_init(kmp_user_lock_p lock, char const *type) { #if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - ident_t const * loc = NULL; - if ( __kmp_get_user_lock_location_ != NULL ) - loc = __kmp_get_user_lock_location_( (lock) ); - char const * src = ( loc == NULL ? NULL : loc->psource ); - KMP_ITT_DEBUG_LOCK(); - __itt_sync_create( lock, type, src, 0 ); - KMP_ITT_DEBUG_PRINT( "[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, src ); - }; // if + if (__itt_sync_create_ptr) { + ident_t const *loc = NULL; + if (__kmp_get_user_lock_location_ != NULL) + loc = __kmp_get_user_lock_location_((lock)); + char const *src = (loc == NULL ? NULL : loc->psource); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_create(lock, type, src, 0); + KMP_ITT_DEBUG_PRINT("[lck ini] scre( %p, \"%s\", \"%s\", 0 )\n", lock, type, + src); + }; // if #endif } // ___kmp_itt_lock_init #endif // KMP_USE_DYNAMIC_LOCK -// Internal guts -- common code for locks and critical sections, do not call directly. -__kmp_inline -void -___kmp_itt_lock_fini( kmp_user_lock_p lock, char const * type ) { +// Internal guts -- common code for locks and critical sections, do not call +// directly. +__kmp_inline void ___kmp_itt_lock_fini(kmp_user_lock_p lock, char const *type) { #if USE_ITT_NOTIFY - KMP_ITT_DEBUG_LOCK(); - __itt_sync_destroy( lock ); - KMP_ITT_DEBUG_PRINT( "[lck dst] sdes( %p )\n", lock ); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_destroy(lock); + KMP_ITT_DEBUG_PRINT("[lck dst] sdes( %p )\n", lock); #endif } // ___kmp_itt_lock_fini - -// ------------------------------------------------------------------------------------------------- - +// ----------------------------------------------------------------------------- #if KMP_USE_DYNAMIC_LOCK -void -__kmp_itt_lock_creating( kmp_user_lock_p lock, const ident_t *loc ) { - ___kmp_itt_lock_init( lock, "OMP Lock", loc ); +void __kmp_itt_lock_creating(kmp_user_lock_p lock, const ident_t *loc) { + ___kmp_itt_lock_init(lock, "OMP Lock", loc); } #else -void -__kmp_itt_lock_creating( kmp_user_lock_p lock ) { - ___kmp_itt_lock_init( lock, "OMP Lock" ); +void __kmp_itt_lock_creating(kmp_user_lock_p lock) { + ___kmp_itt_lock_init(lock, "OMP Lock"); } // __kmp_itt_lock_creating #endif -void -__kmp_itt_lock_acquiring( kmp_user_lock_p lock ) { +void __kmp_itt_lock_acquiring(kmp_user_lock_p lock) { #if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY - // postpone lock object access - if ( __itt_sync_prepare_ptr ) { - if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __itt_sync_prepare( ilk->lock ); - } else { - __itt_sync_prepare( lock ); - } + // postpone lock object access + if (__itt_sync_prepare_ptr) { + if (KMP_EXTRACT_D_TAG(lock) == 0) { + kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); + __itt_sync_prepare(ilk->lock); + } else { + __itt_sync_prepare(lock); } + } #else - __itt_sync_prepare( lock ); + __itt_sync_prepare(lock); #endif } // __kmp_itt_lock_acquiring -void -__kmp_itt_lock_acquired( kmp_user_lock_p lock ) { +void __kmp_itt_lock_acquired(kmp_user_lock_p lock) { #if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY - // postpone lock object access - if ( __itt_sync_acquired_ptr ) { - if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __itt_sync_acquired( ilk->lock ); - } else { - __itt_sync_acquired( lock ); - } + // postpone lock object access + if (__itt_sync_acquired_ptr) { + if (KMP_EXTRACT_D_TAG(lock) == 0) { + kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); + __itt_sync_acquired(ilk->lock); + } else { + __itt_sync_acquired(lock); } + } #else - __itt_sync_acquired( lock ); + __itt_sync_acquired(lock); #endif } // __kmp_itt_lock_acquired -void -__kmp_itt_lock_releasing( kmp_user_lock_p lock ) { +void __kmp_itt_lock_releasing(kmp_user_lock_p lock) { #if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY - if ( __itt_sync_releasing_ptr ) { - if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __itt_sync_releasing( ilk->lock ); - } else { - __itt_sync_releasing( lock ); - } + if (__itt_sync_releasing_ptr) { + if (KMP_EXTRACT_D_TAG(lock) == 0) { + kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); + __itt_sync_releasing(ilk->lock); + } else { + __itt_sync_releasing(lock); } + } #else - __itt_sync_releasing( lock ); + __itt_sync_releasing(lock); #endif } // __kmp_itt_lock_releasing -void -__kmp_itt_lock_cancelled( kmp_user_lock_p lock ) { +void __kmp_itt_lock_cancelled(kmp_user_lock_p lock) { #if KMP_USE_DYNAMIC_LOCK && USE_ITT_NOTIFY - if ( __itt_sync_cancel_ptr ) { - if ( KMP_EXTRACT_D_TAG(lock) == 0 ) { - kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); - __itt_sync_cancel( ilk->lock ); - } else { - __itt_sync_cancel( lock ); - } + if (__itt_sync_cancel_ptr) { + if (KMP_EXTRACT_D_TAG(lock) == 0) { + kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock); + __itt_sync_cancel(ilk->lock); + } else { + __itt_sync_cancel(lock); } + } #else - __itt_sync_cancel( lock ); + __itt_sync_cancel(lock); #endif } // __kmp_itt_lock_cancelled -void -__kmp_itt_lock_destroyed( kmp_user_lock_p lock ) { - ___kmp_itt_lock_fini( lock, "OMP Lock" ); +void __kmp_itt_lock_destroyed(kmp_user_lock_p lock) { + ___kmp_itt_lock_fini(lock, "OMP Lock"); } // __kmp_itt_lock_destroyed -/* - ------------------------------------------------------------------------------------------------ - Critical reporting. - - Critical sections are treated exactly as locks (but have different object type). - ------------------------------------------------------------------------------------------------ -*/ +/* Critical reporting. + Critical sections are treated exactly as locks (but have different object + type). */ #if KMP_USE_DYNAMIC_LOCK -void -__kmp_itt_critical_creating( kmp_user_lock_p lock, const ident_t *loc ) { - ___kmp_itt_lock_init( lock, "OMP Critical", loc); +void __kmp_itt_critical_creating(kmp_user_lock_p lock, const ident_t *loc) { + ___kmp_itt_lock_init(lock, "OMP Critical", loc); } #else -void -__kmp_itt_critical_creating( kmp_user_lock_p lock ) { - ___kmp_itt_lock_init( lock, "OMP Critical" ); +void __kmp_itt_critical_creating(kmp_user_lock_p lock) { + ___kmp_itt_lock_init(lock, "OMP Critical"); } // __kmp_itt_critical_creating #endif -void -__kmp_itt_critical_acquiring( kmp_user_lock_p lock ) { - __itt_sync_prepare( lock ); +void __kmp_itt_critical_acquiring(kmp_user_lock_p lock) { + __itt_sync_prepare(lock); } // __kmp_itt_critical_acquiring -void -__kmp_itt_critical_acquired( kmp_user_lock_p lock ) { - __itt_sync_acquired( lock ); +void __kmp_itt_critical_acquired(kmp_user_lock_p lock) { + __itt_sync_acquired(lock); } // __kmp_itt_critical_acquired -void -__kmp_itt_critical_releasing( kmp_user_lock_p lock ) { - __itt_sync_releasing( lock ); +void __kmp_itt_critical_releasing(kmp_user_lock_p lock) { + __itt_sync_releasing(lock); } // __kmp_itt_critical_releasing -void -__kmp_itt_critical_destroyed( kmp_user_lock_p lock ) { - ___kmp_itt_lock_fini( lock, "OMP Critical" ); +void __kmp_itt_critical_destroyed(kmp_user_lock_p lock) { + ___kmp_itt_lock_fini(lock, "OMP Critical"); } // __kmp_itt_critical_destroyed -/* - ------------------------------------------------------------------------------------------------ - Single reporting. - ------------------------------------------------------------------------------------------------ -*/ +/* Single reporting. */ -void -__kmp_itt_single_start( int gtid ) { +void __kmp_itt_single_start(int gtid) { #if USE_ITT_NOTIFY - if ( __itt_mark_create_ptr || KMP_ITT_DEBUG ) { - kmp_info_t * thr = __kmp_thread_from_gtid( (gtid) ); - ident_t * loc = thr->th.th_ident; - char const * src = ( loc == NULL ? NULL : loc->psource ); - kmp_str_buf_t name; - __kmp_str_buf_init( & name ); - __kmp_str_buf_print( & name, "OMP Single-%s", src ); - KMP_ITT_DEBUG_LOCK(); - thr->th.th_itt_mark_single = __itt_mark_create( name.str ); - KMP_ITT_DEBUG_PRINT( "[sin sta] mcre( \"%s\") -> %d\n", name.str, thr->th.th_itt_mark_single ); - __kmp_str_buf_free( & name ); - KMP_ITT_DEBUG_LOCK(); - __itt_mark( thr->th.th_itt_mark_single, NULL ); - KMP_ITT_DEBUG_PRINT( "[sin sta] mark( %d, NULL )\n", thr->th.th_itt_mark_single ); - }; // if + if (__itt_mark_create_ptr || KMP_ITT_DEBUG) { + kmp_info_t *thr = __kmp_thread_from_gtid((gtid)); + ident_t *loc = thr->th.th_ident; + char const *src = (loc == NULL ? NULL : loc->psource); + kmp_str_buf_t name; + __kmp_str_buf_init(&name); + __kmp_str_buf_print(&name, "OMP Single-%s", src); + KMP_ITT_DEBUG_LOCK(); + thr->th.th_itt_mark_single = __itt_mark_create(name.str); + KMP_ITT_DEBUG_PRINT("[sin sta] mcre( \"%s\") -> %d\n", name.str, + thr->th.th_itt_mark_single); + __kmp_str_buf_free(&name); + KMP_ITT_DEBUG_LOCK(); + __itt_mark(thr->th.th_itt_mark_single, NULL); + KMP_ITT_DEBUG_PRINT("[sin sta] mark( %d, NULL )\n", + thr->th.th_itt_mark_single); + }; // if #endif } // __kmp_itt_single_start -void -__kmp_itt_single_end( int gtid ) { +void __kmp_itt_single_end(int gtid) { #if USE_ITT_NOTIFY - __itt_mark_type mark = __kmp_thread_from_gtid( gtid )->th.th_itt_mark_single; - KMP_ITT_DEBUG_LOCK(); - __itt_mark_off( mark ); - KMP_ITT_DEBUG_PRINT( "[sin end] moff( %d )\n", mark ); + __itt_mark_type mark = __kmp_thread_from_gtid(gtid)->th.th_itt_mark_single; + KMP_ITT_DEBUG_LOCK(); + __itt_mark_off(mark); + KMP_ITT_DEBUG_PRINT("[sin end] moff( %d )\n", mark); #endif } // __kmp_itt_single_end -/* - ------------------------------------------------------------------------------------------------ - Ordered reporting. - - __kmp_itt_ordered_init is called by each thread *before* first using sync - object. ITT team would like it to be called once, but it requires extra synchronization. - - __kmp_itt_ordered_prep is called when thread is going to enter ordered section - (before synchronization). +/* Ordered reporting. + * __kmp_itt_ordered_init is called by each thread *before* first using sync + object. ITT team would like it to be called once, but it requires extra + synchronization. + * __kmp_itt_ordered_prep is called when thread is going to enter ordered + section (before synchronization). + * __kmp_itt_ordered_start is called just before entering user code (after + synchronization). + * __kmp_itt_ordered_end is called after returning from user code. - __kmp_itt_ordered_start is called just before entering user code (after - synchronization). + Sync object is th->th.th_dispatch->th_dispatch_sh_current. + Events are not generated in case of serialized team. */ - __kmp_itt_ordered_end is called after returning from user code. - - Sync object is th->th.th_dispatch->th_dispatch_sh_current. - - Events are not generated in case of serialized team. - ------------------------------------------------------------------------------------------------ -*/ - -void -__kmp_itt_ordered_init( int gtid ) { +void __kmp_itt_ordered_init(int gtid) { #if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - kmp_info_t * thr = __kmp_thread_from_gtid( gtid ); - ident_t const * loc = thr->th.th_ident; - char const * src = ( loc == NULL ? NULL : loc->psource ); - __itt_sync_create( - thr->th.th_dispatch->th_dispatch_sh_current, "OMP Ordered", src, 0 - ); - }; // if + if (__itt_sync_create_ptr) { + kmp_info_t *thr = __kmp_thread_from_gtid(gtid); + ident_t const *loc = thr->th.th_ident; + char const *src = (loc == NULL ? NULL : loc->psource); + __itt_sync_create(thr->th.th_dispatch->th_dispatch_sh_current, + "OMP Ordered", src, 0); + }; // if #endif } // __kmp_itt_ordered_init -void -__kmp_itt_ordered_prep( int gtid ) { +void __kmp_itt_ordered_prep(int gtid) { #if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - kmp_team_t * t = __kmp_team_from_gtid( gtid ); - if ( ! t->t.t_serialized ) { - kmp_info_t * th = __kmp_thread_from_gtid( gtid ); - __itt_sync_prepare( th->th.th_dispatch->th_dispatch_sh_current ); - }; // if + if (__itt_sync_create_ptr) { + kmp_team_t *t = __kmp_team_from_gtid(gtid); + if (!t->t.t_serialized) { + kmp_info_t *th = __kmp_thread_from_gtid(gtid); + __itt_sync_prepare(th->th.th_dispatch->th_dispatch_sh_current); }; // if + }; // if #endif } // __kmp_itt_ordered_prep -void -__kmp_itt_ordered_start( int gtid ) { +void __kmp_itt_ordered_start(int gtid) { #if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - kmp_team_t * t = __kmp_team_from_gtid( gtid ); - if ( ! t->t.t_serialized ) { - kmp_info_t * th = __kmp_thread_from_gtid( gtid ); - __itt_sync_acquired( th->th.th_dispatch->th_dispatch_sh_current ); - }; // if + if (__itt_sync_create_ptr) { + kmp_team_t *t = __kmp_team_from_gtid(gtid); + if (!t->t.t_serialized) { + kmp_info_t *th = __kmp_thread_from_gtid(gtid); + __itt_sync_acquired(th->th.th_dispatch->th_dispatch_sh_current); }; // if + }; // if #endif } // __kmp_itt_ordered_start -void -__kmp_itt_ordered_end( int gtid ) { +void __kmp_itt_ordered_end(int gtid) { #if USE_ITT_NOTIFY - if ( __itt_sync_create_ptr ) { - kmp_team_t * t = __kmp_team_from_gtid( gtid ); - if ( ! t->t.t_serialized ) { - kmp_info_t * th = __kmp_thread_from_gtid( gtid ); - __itt_sync_releasing( th->th.th_dispatch->th_dispatch_sh_current ); - }; // if + if (__itt_sync_create_ptr) { + kmp_team_t *t = __kmp_team_from_gtid(gtid); + if (!t->t.t_serialized) { + kmp_info_t *th = __kmp_thread_from_gtid(gtid); + __itt_sync_releasing(th->th.th_dispatch->th_dispatch_sh_current); }; // if + }; // if #endif } // __kmp_itt_ordered_end +/* Threads reporting. */ -/* - ------------------------------------------------------------------------------------------------ - Threads reporting. - ------------------------------------------------------------------------------------------------ -*/ - -void -__kmp_itt_thread_ignore() { - __itt_thr_ignore(); +void __kmp_itt_thread_ignore() { + __itt_thr_ignore(); } // __kmp_itt_thread_ignore -void -__kmp_itt_thread_name( int gtid ) { +void __kmp_itt_thread_name(int gtid) { #if USE_ITT_NOTIFY - if ( __itt_thr_name_set_ptr ) { - kmp_str_buf_t name; - __kmp_str_buf_init( & name ); - if( KMP_MASTER_GTID(gtid) ) { - __kmp_str_buf_print( & name, "OMP Master Thread #%d", gtid ); - } else { - __kmp_str_buf_print( & name, "OMP Worker Thread #%d", gtid ); - } - KMP_ITT_DEBUG_LOCK(); - __itt_thr_name_set( name.str, name.used ); - KMP_ITT_DEBUG_PRINT( "[thr nam] name( \"%s\")\n", name.str ); - __kmp_str_buf_free( & name ); - }; // if + if (__itt_thr_name_set_ptr) { + kmp_str_buf_t name; + __kmp_str_buf_init(&name); + if (KMP_MASTER_GTID(gtid)) { + __kmp_str_buf_print(&name, "OMP Master Thread #%d", gtid); + } else { + __kmp_str_buf_print(&name, "OMP Worker Thread #%d", gtid); + } + KMP_ITT_DEBUG_LOCK(); + __itt_thr_name_set(name.str, name.used); + KMP_ITT_DEBUG_PRINT("[thr nam] name( \"%s\")\n", name.str); + __kmp_str_buf_free(&name); + }; // if #endif } // __kmp_itt_thread_name +/* System object reporting. + ITT catches operations with system sync objects (like Windows* OS on IA-32 + architecture API critical sections and events). We only need to specify + name ("OMP Scheduler") for the object to let ITT know it is an object used + by OpenMP RTL for internal purposes. */ -/* - -------------------------------------------------------------------------- - System object reporting. - - ITT catches operations with system sync objects (like Windows* OS on IA-32 - architecture API critical sections and events). We only need to specify - name ("OMP Scheduler") for the object to let ITT know it is an object used - by OpenMP RTL for internal purposes. - -------------------------------------------------------------------------- -*/ - -void -__kmp_itt_system_object_created( void * object, char const * name ) { +void __kmp_itt_system_object_created(void *object, char const *name) { #if USE_ITT_NOTIFY - KMP_ITT_DEBUG_LOCK(); - __itt_sync_create( object, "OMP Scheduler", name, 0 ); - KMP_ITT_DEBUG_PRINT( "[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n", object, name ); + KMP_ITT_DEBUG_LOCK(); + __itt_sync_create(object, "OMP Scheduler", name, 0); + KMP_ITT_DEBUG_PRINT("[sys obj] scre( %p, \"OMP Scheduler\", \"%s\", 0 )\n", + object, name); #endif } // __kmp_itt_system_object_created +/* Stack stitching api. + Master calls "create" and put the stitching id into team structure. + Workers read the stitching id and call "enter" / "leave" api. + Master calls "destroy" at the end of the parallel region. */ -/* - ------------------------------------------------------------------------------------------------ - Stack stitching api. - - Master calls "create" and put the stitching id into team structure. - Workers read the stitching id and call "enter" / "leave" api. - Master calls "destroy" at the end of the parallel region. - ------------------------------------------------------------------------------------------------ -*/ - -__itt_caller -__kmp_itt_stack_caller_create() -{ +__itt_caller __kmp_itt_stack_caller_create() { #if USE_ITT_NOTIFY - if ( !__itt_stack_caller_create_ptr ) - return NULL; - KMP_ITT_DEBUG_LOCK(); - __itt_caller id = __itt_stack_caller_create(); - KMP_ITT_DEBUG_PRINT( "[stk cre] %p\n", id ); - return id; -#endif + if (!__itt_stack_caller_create_ptr) return NULL; + KMP_ITT_DEBUG_LOCK(); + __itt_caller id = __itt_stack_caller_create(); + KMP_ITT_DEBUG_PRINT("[stk cre] %p\n", id); + return id; +#endif + return NULL; } -void -__kmp_itt_stack_caller_destroy( __itt_caller id ) -{ +void __kmp_itt_stack_caller_destroy(__itt_caller id) { #if USE_ITT_NOTIFY - if ( __itt_stack_caller_destroy_ptr ) { - KMP_ITT_DEBUG_LOCK(); - __itt_stack_caller_destroy( id ); - KMP_ITT_DEBUG_PRINT( "[stk des] %p\n", id ); - } + if (__itt_stack_caller_destroy_ptr) { + KMP_ITT_DEBUG_LOCK(); + __itt_stack_caller_destroy(id); + KMP_ITT_DEBUG_PRINT("[stk des] %p\n", id); + } #endif } -void -__kmp_itt_stack_callee_enter( __itt_caller id ) -{ +void __kmp_itt_stack_callee_enter(__itt_caller id) { #if USE_ITT_NOTIFY - if ( __itt_stack_callee_enter_ptr ) { - KMP_ITT_DEBUG_LOCK(); - __itt_stack_callee_enter( id ); - KMP_ITT_DEBUG_PRINT( "[stk ent] %p\n", id ); - } + if (__itt_stack_callee_enter_ptr) { + KMP_ITT_DEBUG_LOCK(); + __itt_stack_callee_enter(id); + KMP_ITT_DEBUG_PRINT("[stk ent] %p\n", id); + } #endif } -void -__kmp_itt_stack_callee_leave( __itt_caller id ) -{ +void __kmp_itt_stack_callee_leave(__itt_caller id) { #if USE_ITT_NOTIFY - if ( __itt_stack_callee_leave_ptr ) { - KMP_ITT_DEBUG_LOCK(); - __itt_stack_callee_leave( id ); - KMP_ITT_DEBUG_PRINT( "[stk lea] %p\n", id ); - } + if (__itt_stack_callee_leave_ptr) { + KMP_ITT_DEBUG_LOCK(); + __itt_stack_callee_leave(id); + KMP_ITT_DEBUG_PRINT("[stk lea] %p\n", id); + } #endif } diff --git a/openmp/runtime/src/kmp_lock.cpp b/openmp/runtime/src/kmp_lock.cpp index ed97d36..ef11a5a 100644 --- a/openmp/runtime/src/kmp_lock.cpp +++ b/openmp/runtime/src/kmp_lock.cpp @@ -17,55 +17,51 @@ #include #include "kmp.h" -#include "kmp_itt.h" #include "kmp_i18n.h" -#include "kmp_lock.h" #include "kmp_io.h" +#include "kmp_itt.h" +#include "kmp_lock.h" #include "tsan_annotations.h" #if KMP_USE_FUTEX -# include -# include -// We should really include , but that causes compatibility problems on different -// Linux* OS distributions that either require that you include (or break when you try to include) -// . -// Since all we need is the two macros below (which are part of the kernel ABI, so can't change) -// we just define the constants here and don't include -# ifndef FUTEX_WAIT -# define FUTEX_WAIT 0 -# endif -# ifndef FUTEX_WAKE -# define FUTEX_WAKE 1 -# endif +#include +#include +// We should really include , but that causes compatibility problems on +// different Linux* OS distributions that either require that you include (or +// break when you try to include) . Since all we need is the two +// macros below (which are part of the kernel ABI, so can't change) we just +// define the constants here and don't include +#ifndef FUTEX_WAIT +#define FUTEX_WAIT 0 +#endif +#ifndef FUTEX_WAKE +#define FUTEX_WAKE 1 +#endif #endif /* Implement spin locks for internal library use. */ /* The algorithm implemented is Lamport's bakery lock [1974]. */ -void -__kmp_validate_locks( void ) -{ - int i; - kmp_uint32 x, y; +void __kmp_validate_locks(void) { + int i; + kmp_uint32 x, y; - /* Check to make sure unsigned arithmetic does wraps properly */ - x = ~((kmp_uint32) 0) - 2; - y = x - 2; + /* Check to make sure unsigned arithmetic does wraps properly */ + x = ~((kmp_uint32)0) - 2; + y = x - 2; - for (i = 0; i < 8; ++i, ++x, ++y) { - kmp_uint32 z = (x - y); - KMP_ASSERT( z == 2 ); - } + for (i = 0; i < 8; ++i, ++x, ++y) { + kmp_uint32 z = (x - y); + KMP_ASSERT(z == 2); + } - KMP_ASSERT( offsetof( kmp_base_queuing_lock, tail_id ) % 8 == 0 ); + KMP_ASSERT(offsetof(kmp_base_queuing_lock, tail_id) % 8 == 0); } - /* ------------------------------------------------------------------------ */ /* test and set locks */ -// // For the non-nested locks, we can only assume that the first 4 bytes were // allocated, since gcc only allocates 4 bytes for omp_lock_t, and the Intel // compiler only allocates a 4 byte pointer on IA-32 architecture. On @@ -73,302 +69,253 @@ __kmp_validate_locks( void ) // // gcc reserves >= 8 bytes for nested locks, so we can assume that the // entire 8 bytes were allocated for nested locks on all 64-bit platforms. -// -static kmp_int32 -__kmp_get_tas_lock_owner( kmp_tas_lock_t *lck ) -{ - return KMP_LOCK_STRIP(TCR_4( lck->lk.poll )) - 1; +static kmp_int32 __kmp_get_tas_lock_owner(kmp_tas_lock_t *lck) { + return KMP_LOCK_STRIP(TCR_4(lck->lk.poll)) - 1; } -static inline bool -__kmp_is_tas_lock_nestable( kmp_tas_lock_t *lck ) -{ - return lck->lk.depth_locked != -1; +static inline bool __kmp_is_tas_lock_nestable(kmp_tas_lock_t *lck) { + return lck->lk.depth_locked != -1; } __forceinline static int -__kmp_acquire_tas_lock_timed_template( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - KMP_MB(); +__kmp_acquire_tas_lock_timed_template(kmp_tas_lock_t *lck, kmp_int32 gtid) { + KMP_MB(); #ifdef USE_LOCK_PROFILE - kmp_uint32 curr = KMP_LOCK_STRIP( TCR_4( lck->lk.poll ) ); - if ( ( curr != 0 ) && ( curr != gtid + 1 ) ) - __kmp_printf( "LOCK CONTENTION: %p\n", lck ); - /* else __kmp_printf( "." );*/ + kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll)); + if ((curr != 0) && (curr != gtid + 1)) + __kmp_printf("LOCK CONTENTION: %p\n", lck); +/* else __kmp_printf( "." );*/ #endif /* USE_LOCK_PROFILE */ - if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) ) - && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) { - KMP_FSYNC_ACQUIRED(lck); - return KMP_LOCK_ACQUIRED_FIRST; - } - - kmp_uint32 spins; - KMP_FSYNC_PREPARE( lck ); - KMP_INIT_YIELD( spins ); - if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : - __kmp_xproc ) ) { - KMP_YIELD( TRUE ); - } - else { - KMP_YIELD_SPIN( spins ); - } - - kmp_backoff_t backoff = __kmp_spin_backoff_params; - while ( ( lck->lk.poll != KMP_LOCK_FREE(tas) ) || - ( ! KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) ) { - - __kmp_spin_backoff(&backoff); - if ( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : - __kmp_xproc ) ) { - KMP_YIELD( TRUE ); - } - else { - KMP_YIELD_SPIN( spins ); - } - } - KMP_FSYNC_ACQUIRED( lck ); + if ((lck->lk.poll == KMP_LOCK_FREE(tas)) && + KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(tas), + KMP_LOCK_BUSY(gtid + 1, tas))) { + KMP_FSYNC_ACQUIRED(lck); return KMP_LOCK_ACQUIRED_FIRST; + } + + kmp_uint32 spins; + KMP_FSYNC_PREPARE(lck); + KMP_INIT_YIELD(spins); + if (TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { + KMP_YIELD(TRUE); + } else { + KMP_YIELD_SPIN(spins); + } + + kmp_backoff_t backoff = __kmp_spin_backoff_params; + while ((lck->lk.poll != KMP_LOCK_FREE(tas)) || + (!KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(tas), + KMP_LOCK_BUSY(gtid + 1, tas)))) { + + __kmp_spin_backoff(&backoff); + if (TCR_4(__kmp_nth) > + (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { + KMP_YIELD(TRUE); + } else { + KMP_YIELD_SPIN(spins); + } + } + KMP_FSYNC_ACQUIRED(lck); + return KMP_LOCK_ACQUIRED_FIRST; } -int -__kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - int retval = __kmp_acquire_tas_lock_timed_template( lck, gtid ); +int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { + int retval = __kmp_acquire_tas_lock_timed_template(lck, gtid); ANNOTATE_TAS_ACQUIRED(lck); return retval; } -static int -__kmp_acquire_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_set_lock"; - if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) - && __kmp_is_tas_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) == gtid ) ) { - KMP_FATAL( LockIsAlreadyOwned, func ); - } - return __kmp_acquire_tas_lock( lck, gtid ); +static int __kmp_acquire_tas_lock_with_checks(kmp_tas_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_set_lock"; + if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && + __kmp_is_tas_lock_nestable(lck)) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } + if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) == gtid)) { + KMP_FATAL(LockIsAlreadyOwned, func); + } + return __kmp_acquire_tas_lock(lck, gtid); } -int -__kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - if ( ( lck->lk.poll == KMP_LOCK_FREE(tas) ) - && KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(tas), KMP_LOCK_BUSY(gtid+1, tas) ) ) { - KMP_FSYNC_ACQUIRED( lck ); - return TRUE; - } - return FALSE; +int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { + if ((lck->lk.poll == KMP_LOCK_FREE(tas)) && + KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(tas), + KMP_LOCK_BUSY(gtid + 1, tas))) { + KMP_FSYNC_ACQUIRED(lck); + return TRUE; + } + return FALSE; } -static int -__kmp_test_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_test_lock"; - if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) - && __kmp_is_tas_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - return __kmp_test_tas_lock( lck, gtid ); +static int __kmp_test_tas_lock_with_checks(kmp_tas_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_test_lock"; + if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && + __kmp_is_tas_lock_nestable(lck)) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } + return __kmp_test_tas_lock(lck, gtid); } -int -__kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - KMP_MB(); /* Flush all pending memory write invalidates. */ +int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { + KMP_MB(); /* Flush all pending memory write invalidates. */ - KMP_FSYNC_RELEASING(lck); - ANNOTATE_TAS_RELEASED(lck); - KMP_ST_REL32( &(lck->lk.poll), KMP_LOCK_FREE(tas) ); - KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_FSYNC_RELEASING(lck); + ANNOTATE_TAS_RELEASED(lck); + KMP_ST_REL32(&(lck->lk.poll), KMP_LOCK_FREE(tas)); + KMP_MB(); /* Flush all pending memory write invalidates. */ - KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : - __kmp_xproc ) ); - return KMP_LOCK_RELEASED; + KMP_YIELD(TCR_4(__kmp_nth) > + (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); + return KMP_LOCK_RELEASED; } -static int -__kmp_release_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_unset_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) - && __kmp_is_tas_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( ( gtid >= 0 ) && ( __kmp_get_tas_lock_owner( lck ) >= 0 ) - && ( __kmp_get_tas_lock_owner( lck ) != gtid ) ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } - return __kmp_release_tas_lock( lck, gtid ); +static int __kmp_release_tas_lock_with_checks(kmp_tas_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_unset_lock"; + KMP_MB(); /* in case another processor initialized lock */ + if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && + __kmp_is_tas_lock_nestable(lck)) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } + if (__kmp_get_tas_lock_owner(lck) == -1) { + KMP_FATAL(LockUnsettingFree, func); + } + if ((gtid >= 0) && (__kmp_get_tas_lock_owner(lck) >= 0) && + (__kmp_get_tas_lock_owner(lck) != gtid)) { + KMP_FATAL(LockUnsettingSetByAnother, func); + } + return __kmp_release_tas_lock(lck, gtid); } -void -__kmp_init_tas_lock( kmp_tas_lock_t * lck ) -{ - TCW_4( lck->lk.poll, KMP_LOCK_FREE(tas) ); +void __kmp_init_tas_lock(kmp_tas_lock_t *lck) { + TCW_4(lck->lk.poll, KMP_LOCK_FREE(tas)); } -static void -__kmp_init_tas_lock_with_checks( kmp_tas_lock_t * lck ) -{ - __kmp_init_tas_lock( lck ); +static void __kmp_init_tas_lock_with_checks(kmp_tas_lock_t *lck) { + __kmp_init_tas_lock(lck); } -void -__kmp_destroy_tas_lock( kmp_tas_lock_t *lck ) -{ - lck->lk.poll = 0; -} +void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck) { lck->lk.poll = 0; } -static void -__kmp_destroy_tas_lock_with_checks( kmp_tas_lock_t *lck ) -{ - char const * const func = "omp_destroy_lock"; - if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) - && __kmp_is_tas_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_tas_lock( lck ); +static void __kmp_destroy_tas_lock_with_checks(kmp_tas_lock_t *lck) { + char const *const func = "omp_destroy_lock"; + if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && + __kmp_is_tas_lock_nestable(lck)) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } + if (__kmp_get_tas_lock_owner(lck) != -1) { + KMP_FATAL(LockStillOwned, func); + } + __kmp_destroy_tas_lock(lck); } - -// // nested test and set locks -// -int -__kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); +int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { + KMP_DEBUG_ASSERT(gtid >= 0); - if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { - lck->lk.depth_locked += 1; - return KMP_LOCK_ACQUIRED_NEXT; - } - else { - __kmp_acquire_tas_lock_timed_template( lck, gtid ); - ANNOTATE_TAS_ACQUIRED(lck); - lck->lk.depth_locked = 1; - return KMP_LOCK_ACQUIRED_FIRST; - } + if (__kmp_get_tas_lock_owner(lck) == gtid) { + lck->lk.depth_locked += 1; + return KMP_LOCK_ACQUIRED_NEXT; + } else { + __kmp_acquire_tas_lock_timed_template(lck, gtid); + ANNOTATE_TAS_ACQUIRED(lck); + lck->lk.depth_locked = 1; + return KMP_LOCK_ACQUIRED_FIRST; + } } -static int -__kmp_acquire_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_set_nest_lock"; - if ( ! __kmp_is_tas_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - return __kmp_acquire_nested_tas_lock( lck, gtid ); +static int __kmp_acquire_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_set_nest_lock"; + if (!__kmp_is_tas_lock_nestable(lck)) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + return __kmp_acquire_nested_tas_lock(lck, gtid); } -int -__kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - int retval; +int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { + int retval; - KMP_DEBUG_ASSERT( gtid >= 0 ); + KMP_DEBUG_ASSERT(gtid >= 0); - if ( __kmp_get_tas_lock_owner( lck ) == gtid ) { - retval = ++lck->lk.depth_locked; - } - else if ( !__kmp_test_tas_lock( lck, gtid ) ) { - retval = 0; - } - else { - KMP_MB(); - retval = lck->lk.depth_locked = 1; - } - return retval; + if (__kmp_get_tas_lock_owner(lck) == gtid) { + retval = ++lck->lk.depth_locked; + } else if (!__kmp_test_tas_lock(lck, gtid)) { + retval = 0; + } else { + KMP_MB(); + retval = lck->lk.depth_locked = 1; + } + return retval; } -static int -__kmp_test_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_test_nest_lock"; - if ( ! __kmp_is_tas_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - return __kmp_test_nested_tas_lock( lck, gtid ); +static int __kmp_test_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_test_nest_lock"; + if (!__kmp_is_tas_lock_nestable(lck)) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + return __kmp_test_nested_tas_lock(lck, gtid); } -int -__kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); +int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid) { + KMP_DEBUG_ASSERT(gtid >= 0); - KMP_MB(); - if ( --(lck->lk.depth_locked) == 0 ) { - __kmp_release_tas_lock( lck, gtid ); - return KMP_LOCK_RELEASED; - } - return KMP_LOCK_STILL_HELD; + KMP_MB(); + if (--(lck->lk.depth_locked) == 0) { + __kmp_release_tas_lock(lck, gtid); + return KMP_LOCK_RELEASED; + } + return KMP_LOCK_STILL_HELD; } -static int -__kmp_release_nested_tas_lock_with_checks( kmp_tas_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_unset_nest_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( ! __kmp_is_tas_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - if ( __kmp_get_tas_lock_owner( lck ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( __kmp_get_tas_lock_owner( lck ) != gtid ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } - return __kmp_release_nested_tas_lock( lck, gtid ); +static int __kmp_release_nested_tas_lock_with_checks(kmp_tas_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_unset_nest_lock"; + KMP_MB(); /* in case another processor initialized lock */ + if (!__kmp_is_tas_lock_nestable(lck)) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + if (__kmp_get_tas_lock_owner(lck) == -1) { + KMP_FATAL(LockUnsettingFree, func); + } + if (__kmp_get_tas_lock_owner(lck) != gtid) { + KMP_FATAL(LockUnsettingSetByAnother, func); + } + return __kmp_release_nested_tas_lock(lck, gtid); } -void -__kmp_init_nested_tas_lock( kmp_tas_lock_t * lck ) -{ - __kmp_init_tas_lock( lck ); - lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks +void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck) { + __kmp_init_tas_lock(lck); + lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks } -static void -__kmp_init_nested_tas_lock_with_checks( kmp_tas_lock_t * lck ) -{ - __kmp_init_nested_tas_lock( lck ); +static void __kmp_init_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { + __kmp_init_nested_tas_lock(lck); } -void -__kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ) -{ - __kmp_destroy_tas_lock( lck ); - lck->lk.depth_locked = 0; +void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck) { + __kmp_destroy_tas_lock(lck); + lck->lk.depth_locked = 0; } -static void -__kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck ) -{ - char const * const func = "omp_destroy_nest_lock"; - if ( ! __kmp_is_tas_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - if ( __kmp_get_tas_lock_owner( lck ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_nested_tas_lock( lck ); +static void __kmp_destroy_nested_tas_lock_with_checks(kmp_tas_lock_t *lck) { + char const *const func = "omp_destroy_nest_lock"; + if (!__kmp_is_tas_lock_nestable(lck)) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + if (__kmp_get_tas_lock_owner(lck) != -1) { + KMP_FATAL(LockStillOwned, func); + } + __kmp_destroy_nested_tas_lock(lck); } - #if KMP_USE_FUTEX /* ------------------------------------------------------------------------ */ @@ -379,1573 +326,1471 @@ __kmp_destroy_nested_tas_lock_with_checks( kmp_tas_lock_t *lck ) // set locks, and are allocated the same way (i.e. use the area allocated by // the compiler for non-nested locks / allocate nested locks on the heap). -static kmp_int32 -__kmp_get_futex_lock_owner( kmp_futex_lock_t *lck ) -{ - return KMP_LOCK_STRIP(( TCR_4( lck->lk.poll ) >> 1 )) - 1; +static kmp_int32 __kmp_get_futex_lock_owner(kmp_futex_lock_t *lck) { + return KMP_LOCK_STRIP((TCR_4(lck->lk.poll) >> 1)) - 1; } -static inline bool -__kmp_is_futex_lock_nestable( kmp_futex_lock_t *lck ) -{ - return lck->lk.depth_locked != -1; +static inline bool __kmp_is_futex_lock_nestable(kmp_futex_lock_t *lck) { + return lck->lk.depth_locked != -1; } __forceinline static int -__kmp_acquire_futex_lock_timed_template( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - kmp_int32 gtid_code = ( gtid + 1 ) << 1; +__kmp_acquire_futex_lock_timed_template(kmp_futex_lock_t *lck, kmp_int32 gtid) { + kmp_int32 gtid_code = (gtid + 1) << 1; - KMP_MB(); + KMP_MB(); #ifdef USE_LOCK_PROFILE - kmp_uint32 curr = KMP_LOCK_STRIP( TCR_4( lck->lk.poll ) ); - if ( ( curr != 0 ) && ( curr != gtid_code ) ) - __kmp_printf( "LOCK CONTENTION: %p\n", lck ); - /* else __kmp_printf( "." );*/ + kmp_uint32 curr = KMP_LOCK_STRIP(TCR_4(lck->lk.poll)); + if ((curr != 0) && (curr != gtid_code)) + __kmp_printf("LOCK CONTENTION: %p\n", lck); +/* else __kmp_printf( "." );*/ #endif /* USE_LOCK_PROFILE */ - KMP_FSYNC_PREPARE( lck ); - KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", - lck, lck->lk.poll, gtid ) ); - - kmp_int32 poll_val; - - while ( ( poll_val = KMP_COMPARE_AND_STORE_RET32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), - KMP_LOCK_BUSY(gtid_code, futex) ) ) != KMP_LOCK_FREE(futex) ) { - - kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; - KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", - lck, gtid, poll_val, cond ) ); - - // - // NOTE: if you try to use the following condition for this branch - // - // if ( poll_val & 1 == 0 ) - // - // Then the 12.0 compiler has a bug where the following block will - // always be skipped, regardless of the value of the LSB of poll_val. - // - if ( ! cond ) { - // - // Try to set the lsb in the poll to indicate to the owner - // thread that they need to wake this thread up. - // - if ( ! KMP_COMPARE_AND_STORE_REL32( & ( lck->lk.poll ), poll_val, poll_val | KMP_LOCK_BUSY(1, futex) ) ) { - KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", - lck, lck->lk.poll, gtid ) ); - continue; - } - poll_val |= KMP_LOCK_BUSY(1, futex); - - KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", - lck, lck->lk.poll, gtid ) ); - } - - KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", - lck, gtid, poll_val ) ); + KMP_FSYNC_PREPARE(lck); + KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d entering\n", + lck, lck->lk.poll, gtid)); - kmp_int32 rc; - if ( ( rc = syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAIT, - poll_val, NULL, NULL, 0 ) ) != 0 ) { - KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) failed (rc=%d errno=%d)\n", - lck, gtid, poll_val, rc, errno ) ); - continue; - } + kmp_int32 poll_val; - KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", - lck, gtid, poll_val ) ); - // - // This thread has now done a successful futex wait call and was - // entered on the OS futex queue. We must now perform a futex - // wake call when releasing the lock, as we have no idea how many - // other threads are in the queue. - // - gtid_code |= 1; - } + while ((poll_val = KMP_COMPARE_AND_STORE_RET32( + &(lck->lk.poll), KMP_LOCK_FREE(futex), + KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { - KMP_FSYNC_ACQUIRED( lck ); - KA_TRACE( 1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", - lck, lck->lk.poll, gtid ) ); - return KMP_LOCK_ACQUIRED_FIRST; -} + kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; + KA_TRACE( + 1000, + ("__kmp_acquire_futex_lock: lck:%p, T#%d poll_val = 0x%x cond = 0x%x\n", + lck, gtid, poll_val, cond)); -int -__kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - int retval = __kmp_acquire_futex_lock_timed_template( lck, gtid ); + // NOTE: if you try to use the following condition for this branch + // + // if ( poll_val & 1 == 0 ) + // + // Then the 12.0 compiler has a bug where the following block will + // always be skipped, regardless of the value of the LSB of poll_val. + if (!cond) { + // Try to set the lsb in the poll to indicate to the owner + // thread that they need to wake this thread up. + if (!KMP_COMPARE_AND_STORE_REL32(&(lck->lk.poll), poll_val, + poll_val | KMP_LOCK_BUSY(1, futex))) { + KA_TRACE( + 1000, + ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d can't set bit 0\n", + lck, lck->lk.poll, gtid)); + continue; + } + poll_val |= KMP_LOCK_BUSY(1, futex); + + KA_TRACE(1000, + ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d bit 0 set\n", lck, + lck->lk.poll, gtid)); + } + + KA_TRACE( + 1000, + ("__kmp_acquire_futex_lock: lck:%p, T#%d before futex_wait(0x%x)\n", + lck, gtid, poll_val)); + + kmp_int32 rc; + if ((rc = syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAIT, poll_val, NULL, + NULL, 0)) != 0) { + KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p, T#%d futex_wait(0x%x) " + "failed (rc=%d errno=%d)\n", + lck, gtid, poll_val, rc, errno)); + continue; + } + + KA_TRACE(1000, + ("__kmp_acquire_futex_lock: lck:%p, T#%d after futex_wait(0x%x)\n", + lck, gtid, poll_val)); + // This thread has now done a successful futex wait call and was entered on + // the OS futex queue. We must now perform a futex wake call when releasing + // the lock, as we have no idea how many other threads are in the queue. + gtid_code |= 1; + } + + KMP_FSYNC_ACQUIRED(lck); + KA_TRACE(1000, ("__kmp_acquire_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, + lck->lk.poll, gtid)); + return KMP_LOCK_ACQUIRED_FIRST; +} + +int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { + int retval = __kmp_acquire_futex_lock_timed_template(lck, gtid); ANNOTATE_FUTEX_ACQUIRED(lck); return retval; } -static int -__kmp_acquire_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_set_lock"; - if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) - && __kmp_is_futex_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) == gtid ) ) { - KMP_FATAL( LockIsAlreadyOwned, func ); - } - return __kmp_acquire_futex_lock( lck, gtid ); +static int __kmp_acquire_futex_lock_with_checks(kmp_futex_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_set_lock"; + if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && + __kmp_is_futex_lock_nestable(lck)) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } + if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) == gtid)) { + KMP_FATAL(LockIsAlreadyOwned, func); + } + return __kmp_acquire_futex_lock(lck, gtid); } -int -__kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - if ( KMP_COMPARE_AND_STORE_ACQ32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex), KMP_LOCK_BUSY((gtid+1) << 1, futex) ) ) { - KMP_FSYNC_ACQUIRED( lck ); - return TRUE; - } - return FALSE; +int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { + if (KMP_COMPARE_AND_STORE_ACQ32(&(lck->lk.poll), KMP_LOCK_FREE(futex), + KMP_LOCK_BUSY((gtid + 1) << 1, futex))) { + KMP_FSYNC_ACQUIRED(lck); + return TRUE; + } + return FALSE; } -static int -__kmp_test_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_test_lock"; - if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) - && __kmp_is_futex_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - return __kmp_test_futex_lock( lck, gtid ); +static int __kmp_test_futex_lock_with_checks(kmp_futex_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_test_lock"; + if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && + __kmp_is_futex_lock_nestable(lck)) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } + return __kmp_test_futex_lock(lck, gtid); } -int -__kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - KMP_MB(); /* Flush all pending memory write invalidates. */ +int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { + KMP_MB(); /* Flush all pending memory write invalidates. */ - KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", - lck, lck->lk.poll, gtid ) ); + KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d entering\n", + lck, lck->lk.poll, gtid)); - KMP_FSYNC_RELEASING(lck); - ANNOTATE_FUTEX_RELEASED(lck); + KMP_FSYNC_RELEASING(lck); + ANNOTATE_FUTEX_RELEASED(lck); - kmp_int32 poll_val = KMP_XCHG_FIXED32( & ( lck->lk.poll ), KMP_LOCK_FREE(futex) ); + kmp_int32 poll_val = KMP_XCHG_FIXED32(&(lck->lk.poll), KMP_LOCK_FREE(futex)); - KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", - lck, gtid, poll_val ) ); + KA_TRACE(1000, + ("__kmp_release_futex_lock: lck:%p, T#%d released poll_val = 0x%x\n", + lck, gtid, poll_val)); - if ( KMP_LOCK_STRIP(poll_val) & 1 ) { - KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", - lck, gtid ) ); - syscall( __NR_futex, & ( lck->lk.poll ), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), NULL, NULL, 0 ); - } + if (KMP_LOCK_STRIP(poll_val) & 1) { + KA_TRACE(1000, + ("__kmp_release_futex_lock: lck:%p, T#%d futex_wake 1 thread\n", + lck, gtid)); + syscall(__NR_futex, &(lck->lk.poll), FUTEX_WAKE, KMP_LOCK_BUSY(1, futex), + NULL, NULL, 0); + } - KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_MB(); /* Flush all pending memory write invalidates. */ - KA_TRACE( 1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", - lck, lck->lk.poll, gtid ) ); + KA_TRACE(1000, ("__kmp_release_futex_lock: lck:%p(0x%x), T#%d exiting\n", lck, + lck->lk.poll, gtid)); - KMP_YIELD( TCR_4( __kmp_nth ) > ( __kmp_avail_proc ? __kmp_avail_proc : - __kmp_xproc ) ); - return KMP_LOCK_RELEASED; + KMP_YIELD(TCR_4(__kmp_nth) > + (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); + return KMP_LOCK_RELEASED; } -static int -__kmp_release_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_unset_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) - && __kmp_is_futex_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( ( gtid >= 0 ) && ( __kmp_get_futex_lock_owner( lck ) >= 0 ) - && ( __kmp_get_futex_lock_owner( lck ) != gtid ) ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } - return __kmp_release_futex_lock( lck, gtid ); +static int __kmp_release_futex_lock_with_checks(kmp_futex_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_unset_lock"; + KMP_MB(); /* in case another processor initialized lock */ + if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && + __kmp_is_futex_lock_nestable(lck)) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } + if (__kmp_get_futex_lock_owner(lck) == -1) { + KMP_FATAL(LockUnsettingFree, func); + } + if ((gtid >= 0) && (__kmp_get_futex_lock_owner(lck) >= 0) && + (__kmp_get_futex_lock_owner(lck) != gtid)) { + KMP_FATAL(LockUnsettingSetByAnother, func); + } + return __kmp_release_futex_lock(lck, gtid); } -void -__kmp_init_futex_lock( kmp_futex_lock_t * lck ) -{ - TCW_4( lck->lk.poll, KMP_LOCK_FREE(futex) ); +void __kmp_init_futex_lock(kmp_futex_lock_t *lck) { + TCW_4(lck->lk.poll, KMP_LOCK_FREE(futex)); } -static void -__kmp_init_futex_lock_with_checks( kmp_futex_lock_t * lck ) -{ - __kmp_init_futex_lock( lck ); +static void __kmp_init_futex_lock_with_checks(kmp_futex_lock_t *lck) { + __kmp_init_futex_lock(lck); } -void -__kmp_destroy_futex_lock( kmp_futex_lock_t *lck ) -{ - lck->lk.poll = 0; -} +void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck) { lck->lk.poll = 0; } -static void -__kmp_destroy_futex_lock_with_checks( kmp_futex_lock_t *lck ) -{ - char const * const func = "omp_destroy_lock"; - if ( ( sizeof ( kmp_futex_lock_t ) <= OMP_LOCK_T_SIZE ) - && __kmp_is_futex_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_futex_lock( lck ); +static void __kmp_destroy_futex_lock_with_checks(kmp_futex_lock_t *lck) { + char const *const func = "omp_destroy_lock"; + if ((sizeof(kmp_futex_lock_t) <= OMP_LOCK_T_SIZE) && + __kmp_is_futex_lock_nestable(lck)) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } + if (__kmp_get_futex_lock_owner(lck) != -1) { + KMP_FATAL(LockStillOwned, func); + } + __kmp_destroy_futex_lock(lck); } - -// // nested futex locks -// -int -__kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); +int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { + KMP_DEBUG_ASSERT(gtid >= 0); - if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { - lck->lk.depth_locked += 1; - return KMP_LOCK_ACQUIRED_NEXT; - } - else { - __kmp_acquire_futex_lock_timed_template( lck, gtid ); - ANNOTATE_FUTEX_ACQUIRED(lck); - lck->lk.depth_locked = 1; - return KMP_LOCK_ACQUIRED_FIRST; - } + if (__kmp_get_futex_lock_owner(lck) == gtid) { + lck->lk.depth_locked += 1; + return KMP_LOCK_ACQUIRED_NEXT; + } else { + __kmp_acquire_futex_lock_timed_template(lck, gtid); + ANNOTATE_FUTEX_ACQUIRED(lck); + lck->lk.depth_locked = 1; + return KMP_LOCK_ACQUIRED_FIRST; + } } -static int -__kmp_acquire_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_set_nest_lock"; - if ( ! __kmp_is_futex_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - return __kmp_acquire_nested_futex_lock( lck, gtid ); +static int __kmp_acquire_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_set_nest_lock"; + if (!__kmp_is_futex_lock_nestable(lck)) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + return __kmp_acquire_nested_futex_lock(lck, gtid); } -int -__kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - int retval; +int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { + int retval; - KMP_DEBUG_ASSERT( gtid >= 0 ); + KMP_DEBUG_ASSERT(gtid >= 0); - if ( __kmp_get_futex_lock_owner( lck ) == gtid ) { - retval = ++lck->lk.depth_locked; - } - else if ( !__kmp_test_futex_lock( lck, gtid ) ) { - retval = 0; - } - else { - KMP_MB(); - retval = lck->lk.depth_locked = 1; - } - return retval; + if (__kmp_get_futex_lock_owner(lck) == gtid) { + retval = ++lck->lk.depth_locked; + } else if (!__kmp_test_futex_lock(lck, gtid)) { + retval = 0; + } else { + KMP_MB(); + retval = lck->lk.depth_locked = 1; + } + return retval; } -static int -__kmp_test_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_test_nest_lock"; - if ( ! __kmp_is_futex_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - return __kmp_test_nested_futex_lock( lck, gtid ); +static int __kmp_test_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_test_nest_lock"; + if (!__kmp_is_futex_lock_nestable(lck)) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + return __kmp_test_nested_futex_lock(lck, gtid); } -int -__kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); +int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid) { + KMP_DEBUG_ASSERT(gtid >= 0); - KMP_MB(); - if ( --(lck->lk.depth_locked) == 0 ) { - __kmp_release_futex_lock( lck, gtid ); - return KMP_LOCK_RELEASED; - } - return KMP_LOCK_STILL_HELD; + KMP_MB(); + if (--(lck->lk.depth_locked) == 0) { + __kmp_release_futex_lock(lck, gtid); + return KMP_LOCK_RELEASED; + } + return KMP_LOCK_STILL_HELD; } -static int -__kmp_release_nested_futex_lock_with_checks( kmp_futex_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_unset_nest_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( ! __kmp_is_futex_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - if ( __kmp_get_futex_lock_owner( lck ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( __kmp_get_futex_lock_owner( lck ) != gtid ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } - return __kmp_release_nested_futex_lock( lck, gtid ); +static int __kmp_release_nested_futex_lock_with_checks(kmp_futex_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_unset_nest_lock"; + KMP_MB(); /* in case another processor initialized lock */ + if (!__kmp_is_futex_lock_nestable(lck)) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + if (__kmp_get_futex_lock_owner(lck) == -1) { + KMP_FATAL(LockUnsettingFree, func); + } + if (__kmp_get_futex_lock_owner(lck) != gtid) { + KMP_FATAL(LockUnsettingSetByAnother, func); + } + return __kmp_release_nested_futex_lock(lck, gtid); } -void -__kmp_init_nested_futex_lock( kmp_futex_lock_t * lck ) -{ - __kmp_init_futex_lock( lck ); - lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks +void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck) { + __kmp_init_futex_lock(lck); + lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks } -static void -__kmp_init_nested_futex_lock_with_checks( kmp_futex_lock_t * lck ) -{ - __kmp_init_nested_futex_lock( lck ); +static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { + __kmp_init_nested_futex_lock(lck); } -void -__kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ) -{ - __kmp_destroy_futex_lock( lck ); - lck->lk.depth_locked = 0; +void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck) { + __kmp_destroy_futex_lock(lck); + lck->lk.depth_locked = 0; } -static void -__kmp_destroy_nested_futex_lock_with_checks( kmp_futex_lock_t *lck ) -{ - char const * const func = "omp_destroy_nest_lock"; - if ( ! __kmp_is_futex_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - if ( __kmp_get_futex_lock_owner( lck ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_nested_futex_lock( lck ); +static void __kmp_destroy_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { + char const *const func = "omp_destroy_nest_lock"; + if (!__kmp_is_futex_lock_nestable(lck)) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + if (__kmp_get_futex_lock_owner(lck) != -1) { + KMP_FATAL(LockStillOwned, func); + } + __kmp_destroy_nested_futex_lock(lck); } #endif // KMP_USE_FUTEX - /* ------------------------------------------------------------------------ */ /* ticket (bakery) locks */ -static kmp_int32 -__kmp_get_ticket_lock_owner( kmp_ticket_lock_t *lck ) -{ - return std::atomic_load_explicit( &lck->lk.owner_id, std::memory_order_relaxed ) - 1; +static kmp_int32 __kmp_get_ticket_lock_owner(kmp_ticket_lock_t *lck) { + return std::atomic_load_explicit(&lck->lk.owner_id, + std::memory_order_relaxed) - + 1; } -static inline bool -__kmp_is_ticket_lock_nestable( kmp_ticket_lock_t *lck ) -{ - return std::atomic_load_explicit( &lck->lk.depth_locked, std::memory_order_relaxed ) != -1; +static inline bool __kmp_is_ticket_lock_nestable(kmp_ticket_lock_t *lck) { + return std::atomic_load_explicit(&lck->lk.depth_locked, + std::memory_order_relaxed) != -1; } -static kmp_uint32 -__kmp_bakery_check( void *now_serving, kmp_uint32 my_ticket ) -{ - return std::atomic_load_explicit( (std::atomic *)now_serving, std::memory_order_acquire ) == my_ticket; +static kmp_uint32 __kmp_bakery_check(void *now_serving, kmp_uint32 my_ticket) { + return std::atomic_load_explicit((std::atomic *)now_serving, + std::memory_order_acquire) == my_ticket; } __forceinline static int -__kmp_acquire_ticket_lock_timed_template( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( &lck->lk.next_ticket, 1U, std::memory_order_relaxed ); +__kmp_acquire_ticket_lock_timed_template(kmp_ticket_lock_t *lck, + kmp_int32 gtid) { + kmp_uint32 my_ticket = std::atomic_fetch_add_explicit( + &lck->lk.next_ticket, 1U, std::memory_order_relaxed); #ifdef USE_LOCK_PROFILE - if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ) != my_ticket ) - __kmp_printf( "LOCK CONTENTION: %p\n", lck ); - /* else __kmp_printf( "." );*/ + if (std::atomic_load_explicit(&lck->lk.now_serving, + std::memory_order_relaxed) != my_ticket) + __kmp_printf("LOCK CONTENTION: %p\n", lck); +/* else __kmp_printf( "." );*/ #endif /* USE_LOCK_PROFILE */ - if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_acquire ) == my_ticket ) { - return KMP_LOCK_ACQUIRED_FIRST; - } - KMP_WAIT_YIELD_PTR( &lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck ); + if (std::atomic_load_explicit(&lck->lk.now_serving, + std::memory_order_acquire) == my_ticket) { return KMP_LOCK_ACQUIRED_FIRST; + } + KMP_WAIT_YIELD_PTR(&lck->lk.now_serving, my_ticket, __kmp_bakery_check, lck); + return KMP_LOCK_ACQUIRED_FIRST; } -int -__kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - int retval = __kmp_acquire_ticket_lock_timed_template( lck, gtid ); +int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { + int retval = __kmp_acquire_ticket_lock_timed_template(lck, gtid); ANNOTATE_TICKET_ACQUIRED(lck); return retval; } -static int -__kmp_acquire_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_set_lock"; - - if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( lck->lk.self != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_ticket_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) == gtid ) ) { - KMP_FATAL( LockIsAlreadyOwned, func ); - } - - __kmp_acquire_ticket_lock( lck, gtid ); - - std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed ); - return KMP_LOCK_ACQUIRED_FIRST; -} - -int -__kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - kmp_uint32 my_ticket = std::atomic_load_explicit( &lck->lk.next_ticket, std::memory_order_relaxed ); - - if ( std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ) == my_ticket ) { - kmp_uint32 next_ticket = my_ticket + 1; - if ( std::atomic_compare_exchange_strong_explicit( &lck->lk.next_ticket, - &my_ticket, next_ticket, std::memory_order_acquire, std::memory_order_acquire )) { - return TRUE; - } - } - return FALSE; -} - -static int -__kmp_test_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_test_lock"; - - if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( lck->lk.self != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_ticket_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - - int retval = __kmp_test_ticket_lock( lck, gtid ); - - if ( retval ) { - std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed ); - } - return retval; -} - -int -__kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - kmp_uint32 distance = std::atomic_load_explicit( &lck->lk.next_ticket, std::memory_order_relaxed ) - std::atomic_load_explicit( &lck->lk.now_serving, std::memory_order_relaxed ); - - ANNOTATE_TICKET_RELEASED(lck); - std::atomic_fetch_add_explicit( &lck->lk.now_serving, 1U, std::memory_order_release ); - - KMP_YIELD( distance - > (kmp_uint32) (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ); - return KMP_LOCK_RELEASED; -} - -static int -__kmp_release_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_unset_lock"; - - if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( lck->lk.self != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_ticket_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( ( gtid >= 0 ) && ( __kmp_get_ticket_lock_owner( lck ) >= 0 ) - && ( __kmp_get_ticket_lock_owner( lck ) != gtid ) ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } - std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed ); - return __kmp_release_ticket_lock( lck, gtid ); -} - -void -__kmp_init_ticket_lock( kmp_ticket_lock_t * lck ) -{ - lck->lk.location = NULL; - lck->lk.self = lck; - std::atomic_store_explicit( &lck->lk.next_ticket, 0U, std::memory_order_relaxed ); - std::atomic_store_explicit( &lck->lk.now_serving, 0U, std::memory_order_relaxed ); - std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed ); // no thread owns the lock. - std::atomic_store_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ); // -1 => not a nested lock. - std::atomic_store_explicit( &lck->lk.initialized, true, std::memory_order_release ); -} - -static void -__kmp_init_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) -{ - __kmp_init_ticket_lock( lck ); -} - -void -__kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ) -{ - std::atomic_store_explicit( &lck->lk.initialized, false, std::memory_order_release ); - lck->lk.self = NULL; - lck->lk.location = NULL; - std::atomic_store_explicit( &lck->lk.next_ticket, 0U, std::memory_order_relaxed ); - std::atomic_store_explicit( &lck->lk.now_serving, 0U, std::memory_order_relaxed ); - std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed ); - std::atomic_store_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ); +static int __kmp_acquire_ticket_lock_with_checks(kmp_ticket_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_set_lock"; + + if (!std::atomic_load_explicit(&lck->lk.initialized, + std::memory_order_relaxed)) { + KMP_FATAL(LockIsUninitialized, func); + } + if (lck->lk.self != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (__kmp_is_ticket_lock_nestable(lck)) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } + if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) == gtid)) { + KMP_FATAL(LockIsAlreadyOwned, func); + } + + __kmp_acquire_ticket_lock(lck, gtid); + + std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, + std::memory_order_relaxed); + return KMP_LOCK_ACQUIRED_FIRST; +} + +int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { + kmp_uint32 my_ticket = std::atomic_load_explicit(&lck->lk.next_ticket, + std::memory_order_relaxed); + + if (std::atomic_load_explicit(&lck->lk.now_serving, + std::memory_order_relaxed) == my_ticket) { + kmp_uint32 next_ticket = my_ticket + 1; + if (std::atomic_compare_exchange_strong_explicit( + &lck->lk.next_ticket, &my_ticket, next_ticket, + std::memory_order_acquire, std::memory_order_acquire)) { + return TRUE; + } + } + return FALSE; +} + +static int __kmp_test_ticket_lock_with_checks(kmp_ticket_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_test_lock"; + + if (!std::atomic_load_explicit(&lck->lk.initialized, + std::memory_order_relaxed)) { + KMP_FATAL(LockIsUninitialized, func); + } + if (lck->lk.self != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (__kmp_is_ticket_lock_nestable(lck)) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } + + int retval = __kmp_test_ticket_lock(lck, gtid); + + if (retval) { + std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, + std::memory_order_relaxed); + } + return retval; } -static void -__kmp_destroy_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) -{ - char const * const func = "omp_destroy_lock"; - - if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( lck->lk.self != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_ticket_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_ticket_lock( lck ); +int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { + kmp_uint32 distance = std::atomic_load_explicit(&lck->lk.next_ticket, + std::memory_order_relaxed) - + std::atomic_load_explicit(&lck->lk.now_serving, + std::memory_order_relaxed); + + ANNOTATE_TICKET_RELEASED(lck); + std::atomic_fetch_add_explicit(&lck->lk.now_serving, 1U, + std::memory_order_release); + + KMP_YIELD(distance > + (kmp_uint32)(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); + return KMP_LOCK_RELEASED; +} + +static int __kmp_release_ticket_lock_with_checks(kmp_ticket_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_unset_lock"; + + if (!std::atomic_load_explicit(&lck->lk.initialized, + std::memory_order_relaxed)) { + KMP_FATAL(LockIsUninitialized, func); + } + if (lck->lk.self != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (__kmp_is_ticket_lock_nestable(lck)) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } + if (__kmp_get_ticket_lock_owner(lck) == -1) { + KMP_FATAL(LockUnsettingFree, func); + } + if ((gtid >= 0) && (__kmp_get_ticket_lock_owner(lck) >= 0) && + (__kmp_get_ticket_lock_owner(lck) != gtid)) { + KMP_FATAL(LockUnsettingSetByAnother, func); + } + std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); + return __kmp_release_ticket_lock(lck, gtid); +} + +void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck) { + lck->lk.location = NULL; + lck->lk.self = lck; + std::atomic_store_explicit(&lck->lk.next_ticket, 0U, + std::memory_order_relaxed); + std::atomic_store_explicit(&lck->lk.now_serving, 0U, + std::memory_order_relaxed); + std::atomic_store_explicit( + &lck->lk.owner_id, 0, + std::memory_order_relaxed); // no thread owns the lock. + std::atomic_store_explicit( + &lck->lk.depth_locked, -1, + std::memory_order_relaxed); // -1 => not a nested lock. + std::atomic_store_explicit(&lck->lk.initialized, true, + std::memory_order_release); +} + +static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { + __kmp_init_ticket_lock(lck); +} + +void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck) { + std::atomic_store_explicit(&lck->lk.initialized, false, + std::memory_order_release); + lck->lk.self = NULL; + lck->lk.location = NULL; + std::atomic_store_explicit(&lck->lk.next_ticket, 0U, + std::memory_order_relaxed); + std::atomic_store_explicit(&lck->lk.now_serving, 0U, + std::memory_order_relaxed); + std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); + std::atomic_store_explicit(&lck->lk.depth_locked, -1, + std::memory_order_relaxed); +} + +static void __kmp_destroy_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { + char const *const func = "omp_destroy_lock"; + + if (!std::atomic_load_explicit(&lck->lk.initialized, + std::memory_order_relaxed)) { + KMP_FATAL(LockIsUninitialized, func); + } + if (lck->lk.self != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (__kmp_is_ticket_lock_nestable(lck)) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } + if (__kmp_get_ticket_lock_owner(lck) != -1) { + KMP_FATAL(LockStillOwned, func); + } + __kmp_destroy_ticket_lock(lck); } - -// // nested ticket locks -// - -int -__kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); - - if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { - std::atomic_fetch_add_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed ); - return KMP_LOCK_ACQUIRED_NEXT; - } - else { - __kmp_acquire_ticket_lock_timed_template( lck, gtid ); - ANNOTATE_TICKET_ACQUIRED(lck); - std::atomic_store_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed ); - std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed ); - return KMP_LOCK_ACQUIRED_FIRST; - } -} - -static int -__kmp_acquire_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_set_nest_lock"; - - if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( lck->lk.self != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - return __kmp_acquire_nested_ticket_lock( lck, gtid ); -} - -int -__kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - int retval; - - KMP_DEBUG_ASSERT( gtid >= 0 ); - if ( __kmp_get_ticket_lock_owner( lck ) == gtid ) { - retval = std::atomic_fetch_add_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed ) + 1; - } - else if ( !__kmp_test_ticket_lock( lck, gtid ) ) { - retval = 0; - } - else { - std::atomic_store_explicit( &lck->lk.depth_locked, 1, std::memory_order_relaxed ); - std::atomic_store_explicit( &lck->lk.owner_id, gtid + 1, std::memory_order_relaxed ); - retval = 1; - } - return retval; +int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { + KMP_DEBUG_ASSERT(gtid >= 0); + + if (__kmp_get_ticket_lock_owner(lck) == gtid) { + std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, + std::memory_order_relaxed); + return KMP_LOCK_ACQUIRED_NEXT; + } else { + __kmp_acquire_ticket_lock_timed_template(lck, gtid); + ANNOTATE_TICKET_ACQUIRED(lck); + std::atomic_store_explicit(&lck->lk.depth_locked, 1, + std::memory_order_relaxed); + std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, + std::memory_order_relaxed); + return KMP_LOCK_ACQUIRED_FIRST; + } +} + +static int __kmp_acquire_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_set_nest_lock"; + + if (!std::atomic_load_explicit(&lck->lk.initialized, + std::memory_order_relaxed)) { + KMP_FATAL(LockIsUninitialized, func); + } + if (lck->lk.self != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (!__kmp_is_ticket_lock_nestable(lck)) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + return __kmp_acquire_nested_ticket_lock(lck, gtid); +} + +int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { + int retval; + + KMP_DEBUG_ASSERT(gtid >= 0); + + if (__kmp_get_ticket_lock_owner(lck) == gtid) { + retval = std::atomic_fetch_add_explicit(&lck->lk.depth_locked, 1, + std::memory_order_relaxed) + + 1; + } else if (!__kmp_test_ticket_lock(lck, gtid)) { + retval = 0; + } else { + std::atomic_store_explicit(&lck->lk.depth_locked, 1, + std::memory_order_relaxed); + std::atomic_store_explicit(&lck->lk.owner_id, gtid + 1, + std::memory_order_relaxed); + retval = 1; + } + return retval; } -static int -__kmp_test_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, - kmp_int32 gtid ) -{ - char const * const func = "omp_test_nest_lock"; +static int __kmp_test_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_test_nest_lock"; - if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( lck->lk.self != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - return __kmp_test_nested_ticket_lock( lck, gtid ); + if (!std::atomic_load_explicit(&lck->lk.initialized, + std::memory_order_relaxed)) { + KMP_FATAL(LockIsUninitialized, func); + } + if (lck->lk.self != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (!__kmp_is_ticket_lock_nestable(lck)) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + return __kmp_test_nested_ticket_lock(lck, gtid); } -int -__kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); +int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid) { + KMP_DEBUG_ASSERT(gtid >= 0); - if ( ( std::atomic_fetch_add_explicit( &lck->lk.depth_locked, -1, std::memory_order_relaxed ) - 1 ) == 0 ) { - std::atomic_store_explicit( &lck->lk.owner_id, 0, std::memory_order_relaxed ); - __kmp_release_ticket_lock( lck, gtid ); - return KMP_LOCK_RELEASED; - } - return KMP_LOCK_STILL_HELD; + if ((std::atomic_fetch_add_explicit(&lck->lk.depth_locked, -1, + std::memory_order_relaxed) - + 1) == 0) { + std::atomic_store_explicit(&lck->lk.owner_id, 0, std::memory_order_relaxed); + __kmp_release_ticket_lock(lck, gtid); + return KMP_LOCK_RELEASED; + } + return KMP_LOCK_STILL_HELD; } -static int -__kmp_release_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_unset_nest_lock"; +static int __kmp_release_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_unset_nest_lock"; - if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( lck->lk.self != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - if ( __kmp_get_ticket_lock_owner( lck ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( __kmp_get_ticket_lock_owner( lck ) != gtid ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } - return __kmp_release_nested_ticket_lock( lck, gtid ); + if (!std::atomic_load_explicit(&lck->lk.initialized, + std::memory_order_relaxed)) { + KMP_FATAL(LockIsUninitialized, func); + } + if (lck->lk.self != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (!__kmp_is_ticket_lock_nestable(lck)) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + if (__kmp_get_ticket_lock_owner(lck) == -1) { + KMP_FATAL(LockUnsettingFree, func); + } + if (__kmp_get_ticket_lock_owner(lck) != gtid) { + KMP_FATAL(LockUnsettingSetByAnother, func); + } + return __kmp_release_nested_ticket_lock(lck, gtid); } -void -__kmp_init_nested_ticket_lock( kmp_ticket_lock_t * lck ) -{ - __kmp_init_ticket_lock( lck ); - std::atomic_store_explicit( &lck->lk.depth_locked, 0, std::memory_order_relaxed ); // >= 0 for nestable locks, -1 for simple locks +void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck) { + __kmp_init_ticket_lock(lck); + std::atomic_store_explicit(&lck->lk.depth_locked, 0, + std::memory_order_relaxed); // >= 0 for nestable + // locks, -1 for simple + // locks } -static void -__kmp_init_nested_ticket_lock_with_checks( kmp_ticket_lock_t * lck ) -{ - __kmp_init_nested_ticket_lock( lck ); +static void __kmp_init_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { + __kmp_init_nested_ticket_lock(lck); } -void -__kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ) -{ - __kmp_destroy_ticket_lock( lck ); - std::atomic_store_explicit( &lck->lk.depth_locked, 0, std::memory_order_relaxed ); +void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck) { + __kmp_destroy_ticket_lock(lck); + std::atomic_store_explicit(&lck->lk.depth_locked, 0, + std::memory_order_relaxed); } static void -__kmp_destroy_nested_ticket_lock_with_checks( kmp_ticket_lock_t *lck ) -{ - char const * const func = "omp_destroy_nest_lock"; - - if ( ! std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( lck->lk.self != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_ticket_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - if ( __kmp_get_ticket_lock_owner( lck ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_nested_ticket_lock( lck ); +__kmp_destroy_nested_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { + char const *const func = "omp_destroy_nest_lock"; + + if (!std::atomic_load_explicit(&lck->lk.initialized, + std::memory_order_relaxed)) { + KMP_FATAL(LockIsUninitialized, func); + } + if (lck->lk.self != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (!__kmp_is_ticket_lock_nestable(lck)) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + if (__kmp_get_ticket_lock_owner(lck) != -1) { + KMP_FATAL(LockStillOwned, func); + } + __kmp_destroy_nested_ticket_lock(lck); } - -// // access functions to fields which don't exist for all lock kinds. -// -static int -__kmp_is_ticket_lock_initialized( kmp_ticket_lock_t *lck ) -{ - return std::atomic_load_explicit( &lck->lk.initialized, std::memory_order_relaxed ) && ( lck->lk.self == lck); +static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) { + return std::atomic_load_explicit(&lck->lk.initialized, + std::memory_order_relaxed) && + (lck->lk.self == lck); } -static const ident_t * -__kmp_get_ticket_lock_location( kmp_ticket_lock_t *lck ) -{ - return lck->lk.location; +static const ident_t *__kmp_get_ticket_lock_location(kmp_ticket_lock_t *lck) { + return lck->lk.location; } -static void -__kmp_set_ticket_lock_location( kmp_ticket_lock_t *lck, const ident_t *loc ) -{ - lck->lk.location = loc; +static void __kmp_set_ticket_lock_location(kmp_ticket_lock_t *lck, + const ident_t *loc) { + lck->lk.location = loc; } -static kmp_lock_flags_t -__kmp_get_ticket_lock_flags( kmp_ticket_lock_t *lck ) -{ - return lck->lk.flags; +static kmp_lock_flags_t __kmp_get_ticket_lock_flags(kmp_ticket_lock_t *lck) { + return lck->lk.flags; } -static void -__kmp_set_ticket_lock_flags( kmp_ticket_lock_t *lck, kmp_lock_flags_t flags ) -{ - lck->lk.flags = flags; +static void __kmp_set_ticket_lock_flags(kmp_ticket_lock_t *lck, + kmp_lock_flags_t flags) { + lck->lk.flags = flags; } /* ------------------------------------------------------------------------ */ /* queuing locks */ -/* - * First the states - * (head,tail) = 0, 0 means lock is unheld, nobody on queue - * UINT_MAX or -1, 0 means lock is held, nobody on queue - * h, h means lock is held or about to transition, 1 element on queue - * h, t h <> t, means lock is held or about to transition, >1 elements on queue - * - * Now the transitions - * Acquire(0,0) = -1 ,0 - * Release(0,0) = Error - * Acquire(-1,0) = h ,h h > 0 - * Release(-1,0) = 0 ,0 - * Acquire(h,h) = h ,t h > 0, t > 0, h <> t - * Release(h,h) = -1 ,0 h > 0 - * Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' - * Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t - * - * And pictorially - * - * - * +-----+ - * | 0, 0|------- release -------> Error - * +-----+ - * | ^ - * acquire| |release - * | | - * | | - * v | - * +-----+ - * |-1, 0| - * +-----+ - * | ^ - * acquire| |release - * | | - * | | - * v | - * +-----+ - * | h, h| - * +-----+ - * | ^ - * acquire| |release - * | | - * | | - * v | - * +-----+ - * | h, t|----- acquire, release loopback ---+ - * +-----+ | - * ^ | - * | | - * +------------------------------------+ - * +/* First the states + (head,tail) = 0, 0 means lock is unheld, nobody on queue + UINT_MAX or -1, 0 means lock is held, nobody on queue + h, h means lock held or about to transition, + 1 element on queue + h, t h <> t, means lock is held or about to + transition, >1 elements on queue + + Now the transitions + Acquire(0,0) = -1 ,0 + Release(0,0) = Error + Acquire(-1,0) = h ,h h > 0 + Release(-1,0) = 0 ,0 + Acquire(h,h) = h ,t h > 0, t > 0, h <> t + Release(h,h) = -1 ,0 h > 0 + Acquire(h,t) = h ,t' h > 0, t > 0, t' > 0, h <> t, h <> t', t <> t' + Release(h,t) = h',t h > 0, t > 0, h <> t, h <> h', h' maybe = t + + And pictorially + + +-----+ + | 0, 0|------- release -------> Error + +-----+ + | ^ + acquire| |release + | | + | | + v | + +-----+ + |-1, 0| + +-----+ + | ^ + acquire| |release + | | + | | + v | + +-----+ + | h, h| + +-----+ + | ^ + acquire| |release + | | + | | + v | + +-----+ + | h, t|----- acquire, release loopback ---+ + +-----+ | + ^ | + | | + +------------------------------------+ */ #ifdef DEBUG_QUEUING_LOCKS /* Stuff for circular trace buffer */ -#define TRACE_BUF_ELE 1024 -static char traces[TRACE_BUF_ELE][128] = { 0 } +#define TRACE_BUF_ELE 1024 +static char traces[TRACE_BUF_ELE][128] = {0}; static int tc = 0; -#define TRACE_LOCK(X,Y) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y ); -#define TRACE_LOCK_T(X,Y,Z) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X,Y,Z ); -#define TRACE_LOCK_HT(X,Y,Z,Q) KMP_SNPRINTF( traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, Z, Q ); - -static void -__kmp_dump_queuing_lock( kmp_info_t *this_thr, kmp_int32 gtid, - kmp_queuing_lock_t *lck, kmp_int32 head_id, kmp_int32 tail_id ) -{ - kmp_int32 t, i; - - __kmp_printf_no_lock( "\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n" ); - - i = tc % TRACE_BUF_ELE; - __kmp_printf_no_lock( "%s\n", traces[i] ); - i = (i+1) % TRACE_BUF_ELE; - while ( i != (tc % TRACE_BUF_ELE) ) { - __kmp_printf_no_lock( "%s", traces[i] ); - i = (i+1) % TRACE_BUF_ELE; - } - __kmp_printf_no_lock( "\n" ); - - __kmp_printf_no_lock( - "\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, next_wait:%d, head_id:%d, tail_id:%d\n", - gtid+1, this_thr->th.th_spin_here, this_thr->th.th_next_waiting, - head_id, tail_id ); - - __kmp_printf_no_lock( "\t\thead: %d ", lck->lk.head_id ); - - if ( lck->lk.head_id >= 1 ) { - t = __kmp_threads[lck->lk.head_id-1]->th.th_next_waiting; - while (t > 0) { - __kmp_printf_no_lock( "-> %d ", t ); - t = __kmp_threads[t-1]->th.th_next_waiting; - } - } - __kmp_printf_no_lock( "; tail: %d ", lck->lk.tail_id ); - __kmp_printf_no_lock( "\n\n" ); +#define TRACE_LOCK(X, Y) \ + KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s\n", X, Y); +#define TRACE_LOCK_T(X, Y, Z) \ + KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s%d\n", X, Y, Z); +#define TRACE_LOCK_HT(X, Y, Z, Q) \ + KMP_SNPRINTF(traces[tc++ % TRACE_BUF_ELE], 128, "t%d at %s %d,%d\n", X, Y, \ + Z, Q); + +static void __kmp_dump_queuing_lock(kmp_info_t *this_thr, kmp_int32 gtid, + kmp_queuing_lock_t *lck, kmp_int32 head_id, + kmp_int32 tail_id) { + kmp_int32 t, i; + + __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: TRACE BEGINS HERE! \n"); + + i = tc % TRACE_BUF_ELE; + __kmp_printf_no_lock("%s\n", traces[i]); + i = (i + 1) % TRACE_BUF_ELE; + while (i != (tc % TRACE_BUF_ELE)) { + __kmp_printf_no_lock("%s", traces[i]); + i = (i + 1) % TRACE_BUF_ELE; + } + __kmp_printf_no_lock("\n"); + + __kmp_printf_no_lock("\n__kmp_dump_queuing_lock: gtid+1:%d, spin_here:%d, " + "next_wait:%d, head_id:%d, tail_id:%d\n", + gtid + 1, this_thr->th.th_spin_here, + this_thr->th.th_next_waiting, head_id, tail_id); + + __kmp_printf_no_lock("\t\thead: %d ", lck->lk.head_id); + + if (lck->lk.head_id >= 1) { + t = __kmp_threads[lck->lk.head_id - 1]->th.th_next_waiting; + while (t > 0) { + __kmp_printf_no_lock("-> %d ", t); + t = __kmp_threads[t - 1]->th.th_next_waiting; + } + } + __kmp_printf_no_lock("; tail: %d ", lck->lk.tail_id); + __kmp_printf_no_lock("\n\n"); } #endif /* DEBUG_QUEUING_LOCKS */ -static kmp_int32 -__kmp_get_queuing_lock_owner( kmp_queuing_lock_t *lck ) -{ - return TCR_4( lck->lk.owner_id ) - 1; +static kmp_int32 __kmp_get_queuing_lock_owner(kmp_queuing_lock_t *lck) { + return TCR_4(lck->lk.owner_id) - 1; } -static inline bool -__kmp_is_queuing_lock_nestable( kmp_queuing_lock_t *lck ) -{ - return lck->lk.depth_locked != -1; +static inline bool __kmp_is_queuing_lock_nestable(kmp_queuing_lock_t *lck) { + return lck->lk.depth_locked != -1; } /* Acquire a lock using a the queuing lock implementation */ template -/* [TLW] The unused template above is left behind because of what BEB believes is a - potential compiler problem with __forceinline. */ +/* [TLW] The unused template above is left behind because of what BEB believes + is a potential compiler problem with __forceinline. */ __forceinline static int -__kmp_acquire_queuing_lock_timed_template( kmp_queuing_lock_t *lck, - kmp_int32 gtid ) -{ - register kmp_info_t *this_thr = __kmp_thread_from_gtid( gtid ); - volatile kmp_int32 *head_id_p = & lck->lk.head_id; - volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; - volatile kmp_uint32 *spin_here_p; - kmp_int32 need_mf = 1; +__kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck, + kmp_int32 gtid) { + register kmp_info_t *this_thr = __kmp_thread_from_gtid(gtid); + volatile kmp_int32 *head_id_p = &lck->lk.head_id; + volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; + volatile kmp_uint32 *spin_here_p; + kmp_int32 need_mf = 1; #if OMPT_SUPPORT - ompt_state_t prev_state = ompt_state_undefined; + ompt_state_t prev_state = ompt_state_undefined; #endif - KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); + KA_TRACE(1000, + ("__kmp_acquire_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); - KMP_FSYNC_PREPARE( lck ); - KMP_DEBUG_ASSERT( this_thr != NULL ); - spin_here_p = & this_thr->th.th_spin_here; + KMP_FSYNC_PREPARE(lck); + KMP_DEBUG_ASSERT(this_thr != NULL); + spin_here_p = &this_thr->th.th_spin_here; #ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK( gtid+1, "acq ent" ); - if ( *spin_here_p ) - __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); - if ( this_thr->th.th_next_waiting != 0 ) - __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); + TRACE_LOCK(gtid + 1, "acq ent"); + if (*spin_here_p) + __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); + if (this_thr->th.th_next_waiting != 0) + __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); #endif - KMP_DEBUG_ASSERT( !*spin_here_p ); - KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); - - - /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to head_id_p - that may follow, not just in execution order, but also in visibility order. This way, - when a releasing thread observes the changes to the queue by this thread, it can - rightly assume that spin_here_p has already been set to TRUE, so that when it sets - spin_here_p to FALSE, it is not premature. If the releasing thread sets spin_here_p - to FALSE before this thread sets it to TRUE, this thread will hang. - */ - *spin_here_p = TRUE; /* before enqueuing to prevent race */ - - while( 1 ) { - kmp_int32 enqueued; - kmp_int32 head; - kmp_int32 tail; + KMP_DEBUG_ASSERT(!*spin_here_p); + KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); + + /* The following st.rel to spin_here_p needs to precede the cmpxchg.acq to + head_id_p that may follow, not just in execution order, but also in + visibility order. This way, when a releasing thread observes the changes to + the queue by this thread, it can rightly assume that spin_here_p has + already been set to TRUE, so that when it sets spin_here_p to FALSE, it is + not premature. If the releasing thread sets spin_here_p to FALSE before + this thread sets it to TRUE, this thread will hang. */ + *spin_here_p = TRUE; /* before enqueuing to prevent race */ + + while (1) { + kmp_int32 enqueued; + kmp_int32 head; + kmp_int32 tail; - head = *head_id_p; + head = *head_id_p; - switch ( head ) { + switch (head) { - case -1: - { + case -1: { #ifdef DEBUG_QUEUING_LOCKS - tail = *tail_id_p; - TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); + tail = *tail_id_p; + TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); #endif - tail = 0; /* to make sure next link asynchronously read is not set accidentally; - this assignment prevents us from entering the if ( t > 0 ) - condition in the enqueued case below, which is not necessary for - this state transition */ - - need_mf = 0; - /* try (-1,0)->(tid,tid) */ - enqueued = KMP_COMPARE_AND_STORE_ACQ64( (volatile kmp_int64 *) tail_id_p, - KMP_PACK_64( -1, 0 ), - KMP_PACK_64( gtid+1, gtid+1 ) ); + tail = 0; /* to make sure next link asynchronously read is not set + accidentally; this assignment prevents us from entering the + if ( t > 0 ) condition in the enqueued case below, which is not + necessary for this state transition */ + + need_mf = 0; + /* try (-1,0)->(tid,tid) */ + enqueued = KMP_COMPARE_AND_STORE_ACQ64((volatile kmp_int64 *)tail_id_p, + KMP_PACK_64(-1, 0), + KMP_PACK_64(gtid + 1, gtid + 1)); #ifdef DEBUG_QUEUING_LOCKS - if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (-1,0)->(tid,tid)" ); + if (enqueued) + TRACE_LOCK(gtid + 1, "acq enq: (-1,0)->(tid,tid)"); #endif - } - break; + } break; - default: - { - tail = *tail_id_p; - KMP_DEBUG_ASSERT( tail != gtid + 1 ); + default: { + tail = *tail_id_p; + KMP_DEBUG_ASSERT(tail != gtid + 1); #ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); + TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); #endif - if ( tail == 0 ) { - enqueued = FALSE; - } - else { - need_mf = 0; - /* try (h,t) or (h,h)->(h,tid) */ - enqueued = KMP_COMPARE_AND_STORE_ACQ32( tail_id_p, tail, gtid+1 ); + if (tail == 0) { + enqueued = FALSE; + } else { + need_mf = 0; + /* try (h,t) or (h,h)->(h,tid) */ + enqueued = KMP_COMPARE_AND_STORE_ACQ32(tail_id_p, tail, gtid + 1); #ifdef DEBUG_QUEUING_LOCKS - if ( enqueued ) TRACE_LOCK( gtid+1, "acq enq: (h,t)->(h,tid)" ); + if (enqueued) + TRACE_LOCK(gtid + 1, "acq enq: (h,t)->(h,tid)"); #endif - } - } - break; + } + } break; - case 0: /* empty queue */ - { - kmp_int32 grabbed_lock; + case 0: /* empty queue */ + { + kmp_int32 grabbed_lock; #ifdef DEBUG_QUEUING_LOCKS - tail = *tail_id_p; - TRACE_LOCK_HT( gtid+1, "acq read: ", head, tail ); + tail = *tail_id_p; + TRACE_LOCK_HT(gtid + 1, "acq read: ", head, tail); #endif - /* try (0,0)->(-1,0) */ + /* try (0,0)->(-1,0) */ - /* only legal transition out of head = 0 is head = -1 with no change to tail */ - grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ); + /* only legal transition out of head = 0 is head = -1 with no change to + * tail */ + grabbed_lock = KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1); - if ( grabbed_lock ) { + if (grabbed_lock) { - *spin_here_p = FALSE; + *spin_here_p = FALSE; - KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", - lck, gtid )); + KA_TRACE( + 1000, + ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: no queuing\n", + lck, gtid)); #ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK_HT( gtid+1, "acq exit: ", head, 0 ); + TRACE_LOCK_HT(gtid + 1, "acq exit: ", head, 0); #endif #if OMPT_SUPPORT - if (ompt_enabled && prev_state != ompt_state_undefined) { - /* change the state before clearing wait_id */ - this_thr->th.ompt_thread_info.state = prev_state; - this_thr->th.ompt_thread_info.wait_id = 0; - } + if (ompt_enabled && prev_state != ompt_state_undefined) { + /* change the state before clearing wait_id */ + this_thr->th.ompt_thread_info.state = prev_state; + this_thr->th.ompt_thread_info.wait_id = 0; + } #endif - KMP_FSYNC_ACQUIRED( lck ); - return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */ - } - enqueued = FALSE; - } - break; - } + KMP_FSYNC_ACQUIRED(lck); + return KMP_LOCK_ACQUIRED_FIRST; /* lock holder cannot be on queue */ + } + enqueued = FALSE; + } break; + } #if OMPT_SUPPORT - if (ompt_enabled && prev_state == ompt_state_undefined) { - /* this thread will spin; set wait_id before entering wait state */ - prev_state = this_thr->th.ompt_thread_info.state; - this_thr->th.ompt_thread_info.wait_id = (uint64_t) lck; - this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; - } + if (ompt_enabled && prev_state == ompt_state_undefined) { + /* this thread will spin; set wait_id before entering wait state */ + prev_state = this_thr->th.ompt_thread_info.state; + this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck; + this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; + } #endif - if ( enqueued ) { - if ( tail > 0 ) { - kmp_info_t *tail_thr = __kmp_thread_from_gtid( tail - 1 ); - KMP_ASSERT( tail_thr != NULL ); - tail_thr->th.th_next_waiting = gtid+1; - /* corresponding wait for this write in release code */ - } - KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", lck, gtid )); - - - /* ToDo: May want to consider using __kmp_wait_sleep or something that sleeps for - * throughput only here. - */ - KMP_MB(); - KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); + if (enqueued) { + if (tail > 0) { + kmp_info_t *tail_thr = __kmp_thread_from_gtid(tail - 1); + KMP_ASSERT(tail_thr != NULL); + tail_thr->th.th_next_waiting = gtid + 1; + /* corresponding wait for this write in release code */ + } + KA_TRACE(1000, + ("__kmp_acquire_queuing_lock: lck:%p, T#%d waiting for lock\n", + lck, gtid)); + + /* ToDo: May want to consider using __kmp_wait_sleep or something that + sleeps for throughput only here. */ + KMP_MB(); + KMP_WAIT_YIELD(spin_here_p, FALSE, KMP_EQ, lck); #ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK( gtid+1, "acq spin" ); + TRACE_LOCK(gtid + 1, "acq spin"); - if ( this_thr->th.th_next_waiting != 0 ) - __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); + if (this_thr->th.th_next_waiting != 0) + __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); #endif - KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); - KA_TRACE( 1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after waiting on queue\n", - lck, gtid )); + KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); + KA_TRACE(1000, ("__kmp_acquire_queuing_lock: lck:%p, T#%d exiting: after " + "waiting on queue\n", + lck, gtid)); #ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK( gtid+1, "acq exit 2" ); + TRACE_LOCK(gtid + 1, "acq exit 2"); #endif #if OMPT_SUPPORT - /* change the state before clearing wait_id */ - this_thr->th.ompt_thread_info.state = prev_state; - this_thr->th.ompt_thread_info.wait_id = 0; + /* change the state before clearing wait_id */ + this_thr->th.ompt_thread_info.state = prev_state; + this_thr->th.ompt_thread_info.wait_id = 0; #endif - /* got lock, we were dequeued by the thread that released lock */ - return KMP_LOCK_ACQUIRED_FIRST; - } + /* got lock, we were dequeued by the thread that released lock */ + return KMP_LOCK_ACQUIRED_FIRST; + } - /* Yield if number of threads > number of logical processors */ - /* ToDo: Not sure why this should only be in oversubscription case, - maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ - KMP_YIELD( TCR_4( __kmp_nth ) > (__kmp_avail_proc ? __kmp_avail_proc : - __kmp_xproc ) ); + /* Yield if number of threads > number of logical processors */ + /* ToDo: Not sure why this should only be in oversubscription case, + maybe should be traditional YIELD_INIT/YIELD_WHEN loop */ + KMP_YIELD(TCR_4(__kmp_nth) > + (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); #ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK( gtid+1, "acq retry" ); + TRACE_LOCK(gtid + 1, "acq retry"); #endif - - } - KMP_ASSERT2( 0, "should not get here" ); - return KMP_LOCK_ACQUIRED_FIRST; + } + KMP_ASSERT2(0, "should not get here"); + return KMP_LOCK_ACQUIRED_FIRST; } -int -__kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); +int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { + KMP_DEBUG_ASSERT(gtid >= 0); - int retval = __kmp_acquire_queuing_lock_timed_template( lck, gtid ); - ANNOTATE_QUEUING_ACQUIRED(lck); - return retval; + int retval = __kmp_acquire_queuing_lock_timed_template(lck, gtid); + ANNOTATE_QUEUING_ACQUIRED(lck); + return retval; } -static int -__kmp_acquire_queuing_lock_with_checks( kmp_queuing_lock_t *lck, - kmp_int32 gtid ) -{ - char const * const func = "omp_set_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_queuing_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { - KMP_FATAL( LockIsAlreadyOwned, func ); - } +static int __kmp_acquire_queuing_lock_with_checks(kmp_queuing_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_set_lock"; + if (lck->lk.initialized != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (__kmp_is_queuing_lock_nestable(lck)) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } + if (__kmp_get_queuing_lock_owner(lck) == gtid) { + KMP_FATAL(LockIsAlreadyOwned, func); + } - __kmp_acquire_queuing_lock( lck, gtid ); + __kmp_acquire_queuing_lock(lck, gtid); - lck->lk.owner_id = gtid + 1; - return KMP_LOCK_ACQUIRED_FIRST; + lck->lk.owner_id = gtid + 1; + return KMP_LOCK_ACQUIRED_FIRST; } -int -__kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) -{ - volatile kmp_int32 *head_id_p = & lck->lk.head_id; - kmp_int32 head; +int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { + volatile kmp_int32 *head_id_p = &lck->lk.head_id; + kmp_int32 head; #ifdef KMP_DEBUG - kmp_info_t *this_thr; + kmp_info_t *this_thr; #endif - KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid )); - KMP_DEBUG_ASSERT( gtid >= 0 ); + KA_TRACE(1000, ("__kmp_test_queuing_lock: T#%d entering\n", gtid)); + KMP_DEBUG_ASSERT(gtid >= 0); #ifdef KMP_DEBUG - this_thr = __kmp_thread_from_gtid( gtid ); - KMP_DEBUG_ASSERT( this_thr != NULL ); - KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); + this_thr = __kmp_thread_from_gtid(gtid); + KMP_DEBUG_ASSERT(this_thr != NULL); + KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); #endif - head = *head_id_p; - - if ( head == 0 ) { /* nobody on queue, nobody holding */ - - /* try (0,0)->(-1,0) */ + head = *head_id_p; - if ( KMP_COMPARE_AND_STORE_ACQ32( head_id_p, 0, -1 ) ) { - KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid )); - KMP_FSYNC_ACQUIRED(lck); - ANNOTATE_QUEUING_ACQUIRED(lck); - return TRUE; - } + if (head == 0) { /* nobody on queue, nobody holding */ + /* try (0,0)->(-1,0) */ + if (KMP_COMPARE_AND_STORE_ACQ32(head_id_p, 0, -1)) { + KA_TRACE(1000, + ("__kmp_test_queuing_lock: T#%d exiting: holding lock\n", gtid)); + KMP_FSYNC_ACQUIRED(lck); + ANNOTATE_QUEUING_ACQUIRED(lck); + return TRUE; } + } - KA_TRACE( 1000, ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid )); - return FALSE; + KA_TRACE(1000, + ("__kmp_test_queuing_lock: T#%d exiting: without lock\n", gtid)); + return FALSE; } -static int -__kmp_test_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_test_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_queuing_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } +static int __kmp_test_queuing_lock_with_checks(kmp_queuing_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_test_lock"; + if (lck->lk.initialized != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (__kmp_is_queuing_lock_nestable(lck)) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } - int retval = __kmp_test_queuing_lock( lck, gtid ); + int retval = __kmp_test_queuing_lock(lck, gtid); - if ( retval ) { - lck->lk.owner_id = gtid + 1; - } - return retval; + if (retval) { + lck->lk.owner_id = gtid + 1; + } + return retval; } -int -__kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) -{ - register kmp_info_t *this_thr; - volatile kmp_int32 *head_id_p = & lck->lk.head_id; - volatile kmp_int32 *tail_id_p = & lck->lk.tail_id; +int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { + register kmp_info_t *this_thr; + volatile kmp_int32 *head_id_p = &lck->lk.head_id; + volatile kmp_int32 *tail_id_p = &lck->lk.tail_id; - KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid )); - KMP_DEBUG_ASSERT( gtid >= 0 ); - this_thr = __kmp_thread_from_gtid( gtid ); - KMP_DEBUG_ASSERT( this_thr != NULL ); + KA_TRACE(1000, + ("__kmp_release_queuing_lock: lck:%p, T#%d entering\n", lck, gtid)); + KMP_DEBUG_ASSERT(gtid >= 0); + this_thr = __kmp_thread_from_gtid(gtid); + KMP_DEBUG_ASSERT(this_thr != NULL); #ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK( gtid+1, "rel ent" ); + TRACE_LOCK(gtid + 1, "rel ent"); - if ( this_thr->th.th_spin_here ) - __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); - if ( this_thr->th.th_next_waiting != 0 ) - __kmp_dump_queuing_lock( this_thr, gtid, lck, *head_id_p, *tail_id_p ); + if (this_thr->th.th_spin_here) + __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); + if (this_thr->th.th_next_waiting != 0) + __kmp_dump_queuing_lock(this_thr, gtid, lck, *head_id_p, *tail_id_p); #endif - KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); - KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); + KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); + KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); - KMP_FSYNC_RELEASING(lck); - ANNOTATE_QUEUING_RELEASED(lck); + KMP_FSYNC_RELEASING(lck); + ANNOTATE_QUEUING_RELEASED(lck); - while( 1 ) { - kmp_int32 dequeued; - kmp_int32 head; - kmp_int32 tail; + while (1) { + kmp_int32 dequeued; + kmp_int32 head; + kmp_int32 tail; - head = *head_id_p; + head = *head_id_p; #ifdef DEBUG_QUEUING_LOCKS - tail = *tail_id_p; - TRACE_LOCK_HT( gtid+1, "rel read: ", head, tail ); - if ( head == 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); + tail = *tail_id_p; + TRACE_LOCK_HT(gtid + 1, "rel read: ", head, tail); + if (head == 0) + __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); #endif - KMP_DEBUG_ASSERT( head != 0 ); /* holding the lock, head must be -1 or queue head */ - - if ( head == -1 ) { /* nobody on queue */ - - /* try (-1,0)->(0,0) */ - if ( KMP_COMPARE_AND_STORE_REL32( head_id_p, -1, 0 ) ) { - KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", - lck, gtid )); + KMP_DEBUG_ASSERT(head != + 0); /* holding the lock, head must be -1 or queue head */ + + if (head == -1) { /* nobody on queue */ + /* try (-1,0)->(0,0) */ + if (KMP_COMPARE_AND_STORE_REL32(head_id_p, -1, 0)) { + KA_TRACE( + 1000, + ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: queue empty\n", + lck, gtid)); #ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK_HT( gtid+1, "rel exit: ", 0, 0 ); + TRACE_LOCK_HT(gtid + 1, "rel exit: ", 0, 0); #endif #if OMPT_SUPPORT - /* nothing to do - no other thread is trying to shift blame */ +/* nothing to do - no other thread is trying to shift blame */ #endif - - return KMP_LOCK_RELEASED; - } - dequeued = FALSE; - - } - else { - - tail = *tail_id_p; - if ( head == tail ) { /* only one thread on the queue */ - + return KMP_LOCK_RELEASED; + } + dequeued = FALSE; + } else { + tail = *tail_id_p; + if (head == tail) { /* only one thread on the queue */ #ifdef DEBUG_QUEUING_LOCKS - if ( head <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); + if (head <= 0) + __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); #endif - KMP_DEBUG_ASSERT( head > 0 ); + KMP_DEBUG_ASSERT(head > 0); - /* try (h,h)->(-1,0) */ - dequeued = KMP_COMPARE_AND_STORE_REL64( (kmp_int64 *) tail_id_p, - KMP_PACK_64( head, head ), KMP_PACK_64( -1, 0 ) ); + /* try (h,h)->(-1,0) */ + dequeued = KMP_COMPARE_AND_STORE_REL64((kmp_int64 *)tail_id_p, + KMP_PACK_64(head, head), + KMP_PACK_64(-1, 0)); #ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK( gtid+1, "rel deq: (h,h)->(-1,0)" ); + TRACE_LOCK(gtid + 1, "rel deq: (h,h)->(-1,0)"); #endif - } - else { - volatile kmp_int32 *waiting_id_p; - kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); - KMP_DEBUG_ASSERT( head_thr != NULL ); - waiting_id_p = & head_thr->th.th_next_waiting; + } else { + volatile kmp_int32 *waiting_id_p; + kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); + KMP_DEBUG_ASSERT(head_thr != NULL); + waiting_id_p = &head_thr->th.th_next_waiting; - /* Does this require synchronous reads? */ +/* Does this require synchronous reads? */ #ifdef DEBUG_QUEUING_LOCKS - if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); + if (head <= 0 || tail <= 0) + __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); #endif - KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); + KMP_DEBUG_ASSERT(head > 0 && tail > 0); - /* try (h,t)->(h',t) or (t,t) */ - - KMP_MB(); - /* make sure enqueuing thread has time to update next waiting thread field */ - *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32*)waiting_id_p, 0, KMP_NEQ, NULL); + /* try (h,t)->(h',t) or (t,t) */ + KMP_MB(); + /* make sure enqueuing thread has time to update next waiting thread + * field */ + *head_id_p = KMP_WAIT_YIELD((volatile kmp_uint32 *)waiting_id_p, 0, + KMP_NEQ, NULL); #ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK( gtid+1, "rel deq: (h,t)->(h',t)" ); + TRACE_LOCK(gtid + 1, "rel deq: (h,t)->(h',t)"); #endif - dequeued = TRUE; - } - } + dequeued = TRUE; + } + } - if ( dequeued ) { - kmp_info_t *head_thr = __kmp_thread_from_gtid( head - 1 ); - KMP_DEBUG_ASSERT( head_thr != NULL ); + if (dequeued) { + kmp_info_t *head_thr = __kmp_thread_from_gtid(head - 1); + KMP_DEBUG_ASSERT(head_thr != NULL); - /* Does this require synchronous reads? */ +/* Does this require synchronous reads? */ #ifdef DEBUG_QUEUING_LOCKS - if ( head <= 0 || tail <= 0 ) __kmp_dump_queuing_lock( this_thr, gtid, lck, head, tail ); + if (head <= 0 || tail <= 0) + __kmp_dump_queuing_lock(this_thr, gtid, lck, head, tail); #endif - KMP_DEBUG_ASSERT( head > 0 && tail > 0 ); + KMP_DEBUG_ASSERT(head > 0 && tail > 0); - /* For clean code only. - * Thread not released until next statement prevents race with acquire code. - */ - head_thr->th.th_next_waiting = 0; + /* For clean code only. Thread not released until next statement prevents + race with acquire code. */ + head_thr->th.th_next_waiting = 0; #ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK_T( gtid+1, "rel nw=0 for t=", head ); + TRACE_LOCK_T(gtid + 1, "rel nw=0 for t=", head); #endif - KMP_MB(); - /* reset spin value */ - head_thr->th.th_spin_here = FALSE; + KMP_MB(); + /* reset spin value */ + head_thr->th.th_spin_here = FALSE; - KA_TRACE( 1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after dequeuing\n", - lck, gtid )); + KA_TRACE(1000, ("__kmp_release_queuing_lock: lck:%p, T#%d exiting: after " + "dequeuing\n", + lck, gtid)); #ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK( gtid+1, "rel exit 2" ); + TRACE_LOCK(gtid + 1, "rel exit 2"); #endif - return KMP_LOCK_RELEASED; - } - /* KMP_CPU_PAUSE( ); don't want to make releasing thread hold up acquiring threads */ + return KMP_LOCK_RELEASED; + } +/* KMP_CPU_PAUSE(); don't want to make releasing thread hold up acquiring + threads */ #ifdef DEBUG_QUEUING_LOCKS - TRACE_LOCK( gtid+1, "rel retry" ); + TRACE_LOCK(gtid + 1, "rel retry"); #endif - } /* while */ - KMP_ASSERT2( 0, "should not get here" ); - return KMP_LOCK_RELEASED; -} - -static int -__kmp_release_queuing_lock_with_checks( kmp_queuing_lock_t *lck, - kmp_int32 gtid ) -{ - char const * const func = "omp_unset_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_queuing_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } - lck->lk.owner_id = 0; - return __kmp_release_queuing_lock( lck, gtid ); -} - -void -__kmp_init_queuing_lock( kmp_queuing_lock_t *lck ) -{ - lck->lk.location = NULL; - lck->lk.head_id = 0; - lck->lk.tail_id = 0; - lck->lk.next_ticket = 0; - lck->lk.now_serving = 0; - lck->lk.owner_id = 0; // no thread owns the lock. - lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. - lck->lk.initialized = lck; - - KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); -} - -static void -__kmp_init_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) -{ - __kmp_init_queuing_lock( lck ); -} - -void -__kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ) -{ - lck->lk.initialized = NULL; - lck->lk.location = NULL; - lck->lk.head_id = 0; - lck->lk.tail_id = 0; - lck->lk.next_ticket = 0; - lck->lk.now_serving = 0; - lck->lk.owner_id = 0; - lck->lk.depth_locked = -1; -} - -static void -__kmp_destroy_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) -{ - char const * const func = "omp_destroy_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_queuing_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_queuing_lock( lck ); + } /* while */ + KMP_ASSERT2(0, "should not get here"); + return KMP_LOCK_RELEASED; +} + +static int __kmp_release_queuing_lock_with_checks(kmp_queuing_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_unset_lock"; + KMP_MB(); /* in case another processor initialized lock */ + if (lck->lk.initialized != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (__kmp_is_queuing_lock_nestable(lck)) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } + if (__kmp_get_queuing_lock_owner(lck) == -1) { + KMP_FATAL(LockUnsettingFree, func); + } + if (__kmp_get_queuing_lock_owner(lck) != gtid) { + KMP_FATAL(LockUnsettingSetByAnother, func); + } + lck->lk.owner_id = 0; + return __kmp_release_queuing_lock(lck, gtid); +} + +void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck) { + lck->lk.location = NULL; + lck->lk.head_id = 0; + lck->lk.tail_id = 0; + lck->lk.next_ticket = 0; + lck->lk.now_serving = 0; + lck->lk.owner_id = 0; // no thread owns the lock. + lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. + lck->lk.initialized = lck; + + KA_TRACE(1000, ("__kmp_init_queuing_lock: lock %p initialized\n", lck)); +} + +static void __kmp_init_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { + __kmp_init_queuing_lock(lck); +} + +void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck) { + lck->lk.initialized = NULL; + lck->lk.location = NULL; + lck->lk.head_id = 0; + lck->lk.tail_id = 0; + lck->lk.next_ticket = 0; + lck->lk.now_serving = 0; + lck->lk.owner_id = 0; + lck->lk.depth_locked = -1; +} + +static void __kmp_destroy_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { + char const *const func = "omp_destroy_lock"; + if (lck->lk.initialized != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (__kmp_is_queuing_lock_nestable(lck)) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } + if (__kmp_get_queuing_lock_owner(lck) != -1) { + KMP_FATAL(LockStillOwned, func); + } + __kmp_destroy_queuing_lock(lck); } - -// // nested queuing locks -// -int -__kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); +int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { + KMP_DEBUG_ASSERT(gtid >= 0); - if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { - lck->lk.depth_locked += 1; - return KMP_LOCK_ACQUIRED_NEXT; - } - else { - __kmp_acquire_queuing_lock_timed_template( lck, gtid ); - ANNOTATE_QUEUING_ACQUIRED(lck); - KMP_MB(); - lck->lk.depth_locked = 1; - KMP_MB(); - lck->lk.owner_id = gtid + 1; - return KMP_LOCK_ACQUIRED_FIRST; - } + if (__kmp_get_queuing_lock_owner(lck) == gtid) { + lck->lk.depth_locked += 1; + return KMP_LOCK_ACQUIRED_NEXT; + } else { + __kmp_acquire_queuing_lock_timed_template(lck, gtid); + ANNOTATE_QUEUING_ACQUIRED(lck); + KMP_MB(); + lck->lk.depth_locked = 1; + KMP_MB(); + lck->lk.owner_id = gtid + 1; + return KMP_LOCK_ACQUIRED_FIRST; + } } static int -__kmp_acquire_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_set_nest_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - return __kmp_acquire_nested_queuing_lock( lck, gtid ); -} - -int -__kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) -{ - int retval; - - KMP_DEBUG_ASSERT( gtid >= 0 ); - - if ( __kmp_get_queuing_lock_owner( lck ) == gtid ) { - retval = ++lck->lk.depth_locked; - } - else if ( !__kmp_test_queuing_lock( lck, gtid ) ) { - retval = 0; - } - else { - KMP_MB(); - retval = lck->lk.depth_locked = 1; - KMP_MB(); - lck->lk.owner_id = gtid + 1; - } - return retval; +__kmp_acquire_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_set_nest_lock"; + if (lck->lk.initialized != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (!__kmp_is_queuing_lock_nestable(lck)) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + return __kmp_acquire_nested_queuing_lock(lck, gtid); +} + +int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { + int retval; + + KMP_DEBUG_ASSERT(gtid >= 0); + + if (__kmp_get_queuing_lock_owner(lck) == gtid) { + retval = ++lck->lk.depth_locked; + } else if (!__kmp_test_queuing_lock(lck, gtid)) { + retval = 0; + } else { + KMP_MB(); + retval = lck->lk.depth_locked = 1; + KMP_MB(); + lck->lk.owner_id = gtid + 1; + } + return retval; } -static int -__kmp_test_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, - kmp_int32 gtid ) -{ - char const * const func = "omp_test_nest_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - return __kmp_test_nested_queuing_lock( lck, gtid ); +static int __kmp_test_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_test_nest_lock"; + if (lck->lk.initialized != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (!__kmp_is_queuing_lock_nestable(lck)) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + return __kmp_test_nested_queuing_lock(lck, gtid); } -int -__kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); +int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { + KMP_DEBUG_ASSERT(gtid >= 0); + KMP_MB(); + if (--(lck->lk.depth_locked) == 0) { KMP_MB(); - if ( --(lck->lk.depth_locked) == 0 ) { - KMP_MB(); - lck->lk.owner_id = 0; - __kmp_release_queuing_lock( lck, gtid ); - return KMP_LOCK_RELEASED; - } - return KMP_LOCK_STILL_HELD; + lck->lk.owner_id = 0; + __kmp_release_queuing_lock(lck, gtid); + return KMP_LOCK_RELEASED; + } + return KMP_LOCK_STILL_HELD; } static int -__kmp_release_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_unset_nest_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - if ( __kmp_get_queuing_lock_owner( lck ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( __kmp_get_queuing_lock_owner( lck ) != gtid ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } - return __kmp_release_nested_queuing_lock( lck, gtid ); -} - -void -__kmp_init_nested_queuing_lock( kmp_queuing_lock_t * lck ) -{ - __kmp_init_queuing_lock( lck ); - lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks +__kmp_release_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_unset_nest_lock"; + KMP_MB(); /* in case another processor initialized lock */ + if (lck->lk.initialized != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (!__kmp_is_queuing_lock_nestable(lck)) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + if (__kmp_get_queuing_lock_owner(lck) == -1) { + KMP_FATAL(LockUnsettingFree, func); + } + if (__kmp_get_queuing_lock_owner(lck) != gtid) { + KMP_FATAL(LockUnsettingSetByAnother, func); + } + return __kmp_release_nested_queuing_lock(lck, gtid); +} + +void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck) { + __kmp_init_queuing_lock(lck); + lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks } static void -__kmp_init_nested_queuing_lock_with_checks( kmp_queuing_lock_t * lck ) -{ - __kmp_init_nested_queuing_lock( lck ); +__kmp_init_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { + __kmp_init_nested_queuing_lock(lck); } -void -__kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ) -{ - __kmp_destroy_queuing_lock( lck ); - lck->lk.depth_locked = 0; +void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck) { + __kmp_destroy_queuing_lock(lck); + lck->lk.depth_locked = 0; } static void -__kmp_destroy_nested_queuing_lock_with_checks( kmp_queuing_lock_t *lck ) -{ - char const * const func = "omp_destroy_nest_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_queuing_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - if ( __kmp_get_queuing_lock_owner( lck ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_nested_queuing_lock( lck ); +__kmp_destroy_nested_queuing_lock_with_checks(kmp_queuing_lock_t *lck) { + char const *const func = "omp_destroy_nest_lock"; + if (lck->lk.initialized != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (!__kmp_is_queuing_lock_nestable(lck)) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + if (__kmp_get_queuing_lock_owner(lck) != -1) { + KMP_FATAL(LockStillOwned, func); + } + __kmp_destroy_nested_queuing_lock(lck); } - -// // access functions to fields which don't exist for all lock kinds. -// -static int -__kmp_is_queuing_lock_initialized( kmp_queuing_lock_t *lck ) -{ - return lck == lck->lk.initialized; +static int __kmp_is_queuing_lock_initialized(kmp_queuing_lock_t *lck) { + return lck == lck->lk.initialized; } -static const ident_t * -__kmp_get_queuing_lock_location( kmp_queuing_lock_t *lck ) -{ - return lck->lk.location; +static const ident_t *__kmp_get_queuing_lock_location(kmp_queuing_lock_t *lck) { + return lck->lk.location; } -static void -__kmp_set_queuing_lock_location( kmp_queuing_lock_t *lck, const ident_t *loc ) -{ - lck->lk.location = loc; +static void __kmp_set_queuing_lock_location(kmp_queuing_lock_t *lck, + const ident_t *loc) { + lck->lk.location = loc; } -static kmp_lock_flags_t -__kmp_get_queuing_lock_flags( kmp_queuing_lock_t *lck ) -{ - return lck->lk.flags; +static kmp_lock_flags_t __kmp_get_queuing_lock_flags(kmp_queuing_lock_t *lck) { + return lck->lk.flags; } -static void -__kmp_set_queuing_lock_flags( kmp_queuing_lock_t *lck, kmp_lock_flags_t flags ) -{ - lck->lk.flags = flags; +static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck, + kmp_lock_flags_t flags) { + lck->lk.flags = flags; } #if KMP_USE_ADAPTIVE_LOCKS -/* - RTM Adaptive locks -*/ +/* RTM Adaptive locks */ #if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 #include -#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) +#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) #else // Values from the status register after failed speculation. -#define _XBEGIN_STARTED (~0u) -#define _XABORT_EXPLICIT (1 << 0) -#define _XABORT_RETRY (1 << 1) -#define _XABORT_CONFLICT (1 << 2) -#define _XABORT_CAPACITY (1 << 3) -#define _XABORT_DEBUG (1 << 4) -#define _XABORT_NESTED (1 << 5) -#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) +#define _XBEGIN_STARTED (~0u) +#define _XABORT_EXPLICIT (1 << 0) +#define _XABORT_RETRY (1 << 1) +#define _XABORT_CONFLICT (1 << 2) +#define _XABORT_CAPACITY (1 << 3) +#define _XABORT_DEBUG (1 << 4) +#define _XABORT_NESTED (1 << 5) +#define _XABORT_CODE(x) ((unsigned char)(((x) >> 24) & 0xFF)) // Aborts for which it's worth trying again immediately -#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) +#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) #define STRINGIZE_INTERNAL(arg) #arg #define STRINGIZE(arg) STRINGIZE_INTERNAL(arg) // Access to RTM instructions - -/* - A version of XBegin which returns -1 on speculation, and the value of EAX on an abort. - This is the same definition as the compiler intrinsic that will be supported at some point. -*/ -static __inline int _xbegin() -{ - int res = -1; +/*A version of XBegin which returns -1 on speculation, and the value of EAX on + an abort. This is the same definition as the compiler intrinsic that will be + supported at some point. */ +static __inline int _xbegin() { + int res = -1; #if KMP_OS_WINDOWS #if KMP_ARCH_X86_64 - _asm { + _asm { _emit 0xC7 _emit 0xF8 _emit 2 @@ -1955,9 +1800,9 @@ static __inline int _xbegin() jmp L2 mov res, eax L2: - } + } #else /* IA32 */ - _asm { + _asm { _emit 0xC7 _emit 0xF8 _emit 2 @@ -1967,68 +1812,58 @@ static __inline int _xbegin() jmp L2 mov res, eax L2: - } + } #endif // KMP_ARCH_X86_64 #else - /* Note that %eax must be noted as killed (clobbered), because - * the XSR is returned in %eax(%rax) on abort. Other register - * values are restored, so don't need to be killed. - * - * We must also mark 'res' as an input and an output, since otherwise - * 'res=-1' may be dropped as being dead, whereas we do need the - * assignment on the successful (i.e., non-abort) path. - */ - __asm__ volatile ("1: .byte 0xC7; .byte 0xF8;\n" - " .long 1f-1b-6\n" - " jmp 2f\n" - "1: movl %%eax,%0\n" - "2:" - :"+r"(res)::"memory","%eax"); + /* Note that %eax must be noted as killed (clobbered), because the XSR is + returned in %eax(%rax) on abort. Other register values are restored, so + don't need to be killed. + + We must also mark 'res' as an input and an output, since otherwise + 'res=-1' may be dropped as being dead, whereas we do need the assignment on + the successful (i.e., non-abort) path. */ + __asm__ volatile("1: .byte 0xC7; .byte 0xF8;\n" + " .long 1f-1b-6\n" + " jmp 2f\n" + "1: movl %%eax,%0\n" + "2:" + : "+r"(res)::"memory", "%eax"); #endif // KMP_OS_WINDOWS - return res; + return res; } -/* - Transaction end -*/ -static __inline void _xend() -{ +/* Transaction end */ +static __inline void _xend() { #if KMP_OS_WINDOWS - __asm { + __asm { _emit 0x0f _emit 0x01 _emit 0xd5 - } + } #else - __asm__ volatile (".byte 0x0f; .byte 0x01; .byte 0xd5" :::"memory"); + __asm__ volatile(".byte 0x0f; .byte 0x01; .byte 0xd5" ::: "memory"); #endif } -/* - This is a macro, the argument must be a single byte constant which - can be evaluated by the inline assembler, since it is emitted as a - byte into the assembly code. -*/ +/* This is a macro, the argument must be a single byte constant which can be + evaluated by the inline assembler, since it is emitted as a byte into the + assembly code. */ +// clang-format off #if KMP_OS_WINDOWS -#define _xabort(ARG) \ - _asm _emit 0xc6 \ - _asm _emit 0xf8 \ - _asm _emit ARG +#define _xabort(ARG) _asm _emit 0xc6 _asm _emit 0xf8 _asm _emit ARG #else -#define _xabort(ARG) \ - __asm__ volatile (".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG) :::"memory"); +#define _xabort(ARG) \ + __asm__ volatile(".byte 0xC6; .byte 0xF8; .byte " STRINGIZE(ARG):::"memory"); #endif - +// clang-format on #endif // KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 -// -// Statistics is collected for testing purpose -// +// Statistics is collected for testing purpose #if KMP_DEBUG_ADAPTIVE_LOCKS -// We accumulate speculative lock statistics when the lock is destroyed. -// We keep locks that haven't been destroyed in the liveLocks list -// so that we can grab their statistics too. +// We accumulate speculative lock statistics when the lock is destroyed. We +// keep locks that haven't been destroyed in the liveLocks list so that we can +// grab their statistics too. static kmp_adaptive_lock_statistics_t destroyedStats; // To hold the list of live locks. @@ -2038,1057 +1873,922 @@ static kmp_adaptive_lock_info_t liveLocks; static kmp_bootstrap_lock_t chain_lock; // Initialize the list of stats. -void -__kmp_init_speculative_stats() -{ - kmp_adaptive_lock_info_t *lck = &liveLocks; - - memset( ( void * ) & ( lck->stats ), 0, sizeof( lck->stats ) ); - lck->stats.next = lck; - lck->stats.prev = lck; +void __kmp_init_speculative_stats() { + kmp_adaptive_lock_info_t *lck = &liveLocks; - KMP_ASSERT( lck->stats.next->stats.prev == lck ); - KMP_ASSERT( lck->stats.prev->stats.next == lck ); + memset((void *)&(lck->stats), 0, sizeof(lck->stats)); + lck->stats.next = lck; + lck->stats.prev = lck; - __kmp_init_bootstrap_lock( &chain_lock ); + KMP_ASSERT(lck->stats.next->stats.prev == lck); + KMP_ASSERT(lck->stats.prev->stats.next == lck); + __kmp_init_bootstrap_lock(&chain_lock); } // Insert the lock into the circular list -static void -__kmp_remember_lock( kmp_adaptive_lock_info_t * lck ) -{ - __kmp_acquire_bootstrap_lock( &chain_lock ); +static void __kmp_remember_lock(kmp_adaptive_lock_info_t *lck) { + __kmp_acquire_bootstrap_lock(&chain_lock); - lck->stats.next = liveLocks.stats.next; - lck->stats.prev = &liveLocks; + lck->stats.next = liveLocks.stats.next; + lck->stats.prev = &liveLocks; - liveLocks.stats.next = lck; - lck->stats.next->stats.prev = lck; + liveLocks.stats.next = lck; + lck->stats.next->stats.prev = lck; - KMP_ASSERT( lck->stats.next->stats.prev == lck ); - KMP_ASSERT( lck->stats.prev->stats.next == lck ); + KMP_ASSERT(lck->stats.next->stats.prev == lck); + KMP_ASSERT(lck->stats.prev->stats.next == lck); - __kmp_release_bootstrap_lock( &chain_lock ); + __kmp_release_bootstrap_lock(&chain_lock); } -static void -__kmp_forget_lock( kmp_adaptive_lock_info_t * lck ) -{ - KMP_ASSERT( lck->stats.next->stats.prev == lck ); - KMP_ASSERT( lck->stats.prev->stats.next == lck ); +static void __kmp_forget_lock(kmp_adaptive_lock_info_t *lck) { + KMP_ASSERT(lck->stats.next->stats.prev == lck); + KMP_ASSERT(lck->stats.prev->stats.next == lck); - kmp_adaptive_lock_info_t * n = lck->stats.next; - kmp_adaptive_lock_info_t * p = lck->stats.prev; + kmp_adaptive_lock_info_t *n = lck->stats.next; + kmp_adaptive_lock_info_t *p = lck->stats.prev; - n->stats.prev = p; - p->stats.next = n; + n->stats.prev = p; + p->stats.next = n; } -static void -__kmp_zero_speculative_stats( kmp_adaptive_lock_info_t * lck ) -{ - memset( ( void * )&lck->stats, 0, sizeof( lck->stats ) ); - __kmp_remember_lock( lck ); +static void __kmp_zero_speculative_stats(kmp_adaptive_lock_info_t *lck) { + memset((void *)&lck->stats, 0, sizeof(lck->stats)); + __kmp_remember_lock(lck); } -static void -__kmp_add_stats( kmp_adaptive_lock_statistics_t * t, kmp_adaptive_lock_info_t * lck ) -{ - kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; +static void __kmp_add_stats(kmp_adaptive_lock_statistics_t *t, + kmp_adaptive_lock_info_t *lck) { + kmp_adaptive_lock_statistics_t volatile *s = &lck->stats; - t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; - t->successfulSpeculations += s->successfulSpeculations; - t->hardFailedSpeculations += s->hardFailedSpeculations; - t->softFailedSpeculations += s->softFailedSpeculations; - t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; - t->lemmingYields += s->lemmingYields; + t->nonSpeculativeAcquireAttempts += lck->acquire_attempts; + t->successfulSpeculations += s->successfulSpeculations; + t->hardFailedSpeculations += s->hardFailedSpeculations; + t->softFailedSpeculations += s->softFailedSpeculations; + t->nonSpeculativeAcquires += s->nonSpeculativeAcquires; + t->lemmingYields += s->lemmingYields; } -static void -__kmp_accumulate_speculative_stats( kmp_adaptive_lock_info_t * lck) -{ - kmp_adaptive_lock_statistics_t *t = &destroyedStats; +static void __kmp_accumulate_speculative_stats(kmp_adaptive_lock_info_t *lck) { + kmp_adaptive_lock_statistics_t *t = &destroyedStats; - __kmp_acquire_bootstrap_lock( &chain_lock ); + __kmp_acquire_bootstrap_lock(&chain_lock); - __kmp_add_stats( &destroyedStats, lck ); - __kmp_forget_lock( lck ); + __kmp_add_stats(&destroyedStats, lck); + __kmp_forget_lock(lck); - __kmp_release_bootstrap_lock( &chain_lock ); + __kmp_release_bootstrap_lock(&chain_lock); } -static float -percent (kmp_uint32 count, kmp_uint32 total) -{ - return (total == 0) ? 0.0: (100.0 * count)/total; +static float percent(kmp_uint32 count, kmp_uint32 total) { + return (total == 0) ? 0.0 : (100.0 * count) / total; } -static -FILE * __kmp_open_stats_file() -{ - if (strcmp (__kmp_speculative_statsfile, "-") == 0) - return stdout; +static FILE *__kmp_open_stats_file() { + if (strcmp(__kmp_speculative_statsfile, "-") == 0) + return stdout; - size_t buffLen = KMP_STRLEN( __kmp_speculative_statsfile ) + 20; - char buffer[buffLen]; - KMP_SNPRINTF (&buffer[0], buffLen, __kmp_speculative_statsfile, - (kmp_int32)getpid()); - FILE * result = fopen(&buffer[0], "w"); + size_t buffLen = KMP_STRLEN(__kmp_speculative_statsfile) + 20; + char buffer[buffLen]; + KMP_SNPRINTF(&buffer[0], buffLen, __kmp_speculative_statsfile, + (kmp_int32)getpid()); + FILE *result = fopen(&buffer[0], "w"); - // Maybe we should issue a warning here... - return result ? result : stdout; + // Maybe we should issue a warning here... + return result ? result : stdout; } -void -__kmp_print_speculative_stats() -{ - if (__kmp_user_lock_kind != lk_adaptive) - return; +void __kmp_print_speculative_stats() { + if (__kmp_user_lock_kind != lk_adaptive) + return; - FILE * statsFile = __kmp_open_stats_file(); + FILE *statsFile = __kmp_open_stats_file(); - kmp_adaptive_lock_statistics_t total = destroyedStats; - kmp_adaptive_lock_info_t *lck; + kmp_adaptive_lock_statistics_t total = destroyedStats; + kmp_adaptive_lock_info_t *lck; - for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { - __kmp_add_stats( &total, lck ); - } - kmp_adaptive_lock_statistics_t *t = &total; - kmp_uint32 totalSections = t->nonSpeculativeAcquires + t->successfulSpeculations; - kmp_uint32 totalSpeculations = t->successfulSpeculations + t->hardFailedSpeculations + - t->softFailedSpeculations; - - fprintf ( statsFile, "Speculative lock statistics (all approximate!)\n"); - fprintf ( statsFile, " Lock parameters: \n" - " max_soft_retries : %10d\n" - " max_badness : %10d\n", - __kmp_adaptive_backoff_params.max_soft_retries, - __kmp_adaptive_backoff_params.max_badness); - fprintf( statsFile, " Non-speculative acquire attempts : %10d\n", t->nonSpeculativeAcquireAttempts ); - fprintf( statsFile, " Total critical sections : %10d\n", totalSections ); - fprintf( statsFile, " Successful speculations : %10d (%5.1f%%)\n", - t->successfulSpeculations, percent( t->successfulSpeculations, totalSections ) ); - fprintf( statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", - t->nonSpeculativeAcquires, percent( t->nonSpeculativeAcquires, totalSections ) ); - fprintf( statsFile, " Lemming yields : %10d\n\n", t->lemmingYields ); - - fprintf( statsFile, " Speculative acquire attempts : %10d\n", totalSpeculations ); - fprintf( statsFile, " Successes : %10d (%5.1f%%)\n", - t->successfulSpeculations, percent( t->successfulSpeculations, totalSpeculations ) ); - fprintf( statsFile, " Soft failures : %10d (%5.1f%%)\n", - t->softFailedSpeculations, percent( t->softFailedSpeculations, totalSpeculations ) ); - fprintf( statsFile, " Hard failures : %10d (%5.1f%%)\n", - t->hardFailedSpeculations, percent( t->hardFailedSpeculations, totalSpeculations ) ); - - if (statsFile != stdout) - fclose( statsFile ); -} - -# define KMP_INC_STAT(lck,stat) ( lck->lk.adaptive.stats.stat++ ) + for (lck = liveLocks.stats.next; lck != &liveLocks; lck = lck->stats.next) { + __kmp_add_stats(&total, lck); + } + kmp_adaptive_lock_statistics_t *t = &total; + kmp_uint32 totalSections = + t->nonSpeculativeAcquires + t->successfulSpeculations; + kmp_uint32 totalSpeculations = t->successfulSpeculations + + t->hardFailedSpeculations + + t->softFailedSpeculations; + + fprintf(statsFile, "Speculative lock statistics (all approximate!)\n"); + fprintf(statsFile, " Lock parameters: \n" + " max_soft_retries : %10d\n" + " max_badness : %10d\n", + __kmp_adaptive_backoff_params.max_soft_retries, + __kmp_adaptive_backoff_params.max_badness); + fprintf(statsFile, " Non-speculative acquire attempts : %10d\n", + t->nonSpeculativeAcquireAttempts); + fprintf(statsFile, " Total critical sections : %10d\n", + totalSections); + fprintf(statsFile, " Successful speculations : %10d (%5.1f%%)\n", + t->successfulSpeculations, + percent(t->successfulSpeculations, totalSections)); + fprintf(statsFile, " Non-speculative acquires : %10d (%5.1f%%)\n", + t->nonSpeculativeAcquires, + percent(t->nonSpeculativeAcquires, totalSections)); + fprintf(statsFile, " Lemming yields : %10d\n\n", + t->lemmingYields); + + fprintf(statsFile, " Speculative acquire attempts : %10d\n", + totalSpeculations); + fprintf(statsFile, " Successes : %10d (%5.1f%%)\n", + t->successfulSpeculations, + percent(t->successfulSpeculations, totalSpeculations)); + fprintf(statsFile, " Soft failures : %10d (%5.1f%%)\n", + t->softFailedSpeculations, + percent(t->softFailedSpeculations, totalSpeculations)); + fprintf(statsFile, " Hard failures : %10d (%5.1f%%)\n", + t->hardFailedSpeculations, + percent(t->hardFailedSpeculations, totalSpeculations)); + + if (statsFile != stdout) + fclose(statsFile); +} + +#define KMP_INC_STAT(lck, stat) (lck->lk.adaptive.stats.stat++) #else -# define KMP_INC_STAT(lck,stat) +#define KMP_INC_STAT(lck, stat) #endif // KMP_DEBUG_ADAPTIVE_LOCKS -static inline bool -__kmp_is_unlocked_queuing_lock( kmp_queuing_lock_t *lck ) -{ - // It is enough to check that the head_id is zero. - // We don't also need to check the tail. - bool res = lck->lk.head_id == 0; +static inline bool __kmp_is_unlocked_queuing_lock(kmp_queuing_lock_t *lck) { + // It is enough to check that the head_id is zero. + // We don't also need to check the tail. + bool res = lck->lk.head_id == 0; - // We need a fence here, since we must ensure that no memory operations - // from later in this thread float above that read. +// We need a fence here, since we must ensure that no memory operations +// from later in this thread float above that read. #if KMP_COMPILER_ICC - _mm_mfence(); + _mm_mfence(); #else - __sync_synchronize(); + __sync_synchronize(); #endif - return res; + return res; } // Functions for manipulating the badness static __inline void -__kmp_update_badness_after_success( kmp_adaptive_lock_t *lck ) -{ - // Reset the badness to zero so we eagerly try to speculate again - lck->lk.adaptive.badness = 0; - KMP_INC_STAT(lck,successfulSpeculations); +__kmp_update_badness_after_success(kmp_adaptive_lock_t *lck) { + // Reset the badness to zero so we eagerly try to speculate again + lck->lk.adaptive.badness = 0; + KMP_INC_STAT(lck, successfulSpeculations); } // Create a bit mask with one more set bit. -static __inline void -__kmp_step_badness( kmp_adaptive_lock_t *lck ) -{ - kmp_uint32 newBadness = ( lck->lk.adaptive.badness << 1 ) | 1; - if ( newBadness > lck->lk.adaptive.max_badness) { - return; - } else { - lck->lk.adaptive.badness = newBadness; - } +static __inline void __kmp_step_badness(kmp_adaptive_lock_t *lck) { + kmp_uint32 newBadness = (lck->lk.adaptive.badness << 1) | 1; + if (newBadness > lck->lk.adaptive.max_badness) { + return; + } else { + lck->lk.adaptive.badness = newBadness; + } } // Check whether speculation should be attempted. -static __inline int -__kmp_should_speculate( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) -{ - kmp_uint32 badness = lck->lk.adaptive.badness; - kmp_uint32 attempts= lck->lk.adaptive.acquire_attempts; - int res = (attempts & badness) == 0; - return res; +static __inline int __kmp_should_speculate(kmp_adaptive_lock_t *lck, + kmp_int32 gtid) { + kmp_uint32 badness = lck->lk.adaptive.badness; + kmp_uint32 attempts = lck->lk.adaptive.acquire_attempts; + int res = (attempts & badness) == 0; + return res; } // Attempt to acquire only the speculative lock. // Does not back off to the non-speculative lock. -// -static int -__kmp_test_adaptive_lock_only( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) -{ - int retries = lck->lk.adaptive.max_soft_retries; - - // We don't explicitly count the start of speculation, rather we record - // the results (success, hard fail, soft fail). The sum of all of those - // is the total number of times we started speculation since all - // speculations must end one of those ways. - do - { - kmp_uint32 status = _xbegin(); - // Switch this in to disable actual speculation but exercise - // at least some of the rest of the code. Useful for debugging... - // kmp_uint32 status = _XABORT_NESTED; - - if (status == _XBEGIN_STARTED ) - { /* We have successfully started speculation - * Check that no-one acquired the lock for real between when we last looked - * and now. This also gets the lock cache line into our read-set, - * which we need so that we'll abort if anyone later claims it for real. - */ - if (! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) - { - // Lock is now visibly acquired, so someone beat us to it. - // Abort the transaction so we'll restart from _xbegin with the - // failure status. - _xabort(0x01); - KMP_ASSERT2( 0, "should not get here" ); - } - return 1; // Lock has been acquired (speculatively) - } else { - // We have aborted, update the statistics - if ( status & SOFT_ABORT_MASK) - { - KMP_INC_STAT(lck,softFailedSpeculations); - // and loop round to retry. - } - else - { - KMP_INC_STAT(lck,hardFailedSpeculations); - // Give up if we had a hard failure. - break; - } - } - } while( retries-- ); // Loop while we have retries, and didn't fail hard. - - // Either we had a hard failure or we didn't succeed softly after - // the full set of attempts, so back off the badness. - __kmp_step_badness( lck ); - return 0; -} - -// Attempt to acquire the speculative lock, or back off to the non-speculative one -// if the speculative lock cannot be acquired. -// We can succeed speculatively, non-speculatively, or fail. -static int -__kmp_test_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) -{ - // First try to acquire the lock speculatively - if ( __kmp_should_speculate( lck, gtid ) && __kmp_test_adaptive_lock_only( lck, gtid ) ) - return 1; - - // Speculative acquisition failed, so try to acquire it non-speculatively. - // Count the non-speculative acquire attempt - lck->lk.adaptive.acquire_attempts++; - - // Use base, non-speculative lock. - if ( __kmp_test_queuing_lock( GET_QLK_PTR(lck), gtid ) ) - { - KMP_INC_STAT(lck,nonSpeculativeAcquires); - return 1; // Lock is acquired (non-speculatively) - } - else - { - return 0; // Failed to acquire the lock, it's already visibly locked. - } -} - -static int -__kmp_test_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_test_lock"; - if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { - KMP_FATAL( LockIsUninitialized, func ); - } - - int retval = __kmp_test_adaptive_lock( lck, gtid ); - - if ( retval ) { - lck->lk.qlk.owner_id = gtid + 1; - } - return retval; -} - -// Block until we can acquire a speculative, adaptive lock. -// We check whether we should be trying to speculate. -// If we should be, we check the real lock to see if it is free, -// and, if not, pause without attempting to acquire it until it is. -// Then we try the speculative acquire. -// This means that although we suffer from lemmings a little ( -// because all we can't acquire the lock speculatively until -// the queue of threads waiting has cleared), we don't get into a -// state where we can never acquire the lock speculatively (because we -// force the queue to clear by preventing new arrivals from entering the -// queue). -// This does mean that when we're trying to break lemmings, the lock -// is no longer fair. However OpenMP makes no guarantee that its -// locks are fair, so this isn't a real problem. -static void -__kmp_acquire_adaptive_lock( kmp_adaptive_lock_t * lck, kmp_int32 gtid ) -{ - if ( __kmp_should_speculate( lck, gtid ) ) - { - if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) - { - if ( __kmp_test_adaptive_lock_only( lck , gtid ) ) - return; - // We tried speculation and failed, so give up. - } - else - { - // We can't try speculation until the lock is free, so we - // pause here (without suspending on the queueing lock, - // to allow it to drain, then try again. - // All other threads will also see the same result for - // shouldSpeculate, so will be doing the same if they - // try to claim the lock from now on. - while ( ! __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) - { - KMP_INC_STAT(lck,lemmingYields); - __kmp_yield (TRUE); - } - - if ( __kmp_test_adaptive_lock_only( lck, gtid ) ) - return; - } +static int __kmp_test_adaptive_lock_only(kmp_adaptive_lock_t *lck, + kmp_int32 gtid) { + int retries = lck->lk.adaptive.max_soft_retries; + + // We don't explicitly count the start of speculation, rather we record the + // results (success, hard fail, soft fail). The sum of all of those is the + // total number of times we started speculation since all speculations must + // end one of those ways. + do { + kmp_uint32 status = _xbegin(); + // Switch this in to disable actual speculation but exercise at least some + // of the rest of the code. Useful for debugging... + // kmp_uint32 status = _XABORT_NESTED; + + if (status == _XBEGIN_STARTED) { + /* We have successfully started speculation. Check that no-one acquired + the lock for real between when we last looked and now. This also gets + the lock cache line into our read-set, which we need so that we'll + abort if anyone later claims it for real. */ + if (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { + // Lock is now visibly acquired, so someone beat us to it. Abort the + // transaction so we'll restart from _xbegin with the failure status. + _xabort(0x01); + KMP_ASSERT2(0, "should not get here"); + } + return 1; // Lock has been acquired (speculatively) + } else { + // We have aborted, update the statistics + if (status & SOFT_ABORT_MASK) { + KMP_INC_STAT(lck, softFailedSpeculations); + // and loop round to retry. + } else { + KMP_INC_STAT(lck, hardFailedSpeculations); + // Give up if we had a hard failure. + break; + } } + } while (retries--); // Loop while we have retries, and didn't fail hard. - // Speculative acquisition failed, so acquire it non-speculatively. - // Count the non-speculative acquire attempt - lck->lk.adaptive.acquire_attempts++; - - __kmp_acquire_queuing_lock_timed_template( GET_QLK_PTR(lck), gtid ); - // We have acquired the base lock, so count that. - KMP_INC_STAT(lck,nonSpeculativeAcquires ); - ANNOTATE_QUEUING_ACQUIRED(lck); + // Either we had a hard failure or we didn't succeed softly after + // the full set of attempts, so back off the badness. + __kmp_step_badness(lck); + return 0; } -static void -__kmp_acquire_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_set_lock"; - if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == gtid ) { - KMP_FATAL( LockIsAlreadyOwned, func ); - } - - __kmp_acquire_adaptive_lock( lck, gtid ); - +// Attempt to acquire the speculative lock, or back off to the non-speculative +// one if the speculative lock cannot be acquired. +// We can succeed speculatively, non-speculatively, or fail. +static int __kmp_test_adaptive_lock(kmp_adaptive_lock_t *lck, kmp_int32 gtid) { + // First try to acquire the lock speculatively + if (__kmp_should_speculate(lck, gtid) && + __kmp_test_adaptive_lock_only(lck, gtid)) + return 1; + + // Speculative acquisition failed, so try to acquire it non-speculatively. + // Count the non-speculative acquire attempt + lck->lk.adaptive.acquire_attempts++; + + // Use base, non-speculative lock. + if (__kmp_test_queuing_lock(GET_QLK_PTR(lck), gtid)) { + KMP_INC_STAT(lck, nonSpeculativeAcquires); + return 1; // Lock is acquired (non-speculatively) + } else { + return 0; // Failed to acquire the lock, it's already visibly locked. + } +} + +static int __kmp_test_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_test_lock"; + if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { + KMP_FATAL(LockIsUninitialized, func); + } + + int retval = __kmp_test_adaptive_lock(lck, gtid); + + if (retval) { lck->lk.qlk.owner_id = gtid + 1; + } + return retval; } -static int -__kmp_release_adaptive_lock( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) -{ - if ( __kmp_is_unlocked_queuing_lock( GET_QLK_PTR(lck) ) ) - { // If the lock doesn't look claimed we must be speculating. - // (Or the user's code is buggy and they're releasing without locking; - // if we had XTEST we'd be able to check that case...) - _xend(); // Exit speculation - __kmp_update_badness_after_success( lck ); - } - else - { // Since the lock *is* visibly locked we're not speculating, - // so should use the underlying lock's release scheme. - __kmp_release_queuing_lock( GET_QLK_PTR(lck), gtid ); - } - return KMP_LOCK_RELEASED; -} - -static int -__kmp_release_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_unset_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != gtid ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); +// Block until we can acquire a speculative, adaptive lock. We check whether we +// should be trying to speculate. If we should be, we check the real lock to see +// if it is free, and, if not, pause without attempting to acquire it until it +// is. Then we try the speculative acquire. This means that although we suffer +// from lemmings a little (because all we can't acquire the lock speculatively +// until the queue of threads waiting has cleared), we don't get into a state +// where we can never acquire the lock speculatively (because we force the queue +// to clear by preventing new arrivals from entering the queue). This does mean +// that when we're trying to break lemmings, the lock is no longer fair. However +// OpenMP makes no guarantee that its locks are fair, so this isn't a real +// problem. +static void __kmp_acquire_adaptive_lock(kmp_adaptive_lock_t *lck, + kmp_int32 gtid) { + if (__kmp_should_speculate(lck, gtid)) { + if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { + if (__kmp_test_adaptive_lock_only(lck, gtid)) + return; + // We tried speculation and failed, so give up. + } else { + // We can't try speculation until the lock is free, so we pause here + // (without suspending on the queueing lock, to allow it to drain, then + // try again. All other threads will also see the same result for + // shouldSpeculate, so will be doing the same if they try to claim the + // lock from now on. + while (!__kmp_is_unlocked_queuing_lock(GET_QLK_PTR(lck))) { + KMP_INC_STAT(lck, lemmingYields); + __kmp_yield(TRUE); + } + + if (__kmp_test_adaptive_lock_only(lck, gtid)) + return; } - lck->lk.qlk.owner_id = 0; - __kmp_release_adaptive_lock( lck, gtid ); - return KMP_LOCK_RELEASED; -} - -static void -__kmp_init_adaptive_lock( kmp_adaptive_lock_t *lck ) -{ - __kmp_init_queuing_lock( GET_QLK_PTR(lck) ); - lck->lk.adaptive.badness = 0; - lck->lk.adaptive.acquire_attempts = 0; //nonSpeculativeAcquireAttempts = 0; - lck->lk.adaptive.max_soft_retries = __kmp_adaptive_backoff_params.max_soft_retries; - lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; + } + + // Speculative acquisition failed, so acquire it non-speculatively. + // Count the non-speculative acquire attempt + lck->lk.adaptive.acquire_attempts++; + + __kmp_acquire_queuing_lock_timed_template(GET_QLK_PTR(lck), gtid); + // We have acquired the base lock, so count that. + KMP_INC_STAT(lck, nonSpeculativeAcquires); + ANNOTATE_QUEUING_ACQUIRED(lck); +} + +static void __kmp_acquire_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_set_lock"; + if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { + KMP_FATAL(LockIsUninitialized, func); + } + if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == gtid) { + KMP_FATAL(LockIsAlreadyOwned, func); + } + + __kmp_acquire_adaptive_lock(lck, gtid); + + lck->lk.qlk.owner_id = gtid + 1; +} + +static int __kmp_release_adaptive_lock(kmp_adaptive_lock_t *lck, + kmp_int32 gtid) { + if (__kmp_is_unlocked_queuing_lock(GET_QLK_PTR( + lck))) { // If the lock doesn't look claimed we must be speculating. + // (Or the user's code is buggy and they're releasing without locking; + // if we had XTEST we'd be able to check that case...) + _xend(); // Exit speculation + __kmp_update_badness_after_success(lck); + } else { // Since the lock *is* visibly locked we're not speculating, + // so should use the underlying lock's release scheme. + __kmp_release_queuing_lock(GET_QLK_PTR(lck), gtid); + } + return KMP_LOCK_RELEASED; +} + +static int __kmp_release_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_unset_lock"; + KMP_MB(); /* in case another processor initialized lock */ + if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { + KMP_FATAL(LockIsUninitialized, func); + } + if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) == -1) { + KMP_FATAL(LockUnsettingFree, func); + } + if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != gtid) { + KMP_FATAL(LockUnsettingSetByAnother, func); + } + lck->lk.qlk.owner_id = 0; + __kmp_release_adaptive_lock(lck, gtid); + return KMP_LOCK_RELEASED; +} + +static void __kmp_init_adaptive_lock(kmp_adaptive_lock_t *lck) { + __kmp_init_queuing_lock(GET_QLK_PTR(lck)); + lck->lk.adaptive.badness = 0; + lck->lk.adaptive.acquire_attempts = 0; // nonSpeculativeAcquireAttempts = 0; + lck->lk.adaptive.max_soft_retries = + __kmp_adaptive_backoff_params.max_soft_retries; + lck->lk.adaptive.max_badness = __kmp_adaptive_backoff_params.max_badness; #if KMP_DEBUG_ADAPTIVE_LOCKS - __kmp_zero_speculative_stats( &lck->lk.adaptive ); + __kmp_zero_speculative_stats(&lck->lk.adaptive); #endif - KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); + KA_TRACE(1000, ("__kmp_init_adaptive_lock: lock %p initialized\n", lck)); } -static void -__kmp_init_adaptive_lock_with_checks( kmp_adaptive_lock_t * lck ) -{ - __kmp_init_adaptive_lock( lck ); +static void __kmp_init_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { + __kmp_init_adaptive_lock(lck); } -static void -__kmp_destroy_adaptive_lock( kmp_adaptive_lock_t *lck ) -{ +static void __kmp_destroy_adaptive_lock(kmp_adaptive_lock_t *lck) { #if KMP_DEBUG_ADAPTIVE_LOCKS - __kmp_accumulate_speculative_stats( &lck->lk.adaptive ); + __kmp_accumulate_speculative_stats(&lck->lk.adaptive); #endif - __kmp_destroy_queuing_lock (GET_QLK_PTR(lck)); - // Nothing needed for the speculative part. + __kmp_destroy_queuing_lock(GET_QLK_PTR(lck)); + // Nothing needed for the speculative part. } -static void -__kmp_destroy_adaptive_lock_with_checks( kmp_adaptive_lock_t *lck ) -{ - char const * const func = "omp_destroy_lock"; - if ( lck->lk.qlk.initialized != GET_QLK_PTR(lck) ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_get_queuing_lock_owner( GET_QLK_PTR(lck) ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_adaptive_lock( lck ); +static void __kmp_destroy_adaptive_lock_with_checks(kmp_adaptive_lock_t *lck) { + char const *const func = "omp_destroy_lock"; + if (lck->lk.qlk.initialized != GET_QLK_PTR(lck)) { + KMP_FATAL(LockIsUninitialized, func); + } + if (__kmp_get_queuing_lock_owner(GET_QLK_PTR(lck)) != -1) { + KMP_FATAL(LockStillOwned, func); + } + __kmp_destroy_adaptive_lock(lck); } - #endif // KMP_USE_ADAPTIVE_LOCKS - /* ------------------------------------------------------------------------ */ /* DRDPA ticket locks */ /* "DRDPA" means Dynamically Reconfigurable Distributed Polling Area */ -static kmp_int32 -__kmp_get_drdpa_lock_owner( kmp_drdpa_lock_t *lck ) -{ - return TCR_4( lck->lk.owner_id ) - 1; +static kmp_int32 __kmp_get_drdpa_lock_owner(kmp_drdpa_lock_t *lck) { + return TCR_4(lck->lk.owner_id) - 1; } -static inline bool -__kmp_is_drdpa_lock_nestable( kmp_drdpa_lock_t *lck ) -{ - return lck->lk.depth_locked != -1; +static inline bool __kmp_is_drdpa_lock_nestable(kmp_drdpa_lock_t *lck) { + return lck->lk.depth_locked != -1; } __forceinline static int -__kmp_acquire_drdpa_lock_timed_template( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket); - kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load - volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls - = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) - TCR_PTR(lck->lk.polls); // volatile load +__kmp_acquire_drdpa_lock_timed_template(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { + kmp_uint64 ticket = KMP_TEST_THEN_INC64((kmp_int64 *)&lck->lk.next_ticket); + kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load + volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls = + (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)TCR_PTR( + lck->lk.polls); // volatile load #ifdef USE_LOCK_PROFILE - if (TCR_8(polls[ticket & mask].poll) != ticket) - __kmp_printf("LOCK CONTENTION: %p\n", lck); - /* else __kmp_printf( "." );*/ + if (TCR_8(polls[ticket & mask].poll) != ticket) + __kmp_printf("LOCK CONTENTION: %p\n", lck); +/* else __kmp_printf( "." );*/ #endif /* USE_LOCK_PROFILE */ + // Now spin-wait, but reload the polls pointer and mask, in case the + // polling area has been reconfigured. Unless it is reconfigured, the + // reloads stay in L1 cache and are cheap. + // + // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!! + // + // The current implementation of KMP_WAIT_YIELD doesn't allow for mask + // and poll to be re-read every spin iteration. + kmp_uint32 spins; + + KMP_FSYNC_PREPARE(lck); + KMP_INIT_YIELD(spins); + while (TCR_8(polls[ticket & mask].poll) < ticket) { // volatile load + // If we are oversubscribed, + // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. + // CPU Pause is in the macros for yield. // - // Now spin-wait, but reload the polls pointer and mask, in case the - // polling area has been reconfigured. Unless it is reconfigured, the - // reloads stay in L1 cache and are cheap. - // - // Keep this code in sync with KMP_WAIT_YIELD, in kmp_dispatch.cpp !!! + KMP_YIELD(TCR_4(__kmp_nth) > + (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); + KMP_YIELD_SPIN(spins); + + // Re-read the mask and the poll pointer from the lock structure. // - // The current implementation of KMP_WAIT_YIELD doesn't allow for mask - // and poll to be re-read every spin iteration. + // Make certain that "mask" is read before "polls" !!! // - kmp_uint32 spins; - - KMP_FSYNC_PREPARE(lck); - KMP_INIT_YIELD(spins); - while (TCR_8(polls[ticket & mask].poll) < ticket) { // volatile load - // If we are oversubscribed, - // or have waited a bit (and KMP_LIBRARY=turnaround), then yield. - // CPU Pause is in the macros for yield. - // - KMP_YIELD(TCR_4(__kmp_nth) - > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)); - KMP_YIELD_SPIN(spins); - - // Re-read the mask and the poll pointer from the lock structure. - // - // Make certain that "mask" is read before "polls" !!! - // - // If another thread picks reconfigures the polling area and updates - // their values, and we get the new value of mask and the old polls - // pointer, we could access memory beyond the end of the old polling - // area. - // - mask = TCR_8(lck->lk.mask); // volatile load + // If another thread picks reconfigures the polling area and updates their + // values, and we get the new value of mask and the old polls pointer, we + // could access memory beyond the end of the old polling area. + mask = TCR_8(lck->lk.mask); // volatile load + polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)TCR_PTR( + lck->lk.polls); // volatile load + } + + // Critical section starts here + KMP_FSYNC_ACQUIRED(lck); + KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", + ticket, lck)); + lck->lk.now_serving = ticket; // non-volatile store + + // Deallocate a garbage polling area if we know that we are the last + // thread that could possibly access it. + // + // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup + // ticket. + if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { + __kmp_free((void *)lck->lk.old_polls); + lck->lk.old_polls = NULL; + lck->lk.cleanup_ticket = 0; + } + + // Check to see if we should reconfigure the polling area. + // If there is still a garbage polling area to be deallocated from a + // previous reconfiguration, let a later thread reconfigure it. + if (lck->lk.old_polls == NULL) { + bool reconfigure = false; + volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls; + kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); + + if (TCR_4(__kmp_nth) > + (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { + // We are in oversubscription mode. Contract the polling area + // down to a single location, if that hasn't been done already. + if (num_polls > 1) { + reconfigure = true; + num_polls = TCR_4(lck->lk.num_polls); + mask = 0; + num_polls = 1; + polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) + __kmp_allocate(num_polls * sizeof(*polls)); + polls[0].poll = ticket; + } + } else { + // We are in under/fully subscribed mode. Check the number of + // threads waiting on the lock. The size of the polling area + // should be at least the number of threads waiting. + kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; + if (num_waiting > num_polls) { + kmp_uint32 old_num_polls = num_polls; + reconfigure = true; + do { + mask = (mask << 1) | 1; + num_polls *= 2; + } while (num_polls <= num_waiting); + + // Allocate the new polling area, and copy the relevant portion + // of the old polling area to the new area. __kmp_allocate() + // zeroes the memory it allocates, and most of the old area is + // just zero padding, so we only copy the release counters. polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) - TCR_PTR(lck->lk.polls); // volatile load + __kmp_allocate(num_polls * sizeof(*polls)); + kmp_uint32 i; + for (i = 0; i < old_num_polls; i++) { + polls[i].poll = old_polls[i].poll; + } + } } - // - // Critical section starts here - // - KMP_FSYNC_ACQUIRED(lck); - KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld acquired lock %p\n", - ticket, lck)); - lck->lk.now_serving = ticket; // non-volatile store + if (reconfigure) { + // Now write the updated fields back to the lock structure. + // + // Make certain that "polls" is written before "mask" !!! + // + // If another thread picks up the new value of mask and the old polls + // pointer , it could access memory beyond the end of the old polling + // area. + // + // On x86, we need memory fences. + KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring " + "lock %p to %d polls\n", + ticket, lck, num_polls)); - // - // Deallocate a garbage polling area if we know that we are the last - // thread that could possibly access it. - // - // The >= check is in case __kmp_test_drdpa_lock() allocated the cleanup - // ticket. - // - if ((lck->lk.old_polls != NULL) && (ticket >= lck->lk.cleanup_ticket)) { - __kmp_free((void *)lck->lk.old_polls); - lck->lk.old_polls = NULL; - lck->lk.cleanup_ticket = 0; - } + lck->lk.old_polls = old_polls; // non-volatile store + lck->lk.polls = polls; // volatile store - // - // Check to see if we should reconfigure the polling area. - // If there is still a garbage polling area to be deallocated from a - // previous reconfiguration, let a later thread reconfigure it. - // - if (lck->lk.old_polls == NULL) { - bool reconfigure = false; - volatile struct kmp_base_drdpa_lock::kmp_lock_poll *old_polls = polls; - kmp_uint32 num_polls = TCR_4(lck->lk.num_polls); - - if (TCR_4(__kmp_nth) - > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { - // - // We are in oversubscription mode. Contract the polling area - // down to a single location, if that hasn't been done already. - // - if (num_polls > 1) { - reconfigure = true; - num_polls = TCR_4(lck->lk.num_polls); - mask = 0; - num_polls = 1; - polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) - __kmp_allocate(num_polls * sizeof(*polls)); - polls[0].poll = ticket; - } - } - else { - // - // We are in under/fully subscribed mode. Check the number of - // threads waiting on the lock. The size of the polling area - // should be at least the number of threads waiting. - // - kmp_uint64 num_waiting = TCR_8(lck->lk.next_ticket) - ticket - 1; - if (num_waiting > num_polls) { - kmp_uint32 old_num_polls = num_polls; - reconfigure = true; - do { - mask = (mask << 1) | 1; - num_polls *= 2; - } while (num_polls <= num_waiting); - - // - // Allocate the new polling area, and copy the relevant portion - // of the old polling area to the new area. __kmp_allocate() - // zeroes the memory it allocates, and most of the old area is - // just zero padding, so we only copy the release counters. - // - polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) - __kmp_allocate(num_polls * sizeof(*polls)); - kmp_uint32 i; - for (i = 0; i < old_num_polls; i++) { - polls[i].poll = old_polls[i].poll; - } - } - } + KMP_MB(); - if (reconfigure) { - // - // Now write the updated fields back to the lock structure. - // - // Make certain that "polls" is written before "mask" !!! - // - // If another thread picks up the new value of mask and the old - // polls pointer , it could access memory beyond the end of the - // old polling area. - // - // On x86, we need memory fences. - // - KA_TRACE(1000, ("__kmp_acquire_drdpa_lock: ticket #%lld reconfiguring lock %p to %d polls\n", - ticket, lck, num_polls)); - - lck->lk.old_polls = old_polls; // non-volatile store - lck->lk.polls = polls; // volatile store - - KMP_MB(); - - lck->lk.num_polls = num_polls; // non-volatile store - lck->lk.mask = mask; // volatile store - - KMP_MB(); - - // - // Only after the new polling area and mask have been flushed - // to main memory can we update the cleanup ticket field. - // - // volatile load / non-volatile store - // - lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket); - } + lck->lk.num_polls = num_polls; // non-volatile store + lck->lk.mask = mask; // volatile store + + KMP_MB(); + + // Only after the new polling area and mask have been flushed + // to main memory can we update the cleanup ticket field. + // + // volatile load / non-volatile store + lck->lk.cleanup_ticket = TCR_8(lck->lk.next_ticket); } - return KMP_LOCK_ACQUIRED_FIRST; + } + return KMP_LOCK_ACQUIRED_FIRST; } -int -__kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - int retval = __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); - ANNOTATE_DRDPA_ACQUIRED(lck); - return retval; +int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { + int retval = __kmp_acquire_drdpa_lock_timed_template(lck, gtid); + ANNOTATE_DRDPA_ACQUIRED(lck); + return retval; } -static int -__kmp_acquire_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_set_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_drdpa_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) ) { - KMP_FATAL( LockIsAlreadyOwned, func ); - } +static int __kmp_acquire_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_set_lock"; + if (lck->lk.initialized != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (__kmp_is_drdpa_lock_nestable(lck)) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } + if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) == gtid)) { + KMP_FATAL(LockIsAlreadyOwned, func); + } + + __kmp_acquire_drdpa_lock(lck, gtid); + + lck->lk.owner_id = gtid + 1; + return KMP_LOCK_ACQUIRED_FIRST; +} + +int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { + // First get a ticket, then read the polls pointer and the mask. + // The polls pointer must be read before the mask!!! (See above) + kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load + volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls = + (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)TCR_PTR( + lck->lk.polls); // volatile load + kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load + if (TCR_8(polls[ticket & mask].poll) == ticket) { + kmp_uint64 next_ticket = ticket + 1; + if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket, ticket, + next_ticket)) { + KMP_FSYNC_ACQUIRED(lck); + KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", + ticket, lck)); + lck->lk.now_serving = ticket; // non-volatile store + + // Since no threads are waiting, there is no possibility that we would + // want to reconfigure the polling area. We might have the cleanup ticket + // value (which says that it is now safe to deallocate old_polls), but + // we'll let a later thread which calls __kmp_acquire_lock do that - this + // routine isn't supposed to block, and we would risk blocks if we called + // __kmp_free() to do the deallocation. + return TRUE; + } + } + return FALSE; +} + +static int __kmp_test_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_test_lock"; + if (lck->lk.initialized != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (__kmp_is_drdpa_lock_nestable(lck)) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } + + int retval = __kmp_test_drdpa_lock(lck, gtid); + + if (retval) { + lck->lk.owner_id = gtid + 1; + } + return retval; +} + +int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { + // Read the ticket value from the lock data struct, then the polls pointer and + // the mask. The polls pointer must be read before the mask!!! (See above) + kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load + volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls = + (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)TCR_PTR( + lck->lk.polls); // volatile load + kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load + KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", + ticket - 1, lck)); + KMP_FSYNC_RELEASING(lck); + ANNOTATE_DRDPA_RELEASED(lck); + KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store + return KMP_LOCK_RELEASED; +} + +static int __kmp_release_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_unset_lock"; + KMP_MB(); /* in case another processor initialized lock */ + if (lck->lk.initialized != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (__kmp_is_drdpa_lock_nestable(lck)) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } + if (__kmp_get_drdpa_lock_owner(lck) == -1) { + KMP_FATAL(LockUnsettingFree, func); + } + if ((gtid >= 0) && (__kmp_get_drdpa_lock_owner(lck) >= 0) && + (__kmp_get_drdpa_lock_owner(lck) != gtid)) { + KMP_FATAL(LockUnsettingSetByAnother, func); + } + lck->lk.owner_id = 0; + return __kmp_release_drdpa_lock(lck, gtid); +} + +void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck) { + lck->lk.location = NULL; + lck->lk.mask = 0; + lck->lk.num_polls = 1; + lck->lk.polls = + (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *)__kmp_allocate( + lck->lk.num_polls * sizeof(*(lck->lk.polls))); + lck->lk.cleanup_ticket = 0; + lck->lk.old_polls = NULL; + lck->lk.next_ticket = 0; + lck->lk.now_serving = 0; + lck->lk.owner_id = 0; // no thread owns the lock. + lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. + lck->lk.initialized = lck; + + KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); +} + +static void __kmp_init_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { + __kmp_init_drdpa_lock(lck); +} + +void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck) { + lck->lk.initialized = NULL; + lck->lk.location = NULL; + if (lck->lk.polls != NULL) { + __kmp_free((void *)lck->lk.polls); + lck->lk.polls = NULL; + } + if (lck->lk.old_polls != NULL) { + __kmp_free((void *)lck->lk.old_polls); + lck->lk.old_polls = NULL; + } + lck->lk.mask = 0; + lck->lk.num_polls = 0; + lck->lk.cleanup_ticket = 0; + lck->lk.next_ticket = 0; + lck->lk.now_serving = 0; + lck->lk.owner_id = 0; + lck->lk.depth_locked = -1; +} + +static void __kmp_destroy_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { + char const *const func = "omp_destroy_lock"; + if (lck->lk.initialized != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (__kmp_is_drdpa_lock_nestable(lck)) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } + if (__kmp_get_drdpa_lock_owner(lck) != -1) { + KMP_FATAL(LockStillOwned, func); + } + __kmp_destroy_drdpa_lock(lck); +} - __kmp_acquire_drdpa_lock( lck, gtid ); +// nested drdpa ticket locks + +int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { + KMP_DEBUG_ASSERT(gtid >= 0); + if (__kmp_get_drdpa_lock_owner(lck) == gtid) { + lck->lk.depth_locked += 1; + return KMP_LOCK_ACQUIRED_NEXT; + } else { + __kmp_acquire_drdpa_lock_timed_template(lck, gtid); + ANNOTATE_DRDPA_ACQUIRED(lck); + KMP_MB(); + lck->lk.depth_locked = 1; + KMP_MB(); lck->lk.owner_id = gtid + 1; return KMP_LOCK_ACQUIRED_FIRST; + } } -int -__kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - // - // First get a ticket, then read the polls pointer and the mask. - // The polls pointer must be read before the mask!!! (See above) - // - kmp_uint64 ticket = TCR_8(lck->lk.next_ticket); // volatile load - volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls - = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) - TCR_PTR(lck->lk.polls); // volatile load - kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load - if (TCR_8(polls[ticket & mask].poll) == ticket) { - kmp_uint64 next_ticket = ticket + 1; - if (KMP_COMPARE_AND_STORE_ACQ64((kmp_int64 *)&lck->lk.next_ticket, - ticket, next_ticket)) { - KMP_FSYNC_ACQUIRED(lck); - KA_TRACE(1000, ("__kmp_test_drdpa_lock: ticket #%lld acquired lock %p\n", - ticket, lck)); - lck->lk.now_serving = ticket; // non-volatile store - - // - // Since no threads are waiting, there is no possibility that - // we would want to reconfigure the polling area. We might - // have the cleanup ticket value (which says that it is now - // safe to deallocate old_polls), but we'll let a later thread - // which calls __kmp_acquire_lock do that - this routine - // isn't supposed to block, and we would risk blocks if we - // called __kmp_free() to do the deallocation. - // - return TRUE; - } - } - return FALSE; +static void __kmp_acquire_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_set_nest_lock"; + if (lck->lk.initialized != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (!__kmp_is_drdpa_lock_nestable(lck)) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + __kmp_acquire_nested_drdpa_lock(lck, gtid); } -static int -__kmp_test_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_test_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_drdpa_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } +int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { + int retval; - int retval = __kmp_test_drdpa_lock( lck, gtid ); + KMP_DEBUG_ASSERT(gtid >= 0); - if ( retval ) { - lck->lk.owner_id = gtid + 1; - } - return retval; + if (__kmp_get_drdpa_lock_owner(lck) == gtid) { + retval = ++lck->lk.depth_locked; + } else if (!__kmp_test_drdpa_lock(lck, gtid)) { + retval = 0; + } else { + KMP_MB(); + retval = lck->lk.depth_locked = 1; + KMP_MB(); + lck->lk.owner_id = gtid + 1; + } + return retval; } -int -__kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - // - // Read the ticket value from the lock data struct, then the polls - // pointer and the mask. The polls pointer must be read before the - // mask!!! (See above) - // - kmp_uint64 ticket = lck->lk.now_serving + 1; // non-volatile load - volatile struct kmp_base_drdpa_lock::kmp_lock_poll *polls - = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) - TCR_PTR(lck->lk.polls); // volatile load - kmp_uint64 mask = TCR_8(lck->lk.mask); // volatile load - KA_TRACE(1000, ("__kmp_release_drdpa_lock: ticket #%lld released lock %p\n", - ticket - 1, lck)); - KMP_FSYNC_RELEASING(lck); - ANNOTATE_DRDPA_RELEASED(lck); - KMP_ST_REL64(&(polls[ticket & mask].poll), ticket); // volatile store - return KMP_LOCK_RELEASED; +static int __kmp_test_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_test_nest_lock"; + if (lck->lk.initialized != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (!__kmp_is_drdpa_lock_nestable(lck)) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + return __kmp_test_nested_drdpa_lock(lck, gtid); } -static int -__kmp_release_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_unset_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_drdpa_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( ( gtid >= 0 ) && ( __kmp_get_drdpa_lock_owner( lck ) >= 0 ) - && ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } +int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid) { + KMP_DEBUG_ASSERT(gtid >= 0); + + KMP_MB(); + if (--(lck->lk.depth_locked) == 0) { + KMP_MB(); lck->lk.owner_id = 0; - return __kmp_release_drdpa_lock( lck, gtid ); + __kmp_release_drdpa_lock(lck, gtid); + return KMP_LOCK_RELEASED; + } + return KMP_LOCK_STILL_HELD; } -void -__kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ) -{ - lck->lk.location = NULL; - lck->lk.mask = 0; - lck->lk.num_polls = 1; - lck->lk.polls = (volatile struct kmp_base_drdpa_lock::kmp_lock_poll *) - __kmp_allocate(lck->lk.num_polls * sizeof(*(lck->lk.polls))); - lck->lk.cleanup_ticket = 0; - lck->lk.old_polls = NULL; - lck->lk.next_ticket = 0; - lck->lk.now_serving = 0; - lck->lk.owner_id = 0; // no thread owns the lock. - lck->lk.depth_locked = -1; // >= 0 for nestable locks, -1 for simple locks. - lck->lk.initialized = lck; - - KA_TRACE(1000, ("__kmp_init_drdpa_lock: lock %p initialized\n", lck)); -} - -static void -__kmp_init_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) -{ - __kmp_init_drdpa_lock( lck ); -} - -void -__kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ) -{ - lck->lk.initialized = NULL; - lck->lk.location = NULL; - if (lck->lk.polls != NULL) { - __kmp_free((void *)lck->lk.polls); - lck->lk.polls = NULL; - } - if (lck->lk.old_polls != NULL) { - __kmp_free((void *)lck->lk.old_polls); - lck->lk.old_polls = NULL; - } - lck->lk.mask = 0; - lck->lk.num_polls = 0; - lck->lk.cleanup_ticket = 0; - lck->lk.next_ticket = 0; - lck->lk.now_serving = 0; - lck->lk.owner_id = 0; - lck->lk.depth_locked = -1; -} - -static void -__kmp_destroy_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) -{ - char const * const func = "omp_destroy_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( __kmp_is_drdpa_lock_nestable( lck ) ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_drdpa_lock( lck ); -} - - -// -// nested drdpa ticket locks -// - -int -__kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); - - if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { - lck->lk.depth_locked += 1; - return KMP_LOCK_ACQUIRED_NEXT; - } - else { - __kmp_acquire_drdpa_lock_timed_template( lck, gtid ); - ANNOTATE_DRDPA_ACQUIRED(lck); - KMP_MB(); - lck->lk.depth_locked = 1; - KMP_MB(); - lck->lk.owner_id = gtid + 1; - return KMP_LOCK_ACQUIRED_FIRST; - } -} - -static void -__kmp_acquire_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_set_nest_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - __kmp_acquire_nested_drdpa_lock( lck, gtid ); -} - -int -__kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - int retval; - - KMP_DEBUG_ASSERT( gtid >= 0 ); - - if ( __kmp_get_drdpa_lock_owner( lck ) == gtid ) { - retval = ++lck->lk.depth_locked; - } - else if ( !__kmp_test_drdpa_lock( lck, gtid ) ) { - retval = 0; - } - else { - KMP_MB(); - retval = lck->lk.depth_locked = 1; - KMP_MB(); - lck->lk.owner_id = gtid + 1; - } - return retval; +static int __kmp_release_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck, + kmp_int32 gtid) { + char const *const func = "omp_unset_nest_lock"; + KMP_MB(); /* in case another processor initialized lock */ + if (lck->lk.initialized != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (!__kmp_is_drdpa_lock_nestable(lck)) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + if (__kmp_get_drdpa_lock_owner(lck) == -1) { + KMP_FATAL(LockUnsettingFree, func); + } + if (__kmp_get_drdpa_lock_owner(lck) != gtid) { + KMP_FATAL(LockUnsettingSetByAnother, func); + } + return __kmp_release_nested_drdpa_lock(lck, gtid); } -static int -__kmp_test_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_test_nest_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - return __kmp_test_nested_drdpa_lock( lck, gtid ); +void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { + __kmp_init_drdpa_lock(lck); + lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks } -int -__kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( gtid >= 0 ); - - KMP_MB(); - if ( --(lck->lk.depth_locked) == 0 ) { - KMP_MB(); - lck->lk.owner_id = 0; - __kmp_release_drdpa_lock( lck, gtid ); - return KMP_LOCK_RELEASED; - } - return KMP_LOCK_STILL_HELD; +static void __kmp_init_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { + __kmp_init_nested_drdpa_lock(lck); } -static int -__kmp_release_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck, kmp_int32 gtid ) -{ - char const * const func = "omp_unset_nest_lock"; - KMP_MB(); /* in case another processor initialized lock */ - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - if ( __kmp_get_drdpa_lock_owner( lck ) == -1 ) { - KMP_FATAL( LockUnsettingFree, func ); - } - if ( __kmp_get_drdpa_lock_owner( lck ) != gtid ) { - KMP_FATAL( LockUnsettingSetByAnother, func ); - } - return __kmp_release_nested_drdpa_lock( lck, gtid ); +void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck) { + __kmp_destroy_drdpa_lock(lck); + lck->lk.depth_locked = 0; } -void -__kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t * lck ) -{ - __kmp_init_drdpa_lock( lck ); - lck->lk.depth_locked = 0; // >= 0 for nestable locks, -1 for simple locks -} - -static void -__kmp_init_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t * lck ) -{ - __kmp_init_nested_drdpa_lock( lck ); +static void __kmp_destroy_nested_drdpa_lock_with_checks(kmp_drdpa_lock_t *lck) { + char const *const func = "omp_destroy_nest_lock"; + if (lck->lk.initialized != lck) { + KMP_FATAL(LockIsUninitialized, func); + } + if (!__kmp_is_drdpa_lock_nestable(lck)) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + if (__kmp_get_drdpa_lock_owner(lck) != -1) { + KMP_FATAL(LockStillOwned, func); + } + __kmp_destroy_nested_drdpa_lock(lck); } -void -__kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ) -{ - __kmp_destroy_drdpa_lock( lck ); - lck->lk.depth_locked = 0; -} - -static void -__kmp_destroy_nested_drdpa_lock_with_checks( kmp_drdpa_lock_t *lck ) -{ - char const * const func = "omp_destroy_nest_lock"; - if ( lck->lk.initialized != lck ) { - KMP_FATAL( LockIsUninitialized, func ); - } - if ( ! __kmp_is_drdpa_lock_nestable( lck ) ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - if ( __kmp_get_drdpa_lock_owner( lck ) != -1 ) { - KMP_FATAL( LockStillOwned, func ); - } - __kmp_destroy_nested_drdpa_lock( lck ); -} - - -// // access functions to fields which don't exist for all lock kinds. -// -static int -__kmp_is_drdpa_lock_initialized( kmp_drdpa_lock_t *lck ) -{ - return lck == lck->lk.initialized; +static int __kmp_is_drdpa_lock_initialized(kmp_drdpa_lock_t *lck) { + return lck == lck->lk.initialized; } -static const ident_t * -__kmp_get_drdpa_lock_location( kmp_drdpa_lock_t *lck ) -{ - return lck->lk.location; +static const ident_t *__kmp_get_drdpa_lock_location(kmp_drdpa_lock_t *lck) { + return lck->lk.location; } -static void -__kmp_set_drdpa_lock_location( kmp_drdpa_lock_t *lck, const ident_t *loc ) -{ - lck->lk.location = loc; +static void __kmp_set_drdpa_lock_location(kmp_drdpa_lock_t *lck, + const ident_t *loc) { + lck->lk.location = loc; } -static kmp_lock_flags_t -__kmp_get_drdpa_lock_flags( kmp_drdpa_lock_t *lck ) -{ - return lck->lk.flags; +static kmp_lock_flags_t __kmp_get_drdpa_lock_flags(kmp_drdpa_lock_t *lck) { + return lck->lk.flags; } -static void -__kmp_set_drdpa_lock_flags( kmp_drdpa_lock_t *lck, kmp_lock_flags_t flags ) -{ - lck->lk.flags = flags; +static void __kmp_set_drdpa_lock_flags(kmp_drdpa_lock_t *lck, + kmp_lock_flags_t flags) { + lck->lk.flags = flags; } // Time stamp counter #if KMP_ARCH_X86 || KMP_ARCH_X86_64 -# define __kmp_tsc() __kmp_hardware_timestamp() +#define __kmp_tsc() __kmp_hardware_timestamp() // Runtime's default backoff parameters -kmp_backoff_t __kmp_spin_backoff_params = { 1, 4096, 100 }; +kmp_backoff_t __kmp_spin_backoff_params = {1, 4096, 100}; #else // Use nanoseconds for other platforms extern kmp_uint64 __kmp_now_nsec(); -kmp_backoff_t __kmp_spin_backoff_params = { 1, 256, 100 }; -# define __kmp_tsc() __kmp_now_nsec() +kmp_backoff_t __kmp_spin_backoff_params = {1, 256, 100}; +#define __kmp_tsc() __kmp_now_nsec() #endif // A useful predicate for dealing with timestamps that may wrap. -// Is a before b? -// Since the timestamps may wrap, this is asking whether it's +// Is a before b? Since the timestamps may wrap, this is asking whether it's // shorter to go clockwise from a to b around the clock-face, or anti-clockwise. // Times where going clockwise is less distance than going anti-clockwise -// are in the future, others are in the past. -// e.g.) a = MAX-1, b = MAX+1 (=0), then a > b (true) does not mean a reached b -// whereas signed(a) = -2, signed(b) = 0 captures the actual difference -static inline bool before(kmp_uint64 a, kmp_uint64 b) -{ - return ((kmp_int64)b - (kmp_int64)a) > 0; +// are in the future, others are in the past. e.g. a = MAX-1, b = MAX+1 (=0), +// then a > b (true) does not mean a reached b; whereas signed(a) = -2, +// signed(b) = 0 captures the actual difference +static inline bool before(kmp_uint64 a, kmp_uint64 b) { + return ((kmp_int64)b - (kmp_int64)a) > 0; } // Truncated binary exponential backoff function -void -__kmp_spin_backoff(kmp_backoff_t *boff) -{ - // We could flatten this loop, but making it a nested loop gives better result. - kmp_uint32 i; - for (i = boff->step; i > 0; i--) { - kmp_uint64 goal = __kmp_tsc() + boff->min_tick; - do { - KMP_CPU_PAUSE(); - } while (before(__kmp_tsc(), goal)); - } - boff->step = (boff->step<<1 | 1) & (boff->max_backoff-1); +void __kmp_spin_backoff(kmp_backoff_t *boff) { + // We could flatten this loop, but making it a nested loop gives better result + kmp_uint32 i; + for (i = boff->step; i > 0; i--) { + kmp_uint64 goal = __kmp_tsc() + boff->min_tick; + do { + KMP_CPU_PAUSE(); + } while (before(__kmp_tsc(), goal)); + } + boff->step = (boff->step << 1 | 1) & (boff->max_backoff - 1); } #if KMP_USE_DYNAMIC_LOCK -// Direct lock initializers. It simply writes a tag to the low 8 bits of the lock word. -static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) -{ - TCW_4(*lck, KMP_GET_D_TAG(seq)); - KA_TRACE(20, ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); +// Direct lock initializers. It simply writes a tag to the low 8 bits of the +// lock word. +static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, + kmp_dyna_lockseq_t seq) { + TCW_4(*lck, KMP_GET_D_TAG(seq)); + KA_TRACE( + 20, + ("__kmp_init_direct_lock: initialized direct lock with type#%d\n", seq)); } #if KMP_USE_TSX @@ -3097,207 +2797,183 @@ static void __kmp_init_direct_lock(kmp_dyna_lock_t *lck, kmp_dyna_lockseq_t seq) #define HLE_ACQUIRE ".byte 0xf2;" #define HLE_RELEASE ".byte 0xf3;" -static inline kmp_uint32 -swap4(kmp_uint32 volatile *p, kmp_uint32 v) -{ - __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" - : "+r"(v), "+m"(*p) - : - : "memory"); - return v; +static inline kmp_uint32 swap4(kmp_uint32 volatile *p, kmp_uint32 v) { + __asm__ volatile(HLE_ACQUIRE "xchg %1,%0" : "+r"(v), "+m"(*p) : : "memory"); + return v; } -static void -__kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) -{ - TCW_4(*lck, 0); -} +static void __kmp_destroy_hle_lock(kmp_dyna_lock_t *lck) { TCW_4(*lck, 0); } -static void -__kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) -{ - // Use gtid for KMP_LOCK_BUSY if necessary - if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) { - int delay = 1; - do { - while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) { - for (int i = delay; i != 0; --i) - KMP_CPU_PAUSE(); - delay = ((delay << 1) | 1) & 7; - } - } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)); - } +static void __kmp_acquire_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { + // Use gtid for KMP_LOCK_BUSY if necessary + if (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)) { + int delay = 1; + do { + while (*(kmp_uint32 volatile *)lck != KMP_LOCK_FREE(hle)) { + for (int i = delay; i != 0; --i) + KMP_CPU_PAUSE(); + delay = ((delay << 1) | 1) & 7; + } + } while (swap4(lck, KMP_LOCK_BUSY(1, hle)) != KMP_LOCK_FREE(hle)); + } } -static void -__kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) -{ - __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks +static void __kmp_acquire_hle_lock_with_checks(kmp_dyna_lock_t *lck, + kmp_int32 gtid) { + __kmp_acquire_hle_lock(lck, gtid); // TODO: add checks } -static int -__kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) -{ - __asm__ volatile(HLE_RELEASE "movl %1,%0" - : "=m"(*lck) - : "r"(KMP_LOCK_FREE(hle)) - : "memory"); - return KMP_LOCK_RELEASED; +static int __kmp_release_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { + __asm__ volatile(HLE_RELEASE "movl %1,%0" + : "=m"(*lck) + : "r"(KMP_LOCK_FREE(hle)) + : "memory"); + return KMP_LOCK_RELEASED; } -static int -__kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) -{ - return __kmp_release_hle_lock(lck, gtid); // TODO: add checks +static int __kmp_release_hle_lock_with_checks(kmp_dyna_lock_t *lck, + kmp_int32 gtid) { + return __kmp_release_hle_lock(lck, gtid); // TODO: add checks } -static int -__kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) -{ - return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle); +static int __kmp_test_hle_lock(kmp_dyna_lock_t *lck, kmp_int32 gtid) { + return swap4(lck, KMP_LOCK_BUSY(1, hle)) == KMP_LOCK_FREE(hle); } -static int -__kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, kmp_int32 gtid) -{ - return __kmp_test_hle_lock(lck, gtid); // TODO: add checks +static int __kmp_test_hle_lock_with_checks(kmp_dyna_lock_t *lck, + kmp_int32 gtid) { + return __kmp_test_hle_lock(lck, gtid); // TODO: add checks } -static void -__kmp_init_rtm_lock(kmp_queuing_lock_t *lck) -{ - __kmp_init_queuing_lock(lck); +static void __kmp_init_rtm_lock(kmp_queuing_lock_t *lck) { + __kmp_init_queuing_lock(lck); } -static void -__kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) -{ - __kmp_destroy_queuing_lock(lck); +static void __kmp_destroy_rtm_lock(kmp_queuing_lock_t *lck) { + __kmp_destroy_queuing_lock(lck); } -static void -__kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) -{ - unsigned retries=3, status; - do { - status = _xbegin(); - if (status == _XBEGIN_STARTED) { - if (__kmp_is_unlocked_queuing_lock(lck)) - return; - _xabort(0xff); - } - if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { - // Wait until lock becomes free - while (! __kmp_is_unlocked_queuing_lock(lck)) - __kmp_yield(TRUE); - } - else if (!(status & _XABORT_RETRY)) - break; - } while (retries--); +static void __kmp_acquire_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { + unsigned retries = 3, status; + do { + status = _xbegin(); + if (status == _XBEGIN_STARTED) { + if (__kmp_is_unlocked_queuing_lock(lck)) + return; + _xabort(0xff); + } + if ((status & _XABORT_EXPLICIT) && _XABORT_CODE(status) == 0xff) { + // Wait until lock becomes free + while (!__kmp_is_unlocked_queuing_lock(lck)) + __kmp_yield(TRUE); + } else if (!(status & _XABORT_RETRY)) + break; + } while (retries--); - // Fall-back non-speculative lock (xchg) - __kmp_acquire_queuing_lock(lck, gtid); + // Fall-back non-speculative lock (xchg) + __kmp_acquire_queuing_lock(lck, gtid); } -static void -__kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) -{ - __kmp_acquire_rtm_lock(lck, gtid); +static void __kmp_acquire_rtm_lock_with_checks(kmp_queuing_lock_t *lck, + kmp_int32 gtid) { + __kmp_acquire_rtm_lock(lck, gtid); } -static int -__kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) -{ - if (__kmp_is_unlocked_queuing_lock(lck)) { - // Releasing from speculation - _xend(); - } - else { - // Releasing from a real lock - __kmp_release_queuing_lock(lck, gtid); - } - return KMP_LOCK_RELEASED; +static int __kmp_release_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { + if (__kmp_is_unlocked_queuing_lock(lck)) { + // Releasing from speculation + _xend(); + } else { + // Releasing from a real lock + __kmp_release_queuing_lock(lck, gtid); + } + return KMP_LOCK_RELEASED; } -static int -__kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) -{ - return __kmp_release_rtm_lock(lck, gtid); +static int __kmp_release_rtm_lock_with_checks(kmp_queuing_lock_t *lck, + kmp_int32 gtid) { + return __kmp_release_rtm_lock(lck, gtid); } -static int -__kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) -{ - unsigned retries=3, status; - do { - status = _xbegin(); - if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) { - return 1; - } - if (!(status & _XABORT_RETRY)) - break; - } while (retries--); +static int __kmp_test_rtm_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid) { + unsigned retries = 3, status; + do { + status = _xbegin(); + if (status == _XBEGIN_STARTED && __kmp_is_unlocked_queuing_lock(lck)) { + return 1; + } + if (!(status & _XABORT_RETRY)) + break; + } while (retries--); - return (__kmp_is_unlocked_queuing_lock(lck))? 1: 0; + return (__kmp_is_unlocked_queuing_lock(lck)) ? 1 : 0; } -static int -__kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, kmp_int32 gtid) -{ - return __kmp_test_rtm_lock(lck, gtid); +static int __kmp_test_rtm_lock_with_checks(kmp_queuing_lock_t *lck, + kmp_int32 gtid) { + return __kmp_test_rtm_lock(lck, gtid); } #endif // KMP_USE_TSX -// Entry functions for indirect locks (first element of direct lock jump tables). -static void __kmp_init_indirect_lock(kmp_dyna_lock_t * l, kmp_dyna_lockseq_t tag); -static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock); -static void __kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); -static int __kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); -static int __kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32); -static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); -static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); -static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32); - -// -// Jump tables for the indirect lock functions. -// Only fill in the odd entries, that avoids the need to shift out the low bit. -// +// Entry functions for indirect locks (first element of direct lock jump tables) +static void __kmp_init_indirect_lock(kmp_dyna_lock_t *l, + kmp_dyna_lockseq_t tag); +static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock); +static void __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); +static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); +static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32); +static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, + kmp_int32); +static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, + kmp_int32); +static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, + kmp_int32); + +// Jump tables for the indirect lock functions +// Only fill in the odd entries, that avoids the need to shift out the low bit // init functions -#define expand(l, op) 0,__kmp_init_direct_lock, -void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) - = { __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init) }; +#define expand(l, op) 0, __kmp_init_direct_lock, +void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t) = { + __kmp_init_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, init)}; #undef expand // destroy functions -#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock, -void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *) - = { __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy) }; +#define expand(l, op) 0, (void (*)(kmp_dyna_lock_t *))__kmp_##op##_##l##_lock, +void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *) = { + __kmp_destroy_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, destroy)}; #undef expand // set/acquire functions -#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, -static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) - = { __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire) }; +#define expand(l, op) \ + 0, (void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, +static void (*direct_set[])(kmp_dyna_lock_t *, kmp_int32) = { + __kmp_set_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, acquire)}; #undef expand -#define expand(l, op) 0,(void (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, -static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) - = { __kmp_set_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, acquire) }; +#define expand(l, op) \ + 0, (void (*)(kmp_dyna_lock_t *, \ + kmp_int32))__kmp_##op##_##l##_lock_with_checks, +static void (*direct_set_check[])(kmp_dyna_lock_t *, kmp_int32) = { + __kmp_set_indirect_lock_with_checks, 0, + KMP_FOREACH_D_LOCK(expand, acquire)}; #undef expand // unset/release and test functions -#define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, -static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) - = { __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release) }; -static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) - = { __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test) }; +#define expand(l, op) \ + 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock, +static int (*direct_unset[])(kmp_dyna_lock_t *, kmp_int32) = { + __kmp_unset_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, release)}; +static int (*direct_test[])(kmp_dyna_lock_t *, kmp_int32) = { + __kmp_test_indirect_lock, 0, KMP_FOREACH_D_LOCK(expand, test)}; #undef expand -#define expand(l, op) 0,(int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, -static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) - = { __kmp_unset_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, release) }; -static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) - = { __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test) }; +#define expand(l, op) \ + 0, (int (*)(kmp_dyna_lock_t *, kmp_int32))__kmp_##op##_##l##_lock_with_checks, +static int (*direct_unset_check[])(kmp_dyna_lock_t *, kmp_int32) = { + __kmp_unset_indirect_lock_with_checks, 0, + KMP_FOREACH_D_LOCK(expand, release)}; +static int (*direct_test_check[])(kmp_dyna_lock_t *, kmp_int32) = { + __kmp_test_indirect_lock_with_checks, 0, KMP_FOREACH_D_LOCK(expand, test)}; #undef expand // Exposes only one set of jump tables (*lock or *lock_with_checks). @@ -3305,30 +2981,40 @@ void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32) = 0; int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32) = 0; int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32) = 0; -// -// Jump tables for the indirect lock functions. -// -#define expand(l, op) (void (*)(kmp_user_lock_p))__kmp_##op##_##l##_##lock, -void (*__kmp_indirect_init[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, init) }; -void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { KMP_FOREACH_I_LOCK(expand, destroy) }; +// Jump tables for the indirect lock functions +#define expand(l, op) (void (*)(kmp_user_lock_p)) __kmp_##op##_##l##_##lock, +void (*__kmp_indirect_init[])(kmp_user_lock_p) = { + KMP_FOREACH_I_LOCK(expand, init)}; +void (*__kmp_indirect_destroy[])(kmp_user_lock_p) = { + KMP_FOREACH_I_LOCK(expand, destroy)}; #undef expand // set/acquire functions -#define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, -static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) }; +#define expand(l, op) \ + (void (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, +static void (*indirect_set[])(kmp_user_lock_p, kmp_int32) = { + KMP_FOREACH_I_LOCK(expand, acquire)}; #undef expand -#define expand(l, op) (void (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, -static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, acquire) }; +#define expand(l, op) \ + (void (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, +static void (*indirect_set_check[])(kmp_user_lock_p, kmp_int32) = { + KMP_FOREACH_I_LOCK(expand, acquire)}; #undef expand // unset/release and test functions -#define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock, -static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) }; -static int (*indirect_test[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) }; +#define expand(l, op) \ + (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock, +static int (*indirect_unset[])(kmp_user_lock_p, kmp_int32) = { + KMP_FOREACH_I_LOCK(expand, release)}; +static int (*indirect_test[])(kmp_user_lock_p, + kmp_int32) = {KMP_FOREACH_I_LOCK(expand, test)}; #undef expand -#define expand(l, op) (int (*)(kmp_user_lock_p, kmp_int32))__kmp_##op##_##l##_##lock_with_checks, -static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, release) }; -static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { KMP_FOREACH_I_LOCK(expand, test) }; +#define expand(l, op) \ + (int (*)(kmp_user_lock_p, kmp_int32)) __kmp_##op##_##l##_##lock_with_checks, +static int (*indirect_unset_check[])(kmp_user_lock_p, kmp_int32) = { + KMP_FOREACH_I_LOCK(expand, release)}; +static int (*indirect_test_check[])(kmp_user_lock_p, kmp_int32) = { + KMP_FOREACH_I_LOCK(expand, test)}; #undef expand // Exposes only one jump tables (*lock or *lock_with_checks). @@ -3340,954 +3026,875 @@ int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32) = 0; kmp_indirect_lock_table_t __kmp_i_lock_table; // Size of indirect locks. -static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = { 0 }; +static kmp_uint32 __kmp_indirect_lock_size[KMP_NUM_I_LOCKS] = {0}; // Jump tables for lock accessor/modifier. -void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *) = { 0 }; -void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t) = { 0 }; -const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; -kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p) = { 0 }; +void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, + const ident_t *) = {0}; +void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, + kmp_lock_flags_t) = {0}; +const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])( + kmp_user_lock_p) = {0}; +kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])( + kmp_user_lock_p) = {0}; // Use different lock pools for different lock types. -static kmp_indirect_lock_t * __kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = { 0 }; - -// User lock allocator for dynamically dispatched indirect locks. -// Every entry of the indirect lock table holds the address and type of the allocated indrect lock -// (kmp_indirect_lock_t), and the size of the table doubles when it is full. A destroyed indirect lock -// object is returned to the reusable pool of locks, unique to each lock type. -kmp_indirect_lock_t * -__kmp_allocate_indirect_lock(void **user_lock, kmp_int32 gtid, kmp_indirect_locktag_t tag) -{ - kmp_indirect_lock_t *lck; - kmp_lock_index_t idx; - - __kmp_acquire_lock(&__kmp_global_lock, gtid); - - if (__kmp_indirect_lock_pool[tag] != NULL) { - // Reuse the allocated and destroyed lock object - lck = __kmp_indirect_lock_pool[tag]; - if (OMP_LOCK_T_SIZE < sizeof(void *)) - idx = lck->lock->pool.index; - __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; - KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", lck)); - } else { - idx = __kmp_i_lock_table.next; - // Check capacity and double the size if it is full - if (idx == __kmp_i_lock_table.size) { - // Double up the space for block pointers - int row = __kmp_i_lock_table.size/KMP_I_LOCK_CHUNK; - kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; - __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(2*row*sizeof(kmp_indirect_lock_t *)); - KMP_MEMCPY(__kmp_i_lock_table.table, old_table, row*sizeof(kmp_indirect_lock_t *)); - __kmp_free(old_table); - // Allocate new objects in the new blocks - for (int i = row; i < 2*row; ++i) - *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *) - __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t)); - __kmp_i_lock_table.size = 2*idx; - } - __kmp_i_lock_table.next++; - lck = KMP_GET_I_LOCK(idx); - // Allocate a new base lock object - lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); - KA_TRACE(20, ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck)); - } - - __kmp_release_lock(&__kmp_global_lock, gtid); - - lck->type = tag; +static kmp_indirect_lock_t *__kmp_indirect_lock_pool[KMP_NUM_I_LOCKS] = {0}; + +// User lock allocator for dynamically dispatched indirect locks. Every entry of +// the indirect lock table holds the address and type of the allocated indrect +// lock (kmp_indirect_lock_t), and the size of the table doubles when it is +// full. A destroyed indirect lock object is returned to the reusable pool of +// locks, unique to each lock type. +kmp_indirect_lock_t *__kmp_allocate_indirect_lock(void **user_lock, + kmp_int32 gtid, + kmp_indirect_locktag_t tag) { + kmp_indirect_lock_t *lck; + kmp_lock_index_t idx; + + __kmp_acquire_lock(&__kmp_global_lock, gtid); + + if (__kmp_indirect_lock_pool[tag] != NULL) { + // Reuse the allocated and destroyed lock object + lck = __kmp_indirect_lock_pool[tag]; + if (OMP_LOCK_T_SIZE < sizeof(void *)) + idx = lck->lock->pool.index; + __kmp_indirect_lock_pool[tag] = (kmp_indirect_lock_t *)lck->lock->pool.next; + KA_TRACE(20, ("__kmp_allocate_indirect_lock: reusing an existing lock %p\n", + lck)); + } else { + idx = __kmp_i_lock_table.next; + // Check capacity and double the size if it is full + if (idx == __kmp_i_lock_table.size) { + // Double up the space for block pointers + int row = __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; + kmp_indirect_lock_t **old_table = __kmp_i_lock_table.table; + __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate( + 2 * row * sizeof(kmp_indirect_lock_t *)); + KMP_MEMCPY(__kmp_i_lock_table.table, old_table, + row * sizeof(kmp_indirect_lock_t *)); + __kmp_free(old_table); + // Allocate new objects in the new blocks + for (int i = row; i < 2 * row; ++i) + *(__kmp_i_lock_table.table + i) = (kmp_indirect_lock_t *)__kmp_allocate( + KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); + __kmp_i_lock_table.size = 2 * idx; + } + __kmp_i_lock_table.next++; + lck = KMP_GET_I_LOCK(idx); + // Allocate a new base lock object + lck->lock = (kmp_user_lock_p)__kmp_allocate(__kmp_indirect_lock_size[tag]); + KA_TRACE(20, + ("__kmp_allocate_indirect_lock: allocated a new lock %p\n", lck)); + } + + __kmp_release_lock(&__kmp_global_lock, gtid); + + lck->type = tag; + + if (OMP_LOCK_T_SIZE < sizeof(void *)) { + *((kmp_lock_index_t *)user_lock) = idx + << 1; // indirect lock word must be even + } else { + *((kmp_indirect_lock_t **)user_lock) = lck; + } + + return lck; +} +// User lock lookup for dynamically dispatched locks. +static __forceinline kmp_indirect_lock_t * +__kmp_lookup_indirect_lock(void **user_lock, const char *func) { + if (__kmp_env_consistency_check) { + kmp_indirect_lock_t *lck = NULL; + if (user_lock == NULL) { + KMP_FATAL(LockIsUninitialized, func); + } if (OMP_LOCK_T_SIZE < sizeof(void *)) { - *((kmp_lock_index_t *)user_lock) = idx << 1; // indirect lock word must be even. + kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); + if (idx >= __kmp_i_lock_table.size) { + KMP_FATAL(LockIsUninitialized, func); + } + lck = KMP_GET_I_LOCK(idx); } else { - *((kmp_indirect_lock_t **)user_lock) = lck; + lck = *((kmp_indirect_lock_t **)user_lock); + } + if (lck == NULL) { + KMP_FATAL(LockIsUninitialized, func); } - return lck; -} - -// User lock lookup for dynamically dispatched locks. -static __forceinline -kmp_indirect_lock_t * -__kmp_lookup_indirect_lock(void **user_lock, const char *func) -{ - if (__kmp_env_consistency_check) { - kmp_indirect_lock_t *lck = NULL; - if (user_lock == NULL) { - KMP_FATAL(LockIsUninitialized, func); - } - if (OMP_LOCK_T_SIZE < sizeof(void *)) { - kmp_lock_index_t idx = KMP_EXTRACT_I_INDEX(user_lock); - if (idx >= __kmp_i_lock_table.size) { - KMP_FATAL(LockIsUninitialized, func); - } - lck = KMP_GET_I_LOCK(idx); - } else { - lck = *((kmp_indirect_lock_t **)user_lock); - } - if (lck == NULL) { - KMP_FATAL(LockIsUninitialized, func); - } - return lck; + } else { + if (OMP_LOCK_T_SIZE < sizeof(void *)) { + return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); } else { - if (OMP_LOCK_T_SIZE < sizeof(void *)) { - return KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(user_lock)); - } else { - return *((kmp_indirect_lock_t **)user_lock); - } + return *((kmp_indirect_lock_t **)user_lock); } + } } -static void -__kmp_init_indirect_lock(kmp_dyna_lock_t * lock, kmp_dyna_lockseq_t seq) -{ +static void __kmp_init_indirect_lock(kmp_dyna_lock_t *lock, + kmp_dyna_lockseq_t seq) { #if KMP_USE_ADAPTIVE_LOCKS - if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { - KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); - seq = lockseq_queuing; - } + if (seq == lockseq_adaptive && !__kmp_cpuinfo.rtm) { + KMP_WARNING(AdaptiveNotSupported, "kmp_lockseq_t", "adaptive"); + seq = lockseq_queuing; + } #endif #if KMP_USE_TSX - if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) { - seq = lockseq_queuing; - } + if (seq == lockseq_rtm && !__kmp_cpuinfo.rtm) { + seq = lockseq_queuing; + } #endif - kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq); - kmp_indirect_lock_t *l = __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); - KMP_I_LOCK_FUNC(l, init)(l->lock); - KA_TRACE(20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", seq)); + kmp_indirect_locktag_t tag = KMP_GET_I_TAG(seq); + kmp_indirect_lock_t *l = + __kmp_allocate_indirect_lock((void **)lock, __kmp_entry_gtid(), tag); + KMP_I_LOCK_FUNC(l, init)(l->lock); + KA_TRACE( + 20, ("__kmp_init_indirect_lock: initialized indirect lock with type#%d\n", + seq)); } -static void -__kmp_destroy_indirect_lock(kmp_dyna_lock_t * lock) -{ - kmp_uint32 gtid = __kmp_entry_gtid(); - kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); - KMP_I_LOCK_FUNC(l, destroy)(l->lock); - kmp_indirect_locktag_t tag = l->type; +static void __kmp_destroy_indirect_lock(kmp_dyna_lock_t *lock) { + kmp_uint32 gtid = __kmp_entry_gtid(); + kmp_indirect_lock_t *l = + __kmp_lookup_indirect_lock((void **)lock, "omp_destroy_lock"); + KMP_I_LOCK_FUNC(l, destroy)(l->lock); + kmp_indirect_locktag_t tag = l->type; - __kmp_acquire_lock(&__kmp_global_lock, gtid); + __kmp_acquire_lock(&__kmp_global_lock, gtid); - // Use the base lock's space to keep the pool chain. - l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; - if (OMP_LOCK_T_SIZE < sizeof(void *)) { - l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock); - } - __kmp_indirect_lock_pool[tag] = l; + // Use the base lock's space to keep the pool chain. + l->lock->pool.next = (kmp_user_lock_p)__kmp_indirect_lock_pool[tag]; + if (OMP_LOCK_T_SIZE < sizeof(void *)) { + l->lock->pool.index = KMP_EXTRACT_I_INDEX(lock); + } + __kmp_indirect_lock_pool[tag] = l; - __kmp_release_lock(&__kmp_global_lock, gtid); + __kmp_release_lock(&__kmp_global_lock, gtid); } -static void -__kmp_set_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) -{ - kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); - KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); +static void __kmp_set_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { + kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); + KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); } -static int -__kmp_unset_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) -{ - kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); - return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); +static int __kmp_unset_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { + kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); + return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); } -static int -__kmp_test_indirect_lock(kmp_dyna_lock_t * lock, kmp_int32 gtid) -{ - kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); - return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); +static int __kmp_test_indirect_lock(kmp_dyna_lock_t *lock, kmp_int32 gtid) { + kmp_indirect_lock_t *l = KMP_LOOKUP_I_LOCK(lock); + return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); } -static void -__kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) -{ - kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); - KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); +static void __kmp_set_indirect_lock_with_checks(kmp_dyna_lock_t *lock, + kmp_int32 gtid) { + kmp_indirect_lock_t *l = + __kmp_lookup_indirect_lock((void **)lock, "omp_set_lock"); + KMP_I_LOCK_FUNC(l, set)(l->lock, gtid); } -static int -__kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) -{ - kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); - return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); +static int __kmp_unset_indirect_lock_with_checks(kmp_dyna_lock_t *lock, + kmp_int32 gtid) { + kmp_indirect_lock_t *l = + __kmp_lookup_indirect_lock((void **)lock, "omp_unset_lock"); + return KMP_I_LOCK_FUNC(l, unset)(l->lock, gtid); } -static int -__kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t * lock, kmp_int32 gtid) -{ - kmp_indirect_lock_t *l = __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); - return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); +static int __kmp_test_indirect_lock_with_checks(kmp_dyna_lock_t *lock, + kmp_int32 gtid) { + kmp_indirect_lock_t *l = + __kmp_lookup_indirect_lock((void **)lock, "omp_test_lock"); + return KMP_I_LOCK_FUNC(l, test)(l->lock, gtid); } kmp_dyna_lockseq_t __kmp_user_lock_seq = lockseq_queuing; // This is used only in kmp_error.cpp when consistency checking is on. -kmp_int32 -__kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) -{ - switch (seq) { - case lockseq_tas: - case lockseq_nested_tas: - return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); +kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck, kmp_uint32 seq) { + switch (seq) { + case lockseq_tas: + case lockseq_nested_tas: + return __kmp_get_tas_lock_owner((kmp_tas_lock_t *)lck); #if KMP_USE_FUTEX - case lockseq_futex: - case lockseq_nested_futex: - return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); + case lockseq_futex: + case lockseq_nested_futex: + return __kmp_get_futex_lock_owner((kmp_futex_lock_t *)lck); #endif - case lockseq_ticket: - case lockseq_nested_ticket: - return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); - case lockseq_queuing: - case lockseq_nested_queuing: + case lockseq_ticket: + case lockseq_nested_ticket: + return __kmp_get_ticket_lock_owner((kmp_ticket_lock_t *)lck); + case lockseq_queuing: + case lockseq_nested_queuing: #if KMP_USE_ADAPTIVE_LOCKS - case lockseq_adaptive: + case lockseq_adaptive: #endif - return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); - case lockseq_drdpa: - case lockseq_nested_drdpa: - return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); - default: - return 0; - } + return __kmp_get_queuing_lock_owner((kmp_queuing_lock_t *)lck); + case lockseq_drdpa: + case lockseq_nested_drdpa: + return __kmp_get_drdpa_lock_owner((kmp_drdpa_lock_t *)lck); + default: + return 0; + } } // Initializes data for dynamic user locks. -void -__kmp_init_dynamic_user_locks() -{ - // Initialize jump table for the lock functions - if (__kmp_env_consistency_check) { - __kmp_direct_set = direct_set_check; - __kmp_direct_unset = direct_unset_check; - __kmp_direct_test = direct_test_check; - __kmp_indirect_set = indirect_set_check; - __kmp_indirect_unset = indirect_unset_check; - __kmp_indirect_test = indirect_test_check; - } - else { - __kmp_direct_set = direct_set; - __kmp_direct_unset = direct_unset; - __kmp_direct_test = direct_test; - __kmp_indirect_set = indirect_set; - __kmp_indirect_unset = indirect_unset; - __kmp_indirect_test = indirect_test; - } - // If the user locks have already been initialized, then return. - // Allow the switch between different KMP_CONSISTENCY_CHECK values, - // but do not allocate new lock tables if they have already been - // allocated. - if (__kmp_init_user_locks) - return; - - // Initialize lock index table - __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; - __kmp_i_lock_table.table = (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); - *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *) - __kmp_allocate(KMP_I_LOCK_CHUNK*sizeof(kmp_indirect_lock_t)); - __kmp_i_lock_table.next = 0; - - // Indirect lock size - __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); - __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t); +void __kmp_init_dynamic_user_locks() { + // Initialize jump table for the lock functions + if (__kmp_env_consistency_check) { + __kmp_direct_set = direct_set_check; + __kmp_direct_unset = direct_unset_check; + __kmp_direct_test = direct_test_check; + __kmp_indirect_set = indirect_set_check; + __kmp_indirect_unset = indirect_unset_check; + __kmp_indirect_test = indirect_test_check; + } else { + __kmp_direct_set = direct_set; + __kmp_direct_unset = direct_unset; + __kmp_direct_test = direct_test; + __kmp_indirect_set = indirect_set; + __kmp_indirect_unset = indirect_unset; + __kmp_indirect_test = indirect_test; + } + // If the user locks have already been initialized, then return. Allow the + // switch between different KMP_CONSISTENCY_CHECK values, but do not allocate + // new lock tables if they have already been allocated. + if (__kmp_init_user_locks) + return; + + // Initialize lock index table + __kmp_i_lock_table.size = KMP_I_LOCK_CHUNK; + __kmp_i_lock_table.table = + (kmp_indirect_lock_t **)__kmp_allocate(sizeof(kmp_indirect_lock_t *)); + *(__kmp_i_lock_table.table) = (kmp_indirect_lock_t *)__kmp_allocate( + KMP_I_LOCK_CHUNK * sizeof(kmp_indirect_lock_t)); + __kmp_i_lock_table.next = 0; + + // Indirect lock size + __kmp_indirect_lock_size[locktag_ticket] = sizeof(kmp_ticket_lock_t); + __kmp_indirect_lock_size[locktag_queuing] = sizeof(kmp_queuing_lock_t); #if KMP_USE_ADAPTIVE_LOCKS - __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t); + __kmp_indirect_lock_size[locktag_adaptive] = sizeof(kmp_adaptive_lock_t); #endif - __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t); + __kmp_indirect_lock_size[locktag_drdpa] = sizeof(kmp_drdpa_lock_t); #if KMP_USE_TSX - __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t); + __kmp_indirect_lock_size[locktag_rtm] = sizeof(kmp_queuing_lock_t); #endif - __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t); + __kmp_indirect_lock_size[locktag_nested_tas] = sizeof(kmp_tas_lock_t); #if KMP_USE_FUTEX - __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t); + __kmp_indirect_lock_size[locktag_nested_futex] = sizeof(kmp_futex_lock_t); #endif - __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t); - __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t); - __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t); - - // Initialize lock accessor/modifier -#define fill_jumps(table, expand, sep) { \ - table[locktag##sep##ticket] = expand(ticket); \ - table[locktag##sep##queuing] = expand(queuing); \ - table[locktag##sep##drdpa] = expand(drdpa); \ -} + __kmp_indirect_lock_size[locktag_nested_ticket] = sizeof(kmp_ticket_lock_t); + __kmp_indirect_lock_size[locktag_nested_queuing] = sizeof(kmp_queuing_lock_t); + __kmp_indirect_lock_size[locktag_nested_drdpa] = sizeof(kmp_drdpa_lock_t); + +// Initialize lock accessor/modifier +#define fill_jumps(table, expand, sep) \ + { \ + table[locktag##sep##ticket] = expand(ticket); \ + table[locktag##sep##queuing] = expand(queuing); \ + table[locktag##sep##drdpa] = expand(drdpa); \ + } #if KMP_USE_ADAPTIVE_LOCKS -# define fill_table(table, expand) { \ - fill_jumps(table, expand, _); \ - table[locktag_adaptive] = expand(queuing); \ - fill_jumps(table, expand, _nested_); \ -} +#define fill_table(table, expand) \ + { \ + fill_jumps(table, expand, _); \ + table[locktag_adaptive] = expand(queuing); \ + fill_jumps(table, expand, _nested_); \ + } #else -# define fill_table(table, expand) { \ - fill_jumps(table, expand, _); \ - fill_jumps(table, expand, _nested_); \ -} +#define fill_table(table, expand) \ + { \ + fill_jumps(table, expand, _); \ + fill_jumps(table, expand, _nested_); \ + } #endif // KMP_USE_ADAPTIVE_LOCKS -#define expand(l) (void (*)(kmp_user_lock_p, const ident_t *))__kmp_set_##l##_lock_location - fill_table(__kmp_indirect_set_location, expand); +#define expand(l) \ + (void (*)(kmp_user_lock_p, const ident_t *)) __kmp_set_##l##_lock_location + fill_table(__kmp_indirect_set_location, expand); #undef expand -#define expand(l) (void (*)(kmp_user_lock_p, kmp_lock_flags_t))__kmp_set_##l##_lock_flags - fill_table(__kmp_indirect_set_flags, expand); +#define expand(l) \ + (void (*)(kmp_user_lock_p, kmp_lock_flags_t)) __kmp_set_##l##_lock_flags + fill_table(__kmp_indirect_set_flags, expand); #undef expand -#define expand(l) (const ident_t * (*)(kmp_user_lock_p))__kmp_get_##l##_lock_location - fill_table(__kmp_indirect_get_location, expand); +#define expand(l) \ + (const ident_t *(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_location + fill_table(__kmp_indirect_get_location, expand); #undef expand -#define expand(l) (kmp_lock_flags_t (*)(kmp_user_lock_p))__kmp_get_##l##_lock_flags - fill_table(__kmp_indirect_get_flags, expand); +#define expand(l) \ + (kmp_lock_flags_t(*)(kmp_user_lock_p)) __kmp_get_##l##_lock_flags + fill_table(__kmp_indirect_get_flags, expand); #undef expand - __kmp_init_user_locks = TRUE; + __kmp_init_user_locks = TRUE; } // Clean up the lock table. -void -__kmp_cleanup_indirect_user_locks() -{ - kmp_lock_index_t i; - int k; - - // Clean up locks in the pools first (they were already destroyed before going into the pools). - for (k = 0; k < KMP_NUM_I_LOCKS; ++k) { - kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; - while (l != NULL) { - kmp_indirect_lock_t *ll = l; - l = (kmp_indirect_lock_t *)l->lock->pool.next; - KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", ll)); - __kmp_free(ll->lock); - ll->lock = NULL; - } - __kmp_indirect_lock_pool[k] = NULL; - } - // Clean up the remaining undestroyed locks. - for (i = 0; i < __kmp_i_lock_table.next; i++) { - kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); - if (l->lock != NULL) { - // Locks not destroyed explicitly need to be destroyed here. - KMP_I_LOCK_FUNC(l, destroy)(l->lock); - KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", l)); - __kmp_free(l->lock); - } - } - // Free the table - for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) - __kmp_free(__kmp_i_lock_table.table[i]); - __kmp_free(__kmp_i_lock_table.table); - - __kmp_init_user_locks = FALSE; +void __kmp_cleanup_indirect_user_locks() { + kmp_lock_index_t i; + int k; + + // Clean up locks in the pools first (they were already destroyed before going + // into the pools). + for (k = 0; k < KMP_NUM_I_LOCKS; ++k) { + kmp_indirect_lock_t *l = __kmp_indirect_lock_pool[k]; + while (l != NULL) { + kmp_indirect_lock_t *ll = l; + l = (kmp_indirect_lock_t *)l->lock->pool.next; + KA_TRACE(20, ("__kmp_cleanup_indirect_user_locks: freeing %p from pool\n", + ll)); + __kmp_free(ll->lock); + ll->lock = NULL; + } + __kmp_indirect_lock_pool[k] = NULL; + } + // Clean up the remaining undestroyed locks. + for (i = 0; i < __kmp_i_lock_table.next; i++) { + kmp_indirect_lock_t *l = KMP_GET_I_LOCK(i); + if (l->lock != NULL) { + // Locks not destroyed explicitly need to be destroyed here. + KMP_I_LOCK_FUNC(l, destroy)(l->lock); + KA_TRACE( + 20, + ("__kmp_cleanup_indirect_user_locks: destroy/freeing %p from table\n", + l)); + __kmp_free(l->lock); + } + } + // Free the table + for (i = 0; i < __kmp_i_lock_table.size / KMP_I_LOCK_CHUNK; i++) + __kmp_free(__kmp_i_lock_table.table[i]); + __kmp_free(__kmp_i_lock_table.table); + + __kmp_init_user_locks = FALSE; } enum kmp_lock_kind __kmp_user_lock_kind = lk_default; -int __kmp_num_locks_in_block = 1; // FIXME - tune this value +int __kmp_num_locks_in_block = 1; // FIXME - tune this value #else // KMP_USE_DYNAMIC_LOCK -/* ------------------------------------------------------------------------ */ /* user locks - * * They are implemented as a table of function pointers which are set to the - * lock functions of the appropriate kind, once that has been determined. - */ + * lock functions of the appropriate kind, once that has been determined. */ enum kmp_lock_kind __kmp_user_lock_kind = lk_default; size_t __kmp_base_user_lock_size = 0; size_t __kmp_user_lock_size = 0; -kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ) = NULL; -int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; - -int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; -int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; -void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; -void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ) = NULL; -void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; -int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; - -int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; -int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ) = NULL; -void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; -void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ) = NULL; - -int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ) = NULL; -const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ) = NULL; -void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ) = NULL; -kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ) = NULL; -void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ) = NULL; - -void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ) -{ - switch ( user_lock_kind ) { - case lk_default: - default: - KMP_ASSERT( 0 ); - - case lk_tas: { - __kmp_base_user_lock_size = sizeof( kmp_base_tas_lock_t ); - __kmp_user_lock_size = sizeof( kmp_tas_lock_t ); - - __kmp_get_user_lock_owner_ = - ( kmp_int32 ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_tas_lock_owner ); - - if ( __kmp_env_consistency_check ) { - KMP_BIND_USER_LOCK_WITH_CHECKS(tas); - KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); - } - else { - KMP_BIND_USER_LOCK(tas); - KMP_BIND_NESTED_USER_LOCK(tas); - } - - __kmp_destroy_user_lock_ = - ( void ( * )( kmp_user_lock_p ) ) - ( &__kmp_destroy_tas_lock ); - - __kmp_is_user_lock_initialized_ = - ( int ( * )( kmp_user_lock_p ) ) NULL; - - __kmp_get_user_lock_location_ = - ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; - - __kmp_set_user_lock_location_ = - ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; - - __kmp_get_user_lock_flags_ = - ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; - - __kmp_set_user_lock_flags_ = - ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; - } - break; +kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck) = NULL; +int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck, + kmp_int32 gtid) = NULL; + +int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck, + kmp_int32 gtid) = NULL; +int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck, + kmp_int32 gtid) = NULL; +void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; +void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck) = NULL; +void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; +int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, + kmp_int32 gtid) = NULL; + +int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck, + kmp_int32 gtid) = NULL; +int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck, + kmp_int32 gtid) = NULL; +void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; +void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck) = NULL; + +int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck) = NULL; +const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck) = NULL; +void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck, + const ident_t *loc) = NULL; +kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck) = NULL; +void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck, + kmp_lock_flags_t flags) = NULL; + +void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind) { + switch (user_lock_kind) { + case lk_default: + default: + KMP_ASSERT(0); + + case lk_tas: { + __kmp_base_user_lock_size = sizeof(kmp_base_tas_lock_t); + __kmp_user_lock_size = sizeof(kmp_tas_lock_t); + + __kmp_get_user_lock_owner_ = + (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_tas_lock_owner); + + if (__kmp_env_consistency_check) { + KMP_BIND_USER_LOCK_WITH_CHECKS(tas); + KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(tas); + } else { + KMP_BIND_USER_LOCK(tas); + KMP_BIND_NESTED_USER_LOCK(tas); + } + + __kmp_destroy_user_lock_ = + (void (*)(kmp_user_lock_p))(&__kmp_destroy_tas_lock); + + __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; + + __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; + + __kmp_set_user_lock_location_ = + (void (*)(kmp_user_lock_p, const ident_t *))NULL; + + __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; + + __kmp_set_user_lock_flags_ = + (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; + } break; #if KMP_USE_FUTEX - case lk_futex: { - __kmp_base_user_lock_size = sizeof( kmp_base_futex_lock_t ); - __kmp_user_lock_size = sizeof( kmp_futex_lock_t ); + case lk_futex: { + __kmp_base_user_lock_size = sizeof(kmp_base_futex_lock_t); + __kmp_user_lock_size = sizeof(kmp_futex_lock_t); - __kmp_get_user_lock_owner_ = - ( kmp_int32 ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_futex_lock_owner ); + __kmp_get_user_lock_owner_ = + (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_futex_lock_owner); - if ( __kmp_env_consistency_check ) { - KMP_BIND_USER_LOCK_WITH_CHECKS(futex); - KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); - } - else { - KMP_BIND_USER_LOCK(futex); - KMP_BIND_NESTED_USER_LOCK(futex); - } + if (__kmp_env_consistency_check) { + KMP_BIND_USER_LOCK_WITH_CHECKS(futex); + KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(futex); + } else { + KMP_BIND_USER_LOCK(futex); + KMP_BIND_NESTED_USER_LOCK(futex); + } - __kmp_destroy_user_lock_ = - ( void ( * )( kmp_user_lock_p ) ) - ( &__kmp_destroy_futex_lock ); + __kmp_destroy_user_lock_ = + (void (*)(kmp_user_lock_p))(&__kmp_destroy_futex_lock); - __kmp_is_user_lock_initialized_ = - ( int ( * )( kmp_user_lock_p ) ) NULL; + __kmp_is_user_lock_initialized_ = (int (*)(kmp_user_lock_p))NULL; - __kmp_get_user_lock_location_ = - ( const ident_t * ( * )( kmp_user_lock_p ) ) NULL; + __kmp_get_user_lock_location_ = (const ident_t *(*)(kmp_user_lock_p))NULL; - __kmp_set_user_lock_location_ = - ( void ( * )( kmp_user_lock_p, const ident_t * ) ) NULL; + __kmp_set_user_lock_location_ = + (void (*)(kmp_user_lock_p, const ident_t *))NULL; - __kmp_get_user_lock_flags_ = - ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) NULL; + __kmp_get_user_lock_flags_ = (kmp_lock_flags_t(*)(kmp_user_lock_p))NULL; - __kmp_set_user_lock_flags_ = - ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) NULL; - } - break; + __kmp_set_user_lock_flags_ = + (void (*)(kmp_user_lock_p, kmp_lock_flags_t))NULL; + } break; #endif // KMP_USE_FUTEX - case lk_ticket: { - __kmp_base_user_lock_size = sizeof( kmp_base_ticket_lock_t ); - __kmp_user_lock_size = sizeof( kmp_ticket_lock_t ); - - __kmp_get_user_lock_owner_ = - ( kmp_int32 ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_ticket_lock_owner ); - - if ( __kmp_env_consistency_check ) { - KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); - KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); - } - else { - KMP_BIND_USER_LOCK(ticket); - KMP_BIND_NESTED_USER_LOCK(ticket); - } - - __kmp_destroy_user_lock_ = - ( void ( * )( kmp_user_lock_p ) ) - ( &__kmp_destroy_ticket_lock ); - - __kmp_is_user_lock_initialized_ = - ( int ( * )( kmp_user_lock_p ) ) - ( &__kmp_is_ticket_lock_initialized ); - - __kmp_get_user_lock_location_ = - ( const ident_t * ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_ticket_lock_location ); - - __kmp_set_user_lock_location_ = - ( void ( * )( kmp_user_lock_p, const ident_t * ) ) - ( &__kmp_set_ticket_lock_location ); - - __kmp_get_user_lock_flags_ = - ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_ticket_lock_flags ); - - __kmp_set_user_lock_flags_ = - ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) - ( &__kmp_set_ticket_lock_flags ); - } - break; + case lk_ticket: { + __kmp_base_user_lock_size = sizeof(kmp_base_ticket_lock_t); + __kmp_user_lock_size = sizeof(kmp_ticket_lock_t); - case lk_queuing: { - __kmp_base_user_lock_size = sizeof( kmp_base_queuing_lock_t ); - __kmp_user_lock_size = sizeof( kmp_queuing_lock_t ); - - __kmp_get_user_lock_owner_ = - ( kmp_int32 ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_queuing_lock_owner ); - - if ( __kmp_env_consistency_check ) { - KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); - KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); - } - else { - KMP_BIND_USER_LOCK(queuing); - KMP_BIND_NESTED_USER_LOCK(queuing); - } - - __kmp_destroy_user_lock_ = - ( void ( * )( kmp_user_lock_p ) ) - ( &__kmp_destroy_queuing_lock ); - - __kmp_is_user_lock_initialized_ = - ( int ( * )( kmp_user_lock_p ) ) - ( &__kmp_is_queuing_lock_initialized ); - - __kmp_get_user_lock_location_ = - ( const ident_t * ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_queuing_lock_location ); - - __kmp_set_user_lock_location_ = - ( void ( * )( kmp_user_lock_p, const ident_t * ) ) - ( &__kmp_set_queuing_lock_location ); - - __kmp_get_user_lock_flags_ = - ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_queuing_lock_flags ); - - __kmp_set_user_lock_flags_ = - ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) - ( &__kmp_set_queuing_lock_flags ); - } - break; + __kmp_get_user_lock_owner_ = + (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_owner); + + if (__kmp_env_consistency_check) { + KMP_BIND_USER_LOCK_WITH_CHECKS(ticket); + KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(ticket); + } else { + KMP_BIND_USER_LOCK(ticket); + KMP_BIND_NESTED_USER_LOCK(ticket); + } + + __kmp_destroy_user_lock_ = + (void (*)(kmp_user_lock_p))(&__kmp_destroy_ticket_lock); + + __kmp_is_user_lock_initialized_ = + (int (*)(kmp_user_lock_p))(&__kmp_is_ticket_lock_initialized); + + __kmp_get_user_lock_location_ = + (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_location); + + __kmp_set_user_lock_location_ = (void (*)( + kmp_user_lock_p, const ident_t *))(&__kmp_set_ticket_lock_location); + + __kmp_get_user_lock_flags_ = + (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_ticket_lock_flags); + + __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( + &__kmp_set_ticket_lock_flags); + } break; + + case lk_queuing: { + __kmp_base_user_lock_size = sizeof(kmp_base_queuing_lock_t); + __kmp_user_lock_size = sizeof(kmp_queuing_lock_t); + + __kmp_get_user_lock_owner_ = + (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); + + if (__kmp_env_consistency_check) { + KMP_BIND_USER_LOCK_WITH_CHECKS(queuing); + KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(queuing); + } else { + KMP_BIND_USER_LOCK(queuing); + KMP_BIND_NESTED_USER_LOCK(queuing); + } + + __kmp_destroy_user_lock_ = + (void (*)(kmp_user_lock_p))(&__kmp_destroy_queuing_lock); + + __kmp_is_user_lock_initialized_ = + (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); + + __kmp_get_user_lock_location_ = + (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); + + __kmp_set_user_lock_location_ = (void (*)( + kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); + + __kmp_get_user_lock_flags_ = + (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); + + __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( + &__kmp_set_queuing_lock_flags); + } break; #if KMP_USE_ADAPTIVE_LOCKS - case lk_adaptive: { - __kmp_base_user_lock_size = sizeof( kmp_base_adaptive_lock_t ); - __kmp_user_lock_size = sizeof( kmp_adaptive_lock_t ); + case lk_adaptive: { + __kmp_base_user_lock_size = sizeof(kmp_base_adaptive_lock_t); + __kmp_user_lock_size = sizeof(kmp_adaptive_lock_t); - __kmp_get_user_lock_owner_ = - ( kmp_int32 ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_queuing_lock_owner ); + __kmp_get_user_lock_owner_ = + (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_owner); - if ( __kmp_env_consistency_check ) { - KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); - } - else { - KMP_BIND_USER_LOCK(adaptive); - } + if (__kmp_env_consistency_check) { + KMP_BIND_USER_LOCK_WITH_CHECKS(adaptive); + } else { + KMP_BIND_USER_LOCK(adaptive); + } - __kmp_destroy_user_lock_ = - ( void ( * )( kmp_user_lock_p ) ) - ( &__kmp_destroy_adaptive_lock ); + __kmp_destroy_user_lock_ = + (void (*)(kmp_user_lock_p))(&__kmp_destroy_adaptive_lock); - __kmp_is_user_lock_initialized_ = - ( int ( * )( kmp_user_lock_p ) ) - ( &__kmp_is_queuing_lock_initialized ); + __kmp_is_user_lock_initialized_ = + (int (*)(kmp_user_lock_p))(&__kmp_is_queuing_lock_initialized); - __kmp_get_user_lock_location_ = - ( const ident_t * ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_queuing_lock_location ); + __kmp_get_user_lock_location_ = + (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_location); - __kmp_set_user_lock_location_ = - ( void ( * )( kmp_user_lock_p, const ident_t * ) ) - ( &__kmp_set_queuing_lock_location ); + __kmp_set_user_lock_location_ = (void (*)( + kmp_user_lock_p, const ident_t *))(&__kmp_set_queuing_lock_location); - __kmp_get_user_lock_flags_ = - ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_queuing_lock_flags ); + __kmp_get_user_lock_flags_ = + (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_queuing_lock_flags); - __kmp_set_user_lock_flags_ = - ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) - ( &__kmp_set_queuing_lock_flags ); + __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( + &__kmp_set_queuing_lock_flags); - } - break; + } break; #endif // KMP_USE_ADAPTIVE_LOCKS - case lk_drdpa: { - __kmp_base_user_lock_size = sizeof( kmp_base_drdpa_lock_t ); - __kmp_user_lock_size = sizeof( kmp_drdpa_lock_t ); - - __kmp_get_user_lock_owner_ = - ( kmp_int32 ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_drdpa_lock_owner ); - - if ( __kmp_env_consistency_check ) { - KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); - KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); - } - else { - KMP_BIND_USER_LOCK(drdpa); - KMP_BIND_NESTED_USER_LOCK(drdpa); - } - - __kmp_destroy_user_lock_ = - ( void ( * )( kmp_user_lock_p ) ) - ( &__kmp_destroy_drdpa_lock ); - - __kmp_is_user_lock_initialized_ = - ( int ( * )( kmp_user_lock_p ) ) - ( &__kmp_is_drdpa_lock_initialized ); - - __kmp_get_user_lock_location_ = - ( const ident_t * ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_drdpa_lock_location ); - - __kmp_set_user_lock_location_ = - ( void ( * )( kmp_user_lock_p, const ident_t * ) ) - ( &__kmp_set_drdpa_lock_location ); - - __kmp_get_user_lock_flags_ = - ( kmp_lock_flags_t ( * )( kmp_user_lock_p ) ) - ( &__kmp_get_drdpa_lock_flags ); - - __kmp_set_user_lock_flags_ = - ( void ( * )( kmp_user_lock_p, kmp_lock_flags_t ) ) - ( &__kmp_set_drdpa_lock_flags ); - } - break; + case lk_drdpa: { + __kmp_base_user_lock_size = sizeof(kmp_base_drdpa_lock_t); + __kmp_user_lock_size = sizeof(kmp_drdpa_lock_t); + + __kmp_get_user_lock_owner_ = + (kmp_int32(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_owner); + + if (__kmp_env_consistency_check) { + KMP_BIND_USER_LOCK_WITH_CHECKS(drdpa); + KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(drdpa); + } else { + KMP_BIND_USER_LOCK(drdpa); + KMP_BIND_NESTED_USER_LOCK(drdpa); } -} + __kmp_destroy_user_lock_ = + (void (*)(kmp_user_lock_p))(&__kmp_destroy_drdpa_lock); + + __kmp_is_user_lock_initialized_ = + (int (*)(kmp_user_lock_p))(&__kmp_is_drdpa_lock_initialized); + + __kmp_get_user_lock_location_ = + (const ident_t *(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_location); + + __kmp_set_user_lock_location_ = (void (*)( + kmp_user_lock_p, const ident_t *))(&__kmp_set_drdpa_lock_location); + + __kmp_get_user_lock_flags_ = + (kmp_lock_flags_t(*)(kmp_user_lock_p))(&__kmp_get_drdpa_lock_flags); + + __kmp_set_user_lock_flags_ = (void (*)(kmp_user_lock_p, kmp_lock_flags_t))( + &__kmp_set_drdpa_lock_flags); + } break; + } +} // ---------------------------------------------------------------------------- // User lock table & lock allocation -kmp_lock_table_t __kmp_user_lock_table = { 1, 0, NULL }; +kmp_lock_table_t __kmp_user_lock_table = {1, 0, NULL}; kmp_user_lock_p __kmp_lock_pool = NULL; // Lock block-allocation support. -kmp_block_of_locks* __kmp_lock_blocks = NULL; -int __kmp_num_locks_in_block = 1; // FIXME - tune this value - -static kmp_lock_index_t -__kmp_lock_table_insert( kmp_user_lock_p lck ) -{ - // Assume that kmp_global_lock is held upon entry/exit. - kmp_lock_index_t index; - if ( __kmp_user_lock_table.used >= __kmp_user_lock_table.allocated ) { - kmp_lock_index_t size; - kmp_user_lock_p *table; - // Reallocate lock table. - if ( __kmp_user_lock_table.allocated == 0 ) { - size = 1024; - } - else { - size = __kmp_user_lock_table.allocated * 2; - } - table = (kmp_user_lock_p *)__kmp_allocate( sizeof( kmp_user_lock_p ) * size ); - KMP_MEMCPY( table + 1, __kmp_user_lock_table.table + 1, sizeof( kmp_user_lock_p ) * ( __kmp_user_lock_table.used - 1 ) ); - table[ 0 ] = (kmp_user_lock_p)__kmp_user_lock_table.table; - // We cannot free the previous table now, since it may be in use by other - // threads. So save the pointer to the previous table in in the first element of the - // new table. All the tables will be organized into a list, and could be freed when - // library shutting down. - __kmp_user_lock_table.table = table; - __kmp_user_lock_table.allocated = size; - } - KMP_DEBUG_ASSERT( __kmp_user_lock_table.used < __kmp_user_lock_table.allocated ); - index = __kmp_user_lock_table.used; - __kmp_user_lock_table.table[ index ] = lck; - ++ __kmp_user_lock_table.used; - return index; -} - -static kmp_user_lock_p -__kmp_lock_block_allocate() -{ - // Assume that kmp_global_lock is held upon entry/exit. - static int last_index = 0; - if ( ( last_index >= __kmp_num_locks_in_block ) - || ( __kmp_lock_blocks == NULL ) ) { - // Restart the index. - last_index = 0; - // Need to allocate a new block. - KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); - size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; - char* buffer = (char*)__kmp_allocate( space_for_locks + sizeof( kmp_block_of_locks ) ); - // Set up the new block. - kmp_block_of_locks *new_block = (kmp_block_of_locks *)(& buffer[space_for_locks]); - new_block->next_block = __kmp_lock_blocks; - new_block->locks = (void *)buffer; - // Publish the new block. - KMP_MB(); - __kmp_lock_blocks = new_block; - } - kmp_user_lock_p ret = (kmp_user_lock_p)(& ( ( (char *)( __kmp_lock_blocks->locks ) ) - [ last_index * __kmp_user_lock_size ] ) ); - last_index++; - return ret; +kmp_block_of_locks *__kmp_lock_blocks = NULL; +int __kmp_num_locks_in_block = 1; // FIXME - tune this value + +static kmp_lock_index_t __kmp_lock_table_insert(kmp_user_lock_p lck) { + // Assume that kmp_global_lock is held upon entry/exit. + kmp_lock_index_t index; + if (__kmp_user_lock_table.used >= __kmp_user_lock_table.allocated) { + kmp_lock_index_t size; + kmp_user_lock_p *table; + // Reallocate lock table. + if (__kmp_user_lock_table.allocated == 0) { + size = 1024; + } else { + size = __kmp_user_lock_table.allocated * 2; + } + table = (kmp_user_lock_p *)__kmp_allocate(sizeof(kmp_user_lock_p) * size); + KMP_MEMCPY(table + 1, __kmp_user_lock_table.table + 1, + sizeof(kmp_user_lock_p) * (__kmp_user_lock_table.used - 1)); + table[0] = (kmp_user_lock_p)__kmp_user_lock_table.table; + // We cannot free the previous table now, since it may be in use by other + // threads. So save the pointer to the previous table in in the first + // element of the new table. All the tables will be organized into a list, + // and could be freed when library shutting down. + __kmp_user_lock_table.table = table; + __kmp_user_lock_table.allocated = size; + } + KMP_DEBUG_ASSERT(__kmp_user_lock_table.used < + __kmp_user_lock_table.allocated); + index = __kmp_user_lock_table.used; + __kmp_user_lock_table.table[index] = lck; + ++__kmp_user_lock_table.used; + return index; +} + +static kmp_user_lock_p __kmp_lock_block_allocate() { + // Assume that kmp_global_lock is held upon entry/exit. + static int last_index = 0; + if ((last_index >= __kmp_num_locks_in_block) || (__kmp_lock_blocks == NULL)) { + // Restart the index. + last_index = 0; + // Need to allocate a new block. + KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); + size_t space_for_locks = __kmp_user_lock_size * __kmp_num_locks_in_block; + char *buffer = + (char *)__kmp_allocate(space_for_locks + sizeof(kmp_block_of_locks)); + // Set up the new block. + kmp_block_of_locks *new_block = + (kmp_block_of_locks *)(&buffer[space_for_locks]); + new_block->next_block = __kmp_lock_blocks; + new_block->locks = (void *)buffer; + // Publish the new block. + KMP_MB(); + __kmp_lock_blocks = new_block; + } + kmp_user_lock_p ret = (kmp_user_lock_p)(&( + ((char *)(__kmp_lock_blocks->locks))[last_index * __kmp_user_lock_size])); + last_index++; + return ret; } -// // Get memory for a lock. It may be freshly allocated memory or reused memory // from lock pool. -// -kmp_user_lock_p -__kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, - kmp_lock_flags_t flags ) -{ - kmp_user_lock_p lck; - kmp_lock_index_t index; - KMP_DEBUG_ASSERT( user_lock ); - - __kmp_acquire_lock( &__kmp_global_lock, gtid ); - - if ( __kmp_lock_pool == NULL ) { - // Lock pool is empty. Allocate new memory. - - // ANNOTATION: Found no good way to express the syncronisation - // between allocation and usage, so ignore the allocation - ANNOTATE_IGNORE_WRITES_BEGIN(); - if ( __kmp_num_locks_in_block <= 1 ) { // Tune this cutoff point. - lck = (kmp_user_lock_p) __kmp_allocate( __kmp_user_lock_size ); - } - else { - lck = __kmp_lock_block_allocate(); - } - ANNOTATE_IGNORE_WRITES_END(); - - // Insert lock in the table so that it can be freed in __kmp_cleanup, - // and debugger has info on all allocated locks. - index = __kmp_lock_table_insert( lck ); - } - else { - // Pick up lock from pool. - lck = __kmp_lock_pool; - index = __kmp_lock_pool->pool.index; - __kmp_lock_pool = __kmp_lock_pool->pool.next; +kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, kmp_int32 gtid, + kmp_lock_flags_t flags) { + kmp_user_lock_p lck; + kmp_lock_index_t index; + KMP_DEBUG_ASSERT(user_lock); + + __kmp_acquire_lock(&__kmp_global_lock, gtid); + + if (__kmp_lock_pool == NULL) { + // Lock pool is empty. Allocate new memory. + + // ANNOTATION: Found no good way to express the syncronisation + // between allocation and usage, so ignore the allocation + ANNOTATE_IGNORE_WRITES_BEGIN(); + if (__kmp_num_locks_in_block <= 1) { // Tune this cutoff point. + lck = (kmp_user_lock_p)__kmp_allocate(__kmp_user_lock_size); + } else { + lck = __kmp_lock_block_allocate(); } + ANNOTATE_IGNORE_WRITES_END(); - // - // We could potentially differentiate between nested and regular locks - // here, and do the lock table lookup for regular locks only. - // - if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { - * ( (kmp_lock_index_t *) user_lock ) = index; - } - else { - * ( (kmp_user_lock_p *) user_lock ) = lck; - } + // Insert lock in the table so that it can be freed in __kmp_cleanup, + // and debugger has info on all allocated locks. + index = __kmp_lock_table_insert(lck); + } else { + // Pick up lock from pool. + lck = __kmp_lock_pool; + index = __kmp_lock_pool->pool.index; + __kmp_lock_pool = __kmp_lock_pool->pool.next; + } - // mark the lock if it is critical section lock. - __kmp_set_user_lock_flags( lck, flags ); + // We could potentially differentiate between nested and regular locks + // here, and do the lock table lookup for regular locks only. + if (OMP_LOCK_T_SIZE < sizeof(void *)) { + *((kmp_lock_index_t *)user_lock) = index; + } else { + *((kmp_user_lock_p *)user_lock) = lck; + } - __kmp_release_lock( & __kmp_global_lock, gtid ); // AC: TODO: move this line upper + // mark the lock if it is critical section lock. + __kmp_set_user_lock_flags(lck, flags); - return lck; -} + __kmp_release_lock(&__kmp_global_lock, gtid); // AC: TODO move this line upper -// Put lock's memory to pool for reusing. -void -__kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ) -{ - KMP_DEBUG_ASSERT( user_lock != NULL ); - KMP_DEBUG_ASSERT( lck != NULL ); - - __kmp_acquire_lock( & __kmp_global_lock, gtid ); - - lck->pool.next = __kmp_lock_pool; - __kmp_lock_pool = lck; - if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { - kmp_lock_index_t index = * ( (kmp_lock_index_t *) user_lock ); - KMP_DEBUG_ASSERT( 0 < index && index <= __kmp_user_lock_table.used ); - lck->pool.index = index; - } - - __kmp_release_lock( & __kmp_global_lock, gtid ); + return lck; } -kmp_user_lock_p -__kmp_lookup_user_lock( void **user_lock, char const *func ) -{ - kmp_user_lock_p lck = NULL; +// Put lock's memory to pool for reusing. +void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid, + kmp_user_lock_p lck) { + KMP_DEBUG_ASSERT(user_lock != NULL); + KMP_DEBUG_ASSERT(lck != NULL); - if ( __kmp_env_consistency_check ) { - if ( user_lock == NULL ) { - KMP_FATAL( LockIsUninitialized, func ); - } - } + __kmp_acquire_lock(&__kmp_global_lock, gtid); - if ( OMP_LOCK_T_SIZE < sizeof(void *) ) { - kmp_lock_index_t index = *( (kmp_lock_index_t *)user_lock ); - if ( __kmp_env_consistency_check ) { - if ( ! ( 0 < index && index < __kmp_user_lock_table.used ) ) { - KMP_FATAL( LockIsUninitialized, func ); - } - } - KMP_DEBUG_ASSERT( 0 < index && index < __kmp_user_lock_table.used ); - KMP_DEBUG_ASSERT( __kmp_user_lock_size > 0 ); - lck = __kmp_user_lock_table.table[index]; - } - else { - lck = *( (kmp_user_lock_p *)user_lock ); - } + lck->pool.next = __kmp_lock_pool; + __kmp_lock_pool = lck; + if (OMP_LOCK_T_SIZE < sizeof(void *)) { + kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); + KMP_DEBUG_ASSERT(0 < index && index <= __kmp_user_lock_table.used); + lck->pool.index = index; + } - if ( __kmp_env_consistency_check ) { - if ( lck == NULL ) { - KMP_FATAL( LockIsUninitialized, func ); - } - } - - return lck; + __kmp_release_lock(&__kmp_global_lock, gtid); } -void -__kmp_cleanup_user_locks( void ) -{ - // - // Reset lock pool. Do not worry about lock in the pool -- we will free - // them when iterating through lock table (it includes all the locks, - // dead or alive). - // - __kmp_lock_pool = NULL; +kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, char const *func) { + kmp_user_lock_p lck = NULL; -#define IS_CRITICAL(lck) \ - ( ( __kmp_get_user_lock_flags_ != NULL ) && \ - ( ( *__kmp_get_user_lock_flags_ )( lck ) & kmp_lf_critical_section ) ) + if (__kmp_env_consistency_check) { + if (user_lock == NULL) { + KMP_FATAL(LockIsUninitialized, func); + } + } - // - // Loop through lock table, free all locks. - // - // Do not free item [0], it is reserved for lock tables list. - // - // FIXME - we are iterating through a list of (pointers to) objects of - // type union kmp_user_lock, but we have no way of knowing whether the - // base type is currently "pool" or whatever the global user lock type - // is. - // - // We are relying on the fact that for all of the user lock types - // (except "tas"), the first field in the lock struct is the "initialized" - // field, which is set to the address of the lock object itself when - // the lock is initialized. When the union is of type "pool", the - // first field is a pointer to the next object in the free list, which - // will not be the same address as the object itself. - // - // This means that the check ( *__kmp_is_user_lock_initialized_ )( lck ) - // will fail for "pool" objects on the free list. This must happen as - // the "location" field of real user locks overlaps the "index" field - // of "pool" objects. - // - // It would be better to run through the free list, and remove all "pool" - // objects from the lock table before executing this loop. However, - // "pool" objects do not always have their index field set (only on - // lin_32e), and I don't want to search the lock table for the address - // of every "pool" object on the free list. - // - while ( __kmp_user_lock_table.used > 1 ) { - const ident *loc; - - // - // reduce __kmp_user_lock_table.used before freeing the lock, - // so that state of locks is consistent - // - kmp_user_lock_p lck = __kmp_user_lock_table.table[ - --__kmp_user_lock_table.used ]; - - if ( ( __kmp_is_user_lock_initialized_ != NULL ) && - ( *__kmp_is_user_lock_initialized_ )( lck ) ) { - // - // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is - // initialized AND it is NOT a critical section (user is not - // responsible for destroying criticals) AND we know source - // location to report. - // - if ( __kmp_env_consistency_check && ( ! IS_CRITICAL( lck ) ) && - ( ( loc = __kmp_get_user_lock_location( lck ) ) != NULL ) && - ( loc->psource != NULL ) ) { - kmp_str_loc_t str_loc = __kmp_str_loc_init( loc->psource, 0 ); - KMP_WARNING( CnsLockNotDestroyed, str_loc.file, str_loc.line ); - __kmp_str_loc_free( &str_loc); - } + if (OMP_LOCK_T_SIZE < sizeof(void *)) { + kmp_lock_index_t index = *((kmp_lock_index_t *)user_lock); + if (__kmp_env_consistency_check) { + if (!(0 < index && index < __kmp_user_lock_table.used)) { + KMP_FATAL(LockIsUninitialized, func); + } + } + KMP_DEBUG_ASSERT(0 < index && index < __kmp_user_lock_table.used); + KMP_DEBUG_ASSERT(__kmp_user_lock_size > 0); + lck = __kmp_user_lock_table.table[index]; + } else { + lck = *((kmp_user_lock_p *)user_lock); + } + + if (__kmp_env_consistency_check) { + if (lck == NULL) { + KMP_FATAL(LockIsUninitialized, func); + } + } + + return lck; +} + +void __kmp_cleanup_user_locks(void) { + // Reset lock pool. Don't worry about lock in the pool--we will free them when + // iterating through lock table (it includes all the locks, dead or alive). + __kmp_lock_pool = NULL; + +#define IS_CRITICAL(lck) \ + ((__kmp_get_user_lock_flags_ != NULL) && \ + ((*__kmp_get_user_lock_flags_)(lck)&kmp_lf_critical_section)) + + // Loop through lock table, free all locks. + // Do not free item [0], it is reserved for lock tables list. + // + // FIXME - we are iterating through a list of (pointers to) objects of type + // union kmp_user_lock, but we have no way of knowing whether the base type is + // currently "pool" or whatever the global user lock type is. + // + // We are relying on the fact that for all of the user lock types + // (except "tas"), the first field in the lock struct is the "initialized" + // field, which is set to the address of the lock object itself when + // the lock is initialized. When the union is of type "pool", the + // first field is a pointer to the next object in the free list, which + // will not be the same address as the object itself. + // + // This means that the check (*__kmp_is_user_lock_initialized_)(lck) will fail + // for "pool" objects on the free list. This must happen as the "location" + // field of real user locks overlaps the "index" field of "pool" objects. + // + // It would be better to run through the free list, and remove all "pool" + // objects from the lock table before executing this loop. However, + // "pool" objects do not always have their index field set (only on + // lin_32e), and I don't want to search the lock table for the address + // of every "pool" object on the free list. + while (__kmp_user_lock_table.used > 1) { + const ident *loc; + + // reduce __kmp_user_lock_table.used before freeing the lock, + // so that state of locks is consistent + kmp_user_lock_p lck = + __kmp_user_lock_table.table[--__kmp_user_lock_table.used]; + + if ((__kmp_is_user_lock_initialized_ != NULL) && + (*__kmp_is_user_lock_initialized_)(lck)) { + // Issue a warning if: KMP_CONSISTENCY_CHECK AND lock is initialized AND + // it is NOT a critical section (user is not responsible for destroying + // criticals) AND we know source location to report. + if (__kmp_env_consistency_check && (!IS_CRITICAL(lck)) && + ((loc = __kmp_get_user_lock_location(lck)) != NULL) && + (loc->psource != NULL)) { + kmp_str_loc_t str_loc = __kmp_str_loc_init(loc->psource, 0); + KMP_WARNING(CnsLockNotDestroyed, str_loc.file, str_loc.line); + __kmp_str_loc_free(&str_loc); + } #ifdef KMP_DEBUG - if ( IS_CRITICAL( lck ) ) { - KA_TRACE( 20, ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", lck, *(void**)lck ) ); - } - else { - KA_TRACE( 20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, *(void**)lck ) ); - } + if (IS_CRITICAL(lck)) { + KA_TRACE( + 20, + ("__kmp_cleanup_user_locks: free critical section lock %p (%p)\n", + lck, *(void **)lck)); + } else { + KA_TRACE(20, ("__kmp_cleanup_user_locks: free lock %p (%p)\n", lck, + *(void **)lck)); + } #endif // KMP_DEBUG - // - // Cleanup internal lock dynamic resources - // (for drdpa locks particularly). - // - __kmp_destroy_user_lock( lck ); - } - - // - // Free the lock if block allocation of locks is not used. - // - if ( __kmp_lock_blocks == NULL ) { - __kmp_free( lck ); - } + // Cleanup internal lock dynamic resources (for drdpa locks particularly). + __kmp_destroy_user_lock(lck); } -#undef IS_CRITICAL - - // - // delete lock table(s). - // - kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; - __kmp_user_lock_table.table = NULL; - __kmp_user_lock_table.allocated = 0; - - while ( table_ptr != NULL ) { - // - // In the first element we saved the pointer to the previous - // (smaller) lock table. - // - kmp_user_lock_p *next = (kmp_user_lock_p *)( table_ptr[ 0 ] ); - __kmp_free( table_ptr ); - table_ptr = next; + // Free the lock if block allocation of locks is not used. + if (__kmp_lock_blocks == NULL) { + __kmp_free(lck); } + } - // - // Free buffers allocated for blocks of locks. - // - kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; - __kmp_lock_blocks = NULL; - - while ( block_ptr != NULL ) { - kmp_block_of_locks_t *next = block_ptr->next_block; - __kmp_free( block_ptr->locks ); - // - // *block_ptr itself was allocated at the end of the locks vector. - // - block_ptr = next; - } +#undef IS_CRITICAL - TCW_4(__kmp_init_user_locks, FALSE); + // delete lock table(s). + kmp_user_lock_p *table_ptr = __kmp_user_lock_table.table; + __kmp_user_lock_table.table = NULL; + __kmp_user_lock_table.allocated = 0; + + while (table_ptr != NULL) { + // In the first element we saved the pointer to the previous + // (smaller) lock table. + kmp_user_lock_p *next = (kmp_user_lock_p *)(table_ptr[0]); + __kmp_free(table_ptr); + table_ptr = next; + } + + // Free buffers allocated for blocks of locks. + kmp_block_of_locks_t *block_ptr = __kmp_lock_blocks; + __kmp_lock_blocks = NULL; + + while (block_ptr != NULL) { + kmp_block_of_locks_t *next = block_ptr->next_block; + __kmp_free(block_ptr->locks); + // *block_ptr itself was allocated at the end of the locks vector. + block_ptr = next; + } + + TCW_4(__kmp_init_user_locks, FALSE); } #endif // KMP_USE_DYNAMIC_LOCK diff --git a/openmp/runtime/src/kmp_lock.h b/openmp/runtime/src/kmp_lock.h index 2f4f65f..c24647c 100644 --- a/openmp/runtime/src/kmp_lock.h +++ b/openmp/runtime/src/kmp_lock.h @@ -16,11 +16,11 @@ #ifndef KMP_LOCK_H #define KMP_LOCK_H -#include // CHAR_BIT -#include // offsetof +#include // CHAR_BIT +#include // offsetof -#include "kmp_os.h" #include "kmp_debug.h" +#include "kmp_os.h" #ifdef __cplusplus #include @@ -32,7 +32,8 @@ extern "C" { // Have to copy these definitions from kmp.h because kmp.h cannot be included // due to circular dependencies. Will undef these at end of file. -#define KMP_PAD(type, sz) (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) +#define KMP_PAD(type, sz) \ + (sizeof(type) + (sz - ((sizeof(type) - 1) % (sz)) - 1)) #define KMP_GTID_DNE (-2) // Forward declaration of ident and ident_t @@ -43,7 +44,6 @@ typedef struct ident ident_t; // End of copied code. // ---------------------------------------------------------------------------- -// // We need to know the size of the area we can assume that the compiler(s) // allocated for obects of type omp_lock_t and omp_nest_lock_t. The Intel // compiler always allocates a pointer-sized area, as does visual studio. @@ -52,77 +52,60 @@ typedef struct ident ident_t; // intel archs. It allocates at least 8 bytes for nested lock (more on // recent versions), but we are bounded by the pointer-sized chunks that // the Intel compiler allocates. -// #if KMP_OS_LINUX && defined(KMP_GOMP_COMPAT) -# define OMP_LOCK_T_SIZE sizeof(int) -# define OMP_NEST_LOCK_T_SIZE sizeof(void *) +#define OMP_LOCK_T_SIZE sizeof(int) +#define OMP_NEST_LOCK_T_SIZE sizeof(void *) #else -# define OMP_LOCK_T_SIZE sizeof(void *) -# define OMP_NEST_LOCK_T_SIZE sizeof(void *) +#define OMP_LOCK_T_SIZE sizeof(void *) +#define OMP_NEST_LOCK_T_SIZE sizeof(void *) #endif -// // The Intel compiler allocates a 32-byte chunk for a critical section. // Both gcc and visual studio only allocate enough space for a pointer. // Sometimes we know that the space was allocated by the Intel compiler. -// -#define OMP_CRITICAL_SIZE sizeof(void *) -#define INTEL_CRITICAL_SIZE 32 +#define OMP_CRITICAL_SIZE sizeof(void *) +#define INTEL_CRITICAL_SIZE 32 -// // lock flags -// typedef kmp_uint32 kmp_lock_flags_t; #define kmp_lf_critical_section 1 -// // When a lock table is used, the indices are of kmp_lock_index_t -// typedef kmp_uint32 kmp_lock_index_t; -// // When memory allocated for locks are on the lock pool (free list), // it is treated as structs of this type. -// struct kmp_lock_pool { - union kmp_user_lock *next; - kmp_lock_index_t index; + union kmp_user_lock *next; + kmp_lock_index_t index; }; typedef struct kmp_lock_pool kmp_lock_pool_t; - -extern void __kmp_validate_locks( void ); - +extern void __kmp_validate_locks(void); // ---------------------------------------------------------------------------- -// // There are 5 lock implementations: -// // 1. Test and set locks. -// 2. futex locks (Linux* OS on x86 and Intel(R) Many Integrated Core architecture) +// 2. futex locks (Linux* OS on x86 and Intel(R) Many Integrated Core +// architecture) // 3. Ticket (Lamport bakery) locks. // 4. Queuing locks (with separate spin fields). // 5. DRPA (Dynamically Reconfigurable Distributed Polling Area) locks // // and 3 lock purposes: -// -// 1. Bootstrap locks -- Used for a few locks available at library startup-shutdown time. +// 1. Bootstrap locks -- Used for a few locks available at library +// startup-shutdown time. // These do not require non-negative global thread ID's. // 2. Internal RTL locks -- Used everywhere else in the RTL // 3. User locks (includes critical sections) -// // ---------------------------------------------------------------------------- - // ============================================================================ // Lock implementations. -// ============================================================================ - - -// ---------------------------------------------------------------------------- +// // Test and set locks. // // Non-nested test and set locks differ from the other lock kinds (except @@ -133,52 +116,53 @@ extern void __kmp_validate_locks( void ); // bytes, so we have to use a lock table for nested locks, and avoid accessing // the depth_locked field for non-nested locks. // -// Information normally available to the tools, such as lock location, -// lock usage (normal lock vs. critical section), etc. is not available with -// test and set locks. +// Information normally available to the tools, such as lock location, lock +// usage (normal lock vs. critical section), etc. is not available with test and +// set locks. // ---------------------------------------------------------------------------- struct kmp_base_tas_lock { - volatile kmp_int32 poll; // 0 => unlocked - // locked: (gtid+1) of owning thread - kmp_int32 depth_locked; // depth locked, for nested locks only + volatile kmp_int32 poll; // 0 => unlocked; locked: (gtid+1) of owning thread + kmp_int32 depth_locked; // depth locked, for nested locks only }; typedef struct kmp_base_tas_lock kmp_base_tas_lock_t; union kmp_tas_lock { - kmp_base_tas_lock_t lk; - kmp_lock_pool_t pool; // make certain struct is large enough - double lk_align; // use worst case alignment - // no cache line padding + kmp_base_tas_lock_t lk; + kmp_lock_pool_t pool; // make certain struct is large enough + double lk_align; // use worst case alignment; no cache line padding }; typedef union kmp_tas_lock kmp_tas_lock_t; -// // Static initializer for test and set lock variables. Usage: // kmp_tas_lock_t xlock = KMP_TAS_LOCK_INITIALIZER( xlock ); -// -#define KMP_TAS_LOCK_INITIALIZER( lock ) { { 0, 0 } } - -extern int __kmp_acquire_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_tas_lock( kmp_tas_lock_t *lck ); -extern void __kmp_destroy_tas_lock( kmp_tas_lock_t *lck ); - -extern int __kmp_acquire_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_nested_tas_lock( kmp_tas_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_nested_tas_lock( kmp_tas_lock_t *lck ); -extern void __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ); - -#define KMP_LOCK_RELEASED 1 -#define KMP_LOCK_STILL_HELD 0 +#define KMP_TAS_LOCK_INITIALIZER(lock) \ + { \ + { 0, 0 } \ + } + +extern int __kmp_acquire_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); +extern int __kmp_test_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); +extern int __kmp_release_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); +extern void __kmp_init_tas_lock(kmp_tas_lock_t *lck); +extern void __kmp_destroy_tas_lock(kmp_tas_lock_t *lck); + +extern int __kmp_acquire_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); +extern int __kmp_test_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); +extern int __kmp_release_nested_tas_lock(kmp_tas_lock_t *lck, kmp_int32 gtid); +extern void __kmp_init_nested_tas_lock(kmp_tas_lock_t *lck); +extern void __kmp_destroy_nested_tas_lock(kmp_tas_lock_t *lck); + +#define KMP_LOCK_RELEASED 1 +#define KMP_LOCK_STILL_HELD 0 #define KMP_LOCK_ACQUIRED_FIRST 1 -#define KMP_LOCK_ACQUIRED_NEXT 0 +#define KMP_LOCK_ACQUIRED_NEXT 0 -#define KMP_USE_FUTEX (KMP_OS_LINUX && !KMP_OS_CNK && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)) +#define KMP_USE_FUTEX \ + (KMP_OS_LINUX && !KMP_OS_CNK && \ + (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)) #if KMP_USE_FUTEX @@ -188,82 +172,86 @@ extern void __kmp_destroy_nested_tas_lock( kmp_tas_lock_t *lck ); // Like non-nested test and set lock, non-nested futex locks use the memory // allocated by the compiler for the lock, rather than a pointer to it. // -// Information normally available to the tools, such as lock location, -// lock usage (normal lock vs. critical section), etc. is not available with -// test and set locks. With non-nested futex locks, the lock owner is not -// even available. +// Information normally available to the tools, such as lock location, lock +// usage (normal lock vs. critical section), etc. is not available with test and +// set locks. With non-nested futex locks, the lock owner is not even available. // ---------------------------------------------------------------------------- struct kmp_base_futex_lock { - volatile kmp_int32 poll; // 0 => unlocked - // 2*(gtid+1) of owning thread, 0 if unlocked - // locked: (gtid+1) of owning thread - kmp_int32 depth_locked; // depth locked, for nested locks only + volatile kmp_int32 poll; // 0 => unlocked + // 2*(gtid+1) of owning thread, 0 if unlocked + // locked: (gtid+1) of owning thread + kmp_int32 depth_locked; // depth locked, for nested locks only }; typedef struct kmp_base_futex_lock kmp_base_futex_lock_t; union kmp_futex_lock { - kmp_base_futex_lock_t lk; - kmp_lock_pool_t pool; // make certain struct is large enough - double lk_align; // use worst case alignment - // no cache line padding + kmp_base_futex_lock_t lk; + kmp_lock_pool_t pool; // make certain struct is large enough + double lk_align; // use worst case alignment + // no cache line padding }; typedef union kmp_futex_lock kmp_futex_lock_t; -// // Static initializer for futex lock variables. Usage: // kmp_futex_lock_t xlock = KMP_FUTEX_LOCK_INITIALIZER( xlock ); -// -#define KMP_FUTEX_LOCK_INITIALIZER( lock ) { { 0, 0 } } - -extern int __kmp_acquire_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_futex_lock( kmp_futex_lock_t *lck ); -extern void __kmp_destroy_futex_lock( kmp_futex_lock_t *lck ); - -extern int __kmp_acquire_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_nested_futex_lock( kmp_futex_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_nested_futex_lock( kmp_futex_lock_t *lck ); -extern void __kmp_destroy_nested_futex_lock( kmp_futex_lock_t *lck ); +#define KMP_FUTEX_LOCK_INITIALIZER(lock) \ + { \ + { 0, 0 } \ + } + +extern int __kmp_acquire_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid); +extern int __kmp_test_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid); +extern int __kmp_release_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid); +extern void __kmp_init_futex_lock(kmp_futex_lock_t *lck); +extern void __kmp_destroy_futex_lock(kmp_futex_lock_t *lck); + +extern int __kmp_acquire_nested_futex_lock(kmp_futex_lock_t *lck, + kmp_int32 gtid); +extern int __kmp_test_nested_futex_lock(kmp_futex_lock_t *lck, kmp_int32 gtid); +extern int __kmp_release_nested_futex_lock(kmp_futex_lock_t *lck, + kmp_int32 gtid); +extern void __kmp_init_nested_futex_lock(kmp_futex_lock_t *lck); +extern void __kmp_destroy_nested_futex_lock(kmp_futex_lock_t *lck); #endif // KMP_USE_FUTEX - // ---------------------------------------------------------------------------- // Ticket locks. -// ---------------------------------------------------------------------------- #ifdef __cplusplus #ifdef _MSC_VER -// MSVC won't allow use of std::atomic<> in a union since it has non-trivial copy constructor. +// MSVC won't allow use of std::atomic<> in a union since it has non-trivial +// copy constructor. struct kmp_base_ticket_lock { - // `initialized' must be the first entry in the lock data structure! - std::atomic_bool initialized; - volatile union kmp_ticket_lock *self; // points to the lock union - ident_t const * location; // Source code location of omp_init_lock(). - std::atomic_uint next_ticket; // ticket number to give to next thread which acquires - std::atomic_uint now_serving; // ticket number for thread which holds the lock - std::atomic_int owner_id; // (gtid+1) of owning thread, 0 if unlocked - std::atomic_int depth_locked; // depth locked, for nested locks only - kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock + // `initialized' must be the first entry in the lock data structure! + std::atomic_bool initialized; + volatile union kmp_ticket_lock *self; // points to the lock union + ident_t const *location; // Source code location of omp_init_lock(). + std::atomic_uint + next_ticket; // ticket number to give to next thread which acquires + std::atomic_uint now_serving; // ticket number for thread which holds the lock + std::atomic_int owner_id; // (gtid+1) of owning thread, 0 if unlocked + std::atomic_int depth_locked; // depth locked, for nested locks only + kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock }; #else struct kmp_base_ticket_lock { - // `initialized' must be the first entry in the lock data structure! - std::atomic initialized; - volatile union kmp_ticket_lock *self; // points to the lock union - ident_t const * location; // Source code location of omp_init_lock(). - std::atomic next_ticket; // ticket number to give to next thread which acquires - std::atomic now_serving; // ticket number for thread which holds the lock - std::atomic owner_id; // (gtid+1) of owning thread, 0 if unlocked - std::atomic depth_locked; // depth locked, for nested locks only - kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock + // `initialized' must be the first entry in the lock data structure! + std::atomic initialized; + volatile union kmp_ticket_lock *self; // points to the lock union + ident_t const *location; // Source code location of omp_init_lock(). + std::atomic + next_ticket; // ticket number to give to next thread which acquires + std::atomic + now_serving; // ticket number for thread which holds the lock + std::atomic owner_id; // (gtid+1) of owning thread, 0 if unlocked + std::atomic depth_locked; // depth locked, for nested locks only + kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock }; #endif @@ -276,44 +264,46 @@ struct kmp_base_ticket_lock; typedef struct kmp_base_ticket_lock kmp_base_ticket_lock_t; union KMP_ALIGN_CACHE kmp_ticket_lock { - kmp_base_ticket_lock_t lk; // This field must be first to allow static initializing. - kmp_lock_pool_t pool; - double lk_align; // use worst case alignment - char lk_pad[ KMP_PAD( kmp_base_ticket_lock_t, CACHE_LINE ) ]; + kmp_base_ticket_lock_t + lk; // This field must be first to allow static initializing. + kmp_lock_pool_t pool; + double lk_align; // use worst case alignment + char lk_pad[KMP_PAD(kmp_base_ticket_lock_t, CACHE_LINE)]; }; typedef union kmp_ticket_lock kmp_ticket_lock_t; -// // Static initializer for simple ticket lock variables. Usage: // kmp_ticket_lock_t xlock = KMP_TICKET_LOCK_INITIALIZER( xlock ); // Note the macro argument. It is important to make var properly initialized. -// -#define KMP_TICKET_LOCK_INITIALIZER( lock ) { { ATOMIC_VAR_INIT(true), \ - &(lock), \ - NULL, \ - ATOMIC_VAR_INIT(0U), \ - ATOMIC_VAR_INIT(0U), \ - ATOMIC_VAR_INIT(0), \ - ATOMIC_VAR_INIT(-1) } } - -extern int __kmp_acquire_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_ticket_lock_with_cheks( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_ticket_lock( kmp_ticket_lock_t *lck ); -extern void __kmp_destroy_ticket_lock( kmp_ticket_lock_t *lck ); - -extern int __kmp_acquire_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_nested_ticket_lock( kmp_ticket_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_nested_ticket_lock( kmp_ticket_lock_t *lck ); -extern void __kmp_destroy_nested_ticket_lock( kmp_ticket_lock_t *lck ); - +#define KMP_TICKET_LOCK_INITIALIZER(lock) \ + { \ + { \ + ATOMIC_VAR_INIT(true) \ + , &(lock), NULL, ATOMIC_VAR_INIT(0U), ATOMIC_VAR_INIT(0U), \ + ATOMIC_VAR_INIT(0), ATOMIC_VAR_INIT(-1) \ + } \ + } + +extern int __kmp_acquire_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid); +extern int __kmp_test_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid); +extern int __kmp_test_ticket_lock_with_cheks(kmp_ticket_lock_t *lck, + kmp_int32 gtid); +extern int __kmp_release_ticket_lock(kmp_ticket_lock_t *lck, kmp_int32 gtid); +extern void __kmp_init_ticket_lock(kmp_ticket_lock_t *lck); +extern void __kmp_destroy_ticket_lock(kmp_ticket_lock_t *lck); + +extern int __kmp_acquire_nested_ticket_lock(kmp_ticket_lock_t *lck, + kmp_int32 gtid); +extern int __kmp_test_nested_ticket_lock(kmp_ticket_lock_t *lck, + kmp_int32 gtid); +extern int __kmp_release_nested_ticket_lock(kmp_ticket_lock_t *lck, + kmp_int32 gtid); +extern void __kmp_init_nested_ticket_lock(kmp_ticket_lock_t *lck); +extern void __kmp_destroy_nested_ticket_lock(kmp_ticket_lock_t *lck); // ---------------------------------------------------------------------------- // Queuing locks. -// ---------------------------------------------------------------------------- #if KMP_USE_ADAPTIVE_LOCKS @@ -324,17 +314,17 @@ typedef struct kmp_adaptive_lock_info kmp_adaptive_lock_info_t; #if KMP_DEBUG_ADAPTIVE_LOCKS struct kmp_adaptive_lock_statistics { - /* So we can get stats from locks that haven't been destroyed. */ - kmp_adaptive_lock_info_t * next; - kmp_adaptive_lock_info_t * prev; - - /* Other statistics */ - kmp_uint32 successfulSpeculations; - kmp_uint32 hardFailedSpeculations; - kmp_uint32 softFailedSpeculations; - kmp_uint32 nonSpeculativeAcquires; - kmp_uint32 nonSpeculativeAcquireAttempts; - kmp_uint32 lemmingYields; + /* So we can get stats from locks that haven't been destroyed. */ + kmp_adaptive_lock_info_t *next; + kmp_adaptive_lock_info_t *prev; + + /* Other statistics */ + kmp_uint32 successfulSpeculations; + kmp_uint32 hardFailedSpeculations; + kmp_uint32 softFailedSpeculations; + kmp_uint32 nonSpeculativeAcquires; + kmp_uint32 nonSpeculativeAcquireAttempts; + kmp_uint32 lemmingYields; }; typedef struct kmp_adaptive_lock_statistics kmp_adaptive_lock_statistics_t; @@ -344,188 +334,182 @@ extern void __kmp_init_speculative_stats(); #endif // KMP_DEBUG_ADAPTIVE_LOCKS -struct kmp_adaptive_lock_info -{ - /* Values used for adaptivity. - * Although these are accessed from multiple threads we don't access them atomically, - * because if we miss updates it probably doesn't matter much. (It just affects our - * decision about whether to try speculation on the lock). - */ - kmp_uint32 volatile badness; - kmp_uint32 volatile acquire_attempts; - /* Parameters of the lock. */ - kmp_uint32 max_badness; - kmp_uint32 max_soft_retries; +struct kmp_adaptive_lock_info { + /* Values used for adaptivity. + Although these are accessed from multiple threads we don't access them + atomically, because if we miss updates it probably doesn't matter much. (It + just affects our decision about whether to try speculation on the lock). */ + kmp_uint32 volatile badness; + kmp_uint32 volatile acquire_attempts; + /* Parameters of the lock. */ + kmp_uint32 max_badness; + kmp_uint32 max_soft_retries; #if KMP_DEBUG_ADAPTIVE_LOCKS - kmp_adaptive_lock_statistics_t volatile stats; + kmp_adaptive_lock_statistics_t volatile stats; #endif }; #endif // KMP_USE_ADAPTIVE_LOCKS - struct kmp_base_queuing_lock { - // `initialized' must be the first entry in the lock data structure! - volatile union kmp_queuing_lock *initialized; // Points to the lock union if in initialized state. - - ident_t const * location; // Source code location of omp_init_lock(). - - KMP_ALIGN( 8 ) // tail_id must be 8-byte aligned! - - volatile kmp_int32 tail_id; // (gtid+1) of thread at tail of wait queue, 0 if empty - // Must be no padding here since head/tail used in 8-byte CAS - volatile kmp_int32 head_id; // (gtid+1) of thread at head of wait queue, 0 if empty - // Decl order assumes little endian - // bakery-style lock - volatile kmp_uint32 next_ticket; // ticket number to give to next thread which acquires - volatile kmp_uint32 now_serving; // ticket number for thread which holds the lock - volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked - kmp_int32 depth_locked; // depth locked, for nested locks only - - kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock + // `initialized' must be the first entry in the lock data structure! + volatile union kmp_queuing_lock + *initialized; // Points to the lock union if in initialized state. + + ident_t const *location; // Source code location of omp_init_lock(). + + KMP_ALIGN(8) // tail_id must be 8-byte aligned! + + volatile kmp_int32 + tail_id; // (gtid+1) of thread at tail of wait queue, 0 if empty + // Must be no padding here since head/tail used in 8-byte CAS + volatile kmp_int32 + head_id; // (gtid+1) of thread at head of wait queue, 0 if empty + // Decl order assumes little endian + // bakery-style lock + volatile kmp_uint32 + next_ticket; // ticket number to give to next thread which acquires + volatile kmp_uint32 + now_serving; // ticket number for thread which holds the lock + volatile kmp_int32 owner_id; // (gtid+1) of owning thread, 0 if unlocked + kmp_int32 depth_locked; // depth locked, for nested locks only + + kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock }; typedef struct kmp_base_queuing_lock kmp_base_queuing_lock_t; -KMP_BUILD_ASSERT( offsetof( kmp_base_queuing_lock_t, tail_id ) % 8 == 0 ); +KMP_BUILD_ASSERT(offsetof(kmp_base_queuing_lock_t, tail_id) % 8 == 0); union KMP_ALIGN_CACHE kmp_queuing_lock { - kmp_base_queuing_lock_t lk; // This field must be first to allow static initializing. - kmp_lock_pool_t pool; - double lk_align; // use worst case alignment - char lk_pad[ KMP_PAD( kmp_base_queuing_lock_t, CACHE_LINE ) ]; + kmp_base_queuing_lock_t + lk; // This field must be first to allow static initializing. + kmp_lock_pool_t pool; + double lk_align; // use worst case alignment + char lk_pad[KMP_PAD(kmp_base_queuing_lock_t, CACHE_LINE)]; }; typedef union kmp_queuing_lock kmp_queuing_lock_t; -extern int __kmp_acquire_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_queuing_lock( kmp_queuing_lock_t *lck ); -extern void __kmp_destroy_queuing_lock( kmp_queuing_lock_t *lck ); - -extern int __kmp_acquire_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_nested_queuing_lock( kmp_queuing_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_nested_queuing_lock( kmp_queuing_lock_t *lck ); -extern void __kmp_destroy_nested_queuing_lock( kmp_queuing_lock_t *lck ); +extern int __kmp_acquire_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid); +extern int __kmp_test_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid); +extern int __kmp_release_queuing_lock(kmp_queuing_lock_t *lck, kmp_int32 gtid); +extern void __kmp_init_queuing_lock(kmp_queuing_lock_t *lck); +extern void __kmp_destroy_queuing_lock(kmp_queuing_lock_t *lck); + +extern int __kmp_acquire_nested_queuing_lock(kmp_queuing_lock_t *lck, + kmp_int32 gtid); +extern int __kmp_test_nested_queuing_lock(kmp_queuing_lock_t *lck, + kmp_int32 gtid); +extern int __kmp_release_nested_queuing_lock(kmp_queuing_lock_t *lck, + kmp_int32 gtid); +extern void __kmp_init_nested_queuing_lock(kmp_queuing_lock_t *lck); +extern void __kmp_destroy_nested_queuing_lock(kmp_queuing_lock_t *lck); #if KMP_USE_ADAPTIVE_LOCKS // ---------------------------------------------------------------------------- // Adaptive locks. -// ---------------------------------------------------------------------------- struct kmp_base_adaptive_lock { - kmp_base_queuing_lock qlk; - KMP_ALIGN(CACHE_LINE) - kmp_adaptive_lock_info_t adaptive; // Information for the speculative adaptive lock + kmp_base_queuing_lock qlk; + KMP_ALIGN(CACHE_LINE) + kmp_adaptive_lock_info_t + adaptive; // Information for the speculative adaptive lock }; typedef struct kmp_base_adaptive_lock kmp_base_adaptive_lock_t; union KMP_ALIGN_CACHE kmp_adaptive_lock { - kmp_base_adaptive_lock_t lk; - kmp_lock_pool_t pool; - double lk_align; - char lk_pad[ KMP_PAD(kmp_base_adaptive_lock_t, CACHE_LINE) ]; + kmp_base_adaptive_lock_t lk; + kmp_lock_pool_t pool; + double lk_align; + char lk_pad[KMP_PAD(kmp_base_adaptive_lock_t, CACHE_LINE)]; }; typedef union kmp_adaptive_lock kmp_adaptive_lock_t; -# define GET_QLK_PTR(l) ((kmp_queuing_lock_t *) & (l)->lk.qlk) +#define GET_QLK_PTR(l) ((kmp_queuing_lock_t *)&(l)->lk.qlk) #endif // KMP_USE_ADAPTIVE_LOCKS // ---------------------------------------------------------------------------- // DRDPA ticket locks. -// ---------------------------------------------------------------------------- - struct kmp_base_drdpa_lock { - // - // All of the fields on the first cache line are only written when - // initializing or reconfiguring the lock. These are relatively rare - // operations, so data from the first cache line will usually stay - // resident in the cache of each thread trying to acquire the lock. - // - // initialized must be the first entry in the lock data structure! - // - KMP_ALIGN_CACHE - - volatile union kmp_drdpa_lock * initialized; // points to the lock union if in initialized state - ident_t const * location; // Source code location of omp_init_lock(). - volatile struct kmp_lock_poll { - kmp_uint64 poll; - } * volatile polls; - volatile kmp_uint64 mask; // is 2**num_polls-1 for mod op - kmp_uint64 cleanup_ticket; // thread with cleanup ticket - volatile struct kmp_lock_poll * old_polls; // will deallocate old_polls - kmp_uint32 num_polls; // must be power of 2 - - // - // next_ticket it needs to exist in a separate cache line, as it is - // invalidated every time a thread takes a new ticket. - // - KMP_ALIGN_CACHE - - volatile kmp_uint64 next_ticket; - - // - // now_serving is used to store our ticket value while we hold the lock. - // It has a slightly different meaning in the DRDPA ticket locks (where - // it is written by the acquiring thread) than it does in the simple - // ticket locks (where it is written by the releasing thread). - // - // Since now_serving is only read an written in the critical section, - // it is non-volatile, but it needs to exist on a separate cache line, - // as it is invalidated at every lock acquire. - // - // Likewise, the vars used for nested locks (owner_id and depth_locked) - // are only written by the thread owning the lock, so they are put in - // this cache line. owner_id is read by other threads, so it must be - // declared volatile. - // - KMP_ALIGN_CACHE - - kmp_uint64 now_serving; // doesn't have to be volatile - volatile kmp_uint32 owner_id; // (gtid+1) of owning thread, 0 if unlocked - kmp_int32 depth_locked; // depth locked - kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock + // All of the fields on the first cache line are only written when + // initializing or reconfiguring the lock. These are relatively rare + // operations, so data from the first cache line will usually stay resident in + // the cache of each thread trying to acquire the lock. + // + // initialized must be the first entry in the lock data structure! + KMP_ALIGN_CACHE + + volatile union kmp_drdpa_lock + *initialized; // points to the lock union if in initialized state + ident_t const *location; // Source code location of omp_init_lock(). + volatile struct kmp_lock_poll { kmp_uint64 poll; } * volatile polls; + volatile kmp_uint64 mask; // is 2**num_polls-1 for mod op + kmp_uint64 cleanup_ticket; // thread with cleanup ticket + volatile struct kmp_lock_poll *old_polls; // will deallocate old_polls + kmp_uint32 num_polls; // must be power of 2 + + // next_ticket it needs to exist in a separate cache line, as it is + // invalidated every time a thread takes a new ticket. + KMP_ALIGN_CACHE + + volatile kmp_uint64 next_ticket; + + // now_serving is used to store our ticket value while we hold the lock. It + // has a slightly different meaning in the DRDPA ticket locks (where it is + // written by the acquiring thread) than it does in the simple ticket locks + // (where it is written by the releasing thread). + // + // Since now_serving is only read an written in the critical section, + // it is non-volatile, but it needs to exist on a separate cache line, + // as it is invalidated at every lock acquire. + // + // Likewise, the vars used for nested locks (owner_id and depth_locked) are + // only written by the thread owning the lock, so they are put in this cache + // line. owner_id is read by other threads, so it must be declared volatile. + KMP_ALIGN_CACHE + kmp_uint64 now_serving; // doesn't have to be volatile + volatile kmp_uint32 owner_id; // (gtid+1) of owning thread, 0 if unlocked + kmp_int32 depth_locked; // depth locked + kmp_lock_flags_t flags; // lock specifics, e.g. critical section lock }; typedef struct kmp_base_drdpa_lock kmp_base_drdpa_lock_t; union KMP_ALIGN_CACHE kmp_drdpa_lock { - kmp_base_drdpa_lock_t lk; // This field must be first to allow static initializing. */ - kmp_lock_pool_t pool; - double lk_align; // use worst case alignment - char lk_pad[ KMP_PAD( kmp_base_drdpa_lock_t, CACHE_LINE ) ]; + kmp_base_drdpa_lock_t + lk; // This field must be first to allow static initializing. */ + kmp_lock_pool_t pool; + double lk_align; // use worst case alignment + char lk_pad[KMP_PAD(kmp_base_drdpa_lock_t, CACHE_LINE)]; }; typedef union kmp_drdpa_lock kmp_drdpa_lock_t; -extern int __kmp_acquire_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_drdpa_lock( kmp_drdpa_lock_t *lck ); -extern void __kmp_destroy_drdpa_lock( kmp_drdpa_lock_t *lck ); - -extern int __kmp_acquire_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_test_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); -extern int __kmp_release_nested_drdpa_lock( kmp_drdpa_lock_t *lck, kmp_int32 gtid ); -extern void __kmp_init_nested_drdpa_lock( kmp_drdpa_lock_t *lck ); -extern void __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ); +extern int __kmp_acquire_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid); +extern int __kmp_test_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid); +extern int __kmp_release_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid); +extern void __kmp_init_drdpa_lock(kmp_drdpa_lock_t *lck); +extern void __kmp_destroy_drdpa_lock(kmp_drdpa_lock_t *lck); +extern int __kmp_acquire_nested_drdpa_lock(kmp_drdpa_lock_t *lck, + kmp_int32 gtid); +extern int __kmp_test_nested_drdpa_lock(kmp_drdpa_lock_t *lck, kmp_int32 gtid); +extern int __kmp_release_nested_drdpa_lock(kmp_drdpa_lock_t *lck, + kmp_int32 gtid); +extern void __kmp_init_nested_drdpa_lock(kmp_drdpa_lock_t *lck); +extern void __kmp_destroy_nested_drdpa_lock(kmp_drdpa_lock_t *lck); // ============================================================================ // Lock purposes. // ============================================================================ - -// ---------------------------------------------------------------------------- // Bootstrap locks. -// ---------------------------------------------------------------------------- - +// // Bootstrap locks -- very few locks used at library initialization time. // Bootstrap locks are currently implemented as ticket locks. // They could also be implemented as test and set lock, but cannot be @@ -534,111 +518,80 @@ extern void __kmp_destroy_nested_drdpa_lock( kmp_drdpa_lock_t *lck ); typedef kmp_ticket_lock_t kmp_bootstrap_lock_t; -#define KMP_BOOTSTRAP_LOCK_INITIALIZER( lock ) KMP_TICKET_LOCK_INITIALIZER( (lock) ) +#define KMP_BOOTSTRAP_LOCK_INITIALIZER(lock) KMP_TICKET_LOCK_INITIALIZER((lock)) -static inline int -__kmp_acquire_bootstrap_lock( kmp_bootstrap_lock_t *lck ) -{ - return __kmp_acquire_ticket_lock( lck, KMP_GTID_DNE ); +static inline int __kmp_acquire_bootstrap_lock(kmp_bootstrap_lock_t *lck) { + return __kmp_acquire_ticket_lock(lck, KMP_GTID_DNE); } -static inline int -__kmp_test_bootstrap_lock( kmp_bootstrap_lock_t *lck ) -{ - return __kmp_test_ticket_lock( lck, KMP_GTID_DNE ); +static inline int __kmp_test_bootstrap_lock(kmp_bootstrap_lock_t *lck) { + return __kmp_test_ticket_lock(lck, KMP_GTID_DNE); } -static inline void -__kmp_release_bootstrap_lock( kmp_bootstrap_lock_t *lck ) -{ - __kmp_release_ticket_lock( lck, KMP_GTID_DNE ); +static inline void __kmp_release_bootstrap_lock(kmp_bootstrap_lock_t *lck) { + __kmp_release_ticket_lock(lck, KMP_GTID_DNE); } -static inline void -__kmp_init_bootstrap_lock( kmp_bootstrap_lock_t *lck ) -{ - __kmp_init_ticket_lock( lck ); +static inline void __kmp_init_bootstrap_lock(kmp_bootstrap_lock_t *lck) { + __kmp_init_ticket_lock(lck); } -static inline void -__kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t *lck ) -{ - __kmp_destroy_ticket_lock( lck ); +static inline void __kmp_destroy_bootstrap_lock(kmp_bootstrap_lock_t *lck) { + __kmp_destroy_ticket_lock(lck); } - -// ---------------------------------------------------------------------------- // Internal RTL locks. -// ---------------------------------------------------------------------------- - // // Internal RTL locks are also implemented as ticket locks, for now. // // FIXME - We should go through and figure out which lock kind works best for // each internal lock, and use the type declaration and function calls for // that explicit lock kind (and get rid of this section). -// typedef kmp_ticket_lock_t kmp_lock_t; -static inline int -__kmp_acquire_lock( kmp_lock_t *lck, kmp_int32 gtid ) -{ - return __kmp_acquire_ticket_lock( lck, gtid ); +static inline int __kmp_acquire_lock(kmp_lock_t *lck, kmp_int32 gtid) { + return __kmp_acquire_ticket_lock(lck, gtid); } -static inline int -__kmp_test_lock( kmp_lock_t *lck, kmp_int32 gtid ) -{ - return __kmp_test_ticket_lock( lck, gtid ); +static inline int __kmp_test_lock(kmp_lock_t *lck, kmp_int32 gtid) { + return __kmp_test_ticket_lock(lck, gtid); } -static inline void -__kmp_release_lock( kmp_lock_t *lck, kmp_int32 gtid ) -{ - __kmp_release_ticket_lock( lck, gtid ); +static inline void __kmp_release_lock(kmp_lock_t *lck, kmp_int32 gtid) { + __kmp_release_ticket_lock(lck, gtid); } -static inline void -__kmp_init_lock( kmp_lock_t *lck ) -{ - __kmp_init_ticket_lock( lck ); +static inline void __kmp_init_lock(kmp_lock_t *lck) { + __kmp_init_ticket_lock(lck); } -static inline void -__kmp_destroy_lock( kmp_lock_t *lck ) -{ - __kmp_destroy_ticket_lock( lck ); +static inline void __kmp_destroy_lock(kmp_lock_t *lck) { + __kmp_destroy_ticket_lock(lck); } - -// ---------------------------------------------------------------------------- // User locks. -// ---------------------------------------------------------------------------- - -// -// Do not allocate objects of type union kmp_user_lock!!! -// This will waste space unless __kmp_user_lock_kind == lk_drdpa. -// Instead, check the value of __kmp_user_lock_kind and allocate objects of -// the type of the appropriate union member, and cast their addresses to -// kmp_user_lock_p. // +// Do not allocate objects of type union kmp_user_lock!!! This will waste space +// unless __kmp_user_lock_kind == lk_drdpa. Instead, check the value of +// __kmp_user_lock_kind and allocate objects of the type of the appropriate +// union member, and cast their addresses to kmp_user_lock_p. enum kmp_lock_kind { - lk_default = 0, - lk_tas, + lk_default = 0, + lk_tas, #if KMP_USE_FUTEX - lk_futex, + lk_futex, #endif #if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX - lk_hle, - lk_rtm, + lk_hle, + lk_rtm, #endif - lk_ticket, - lk_queuing, - lk_drdpa, + lk_ticket, + lk_queuing, + lk_drdpa, #if KMP_USE_ADAPTIVE_LOCKS - lk_adaptive + lk_adaptive #endif // KMP_USE_ADAPTIVE_LOCKS }; @@ -647,279 +600,276 @@ typedef enum kmp_lock_kind kmp_lock_kind_t; extern kmp_lock_kind_t __kmp_user_lock_kind; union kmp_user_lock { - kmp_tas_lock_t tas; + kmp_tas_lock_t tas; #if KMP_USE_FUTEX - kmp_futex_lock_t futex; + kmp_futex_lock_t futex; #endif - kmp_ticket_lock_t ticket; - kmp_queuing_lock_t queuing; - kmp_drdpa_lock_t drdpa; + kmp_ticket_lock_t ticket; + kmp_queuing_lock_t queuing; + kmp_drdpa_lock_t drdpa; #if KMP_USE_ADAPTIVE_LOCKS - kmp_adaptive_lock_t adaptive; + kmp_adaptive_lock_t adaptive; #endif // KMP_USE_ADAPTIVE_LOCKS - kmp_lock_pool_t pool; + kmp_lock_pool_t pool; }; typedef union kmp_user_lock *kmp_user_lock_p; -#if ! KMP_USE_DYNAMIC_LOCK +#if !KMP_USE_DYNAMIC_LOCK extern size_t __kmp_base_user_lock_size; extern size_t __kmp_user_lock_size; -extern kmp_int32 ( *__kmp_get_user_lock_owner_ )( kmp_user_lock_p lck ); +extern kmp_int32 (*__kmp_get_user_lock_owner_)(kmp_user_lock_p lck); -static inline kmp_int32 -__kmp_get_user_lock_owner( kmp_user_lock_p lck ) -{ - KMP_DEBUG_ASSERT( __kmp_get_user_lock_owner_ != NULL ); - return ( *__kmp_get_user_lock_owner_ )( lck ); +static inline kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p lck) { + KMP_DEBUG_ASSERT(__kmp_get_user_lock_owner_ != NULL); + return (*__kmp_get_user_lock_owner_)(lck); } -extern int ( *__kmp_acquire_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); - -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - -#define __kmp_acquire_user_lock_with_checks(lck,gtid) \ - if (__kmp_user_lock_kind == lk_tas) { \ - if ( __kmp_env_consistency_check ) { \ - char const * const func = "omp_set_lock"; \ - if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) \ - && lck->tas.lk.depth_locked != -1 ) { \ - KMP_FATAL( LockNestableUsedAsSimple, func ); \ - } \ - if ( ( gtid >= 0 ) && ( lck->tas.lk.poll - 1 == gtid ) ) { \ - KMP_FATAL( LockIsAlreadyOwned, func ); \ - } \ - } \ - if ( ( lck->tas.lk.poll != 0 ) || \ - ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ - kmp_uint32 spins; \ - KMP_FSYNC_PREPARE( lck ); \ - KMP_INIT_YIELD( spins ); \ - if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ - KMP_YIELD( TRUE ); \ - } else { \ - KMP_YIELD_SPIN( spins ); \ - } \ - while ( ( lck->tas.lk.poll != 0 ) || \ - ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ - if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ - KMP_YIELD( TRUE ); \ - } else { \ - KMP_YIELD_SPIN( spins ); \ - } \ - } \ - } \ - KMP_FSYNC_ACQUIRED( lck ); \ - } else { \ - KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL ); \ - ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid ); \ - } +extern int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck, + kmp_int32 gtid); + +#if KMP_OS_LINUX && \ + (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + +#define __kmp_acquire_user_lock_with_checks(lck, gtid) \ + if (__kmp_user_lock_kind == lk_tas) { \ + if (__kmp_env_consistency_check) { \ + char const *const func = "omp_set_lock"; \ + if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && \ + lck->tas.lk.depth_locked != -1) { \ + KMP_FATAL(LockNestableUsedAsSimple, func); \ + } \ + if ((gtid >= 0) && (lck->tas.lk.poll - 1 == gtid)) { \ + KMP_FATAL(LockIsAlreadyOwned, func); \ + } \ + } \ + if ((lck->tas.lk.poll != 0) || \ + (!KMP_COMPARE_AND_STORE_ACQ32(&(lck->tas.lk.poll), 0, gtid + 1))) { \ + kmp_uint32 spins; \ + KMP_FSYNC_PREPARE(lck); \ + KMP_INIT_YIELD(spins); \ + if (TCR_4(__kmp_nth) > \ + (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ + KMP_YIELD(TRUE); \ + } else { \ + KMP_YIELD_SPIN(spins); \ + } \ + while ( \ + (lck->tas.lk.poll != 0) || \ + (!KMP_COMPARE_AND_STORE_ACQ32(&(lck->tas.lk.poll), 0, gtid + 1))) { \ + if (TCR_4(__kmp_nth) > \ + (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ + KMP_YIELD(TRUE); \ + } else { \ + KMP_YIELD_SPIN(spins); \ + } \ + } \ + } \ + KMP_FSYNC_ACQUIRED(lck); \ + } else { \ + KMP_DEBUG_ASSERT(__kmp_acquire_user_lock_with_checks_ != NULL); \ + (*__kmp_acquire_user_lock_with_checks_)(lck, gtid); \ + } #else -static inline int -__kmp_acquire_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( __kmp_acquire_user_lock_with_checks_ != NULL ); - return ( *__kmp_acquire_user_lock_with_checks_ )( lck, gtid ); +static inline int __kmp_acquire_user_lock_with_checks(kmp_user_lock_p lck, + kmp_int32 gtid) { + KMP_DEBUG_ASSERT(__kmp_acquire_user_lock_with_checks_ != NULL); + return (*__kmp_acquire_user_lock_with_checks_)(lck, gtid); } #endif -extern int ( *__kmp_test_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); - -#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - -#include "kmp_i18n.h" /* AC: KMP_FATAL definition */ -extern int __kmp_env_consistency_check; /* AC: copy from kmp.h here */ -static inline int -__kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - if ( __kmp_user_lock_kind == lk_tas ) { - if ( __kmp_env_consistency_check ) { - char const * const func = "omp_test_lock"; - if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_LOCK_T_SIZE ) - && lck->tas.lk.depth_locked != -1 ) { - KMP_FATAL( LockNestableUsedAsSimple, func ); - } - } - return ( ( lck->tas.lk.poll == 0 ) && - KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ); - } else { - KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL ); - return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid ); +extern int (*__kmp_test_user_lock_with_checks_)(kmp_user_lock_p lck, + kmp_int32 gtid); + +#if KMP_OS_LINUX && \ + (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + +#include "kmp_i18n.h" /* AC: KMP_FATAL definition */ +extern int __kmp_env_consistency_check; /* AC: copy from kmp.h here */ +static inline int __kmp_test_user_lock_with_checks(kmp_user_lock_p lck, + kmp_int32 gtid) { + if (__kmp_user_lock_kind == lk_tas) { + if (__kmp_env_consistency_check) { + char const *const func = "omp_test_lock"; + if ((sizeof(kmp_tas_lock_t) <= OMP_LOCK_T_SIZE) && + lck->tas.lk.depth_locked != -1) { + KMP_FATAL(LockNestableUsedAsSimple, func); + } } + return ((lck->tas.lk.poll == 0) && + KMP_COMPARE_AND_STORE_ACQ32(&(lck->tas.lk.poll), 0, gtid + 1)); + } else { + KMP_DEBUG_ASSERT(__kmp_test_user_lock_with_checks_ != NULL); + return (*__kmp_test_user_lock_with_checks_)(lck, gtid); + } } #else -static inline int -__kmp_test_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( __kmp_test_user_lock_with_checks_ != NULL ); - return ( *__kmp_test_user_lock_with_checks_ )( lck, gtid ); +static inline int __kmp_test_user_lock_with_checks(kmp_user_lock_p lck, + kmp_int32 gtid) { + KMP_DEBUG_ASSERT(__kmp_test_user_lock_with_checks_ != NULL); + return (*__kmp_test_user_lock_with_checks_)(lck, gtid); } #endif -extern int ( *__kmp_release_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); +extern int (*__kmp_release_user_lock_with_checks_)(kmp_user_lock_p lck, + kmp_int32 gtid); -static inline void -__kmp_release_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( __kmp_release_user_lock_with_checks_ != NULL ); - ( *__kmp_release_user_lock_with_checks_ ) ( lck, gtid ); +static inline void __kmp_release_user_lock_with_checks(kmp_user_lock_p lck, + kmp_int32 gtid) { + KMP_DEBUG_ASSERT(__kmp_release_user_lock_with_checks_ != NULL); + (*__kmp_release_user_lock_with_checks_)(lck, gtid); } -extern void ( *__kmp_init_user_lock_with_checks_ )( kmp_user_lock_p lck ); +extern void (*__kmp_init_user_lock_with_checks_)(kmp_user_lock_p lck); -static inline void -__kmp_init_user_lock_with_checks( kmp_user_lock_p lck ) -{ - KMP_DEBUG_ASSERT( __kmp_init_user_lock_with_checks_ != NULL ); - ( *__kmp_init_user_lock_with_checks_ )( lck ); +static inline void __kmp_init_user_lock_with_checks(kmp_user_lock_p lck) { + KMP_DEBUG_ASSERT(__kmp_init_user_lock_with_checks_ != NULL); + (*__kmp_init_user_lock_with_checks_)(lck); } -// // We need a non-checking version of destroy lock for when the RTL is // doing the cleanup as it can't always tell if the lock is nested or not. -// -extern void ( *__kmp_destroy_user_lock_ )( kmp_user_lock_p lck ); +extern void (*__kmp_destroy_user_lock_)(kmp_user_lock_p lck); -static inline void -__kmp_destroy_user_lock( kmp_user_lock_p lck ) -{ - KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_ != NULL ); - ( *__kmp_destroy_user_lock_ )( lck ); +static inline void __kmp_destroy_user_lock(kmp_user_lock_p lck) { + KMP_DEBUG_ASSERT(__kmp_destroy_user_lock_ != NULL); + (*__kmp_destroy_user_lock_)(lck); } -extern void ( *__kmp_destroy_user_lock_with_checks_ )( kmp_user_lock_p lck ); +extern void (*__kmp_destroy_user_lock_with_checks_)(kmp_user_lock_p lck); -static inline void -__kmp_destroy_user_lock_with_checks( kmp_user_lock_p lck ) -{ - KMP_DEBUG_ASSERT( __kmp_destroy_user_lock_with_checks_ != NULL ); - ( *__kmp_destroy_user_lock_with_checks_ )( lck ); +static inline void __kmp_destroy_user_lock_with_checks(kmp_user_lock_p lck) { + KMP_DEBUG_ASSERT(__kmp_destroy_user_lock_with_checks_ != NULL); + (*__kmp_destroy_user_lock_with_checks_)(lck); } -extern int ( *__kmp_acquire_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); +extern int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, + kmp_int32 gtid); #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) -#define __kmp_acquire_nested_user_lock_with_checks(lck,gtid,depth) \ - if (__kmp_user_lock_kind == lk_tas) { \ - if ( __kmp_env_consistency_check ) { \ - char const * const func = "omp_set_nest_lock"; \ - if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE ) \ - && lck->tas.lk.depth_locked == -1 ) { \ - KMP_FATAL( LockSimpleUsedAsNestable, func ); \ - } \ - } \ - if ( lck->tas.lk.poll - 1 == gtid ) { \ - lck->tas.lk.depth_locked += 1; \ - *depth = KMP_LOCK_ACQUIRED_NEXT; \ - } else { \ - if ( ( lck->tas.lk.poll != 0 ) || \ - ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ - kmp_uint32 spins; \ - KMP_FSYNC_PREPARE( lck ); \ - KMP_INIT_YIELD( spins ); \ - if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ - KMP_YIELD( TRUE ); \ - } else { \ - KMP_YIELD_SPIN( spins ); \ - } \ - while ( ( lck->tas.lk.poll != 0 ) || \ - ( ! KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ) ) { \ - if ( TCR_4(__kmp_nth) > (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc) ) { \ - KMP_YIELD( TRUE ); \ - } else { \ - KMP_YIELD_SPIN( spins ); \ - } \ - } \ - } \ - lck->tas.lk.depth_locked = 1; \ - *depth = KMP_LOCK_ACQUIRED_FIRST; \ - } \ - KMP_FSYNC_ACQUIRED( lck ); \ - } else { \ - KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL ); \ - *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid ); \ - } +#define __kmp_acquire_nested_user_lock_with_checks(lck, gtid, depth) \ + if (__kmp_user_lock_kind == lk_tas) { \ + if (__kmp_env_consistency_check) { \ + char const *const func = "omp_set_nest_lock"; \ + if ((sizeof(kmp_tas_lock_t) <= OMP_NEST_LOCK_T_SIZE) && \ + lck->tas.lk.depth_locked == -1) { \ + KMP_FATAL(LockSimpleUsedAsNestable, func); \ + } \ + } \ + if (lck->tas.lk.poll - 1 == gtid) { \ + lck->tas.lk.depth_locked += 1; \ + *depth = KMP_LOCK_ACQUIRED_NEXT; \ + } else { \ + if ((lck->tas.lk.poll != 0) || \ + (!KMP_COMPARE_AND_STORE_ACQ32(&(lck->tas.lk.poll), 0, gtid + 1))) { \ + kmp_uint32 spins; \ + KMP_FSYNC_PREPARE(lck); \ + KMP_INIT_YIELD(spins); \ + if (TCR_4(__kmp_nth) > \ + (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ + KMP_YIELD(TRUE); \ + } else { \ + KMP_YIELD_SPIN(spins); \ + } \ + while ((lck->tas.lk.poll != 0) || \ + (!KMP_COMPARE_AND_STORE_ACQ32(&(lck->tas.lk.poll), 0, \ + gtid + 1))) { \ + if (TCR_4(__kmp_nth) > \ + (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ + KMP_YIELD(TRUE); \ + } else { \ + KMP_YIELD_SPIN(spins); \ + } \ + } \ + } \ + lck->tas.lk.depth_locked = 1; \ + *depth = KMP_LOCK_ACQUIRED_FIRST; \ + } \ + KMP_FSYNC_ACQUIRED(lck); \ + } else { \ + KMP_DEBUG_ASSERT(__kmp_acquire_nested_user_lock_with_checks_ != NULL); \ + *depth = (*__kmp_acquire_nested_user_lock_with_checks_)(lck, gtid); \ + } #else static inline void -__kmp_acquire_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid, int* depth ) -{ - KMP_DEBUG_ASSERT( __kmp_acquire_nested_user_lock_with_checks_ != NULL ); - *depth = ( *__kmp_acquire_nested_user_lock_with_checks_ )( lck, gtid ); +__kmp_acquire_nested_user_lock_with_checks(kmp_user_lock_p lck, kmp_int32 gtid, + int *depth) { + KMP_DEBUG_ASSERT(__kmp_acquire_nested_user_lock_with_checks_ != NULL); + *depth = (*__kmp_acquire_nested_user_lock_with_checks_)(lck, gtid); } #endif -extern int ( *__kmp_test_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); +extern int (*__kmp_test_nested_user_lock_with_checks_)(kmp_user_lock_p lck, + kmp_int32 gtid); #if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64) -static inline int -__kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - if ( __kmp_user_lock_kind == lk_tas ) { - int retval; - if ( __kmp_env_consistency_check ) { - char const * const func = "omp_test_nest_lock"; - if ( ( sizeof ( kmp_tas_lock_t ) <= OMP_NEST_LOCK_T_SIZE ) - && lck->tas.lk.depth_locked == -1 ) { - KMP_FATAL( LockSimpleUsedAsNestable, func ); - } - } - KMP_DEBUG_ASSERT( gtid >= 0 ); - if ( lck->tas.lk.poll - 1 == gtid ) { /* __kmp_get_tas_lock_owner( lck ) == gtid */ - return ++lck->tas.lk.depth_locked; /* same owner, depth increased */ - } - retval = ( ( lck->tas.lk.poll == 0 ) && - KMP_COMPARE_AND_STORE_ACQ32( &(lck->tas.lk.poll), 0, gtid + 1 ) ); - if ( retval ) { - KMP_MB(); - lck->tas.lk.depth_locked = 1; - } - return retval; - } else { - KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL ); - return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid ); +static inline int __kmp_test_nested_user_lock_with_checks(kmp_user_lock_p lck, + kmp_int32 gtid) { + if (__kmp_user_lock_kind == lk_tas) { + int retval; + if (__kmp_env_consistency_check) { + char const *const func = "omp_test_nest_lock"; + if ((sizeof(kmp_tas_lock_t) <= OMP_NEST_LOCK_T_SIZE) && + lck->tas.lk.depth_locked == -1) { + KMP_FATAL(LockSimpleUsedAsNestable, func); + } + } + KMP_DEBUG_ASSERT(gtid >= 0); + if (lck->tas.lk.poll - 1 == + gtid) { /* __kmp_get_tas_lock_owner( lck ) == gtid */ + return ++lck->tas.lk.depth_locked; /* same owner, depth increased */ } + retval = ((lck->tas.lk.poll == 0) && + KMP_COMPARE_AND_STORE_ACQ32(&(lck->tas.lk.poll), 0, gtid + 1)); + if (retval) { + KMP_MB(); + lck->tas.lk.depth_locked = 1; + } + return retval; + } else { + KMP_DEBUG_ASSERT(__kmp_test_nested_user_lock_with_checks_ != NULL); + return (*__kmp_test_nested_user_lock_with_checks_)(lck, gtid); + } } #else -static inline int -__kmp_test_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( __kmp_test_nested_user_lock_with_checks_ != NULL ); - return ( *__kmp_test_nested_user_lock_with_checks_ )( lck, gtid ); +static inline int __kmp_test_nested_user_lock_with_checks(kmp_user_lock_p lck, + kmp_int32 gtid) { + KMP_DEBUG_ASSERT(__kmp_test_nested_user_lock_with_checks_ != NULL); + return (*__kmp_test_nested_user_lock_with_checks_)(lck, gtid); } #endif -extern int ( *__kmp_release_nested_user_lock_with_checks_ )( kmp_user_lock_p lck, kmp_int32 gtid ); +extern int (*__kmp_release_nested_user_lock_with_checks_)(kmp_user_lock_p lck, + kmp_int32 gtid); static inline int -__kmp_release_nested_user_lock_with_checks( kmp_user_lock_p lck, kmp_int32 gtid ) -{ - KMP_DEBUG_ASSERT( __kmp_release_nested_user_lock_with_checks_ != NULL ); - return ( *__kmp_release_nested_user_lock_with_checks_ )( lck, gtid ); +__kmp_release_nested_user_lock_with_checks(kmp_user_lock_p lck, + kmp_int32 gtid) { + KMP_DEBUG_ASSERT(__kmp_release_nested_user_lock_with_checks_ != NULL); + return (*__kmp_release_nested_user_lock_with_checks_)(lck, gtid); } -extern void ( *__kmp_init_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ); +extern void (*__kmp_init_nested_user_lock_with_checks_)(kmp_user_lock_p lck); -static inline void __kmp_init_nested_user_lock_with_checks( kmp_user_lock_p lck ) -{ - KMP_DEBUG_ASSERT( __kmp_init_nested_user_lock_with_checks_ != NULL ); - ( *__kmp_init_nested_user_lock_with_checks_ )( lck ); +static inline void +__kmp_init_nested_user_lock_with_checks(kmp_user_lock_p lck) { + KMP_DEBUG_ASSERT(__kmp_init_nested_user_lock_with_checks_ != NULL); + (*__kmp_init_nested_user_lock_with_checks_)(lck); } -extern void ( *__kmp_destroy_nested_user_lock_with_checks_ )( kmp_user_lock_p lck ); +extern void (*__kmp_destroy_nested_user_lock_with_checks_)(kmp_user_lock_p lck); static inline void -__kmp_destroy_nested_user_lock_with_checks( kmp_user_lock_p lck ) -{ - KMP_DEBUG_ASSERT( __kmp_destroy_nested_user_lock_with_checks_ != NULL ); - ( *__kmp_destroy_nested_user_lock_with_checks_ )( lck ); +__kmp_destroy_nested_user_lock_with_checks(kmp_user_lock_p lck) { + KMP_DEBUG_ASSERT(__kmp_destroy_nested_user_lock_with_checks_ != NULL); + (*__kmp_destroy_nested_user_lock_with_checks_)(lck); } -// // user lock functions which do not necessarily exist for all lock kinds. // // The "set" functions usually have wrapper routines that check for a NULL set @@ -932,103 +882,96 @@ __kmp_destroy_nested_user_lock_with_checks( kmp_user_lock_p lck ) // In other cases, the calling code really should differentiate between an // unimplemented function and one that is implemented but returning NULL / // invalied value. If this is the case, no get function wrapper exists. -// -extern int ( *__kmp_is_user_lock_initialized_ )( kmp_user_lock_p lck ); +extern int (*__kmp_is_user_lock_initialized_)(kmp_user_lock_p lck); // no set function; fields set durining local allocation -extern const ident_t * ( *__kmp_get_user_lock_location_ )( kmp_user_lock_p lck ); +extern const ident_t *(*__kmp_get_user_lock_location_)(kmp_user_lock_p lck); -static inline const ident_t * -__kmp_get_user_lock_location( kmp_user_lock_p lck ) -{ - if ( __kmp_get_user_lock_location_ != NULL ) { - return ( *__kmp_get_user_lock_location_ )( lck ); - } - else { - return NULL; - } +static inline const ident_t *__kmp_get_user_lock_location(kmp_user_lock_p lck) { + if (__kmp_get_user_lock_location_ != NULL) { + return (*__kmp_get_user_lock_location_)(lck); + } else { + return NULL; + } } -extern void ( *__kmp_set_user_lock_location_ )( kmp_user_lock_p lck, const ident_t *loc ); +extern void (*__kmp_set_user_lock_location_)(kmp_user_lock_p lck, + const ident_t *loc); -static inline void -__kmp_set_user_lock_location( kmp_user_lock_p lck, const ident_t *loc ) -{ - if ( __kmp_set_user_lock_location_ != NULL ) { - ( *__kmp_set_user_lock_location_ )( lck, loc ); - } +static inline void __kmp_set_user_lock_location(kmp_user_lock_p lck, + const ident_t *loc) { + if (__kmp_set_user_lock_location_ != NULL) { + (*__kmp_set_user_lock_location_)(lck, loc); + } } -extern kmp_lock_flags_t ( *__kmp_get_user_lock_flags_ )( kmp_user_lock_p lck ); +extern kmp_lock_flags_t (*__kmp_get_user_lock_flags_)(kmp_user_lock_p lck); -extern void ( *__kmp_set_user_lock_flags_ )( kmp_user_lock_p lck, kmp_lock_flags_t flags ); +extern void (*__kmp_set_user_lock_flags_)(kmp_user_lock_p lck, + kmp_lock_flags_t flags); -static inline void -__kmp_set_user_lock_flags( kmp_user_lock_p lck, kmp_lock_flags_t flags ) -{ - if ( __kmp_set_user_lock_flags_ != NULL ) { - ( *__kmp_set_user_lock_flags_ )( lck, flags ); - } +static inline void __kmp_set_user_lock_flags(kmp_user_lock_p lck, + kmp_lock_flags_t flags) { + if (__kmp_set_user_lock_flags_ != NULL) { + (*__kmp_set_user_lock_flags_)(lck, flags); + } } -// // The fuction which sets up all of the vtbl pointers for kmp_user_lock_t. -// -extern void __kmp_set_user_lock_vptrs( kmp_lock_kind_t user_lock_kind ); +extern void __kmp_set_user_lock_vptrs(kmp_lock_kind_t user_lock_kind); -// // Macros for binding user lock functions. -// -#define KMP_BIND_USER_LOCK_TEMPLATE(nest, kind, suffix) { \ - __kmp_acquire##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ - __kmp_acquire##nest##kind##_##suffix; \ - __kmp_release##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ - __kmp_release##nest##kind##_##suffix; \ - __kmp_test##nest##user_lock_with_checks_ = ( int (*)( kmp_user_lock_p, kmp_int32 ) ) \ - __kmp_test##nest##kind##_##suffix; \ - __kmp_init##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) ) \ - __kmp_init##nest##kind##_##suffix; \ - __kmp_destroy##nest##user_lock_with_checks_ = ( void (*)( kmp_user_lock_p ) ) \ - __kmp_destroy##nest##kind##_##suffix; \ -} - -#define KMP_BIND_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock) -#define KMP_BIND_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock_with_checks) -#define KMP_BIND_NESTED_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock) -#define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind) KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock_with_checks) +#define KMP_BIND_USER_LOCK_TEMPLATE(nest, kind, suffix) \ + { \ + __kmp_acquire##nest##user_lock_with_checks_ = (int (*)( \ + kmp_user_lock_p, kmp_int32))__kmp_acquire##nest##kind##_##suffix; \ + __kmp_release##nest##user_lock_with_checks_ = (int (*)( \ + kmp_user_lock_p, kmp_int32))__kmp_release##nest##kind##_##suffix; \ + __kmp_test##nest##user_lock_with_checks_ = (int (*)( \ + kmp_user_lock_p, kmp_int32))__kmp_test##nest##kind##_##suffix; \ + __kmp_init##nest##user_lock_with_checks_ = \ + (void (*)(kmp_user_lock_p))__kmp_init##nest##kind##_##suffix; \ + __kmp_destroy##nest##user_lock_with_checks_ = \ + (void (*)(kmp_user_lock_p))__kmp_destroy##nest##kind##_##suffix; \ + } + +#define KMP_BIND_USER_LOCK(kind) KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock) +#define KMP_BIND_USER_LOCK_WITH_CHECKS(kind) \ + KMP_BIND_USER_LOCK_TEMPLATE(_, kind, lock_with_checks) +#define KMP_BIND_NESTED_USER_LOCK(kind) \ + KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock) +#define KMP_BIND_NESTED_USER_LOCK_WITH_CHECKS(kind) \ + KMP_BIND_USER_LOCK_TEMPLATE(_nested_, kind, lock_with_checks) -// ---------------------------------------------------------------------------- // User lock table & lock allocation -// ---------------------------------------------------------------------------- - -/* - On 64-bit Linux* OS (and OS X*) GNU compiler allocates only 4 bytems memory for lock variable, which - is not enough to store a pointer, so we have to use lock indexes instead of pointers and - maintain lock table to map indexes to pointers. - - - Note: The first element of the table is not a pointer to lock! It is a pointer to previously - allocated table (or NULL if it is the first table). - - Usage: - - if ( OMP_LOCK_T_SIZE < sizeof( ) ) { // or OMP_NEST_LOCK_T_SIZE - Lock table is fully utilized. User locks are indexes, so table is - used on user lock operation. - Note: it may be the case (lin_32) that we don't need to use a lock - table for regular locks, but do need the table for nested locks. - } - else { - Lock table initialized but not actually used. - } +/* On 64-bit Linux* OS (and OS X*) GNU compiler allocates only 4 bytems memory + for lock variable, which is not enough to store a pointer, so we have to use + lock indexes instead of pointers and maintain lock table to map indexes to + pointers. + + + Note: The first element of the table is not a pointer to lock! It is a + pointer to previously allocated table (or NULL if it is the first table). + + Usage: + + if ( OMP_LOCK_T_SIZE < sizeof( ) ) { // or OMP_NEST_LOCK_T_SIZE + Lock table is fully utilized. User locks are indexes, so table is used on + user lock operation. + Note: it may be the case (lin_32) that we don't need to use a lock + table for regular locks, but do need the table for nested locks. + } + else { + Lock table initialized but not actually used. + } */ struct kmp_lock_table { - kmp_lock_index_t used; // Number of used elements - kmp_lock_index_t allocated; // Number of allocated elements - kmp_user_lock_p * table; // Lock table. + kmp_lock_index_t used; // Number of used elements + kmp_lock_index_t allocated; // Number of allocated elements + kmp_user_lock_p *table; // Lock table. }; typedef struct kmp_lock_table kmp_lock_table_t; @@ -1037,8 +980,8 @@ extern kmp_lock_table_t __kmp_user_lock_table; extern kmp_user_lock_p __kmp_lock_pool; struct kmp_block_of_locks { - struct kmp_block_of_locks * next_block; - void * locks; + struct kmp_block_of_locks *next_block; + void *locks; }; typedef struct kmp_block_of_locks kmp_block_of_locks_t; @@ -1046,21 +989,25 @@ typedef struct kmp_block_of_locks kmp_block_of_locks_t; extern kmp_block_of_locks_t *__kmp_lock_blocks; extern int __kmp_num_locks_in_block; -extern kmp_user_lock_p __kmp_user_lock_allocate( void **user_lock, kmp_int32 gtid, kmp_lock_flags_t flags ); -extern void __kmp_user_lock_free( void **user_lock, kmp_int32 gtid, kmp_user_lock_p lck ); -extern kmp_user_lock_p __kmp_lookup_user_lock( void **user_lock, char const *func ); +extern kmp_user_lock_p __kmp_user_lock_allocate(void **user_lock, + kmp_int32 gtid, + kmp_lock_flags_t flags); +extern void __kmp_user_lock_free(void **user_lock, kmp_int32 gtid, + kmp_user_lock_p lck); +extern kmp_user_lock_p __kmp_lookup_user_lock(void **user_lock, + char const *func); extern void __kmp_cleanup_user_locks(); -#define KMP_CHECK_USER_LOCK_INIT() \ - { \ - if ( ! TCR_4( __kmp_init_user_locks ) ) { \ - __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); \ - if ( ! TCR_4( __kmp_init_user_locks ) ) { \ - TCW_4( __kmp_init_user_locks, TRUE ); \ - } \ - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); \ - } \ - } +#define KMP_CHECK_USER_LOCK_INIT() \ + { \ + if (!TCR_4(__kmp_init_user_locks)) { \ + __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); \ + if (!TCR_4(__kmp_init_user_locks)) { \ + TCW_4(__kmp_init_user_locks, TRUE); \ + } \ + __kmp_release_bootstrap_lock(&__kmp_initz_lock); \ + } \ + } #endif // KMP_USE_DYNAMIC_LOCK @@ -1068,168 +1015,187 @@ extern void __kmp_cleanup_user_locks(); #undef KMP_GTID_DNE #if KMP_USE_DYNAMIC_LOCK - +// KMP_USE_DYNAMIC_LOCK enables dynamic dispatch of lock functions without +// breaking the current compatibility. Essential functionality of this new code +// is dynamic dispatch, but it also implements (or enables implementation of) +// hinted user lock and critical section which will be part of OMP 4.5 soon. // -// KMP_USE_DYNAMIC_LOCK enables dynamic dispatch of lock functions without breaking the current -// compatibility. Essential functionality of this new code is dynamic dispatch, but it also -// implements (or enables implementation of) hinted user lock and critical section which will be -// part of OMP 4.5 soon. -// -// Lock type can be decided at creation time (i.e., lock initialization), and subsequent lock -// function call on the created lock object requires type extraction and call through jump table -// using the extracted type. This type information is stored in two different ways depending on -// the size of the lock object, and we differentiate lock types by this size requirement - direct -// and indirect locks. +// Lock type can be decided at creation time (i.e., lock initialization), and +// subsequent lock function call on the created lock object requires type +// extraction and call through jump table using the extracted type. This type +// information is stored in two different ways depending on the size of the lock +// object, and we differentiate lock types by this size requirement - direct and +// indirect locks. // // Direct locks: -// A direct lock object fits into the space created by the compiler for an omp_lock_t object, and -// TAS/Futex lock falls into this category. We use low one byte of the lock object as the storage -// for the lock type, and appropriate bit operation is required to access the data meaningful to -// the lock algorithms. Also, to differentiate direct lock from indirect lock, 1 is written to LSB -// of the lock object. The newly introduced "hle" lock is also a direct lock. +// A direct lock object fits into the space created by the compiler for an +// omp_lock_t object, and TAS/Futex lock falls into this category. We use low +// one byte of the lock object as the storage for the lock type, and appropriate +// bit operation is required to access the data meaningful to the lock +// algorithms. Also, to differentiate direct lock from indirect lock, 1 is +// written to LSB of the lock object. The newly introduced "hle" lock is also a +// direct lock. // // Indirect locks: -// An indirect lock object requires more space than the compiler-generated space, and it should be -// allocated from heap. Depending on the size of the compiler-generated space for the lock (i.e., -// size of omp_lock_t), this omp_lock_t object stores either the address of the heap-allocated -// indirect lock (void * fits in the object) or an index to the indirect lock table entry that -// holds the address. Ticket/Queuing/DRDPA/Adaptive lock falls into this category, and the newly -// introduced "rtm" lock is also an indirect lock which was implemented on top of the Queuing lock. -// When the omp_lock_t object holds an index (not lock address), 0 is written to LSB to -// differentiate the lock from a direct lock, and the remaining part is the actual index to the +// An indirect lock object requires more space than the compiler-generated +// space, and it should be allocated from heap. Depending on the size of the +// compiler-generated space for the lock (i.e., size of omp_lock_t), this +// omp_lock_t object stores either the address of the heap-allocated indirect +// lock (void * fits in the object) or an index to the indirect lock table entry +// that holds the address. Ticket/Queuing/DRDPA/Adaptive lock falls into this +// category, and the newly introduced "rtm" lock is also an indirect lock which +// was implemented on top of the Queuing lock. When the omp_lock_t object holds +// an index (not lock address), 0 is written to LSB to differentiate the lock +// from a direct lock, and the remaining part is the actual index to the // indirect lock table. -// #include // for uintptr_t // Shortcuts -#define KMP_USE_INLINED_TAS (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)) && 1 +#define KMP_USE_INLINED_TAS \ + (KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)) && 1 #define KMP_USE_INLINED_FUTEX KMP_USE_FUTEX && 0 // List of lock definitions; all nested locks are indirect locks. // hle lock is xchg lock prefixed with XACQUIRE/XRELEASE. // All nested locks are indirect lock types. #if KMP_USE_TSX -# if KMP_USE_FUTEX -# define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) m(hle, a) -# define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ - m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \ - m(nested_queuing, a) m(nested_drdpa, a) -# else -# define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(hle, a) -# define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ - m(nested_tas, a) m(nested_ticket, a) \ - m(nested_queuing, a) m(nested_drdpa, a) -# endif // KMP_USE_FUTEX -# define KMP_LAST_D_LOCK lockseq_hle +#if KMP_USE_FUTEX +#define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) m(hle, a) +#define KMP_FOREACH_I_LOCK(m, a) \ + m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ + m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \ + m(nested_queuing, a) m(nested_drdpa, a) #else -# if KMP_USE_FUTEX -# define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) -# define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \ - m(nested_tas, a) m(nested_futex, a) m(nested_ticket, a) \ - m(nested_queuing, a) m(nested_drdpa, a) -# define KMP_LAST_D_LOCK lockseq_futex -# else -# define KMP_FOREACH_D_LOCK(m, a) m(tas, a) -# define KMP_FOREACH_I_LOCK(m, a) m(ticket, a) m(queuing, a) m(drdpa, a) \ - m(nested_tas, a) m(nested_ticket, a) \ - m(nested_queuing, a) m(nested_drdpa, a) -# define KMP_LAST_D_LOCK lockseq_tas -# endif // KMP_USE_FUTEX +#define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(hle, a) +#define KMP_FOREACH_I_LOCK(m, a) \ + m(ticket, a) m(queuing, a) m(adaptive, a) m(drdpa, a) m(rtm, a) \ + m(nested_tas, a) m(nested_ticket, a) m(nested_queuing, a) \ + m(nested_drdpa, a) +#endif // KMP_USE_FUTEX +#define KMP_LAST_D_LOCK lockseq_hle +#else +#if KMP_USE_FUTEX +#define KMP_FOREACH_D_LOCK(m, a) m(tas, a) m(futex, a) +#define KMP_FOREACH_I_LOCK(m, a) \ + m(ticket, a) m(queuing, a) m(drdpa, a) m(nested_tas, a) m(nested_futex, a) \ + m(nested_ticket, a) m(nested_queuing, a) m(nested_drdpa, a) +#define KMP_LAST_D_LOCK lockseq_futex +#else +#define KMP_FOREACH_D_LOCK(m, a) m(tas, a) +#define KMP_FOREACH_I_LOCK(m, a) \ + m(ticket, a) m(queuing, a) m(drdpa, a) m(nested_tas, a) m(nested_ticket, a) \ + m(nested_queuing, a) m(nested_drdpa, a) +#define KMP_LAST_D_LOCK lockseq_tas +#endif // KMP_USE_FUTEX #endif // KMP_USE_TSX // Information used in dynamic dispatch -#define KMP_LOCK_SHIFT 8 // number of low bits to be used as tag for direct locks +#define KMP_LOCK_SHIFT \ + 8 // number of low bits to be used as tag for direct locks #define KMP_FIRST_D_LOCK lockseq_tas #define KMP_FIRST_I_LOCK lockseq_ticket -#define KMP_LAST_I_LOCK lockseq_nested_drdpa -#define KMP_NUM_I_LOCKS (locktag_nested_drdpa+1) // number of indirect lock types +#define KMP_LAST_I_LOCK lockseq_nested_drdpa +#define KMP_NUM_I_LOCKS \ + (locktag_nested_drdpa + 1) // number of indirect lock types // Base type for dynamic locks. typedef kmp_uint32 kmp_dyna_lock_t; -// Lock sequence that enumerates all lock kinds. -// Always make this enumeration consistent with kmp_lockseq_t in the include directory. +// Lock sequence that enumerates all lock kinds. Always make this enumeration +// consistent with kmp_lockseq_t in the include directory. typedef enum { - lockseq_indirect = 0, -#define expand_seq(l,a) lockseq_##l, - KMP_FOREACH_D_LOCK(expand_seq, 0) - KMP_FOREACH_I_LOCK(expand_seq, 0) + lockseq_indirect = 0, +#define expand_seq(l, a) lockseq_##l, + KMP_FOREACH_D_LOCK(expand_seq, 0) KMP_FOREACH_I_LOCK(expand_seq, 0) #undef expand_seq } kmp_dyna_lockseq_t; // Enumerates indirect lock tags. typedef enum { -#define expand_tag(l,a) locktag_##l, - KMP_FOREACH_I_LOCK(expand_tag, 0) +#define expand_tag(l, a) locktag_##l, + KMP_FOREACH_I_LOCK(expand_tag, 0) #undef expand_tag } kmp_indirect_locktag_t; // Utility macros that extract information from lock sequences. -#define KMP_IS_D_LOCK(seq) ((seq) >= KMP_FIRST_D_LOCK && (seq) <= KMP_LAST_D_LOCK) -#define KMP_IS_I_LOCK(seq) ((seq) >= KMP_FIRST_I_LOCK && (seq) <= KMP_LAST_I_LOCK) -#define KMP_GET_I_TAG(seq) (kmp_indirect_locktag_t)((seq) - KMP_FIRST_I_LOCK) -#define KMP_GET_D_TAG(seq) ((seq)<<1 | 1) +#define KMP_IS_D_LOCK(seq) \ + ((seq) >= KMP_FIRST_D_LOCK && (seq) <= KMP_LAST_D_LOCK) +#define KMP_IS_I_LOCK(seq) \ + ((seq) >= KMP_FIRST_I_LOCK && (seq) <= KMP_LAST_I_LOCK) +#define KMP_GET_I_TAG(seq) (kmp_indirect_locktag_t)((seq)-KMP_FIRST_I_LOCK) +#define KMP_GET_D_TAG(seq) ((seq) << 1 | 1) // Enumerates direct lock tags starting from indirect tag. typedef enum { -#define expand_tag(l,a) locktag_##l = KMP_GET_D_TAG(lockseq_##l), - KMP_FOREACH_D_LOCK(expand_tag, 0) +#define expand_tag(l, a) locktag_##l = KMP_GET_D_TAG(lockseq_##l), + KMP_FOREACH_D_LOCK(expand_tag, 0) #undef expand_tag } kmp_direct_locktag_t; // Indirect lock type typedef struct { - kmp_user_lock_p lock; - kmp_indirect_locktag_t type; + kmp_user_lock_p lock; + kmp_indirect_locktag_t type; } kmp_indirect_lock_t; -// Function tables for direct locks. Set/unset/test differentiate functions with/without consistency checking. +// Function tables for direct locks. Set/unset/test differentiate functions +// with/without consistency checking. extern void (*__kmp_direct_init[])(kmp_dyna_lock_t *, kmp_dyna_lockseq_t); extern void (*__kmp_direct_destroy[])(kmp_dyna_lock_t *); extern void (*(*__kmp_direct_set))(kmp_dyna_lock_t *, kmp_int32); -extern int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32); -extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32); +extern int (*(*__kmp_direct_unset))(kmp_dyna_lock_t *, kmp_int32); +extern int (*(*__kmp_direct_test))(kmp_dyna_lock_t *, kmp_int32); -// Function tables for indirect locks. Set/unset/test differentiate functions with/withuot consistency checking. +// Function tables for indirect locks. Set/unset/test differentiate functions +// with/withuot consistency checking. extern void (*__kmp_indirect_init[])(kmp_user_lock_p); extern void (*__kmp_indirect_destroy[])(kmp_user_lock_p); extern void (*(*__kmp_indirect_set))(kmp_user_lock_p, kmp_int32); -extern int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32); -extern int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32); +extern int (*(*__kmp_indirect_unset))(kmp_user_lock_p, kmp_int32); +extern int (*(*__kmp_indirect_test))(kmp_user_lock_p, kmp_int32); // Extracts direct lock tag from a user lock pointer -#define KMP_EXTRACT_D_TAG(l) (*((kmp_dyna_lock_t *)(l)) & ((1<> 1) -// Returns function pointer to the direct lock function with l (kmp_dyna_lock_t *) and op (operation type). +// Returns function pointer to the direct lock function with l (kmp_dyna_lock_t +// *) and op (operation type). #define KMP_D_LOCK_FUNC(l, op) __kmp_direct_##op[KMP_EXTRACT_D_TAG(l)] -// Returns function pointer to the indirect lock function with l (kmp_indirect_lock_t *) and op (operation type). -#define KMP_I_LOCK_FUNC(l, op) __kmp_indirect_##op[((kmp_indirect_lock_t *)(l))->type] +// Returns function pointer to the indirect lock function with l +// (kmp_indirect_lock_t *) and op (operation type). +#define KMP_I_LOCK_FUNC(l, op) \ + __kmp_indirect_##op[((kmp_indirect_lock_t *)(l))->type] // Initializes a direct lock with the given lock pointer and lock sequence. -#define KMP_INIT_D_LOCK(l, seq) __kmp_direct_init[KMP_GET_D_TAG(seq)]((kmp_dyna_lock_t *)l, seq) +#define KMP_INIT_D_LOCK(l, seq) \ + __kmp_direct_init[KMP_GET_D_TAG(seq)]((kmp_dyna_lock_t *)l, seq) // Initializes an indirect lock with the given lock pointer and lock sequence. -#define KMP_INIT_I_LOCK(l, seq) __kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq) +#define KMP_INIT_I_LOCK(l, seq) \ + __kmp_direct_init[0]((kmp_dyna_lock_t *)(l), seq) // Returns "free" lock value for the given lock type. -#define KMP_LOCK_FREE(type) (locktag_##type) +#define KMP_LOCK_FREE(type) (locktag_##type) // Returns "busy" lock value for the given lock teyp. -#define KMP_LOCK_BUSY(v, type) ((v)<>KMP_LOCK_SHIFT) +#define KMP_LOCK_STRIP(v) ((v) >> KMP_LOCK_SHIFT) -// Initializes global states and data structures for managing dynamic user locks. +// Initializes global states and data structures for managing dynamic user +// locks. extern void __kmp_init_dynamic_user_locks(); // Allocates and returns an indirect lock with the given indirect lock tag. -extern kmp_indirect_lock_t * __kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t); +extern kmp_indirect_lock_t * +__kmp_allocate_indirect_lock(void **, kmp_int32, kmp_indirect_locktag_t); // Cleans up global states and data structures for managing dynamic user locks. extern void __kmp_cleanup_indirect_user_locks(); @@ -1238,72 +1204,82 @@ extern void __kmp_cleanup_indirect_user_locks(); extern kmp_dyna_lockseq_t __kmp_user_lock_seq; // Jump table for "set lock location", available only for indirect locks. -extern void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, const ident_t *); -#define KMP_SET_I_LOCK_LOCATION(lck, loc) { \ - if (__kmp_indirect_set_location[(lck)->type] != NULL) \ - __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc); \ -} +extern void (*__kmp_indirect_set_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p, + const ident_t *); +#define KMP_SET_I_LOCK_LOCATION(lck, loc) \ + { \ + if (__kmp_indirect_set_location[(lck)->type] != NULL) \ + __kmp_indirect_set_location[(lck)->type]((lck)->lock, loc); \ + } // Jump table for "set lock flags", available only for indirect locks. -extern void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, kmp_lock_flags_t); -#define KMP_SET_I_LOCK_FLAGS(lck, flag) { \ - if (__kmp_indirect_set_flags[(lck)->type] != NULL) \ - __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag); \ -} +extern void (*__kmp_indirect_set_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p, + kmp_lock_flags_t); +#define KMP_SET_I_LOCK_FLAGS(lck, flag) \ + { \ + if (__kmp_indirect_set_flags[(lck)->type] != NULL) \ + __kmp_indirect_set_flags[(lck)->type]((lck)->lock, flag); \ + } // Jump table for "get lock location", available only for indirect locks. -extern const ident_t * (*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])(kmp_user_lock_p); -#define KMP_GET_I_LOCK_LOCATION(lck) ( __kmp_indirect_get_location[(lck)->type] != NULL \ - ? __kmp_indirect_get_location[(lck)->type]((lck)->lock) \ - : NULL ) +extern const ident_t *(*__kmp_indirect_get_location[KMP_NUM_I_LOCKS])( + kmp_user_lock_p); +#define KMP_GET_I_LOCK_LOCATION(lck) \ + (__kmp_indirect_get_location[(lck)->type] != NULL \ + ? __kmp_indirect_get_location[(lck)->type]((lck)->lock) \ + : NULL) // Jump table for "get lock flags", available only for indirect locks. -extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])(kmp_user_lock_p); -#define KMP_GET_I_LOCK_FLAGS(lck) ( __kmp_indirect_get_flags[(lck)->type] != NULL \ - ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \ - : NULL ) +extern kmp_lock_flags_t (*__kmp_indirect_get_flags[KMP_NUM_I_LOCKS])( + kmp_user_lock_p); +#define KMP_GET_I_LOCK_FLAGS(lck) \ + (__kmp_indirect_get_flags[(lck)->type] != NULL \ + ? __kmp_indirect_get_flags[(lck)->type]((lck)->lock) \ + : NULL) -#define KMP_I_LOCK_CHUNK 1024 // number of kmp_indirect_lock_t objects to be allocated together +#define KMP_I_LOCK_CHUNK \ + 1024 // number of kmp_indirect_lock_t objects to be allocated together // Lock table for indirect locks. typedef struct kmp_indirect_lock_table { - kmp_indirect_lock_t **table; // blocks of indirect locks allocated - kmp_lock_index_t size; // size of the indirect lock table - kmp_lock_index_t next; // index to the next lock to be allocated + kmp_indirect_lock_t **table; // blocks of indirect locks allocated + kmp_lock_index_t size; // size of the indirect lock table + kmp_lock_index_t next; // index to the next lock to be allocated } kmp_indirect_lock_table_t; extern kmp_indirect_lock_table_t __kmp_i_lock_table; // Returns the indirect lock associated with the given index. -#define KMP_GET_I_LOCK(index) (*(__kmp_i_lock_table.table + (index)/KMP_I_LOCK_CHUNK) + (index)%KMP_I_LOCK_CHUNK) +#define KMP_GET_I_LOCK(index) \ + (*(__kmp_i_lock_table.table + (index) / KMP_I_LOCK_CHUNK) + \ + (index) % KMP_I_LOCK_CHUNK) // Number of locks in a lock block, which is fixed to "1" now. -// TODO: No lock block implementation now. If we do support, we need to manage lock block data -// structure for each indirect lock type. +// TODO: No lock block implementation now. If we do support, we need to manage +// lock block data structure for each indirect lock type. extern int __kmp_num_locks_in_block; // Fast lock table lookup without consistency checking -#define KMP_LOOKUP_I_LOCK(l) ( (OMP_LOCK_T_SIZE < sizeof(void *)) \ - ? KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(l)) \ - : *((kmp_indirect_lock_t **)(l)) ) +#define KMP_LOOKUP_I_LOCK(l) \ + ((OMP_LOCK_T_SIZE < sizeof(void *)) ? KMP_GET_I_LOCK(KMP_EXTRACT_I_INDEX(l)) \ + : *((kmp_indirect_lock_t **)(l))) // Used once in kmp_error.cpp -extern kmp_int32 -__kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32); +extern kmp_int32 __kmp_get_user_lock_owner(kmp_user_lock_p, kmp_uint32); #else // KMP_USE_DYNAMIC_LOCK -# define KMP_LOCK_BUSY(v, type) (v) -# define KMP_LOCK_FREE(type) 0 -# define KMP_LOCK_STRIP(v) (v) +#define KMP_LOCK_BUSY(v, type) (v) +#define KMP_LOCK_FREE(type) 0 +#define KMP_LOCK_STRIP(v) (v) #endif // KMP_USE_DYNAMIC_LOCK // data structure for using backoff within spin locks. typedef struct { - kmp_uint32 step; // current step - kmp_uint32 max_backoff; // upper bound of outer delay loop - kmp_uint32 min_tick; // size of inner delay loop in ticks (machine-dependent) + kmp_uint32 step; // current step + kmp_uint32 max_backoff; // upper bound of outer delay loop + kmp_uint32 min_tick; // size of inner delay loop in ticks (machine-dependent) } kmp_backoff_t; // Runtime's default backoff parameters @@ -1317,4 +1293,3 @@ extern void __kmp_spin_backoff(kmp_backoff_t *); #endif // __cplusplus #endif /* KMP_LOCK_H */ - diff --git a/openmp/runtime/src/kmp_omp.h b/openmp/runtime/src/kmp_omp.h index 6a76023..9684fd3 100644 --- a/openmp/runtime/src/kmp_omp.h +++ b/openmp/runtime/src/kmp_omp.h @@ -16,216 +16,224 @@ /* THIS FILE SHOULD NOT BE MODIFIED IN IDB INTERFACE LIBRARY CODE - * It should instead be modified in the OpenMP runtime and copied - * to the interface library code. This way we can minimize the - * problems that this is sure to cause having two copies of the - * same file. - * - * files live in libomp and libomp_db/src/include - */ + It should instead be modified in the OpenMP runtime and copied to the + interface library code. This way we can minimize the problems that this is + sure to cause having two copies of the same file. + + Files live in libomp and libomp_db/src/include */ /* CHANGE THIS WHEN STRUCTURES BELOW CHANGE - * Before we release this to a customer, please don't change this value. After it is released and - * stable, then any new updates to the structures or data structure traversal algorithms need to - * change this value. - */ + Before we release this to a customer, please don't change this value. After + it is released and stable, then any new updates to the structures or data + structure traversal algorithms need to change this value. */ #define KMP_OMP_VERSION 9 typedef struct { - kmp_int32 offset; - kmp_int32 size; + kmp_int32 offset; + kmp_int32 size; } offset_and_size_t; typedef struct { - kmp_uint64 addr; - kmp_int32 size; - kmp_int32 padding; + kmp_uint64 addr; + kmp_int32 size; + kmp_int32 padding; } addr_and_size_t; typedef struct { - kmp_uint64 flags; // Flags for future extensions. - kmp_uint64 file; // Pointer to name of source file where the parallel region is. - kmp_uint64 func; // Pointer to name of routine where the parallel region is. - kmp_int32 begin; // Beginning of source line range. - kmp_int32 end; // End of source line range. - kmp_int32 num_threads; // Specified number of threads. + kmp_uint64 flags; // Flags for future extensions. + kmp_uint64 + file; // Pointer to name of source file where the parallel region is. + kmp_uint64 func; // Pointer to name of routine where the parallel region is. + kmp_int32 begin; // Beginning of source line range. + kmp_int32 end; // End of source line range. + kmp_int32 num_threads; // Specified number of threads. } kmp_omp_nthr_item_t; typedef struct { - kmp_int32 num; // Number of items in the arrray. - kmp_uint64 array; // Address of array of kmp_omp_num_threads_item_t. + kmp_int32 num; // Number of items in the arrray. + kmp_uint64 array; // Address of array of kmp_omp_num_threads_item_t. } kmp_omp_nthr_info_t; - /* This structure is known to the idb interface library */ typedef struct { - /* Change this only if you make a fundamental data structure change here */ - kmp_int32 lib_version; - - /* sanity check. Only should be checked if versions are identical - * This is also used for backward compatibility to get the runtime - * structure size if it the runtime is older than the interface */ - kmp_int32 sizeof_this_structure; - - /* OpenMP RTL version info. */ - addr_and_size_t major; - addr_and_size_t minor; - addr_and_size_t build; - addr_and_size_t openmp_version; - addr_and_size_t banner; - - /* Various globals. */ - addr_and_size_t threads; // Pointer to __kmp_threads. - addr_and_size_t roots; // Pointer to __kmp_root. - addr_and_size_t capacity; // Pointer to __kmp_threads_capacity. - addr_and_size_t monitor; // Pointer to __kmp_monitor. -#if ! KMP_USE_DYNAMIC_LOCK - addr_and_size_t lock_table; // Pointer to __kmp_lock_table. + /* Change this only if you make a fundamental data structure change here */ + kmp_int32 lib_version; + + /* sanity check. Only should be checked if versions are identical + * This is also used for backward compatibility to get the runtime + * structure size if it the runtime is older than the interface */ + kmp_int32 sizeof_this_structure; + + /* OpenMP RTL version info. */ + addr_and_size_t major; + addr_and_size_t minor; + addr_and_size_t build; + addr_and_size_t openmp_version; + addr_and_size_t banner; + + /* Various globals. */ + addr_and_size_t threads; // Pointer to __kmp_threads. + addr_and_size_t roots; // Pointer to __kmp_root. + addr_and_size_t capacity; // Pointer to __kmp_threads_capacity. + addr_and_size_t monitor; // Pointer to __kmp_monitor. +#if !KMP_USE_DYNAMIC_LOCK + addr_and_size_t lock_table; // Pointer to __kmp_lock_table. #endif - addr_and_size_t func_microtask; - addr_and_size_t func_fork; - addr_and_size_t func_fork_teams; - addr_and_size_t team_counter; - addr_and_size_t task_counter; - addr_and_size_t nthr_info; - kmp_int32 address_width; - kmp_int32 indexed_locks; - kmp_int32 last_barrier; // The end in enum barrier_type - kmp_int32 deque_size; // TASK_DEQUE_SIZE - - /* thread structure information. */ - kmp_int32 th_sizeof_struct; - offset_and_size_t th_info; // descriptor for thread - offset_and_size_t th_team; // team for this thread - offset_and_size_t th_root; // root for this thread - offset_and_size_t th_serial_team; // serial team under this thread - offset_and_size_t th_ident; // location for this thread (if available) - offset_and_size_t th_spin_here; // is thread waiting for lock (if available) - offset_and_size_t th_next_waiting; // next thread waiting for lock (if available) - offset_and_size_t th_task_team; // task team struct - offset_and_size_t th_current_task; // innermost task being executed - offset_and_size_t th_task_state; // alternating 0/1 for task team identification - offset_and_size_t th_bar; - offset_and_size_t th_b_worker_arrived; // the worker increases it by 1 when it arrives to the barrier + addr_and_size_t func_microtask; + addr_and_size_t func_fork; + addr_and_size_t func_fork_teams; + addr_and_size_t team_counter; + addr_and_size_t task_counter; + addr_and_size_t nthr_info; + kmp_int32 address_width; + kmp_int32 indexed_locks; + kmp_int32 last_barrier; // The end in enum barrier_type + kmp_int32 deque_size; // TASK_DEQUE_SIZE + + /* thread structure information. */ + kmp_int32 th_sizeof_struct; + offset_and_size_t th_info; // descriptor for thread + offset_and_size_t th_team; // team for this thread + offset_and_size_t th_root; // root for this thread + offset_and_size_t th_serial_team; // serial team under this thread + offset_and_size_t th_ident; // location for this thread (if available) + offset_and_size_t th_spin_here; // is thread waiting for lock (if available) + offset_and_size_t + th_next_waiting; // next thread waiting for lock (if available) + offset_and_size_t th_task_team; // task team struct + offset_and_size_t th_current_task; // innermost task being executed + offset_and_size_t + th_task_state; // alternating 0/1 for task team identification + offset_and_size_t th_bar; + offset_and_size_t th_b_worker_arrived; // the worker increases it by 1 when it +// arrives to the barrier #if OMP_40_ENABLED - /* teams information */ - offset_and_size_t th_teams_microtask;// entry address for teams construct - offset_and_size_t th_teams_level; // initial level of teams construct - offset_and_size_t th_teams_nteams; // number of teams in a league - offset_and_size_t th_teams_nth; // number of threads in each team of the league + /* teams information */ + offset_and_size_t th_teams_microtask; // entry address for teams construct + offset_and_size_t th_teams_level; // initial level of teams construct + offset_and_size_t th_teams_nteams; // number of teams in a league + offset_and_size_t + th_teams_nth; // number of threads in each team of the league #endif - /* kmp_desc structure (for info field above) */ - kmp_int32 ds_sizeof_struct; - offset_and_size_t ds_tid; // team thread id - offset_and_size_t ds_gtid; // global thread id - offset_and_size_t ds_thread; // native thread id - - /* team structure information */ - kmp_int32 t_sizeof_struct; - offset_and_size_t t_master_tid; // tid of master in parent team - offset_and_size_t t_ident; // location of parallel region - offset_and_size_t t_parent; // parent team - offset_and_size_t t_nproc; // # team threads - offset_and_size_t t_threads; // array of threads - offset_and_size_t t_serialized; // # levels of serialized teams - offset_and_size_t t_id; // unique team id - offset_and_size_t t_pkfn; - offset_and_size_t t_task_team; // task team structure - offset_and_size_t t_implicit_task; // taskdata for the thread's implicit task + /* kmp_desc structure (for info field above) */ + kmp_int32 ds_sizeof_struct; + offset_and_size_t ds_tid; // team thread id + offset_and_size_t ds_gtid; // global thread id + offset_and_size_t ds_thread; // native thread id + + /* team structure information */ + kmp_int32 t_sizeof_struct; + offset_and_size_t t_master_tid; // tid of master in parent team + offset_and_size_t t_ident; // location of parallel region + offset_and_size_t t_parent; // parent team + offset_and_size_t t_nproc; // # team threads + offset_and_size_t t_threads; // array of threads + offset_and_size_t t_serialized; // # levels of serialized teams + offset_and_size_t t_id; // unique team id + offset_and_size_t t_pkfn; + offset_and_size_t t_task_team; // task team structure + offset_and_size_t t_implicit_task; // taskdata for the thread's implicit task #if OMP_40_ENABLED - offset_and_size_t t_cancel_request; + offset_and_size_t t_cancel_request; #endif - offset_and_size_t t_bar; - offset_and_size_t t_b_master_arrived; // increased by 1 when master arrives to a barrier - offset_and_size_t t_b_team_arrived; // increased by one when all the threads arrived - - /* root structure information */ - kmp_int32 r_sizeof_struct; - offset_and_size_t r_root_team; // team at root - offset_and_size_t r_hot_team; // hot team for this root - offset_and_size_t r_uber_thread; // root thread - offset_and_size_t r_root_id; // unique root id (if available) - - /* ident structure information */ - kmp_int32 id_sizeof_struct; - offset_and_size_t id_psource; /* address of string ";file;func;line1;line2;;". */ - offset_and_size_t id_flags; - - /* lock structure information */ - kmp_int32 lk_sizeof_struct; - offset_and_size_t lk_initialized; - offset_and_size_t lk_location; - offset_and_size_t lk_tail_id; - offset_and_size_t lk_head_id; - offset_and_size_t lk_next_ticket; - offset_and_size_t lk_now_serving; - offset_and_size_t lk_owner_id; - offset_and_size_t lk_depth_locked; - offset_and_size_t lk_lock_flags; - -#if ! KMP_USE_DYNAMIC_LOCK - /* lock_table_t */ - kmp_int32 lt_size_of_struct; /* Size and layout of kmp_lock_table_t. */ - offset_and_size_t lt_used; - offset_and_size_t lt_allocated; - offset_and_size_t lt_table; + offset_and_size_t t_bar; + offset_and_size_t + t_b_master_arrived; // increased by 1 when master arrives to a barrier + offset_and_size_t + t_b_team_arrived; // increased by one when all the threads arrived + + /* root structure information */ + kmp_int32 r_sizeof_struct; + offset_and_size_t r_root_team; // team at root + offset_and_size_t r_hot_team; // hot team for this root + offset_and_size_t r_uber_thread; // root thread + offset_and_size_t r_root_id; // unique root id (if available) + + /* ident structure information */ + kmp_int32 id_sizeof_struct; + offset_and_size_t + id_psource; /* address of string ";file;func;line1;line2;;". */ + offset_and_size_t id_flags; + + /* lock structure information */ + kmp_int32 lk_sizeof_struct; + offset_and_size_t lk_initialized; + offset_and_size_t lk_location; + offset_and_size_t lk_tail_id; + offset_and_size_t lk_head_id; + offset_and_size_t lk_next_ticket; + offset_and_size_t lk_now_serving; + offset_and_size_t lk_owner_id; + offset_and_size_t lk_depth_locked; + offset_and_size_t lk_lock_flags; + +#if !KMP_USE_DYNAMIC_LOCK + /* lock_table_t */ + kmp_int32 lt_size_of_struct; /* Size and layout of kmp_lock_table_t. */ + offset_and_size_t lt_used; + offset_and_size_t lt_allocated; + offset_and_size_t lt_table; #endif - /* task_team_t */ - kmp_int32 tt_sizeof_struct; - offset_and_size_t tt_threads_data; - offset_and_size_t tt_found_tasks; - offset_and_size_t tt_nproc; - offset_and_size_t tt_unfinished_threads; - offset_and_size_t tt_active; - - /* kmp_taskdata_t */ - kmp_int32 td_sizeof_struct; - offset_and_size_t td_task_id; // task id - offset_and_size_t td_flags; // task flags - offset_and_size_t td_team; // team for this task - offset_and_size_t td_parent; // parent task - offset_and_size_t td_level; // task testing level - offset_and_size_t td_ident; // task identifier - offset_and_size_t td_allocated_child_tasks; // child tasks (+ current task) not yet deallocated - offset_and_size_t td_incomplete_child_tasks; // child tasks not yet complete - - /* Taskwait */ - offset_and_size_t td_taskwait_ident; - offset_and_size_t td_taskwait_counter; - offset_and_size_t td_taskwait_thread; // gtid + 1 of thread encountered taskwait + /* task_team_t */ + kmp_int32 tt_sizeof_struct; + offset_and_size_t tt_threads_data; + offset_and_size_t tt_found_tasks; + offset_and_size_t tt_nproc; + offset_and_size_t tt_unfinished_threads; + offset_and_size_t tt_active; + + /* kmp_taskdata_t */ + kmp_int32 td_sizeof_struct; + offset_and_size_t td_task_id; // task id + offset_and_size_t td_flags; // task flags + offset_and_size_t td_team; // team for this task + offset_and_size_t td_parent; // parent task + offset_and_size_t td_level; // task testing level + offset_and_size_t td_ident; // task identifier + offset_and_size_t td_allocated_child_tasks; // child tasks (+ current task) + // not yet deallocated + offset_and_size_t td_incomplete_child_tasks; // child tasks not yet complete + + /* Taskwait */ + offset_and_size_t td_taskwait_ident; + offset_and_size_t td_taskwait_counter; + offset_and_size_t + td_taskwait_thread; // gtid + 1 of thread encountered taskwait #if OMP_40_ENABLED - /* Taskgroup */ - offset_and_size_t td_taskgroup; // pointer to the current taskgroup - offset_and_size_t td_task_count; // number of allocated and not yet complete tasks - offset_and_size_t td_cancel; // request for cancellation of this taskgroup - - /* Task dependency */ - offset_and_size_t td_depnode; // pointer to graph node if the task has dependencies - offset_and_size_t dn_node; - offset_and_size_t dn_next; - offset_and_size_t dn_successors; - offset_and_size_t dn_task; - offset_and_size_t dn_npredecessors; - offset_and_size_t dn_nrefs; + /* Taskgroup */ + offset_and_size_t td_taskgroup; // pointer to the current taskgroup + offset_and_size_t + td_task_count; // number of allocated and not yet complete tasks + offset_and_size_t td_cancel; // request for cancellation of this taskgroup + + /* Task dependency */ + offset_and_size_t + td_depnode; // pointer to graph node if the task has dependencies + offset_and_size_t dn_node; + offset_and_size_t dn_next; + offset_and_size_t dn_successors; + offset_and_size_t dn_task; + offset_and_size_t dn_npredecessors; + offset_and_size_t dn_nrefs; #endif - offset_and_size_t dn_routine; - - /* kmp_thread_data_t */ - kmp_int32 hd_sizeof_struct; - offset_and_size_t hd_deque; - offset_and_size_t hd_deque_size; - offset_and_size_t hd_deque_head; - offset_and_size_t hd_deque_tail; - offset_and_size_t hd_deque_ntasks; - offset_and_size_t hd_deque_last_stolen; - - // The last field of stable version. - kmp_uint64 last_field; + offset_and_size_t dn_routine; + + /* kmp_thread_data_t */ + kmp_int32 hd_sizeof_struct; + offset_and_size_t hd_deque; + offset_and_size_t hd_deque_size; + offset_and_size_t hd_deque_head; + offset_and_size_t hd_deque_tail; + offset_and_size_t hd_deque_ntasks; + offset_and_size_t hd_deque_last_stolen; + + // The last field of stable version. + kmp_uint64 last_field; } kmp_omp_struct_info_t; diff --git a/openmp/runtime/src/kmp_os.h b/openmp/runtime/src/kmp_os.h index d15978e..9ddd271 100644 --- a/openmp/runtime/src/kmp_os.h +++ b/openmp/runtime/src/kmp_os.h @@ -19,26 +19,26 @@ #include "kmp_config.h" #include -#define KMP_FTN_PLAIN 1 -#define KMP_FTN_APPEND 2 -#define KMP_FTN_UPPER 3 +#define KMP_FTN_PLAIN 1 +#define KMP_FTN_APPEND 2 +#define KMP_FTN_UPPER 3 /* #define KMP_FTN_PREPEND 4 #define KMP_FTN_UAPPEND 5 */ -#define KMP_PTR_SKIP (sizeof(void*)) +#define KMP_PTR_SKIP (sizeof(void *)) /* -------------------------- Compiler variations ------------------------ */ -#define KMP_OFF 0 -#define KMP_ON 1 +#define KMP_OFF 0 +#define KMP_ON 1 -#define KMP_MEM_CONS_VOLATILE 0 -#define KMP_MEM_CONS_FENCE 1 +#define KMP_MEM_CONS_VOLATILE 0 +#define KMP_MEM_CONS_FENCE 1 #ifndef KMP_MEM_CONS_MODEL -# define KMP_MEM_CONS_MODEL KMP_MEM_CONS_VOLATILE +#define KMP_MEM_CONS_MODEL KMP_MEM_CONS_VOLATILE #endif /* ------------------------- Compiler recognition ---------------------- */ @@ -47,202 +47,197 @@ #define KMP_COMPILER_CLANG 0 #define KMP_COMPILER_MSVC 0 -#if defined( __INTEL_COMPILER ) -# undef KMP_COMPILER_ICC -# define KMP_COMPILER_ICC 1 -#elif defined( __clang__ ) -# undef KMP_COMPILER_CLANG -# define KMP_COMPILER_CLANG 1 -#elif defined( __GNUC__ ) -# undef KMP_COMPILER_GCC -# define KMP_COMPILER_GCC 1 -#elif defined( _MSC_VER ) -# undef KMP_COMPILER_MSVC -# define KMP_COMPILER_MSVC 1 +#if defined(__INTEL_COMPILER) +#undef KMP_COMPILER_ICC +#define KMP_COMPILER_ICC 1 +#elif defined(__clang__) +#undef KMP_COMPILER_CLANG +#define KMP_COMPILER_CLANG 1 +#elif defined(__GNUC__) +#undef KMP_COMPILER_GCC +#define KMP_COMPILER_GCC 1 +#elif defined(_MSC_VER) +#undef KMP_COMPILER_MSVC +#define KMP_COMPILER_MSVC 1 #else -# error Unknown compiler +#error Unknown compiler #endif #if (KMP_OS_LINUX || KMP_OS_WINDOWS) && !KMP_OS_CNK && !KMP_ARCH_PPC64 -# define KMP_AFFINITY_SUPPORTED 1 -# if KMP_OS_WINDOWS && KMP_ARCH_X86_64 -# define KMP_GROUP_AFFINITY 1 -# else -# define KMP_GROUP_AFFINITY 0 -# endif +#define KMP_AFFINITY_SUPPORTED 1 +#if KMP_OS_WINDOWS && KMP_ARCH_X86_64 +#define KMP_GROUP_AFFINITY 1 #else -# define KMP_AFFINITY_SUPPORTED 0 -# define KMP_GROUP_AFFINITY 0 +#define KMP_GROUP_AFFINITY 0 +#endif +#else +#define KMP_AFFINITY_SUPPORTED 0 +#define KMP_GROUP_AFFINITY 0 #endif /* Check for quad-precision extension. */ #define KMP_HAVE_QUAD 0 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 -# if KMP_COMPILER_ICC - /* _Quad is already defined for icc */ -# undef KMP_HAVE_QUAD -# define KMP_HAVE_QUAD 1 -# elif KMP_COMPILER_CLANG - /* Clang doesn't support a software-implemented - 128-bit extended precision type yet */ - typedef long double _Quad; -# elif KMP_COMPILER_GCC - typedef __float128 _Quad; -# undef KMP_HAVE_QUAD -# define KMP_HAVE_QUAD 1 -# elif KMP_COMPILER_MSVC - typedef long double _Quad; -# endif +#if KMP_COMPILER_ICC +/* _Quad is already defined for icc */ +#undef KMP_HAVE_QUAD +#define KMP_HAVE_QUAD 1 +#elif KMP_COMPILER_CLANG +/* Clang doesn't support a software-implemented + 128-bit extended precision type yet */ +typedef long double _Quad; +#elif KMP_COMPILER_GCC +typedef __float128 _Quad; +#undef KMP_HAVE_QUAD +#define KMP_HAVE_QUAD 1 +#elif KMP_COMPILER_MSVC +typedef long double _Quad; +#endif #else -# if __LDBL_MAX_EXP__ >= 16384 && KMP_COMPILER_GCC - typedef long double _Quad; -# undef KMP_HAVE_QUAD -# define KMP_HAVE_QUAD 1 -# endif +#if __LDBL_MAX_EXP__ >= 16384 && KMP_COMPILER_GCC +typedef long double _Quad; +#undef KMP_HAVE_QUAD +#define KMP_HAVE_QUAD 1 +#endif #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ #if KMP_OS_WINDOWS - typedef char kmp_int8; - typedef unsigned char kmp_uint8; - typedef short kmp_int16; - typedef unsigned short kmp_uint16; - typedef int kmp_int32; - typedef unsigned int kmp_uint32; -# define KMP_INT32_SPEC "d" -# define KMP_UINT32_SPEC "u" -# ifndef KMP_STRUCT64 - typedef __int64 kmp_int64; - typedef unsigned __int64 kmp_uint64; - #define KMP_INT64_SPEC "I64d" - #define KMP_UINT64_SPEC "I64u" -# else - struct kmp_struct64 { - kmp_int32 a,b; - }; - typedef struct kmp_struct64 kmp_int64; - typedef struct kmp_struct64 kmp_uint64; - /* Not sure what to use for KMP_[U]INT64_SPEC here */ -# endif -# if KMP_ARCH_X86_64 -# define KMP_INTPTR 1 - typedef __int64 kmp_intptr_t; - typedef unsigned __int64 kmp_uintptr_t; -# define KMP_INTPTR_SPEC "I64d" -# define KMP_UINTPTR_SPEC "I64u" -# endif +typedef char kmp_int8; +typedef unsigned char kmp_uint8; +typedef short kmp_int16; +typedef unsigned short kmp_uint16; +typedef int kmp_int32; +typedef unsigned int kmp_uint32; +#define KMP_INT32_SPEC "d" +#define KMP_UINT32_SPEC "u" +#ifndef KMP_STRUCT64 +typedef __int64 kmp_int64; +typedef unsigned __int64 kmp_uint64; +#define KMP_INT64_SPEC "I64d" +#define KMP_UINT64_SPEC "I64u" +#else +struct kmp_struct64 { + kmp_int32 a, b; +}; +typedef struct kmp_struct64 kmp_int64; +typedef struct kmp_struct64 kmp_uint64; +/* Not sure what to use for KMP_[U]INT64_SPEC here */ +#endif +#if KMP_ARCH_X86_64 +#define KMP_INTPTR 1 +typedef __int64 kmp_intptr_t; +typedef unsigned __int64 kmp_uintptr_t; +#define KMP_INTPTR_SPEC "I64d" +#define KMP_UINTPTR_SPEC "I64u" +#endif #endif /* KMP_OS_WINDOWS */ #if KMP_OS_UNIX - typedef char kmp_int8; - typedef unsigned char kmp_uint8; - typedef short kmp_int16; - typedef unsigned short kmp_uint16; - typedef int kmp_int32; - typedef unsigned int kmp_uint32; - typedef long long kmp_int64; - typedef unsigned long long kmp_uint64; -# define KMP_INT32_SPEC "d" -# define KMP_UINT32_SPEC "u" -# define KMP_INT64_SPEC "lld" -# define KMP_UINT64_SPEC "llu" +typedef char kmp_int8; +typedef unsigned char kmp_uint8; +typedef short kmp_int16; +typedef unsigned short kmp_uint16; +typedef int kmp_int32; +typedef unsigned int kmp_uint32; +typedef long long kmp_int64; +typedef unsigned long long kmp_uint64; +#define KMP_INT32_SPEC "d" +#define KMP_UINT32_SPEC "u" +#define KMP_INT64_SPEC "lld" +#define KMP_UINT64_SPEC "llu" #endif /* KMP_OS_UNIX */ #if KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS -# define KMP_SIZE_T_SPEC KMP_UINT32_SPEC +#define KMP_SIZE_T_SPEC KMP_UINT32_SPEC #elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 -# define KMP_SIZE_T_SPEC KMP_UINT64_SPEC +#define KMP_SIZE_T_SPEC KMP_UINT64_SPEC #else -# error "Can't determine size_t printf format specifier." +#error "Can't determine size_t printf format specifier." #endif #if KMP_ARCH_X86 -# define KMP_SIZE_T_MAX (0xFFFFFFFF) +#define KMP_SIZE_T_MAX (0xFFFFFFFF) #else -# define KMP_SIZE_T_MAX (0xFFFFFFFFFFFFFFFF) +#define KMP_SIZE_T_MAX (0xFFFFFFFFFFFFFFFF) #endif -typedef size_t kmp_size_t; -typedef float kmp_real32; -typedef double kmp_real64; +typedef size_t kmp_size_t; +typedef float kmp_real32; +typedef double kmp_real64; #ifndef KMP_INTPTR -# define KMP_INTPTR 1 - typedef long kmp_intptr_t; - typedef unsigned long kmp_uintptr_t; -# define KMP_INTPTR_SPEC "ld" -# define KMP_UINTPTR_SPEC "lu" +#define KMP_INTPTR 1 +typedef long kmp_intptr_t; +typedef unsigned long kmp_uintptr_t; +#define KMP_INTPTR_SPEC "ld" +#define KMP_UINTPTR_SPEC "lu" #endif #ifdef BUILD_I8 - typedef kmp_int64 kmp_int; - typedef kmp_uint64 kmp_uint; +typedef kmp_int64 kmp_int; +typedef kmp_uint64 kmp_uint; #else - typedef kmp_int32 kmp_int; - typedef kmp_uint32 kmp_uint; +typedef kmp_int32 kmp_int; +typedef kmp_uint32 kmp_uint; #endif /* BUILD_I8 */ -#define KMP_INT_MAX ((kmp_int32)0x7FFFFFFF) -#define KMP_INT_MIN ((kmp_int32)0x80000000) +#define KMP_INT_MAX ((kmp_int32)0x7FFFFFFF) +#define KMP_INT_MIN ((kmp_int32)0x80000000) #ifdef __cplusplus - //------------------------------------------------------------------------- - // template for debug prints specification ( d, u, lld, llu ), and to obtain - // signed/unsigned flavors of a type - template< typename T > - struct traits_t { }; - // int - template<> - struct traits_t< signed int > { - typedef signed int signed_t; - typedef unsigned int unsigned_t; - typedef double floating_t; - static char const * spec; - static const signed_t max_value = 0x7fffffff; - static const signed_t min_value = 0x80000000; - static const int type_size = sizeof(signed_t); - }; - // unsigned int - template<> - struct traits_t< unsigned int > { - typedef signed int signed_t; - typedef unsigned int unsigned_t; - typedef double floating_t; - static char const * spec; - static const unsigned_t max_value = 0xffffffff; - static const unsigned_t min_value = 0x00000000; - static const int type_size = sizeof(unsigned_t); - }; - // long long - template<> - struct traits_t< signed long long > { - typedef signed long long signed_t; - typedef unsigned long long unsigned_t; - typedef long double floating_t; - static char const * spec; - static const signed_t max_value = 0x7fffffffffffffffLL; - static const signed_t min_value = 0x8000000000000000LL; - static const int type_size = sizeof(signed_t); - }; - // unsigned long long - template<> - struct traits_t< unsigned long long > { - typedef signed long long signed_t; - typedef unsigned long long unsigned_t; - typedef long double floating_t; - static char const * spec; - static const unsigned_t max_value = 0xffffffffffffffffLL; - static const unsigned_t min_value = 0x0000000000000000LL; - static const int type_size = sizeof(unsigned_t); - }; - //------------------------------------------------------------------------- +//------------------------------------------------------------------------- +// template for debug prints specification ( d, u, lld, llu ), and to obtain +// signed/unsigned flavors of a type +template struct traits_t {}; +// int +template <> struct traits_t { + typedef signed int signed_t; + typedef unsigned int unsigned_t; + typedef double floating_t; + static char const *spec; + static const signed_t max_value = 0x7fffffff; + static const signed_t min_value = 0x80000000; + static const int type_size = sizeof(signed_t); +}; +// unsigned int +template <> struct traits_t { + typedef signed int signed_t; + typedef unsigned int unsigned_t; + typedef double floating_t; + static char const *spec; + static const unsigned_t max_value = 0xffffffff; + static const unsigned_t min_value = 0x00000000; + static const int type_size = sizeof(unsigned_t); +}; +// long long +template <> struct traits_t { + typedef signed long long signed_t; + typedef unsigned long long unsigned_t; + typedef long double floating_t; + static char const *spec; + static const signed_t max_value = 0x7fffffffffffffffLL; + static const signed_t min_value = 0x8000000000000000LL; + static const int type_size = sizeof(signed_t); +}; +// unsigned long long +template <> struct traits_t { + typedef signed long long signed_t; + typedef unsigned long long unsigned_t; + typedef long double floating_t; + static char const *spec; + static const unsigned_t max_value = 0xffffffffffffffffLL; + static const unsigned_t min_value = 0x0000000000000000LL; + static const int type_size = sizeof(unsigned_t); +}; +//------------------------------------------------------------------------- #endif // __cplusplus -#define KMP_EXPORT extern /* export declaration in guide libraries */ +#define KMP_EXPORT extern /* export declaration in guide libraries */ #if __GNUC__ >= 4 - #define __forceinline __inline +#define __forceinline __inline #endif -#define PAGE_SIZE (0x4000) +#define PAGE_SIZE (0x4000) #if KMP_OS_LINUX #define KMP_GET_PAGE_SIZE() getpagesize() @@ -252,11 +247,12 @@ typedef double kmp_real64; #define KMP_GET_PAGE_SIZE() PAGE_SIZE #endif -#define PAGE_ALIGNED(_addr) ( ! ((size_t) _addr & \ - (size_t)(KMP_GET_PAGE_SIZE() - 1))) -#define ALIGN_TO_PAGE(x) (void *)(((size_t)(x)) & ~((size_t)(KMP_GET_PAGE_SIZE() - 1))) +#define PAGE_ALIGNED(_addr) \ + (!((size_t)_addr & (size_t)(KMP_GET_PAGE_SIZE() - 1))) +#define ALIGN_TO_PAGE(x) \ + (void *)(((size_t)(x)) & ~((size_t)(KMP_GET_PAGE_SIZE() - 1))) -/* ---------------------- Support for cache alignment, padding, etc. -----------------*/ +/* ---------- Support for cache alignment, padding, etc. ----------------*/ #ifdef __cplusplus extern "C" { @@ -266,42 +262,39 @@ extern "C" { /* Define the default size of the cache line */ #ifndef CACHE_LINE - #define CACHE_LINE 128 /* cache line size in bytes */ +#define CACHE_LINE 128 /* cache line size in bytes */ #else - #if ( CACHE_LINE < 64 ) && ! defined( KMP_OS_DARWIN ) - // 2006-02-13: This produces too many warnings on OS X*. Disable it for a while... - #warning CACHE_LINE is too small. - #endif +#if (CACHE_LINE < 64) && !defined(KMP_OS_DARWIN) +// 2006-02-13: This produces too many warnings on OS X*. Disable for now +#warning CACHE_LINE is too small. +#endif #endif /* CACHE_LINE */ -#define KMP_CACHE_PREFETCH(ADDR) /* nothing */ +#define KMP_CACHE_PREFETCH(ADDR) /* nothing */ /* Temporary note: if performance testing of this passes, we can remove all references to KMP_DO_ALIGN and replace with KMP_ALIGN. */ #if KMP_OS_UNIX && defined(__GNUC__) -# define KMP_DO_ALIGN(bytes) __attribute__((aligned(bytes))) -# define KMP_ALIGN_CACHE __attribute__((aligned(CACHE_LINE))) -# define KMP_ALIGN_CACHE_INTERNODE __attribute__((aligned(INTERNODE_CACHE_LINE))) -# define KMP_ALIGN(bytes) __attribute__((aligned(bytes))) +#define KMP_DO_ALIGN(bytes) __attribute__((aligned(bytes))) +#define KMP_ALIGN_CACHE __attribute__((aligned(CACHE_LINE))) +#define KMP_ALIGN_CACHE_INTERNODE __attribute__((aligned(INTERNODE_CACHE_LINE))) +#define KMP_ALIGN(bytes) __attribute__((aligned(bytes))) #else -# define KMP_DO_ALIGN(bytes) __declspec( align(bytes) ) -# define KMP_ALIGN_CACHE __declspec( align(CACHE_LINE) ) -# define KMP_ALIGN_CACHE_INTERNODE __declspec( align(INTERNODE_CACHE_LINE) ) -# define KMP_ALIGN(bytes) __declspec( align(bytes) ) +#define KMP_DO_ALIGN(bytes) __declspec(align(bytes)) +#define KMP_ALIGN_CACHE __declspec(align(CACHE_LINE)) +#define KMP_ALIGN_CACHE_INTERNODE __declspec(align(INTERNODE_CACHE_LINE)) +#define KMP_ALIGN(bytes) __declspec(align(bytes)) #endif /* General purpose fence types for memory operations */ enum kmp_mem_fence_type { - kmp_no_fence, /* No memory fence */ - kmp_acquire_fence, /* Acquire (read) memory fence */ - kmp_release_fence, /* Release (write) memory fence */ - kmp_full_fence /* Full (read+write) memory fence */ + kmp_no_fence, /* No memory fence */ + kmp_acquire_fence, /* Acquire (read) memory fence */ + kmp_release_fence, /* Release (write) memory fence */ + kmp_full_fence /* Full (read+write) memory fence */ }; - -// // Synchronization primitives -// #if KMP_ASM_INTRINS && KMP_OS_WINDOWS @@ -312,292 +305,379 @@ enum kmp_mem_fence_type { #pragma intrinsic(InterlockedExchange) #pragma intrinsic(InterlockedExchange64) -// // Using InterlockedIncrement / InterlockedDecrement causes a library loading // ordering problem, so we use InterlockedExchangeAdd instead. -// -# define KMP_TEST_THEN_INC32(p) InterlockedExchangeAdd( (volatile long *)(p), 1 ) -# define KMP_TEST_THEN_INC_ACQ32(p) InterlockedExchangeAdd( (volatile long *)(p), 1 ) -# define KMP_TEST_THEN_ADD4_32(p) InterlockedExchangeAdd( (volatile long *)(p), 4 ) -# define KMP_TEST_THEN_ADD4_ACQ32(p) InterlockedExchangeAdd( (volatile long *)(p), 4 ) -# define KMP_TEST_THEN_DEC32(p) InterlockedExchangeAdd( (volatile long *)(p), -1 ) -# define KMP_TEST_THEN_DEC_ACQ32(p) InterlockedExchangeAdd( (volatile long *)(p), -1 ) -# define KMP_TEST_THEN_ADD32(p, v) InterlockedExchangeAdd( (volatile long *)(p), (v) ) - -extern kmp_int8 __kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 v ); -extern kmp_int8 __kmp_test_then_or8( volatile kmp_int8 *p, kmp_int8 v ); -extern kmp_int8 __kmp_test_then_and8( volatile kmp_int8 *p, kmp_int8 v ); -# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) InterlockedCompareExchange( (volatile long *)(p),(long)(sv),(long)(cv) ) - -# define KMP_XCHG_FIXED32(p, v) InterlockedExchange( (volatile long *)(p), (long)(v) ) -# define KMP_XCHG_FIXED64(p, v) InterlockedExchange64( (volatile kmp_int64 *)(p), (kmp_int64)(v) ) - -inline kmp_real32 KMP_XCHG_REAL32( volatile kmp_real32 *p, kmp_real32 v) -{ - kmp_int32 tmp = InterlockedExchange( (volatile long *)p, *(long *)&v); - return *(kmp_real32*)&tmp; +#define KMP_TEST_THEN_INC32(p) InterlockedExchangeAdd((volatile long *)(p), 1) +#define KMP_TEST_THEN_INC_ACQ32(p) \ + InterlockedExchangeAdd((volatile long *)(p), 1) +#define KMP_TEST_THEN_ADD4_32(p) InterlockedExchangeAdd((volatile long *)(p), 4) +#define KMP_TEST_THEN_ADD4_ACQ32(p) \ + InterlockedExchangeAdd((volatile long *)(p), 4) +#define KMP_TEST_THEN_DEC32(p) InterlockedExchangeAdd((volatile long *)(p), -1) +#define KMP_TEST_THEN_DEC_ACQ32(p) \ + InterlockedExchangeAdd((volatile long *)(p), -1) +#define KMP_TEST_THEN_ADD32(p, v) \ + InterlockedExchangeAdd((volatile long *)(p), (v)) + +extern kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 v); +extern kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 v); +extern kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 v); +#define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) \ + InterlockedCompareExchange((volatile long *)(p), (long)(sv), (long)(cv)) + +#define KMP_XCHG_FIXED32(p, v) \ + InterlockedExchange((volatile long *)(p), (long)(v)) +#define KMP_XCHG_FIXED64(p, v) \ + InterlockedExchange64((volatile kmp_int64 *)(p), (kmp_int64)(v)) + +inline kmp_real32 KMP_XCHG_REAL32(volatile kmp_real32 *p, kmp_real32 v) { + kmp_int32 tmp = InterlockedExchange((volatile long *)p, *(long *)&v); + return *(kmp_real32 *)&tmp; } -// // Routines that we still need to implement in assembly. -// -extern kmp_int32 __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 v ); -extern kmp_int32 __kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 v ); -extern kmp_int32 __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 v ); -extern kmp_int64 __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 v ); -extern kmp_int64 __kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 v ); -extern kmp_int64 __kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 v ); - -extern kmp_int8 __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -extern kmp_int16 __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -extern kmp_int32 __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -extern kmp_int32 __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -extern kmp_int8 __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -extern kmp_int16 __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -extern kmp_int32 __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -extern kmp_int64 __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); - -extern kmp_int8 __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 v ); -extern kmp_int16 __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 v ); -extern kmp_int32 __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 v ); -extern kmp_int64 __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 v ); -extern kmp_real32 __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 v ); -extern kmp_real64 __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 v ); -# define KMP_TEST_THEN_ADD8(p, v) __kmp_test_then_add8( (p), (v) ) - -//# define KMP_TEST_THEN_INC32(p) __kmp_test_then_add32( (p), 1 ) -# define KMP_TEST_THEN_OR8(p, v) __kmp_test_then_or8( (p), (v) ) -# define KMP_TEST_THEN_AND8(p, v) __kmp_test_then_and8( (p), (v) ) -//# define KMP_TEST_THEN_INC_ACQ32(p) __kmp_test_then_add32( (p), 1 ) -# define KMP_TEST_THEN_INC64(p) __kmp_test_then_add64( (p), 1LL ) -# define KMP_TEST_THEN_INC_ACQ64(p) __kmp_test_then_add64( (p), 1LL ) -//# define KMP_TEST_THEN_ADD4_32(p) __kmp_test_then_add32( (p), 4 ) -//# define KMP_TEST_THEN_ADD4_ACQ32(p) __kmp_test_then_add32( (p), 4 ) -# define KMP_TEST_THEN_ADD4_64(p) __kmp_test_then_add64( (p), 4LL ) -# define KMP_TEST_THEN_ADD4_ACQ64(p) __kmp_test_then_add64( (p), 4LL ) -//# define KMP_TEST_THEN_DEC32(p) __kmp_test_then_add32( (p), -1 ) -//# define KMP_TEST_THEN_DEC_ACQ32(p) __kmp_test_then_add32( (p), -1 ) -# define KMP_TEST_THEN_DEC64(p) __kmp_test_then_add64( (p), -1LL ) -# define KMP_TEST_THEN_DEC_ACQ64(p) __kmp_test_then_add64( (p), -1LL ) -//# define KMP_TEST_THEN_ADD32(p, v) __kmp_test_then_add32( (p), (v) ) -# define KMP_TEST_THEN_ADD64(p, v) __kmp_test_then_add64( (p), (v) ) - -# define KMP_TEST_THEN_OR32(p, v) __kmp_test_then_or32( (p), (v) ) -# define KMP_TEST_THEN_AND32(p, v) __kmp_test_then_and32( (p), (v) ) -# define KMP_TEST_THEN_OR64(p, v) __kmp_test_then_or64( (p), (v) ) -# define KMP_TEST_THEN_AND64(p, v) __kmp_test_then_and64( (p), (v) ) - -# define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) __kmp_compare_and_store8( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) __kmp_compare_and_store8( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) ) - -# if KMP_ARCH_X86 -# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) __kmp_compare_and_store32( (volatile kmp_int32*)(p), (kmp_int32)(cv), (kmp_int32)(sv) ) -# else /* 64 bit pointers */ -# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) __kmp_compare_and_store64( (volatile kmp_int64*)(p), (kmp_int64)(cv), (kmp_int64)(sv) ) -# endif /* KMP_ARCH_X86 */ - -# define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) __kmp_compare_and_store_ret8( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) __kmp_compare_and_store_ret16( (p), (cv), (sv) ) -//# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) __kmp_compare_and_store_ret32( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) __kmp_compare_and_store_ret64( (p), (cv), (sv) ) - -# define KMP_XCHG_FIXED8(p, v) __kmp_xchg_fixed8( (volatile kmp_int8*)(p), (kmp_int8)(v) ); -# define KMP_XCHG_FIXED16(p, v) __kmp_xchg_fixed16( (p), (v) ); -//# define KMP_XCHG_FIXED32(p, v) __kmp_xchg_fixed32( (p), (v) ); -//# define KMP_XCHG_FIXED64(p, v) __kmp_xchg_fixed64( (p), (v) ); -//# define KMP_XCHG_REAL32(p, v) __kmp_xchg_real32( (p), (v) ); -# define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64( (p), (v) ); +extern kmp_int32 __kmp_test_then_add32(volatile kmp_int32 *p, kmp_int32 v); +extern kmp_int32 __kmp_test_then_or32(volatile kmp_int32 *p, kmp_int32 v); +extern kmp_int32 __kmp_test_then_and32(volatile kmp_int32 *p, kmp_int32 v); +extern kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 v); +extern kmp_int64 __kmp_test_then_or64(volatile kmp_int64 *p, kmp_int64 v); +extern kmp_int64 __kmp_test_then_and64(volatile kmp_int64 *p, kmp_int64 v); + +extern kmp_int8 __kmp_compare_and_store8(volatile kmp_int8 *p, kmp_int8 cv, + kmp_int8 sv); +extern kmp_int16 __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, + kmp_int16 sv); +extern kmp_int32 __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, + kmp_int32 sv); +extern kmp_int32 __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, + kmp_int64 sv); +extern kmp_int8 __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, + kmp_int8 sv); +extern kmp_int16 __kmp_compare_and_store_ret16(volatile kmp_int16 *p, + kmp_int16 cv, kmp_int16 sv); +extern kmp_int32 __kmp_compare_and_store_ret32(volatile kmp_int32 *p, + kmp_int32 cv, kmp_int32 sv); +extern kmp_int64 __kmp_compare_and_store_ret64(volatile kmp_int64 *p, + kmp_int64 cv, kmp_int64 sv); + +extern kmp_int8 __kmp_xchg_fixed8(volatile kmp_int8 *p, kmp_int8 v); +extern kmp_int16 __kmp_xchg_fixed16(volatile kmp_int16 *p, kmp_int16 v); +extern kmp_int32 __kmp_xchg_fixed32(volatile kmp_int32 *p, kmp_int32 v); +extern kmp_int64 __kmp_xchg_fixed64(volatile kmp_int64 *p, kmp_int64 v); +extern kmp_real32 __kmp_xchg_real32(volatile kmp_real32 *p, kmp_real32 v); +extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); +#define KMP_TEST_THEN_ADD8(p, v) __kmp_test_then_add8((p), (v)) + +//# define KMP_TEST_THEN_INC32(p) __kmp_test_then_add32( (p), 1 +//) +#define KMP_TEST_THEN_OR8(p, v) __kmp_test_then_or8((p), (v)) +#define KMP_TEST_THEN_AND8(p, v) __kmp_test_then_and8((p), (v)) +//# define KMP_TEST_THEN_INC_ACQ32(p) __kmp_test_then_add32( (p), 1 +//) +#define KMP_TEST_THEN_INC64(p) __kmp_test_then_add64((p), 1LL) +#define KMP_TEST_THEN_INC_ACQ64(p) __kmp_test_then_add64((p), 1LL) +//# define KMP_TEST_THEN_ADD4_32(p) __kmp_test_then_add32( (p), 4 +//) +//# define KMP_TEST_THEN_ADD4_ACQ32(p) __kmp_test_then_add32( (p), 4 +//) +#define KMP_TEST_THEN_ADD4_64(p) __kmp_test_then_add64((p), 4LL) +#define KMP_TEST_THEN_ADD4_ACQ64(p) __kmp_test_then_add64((p), 4LL) +//# define KMP_TEST_THEN_DEC32(p) __kmp_test_then_add32( (p), -1 +//) +//# define KMP_TEST_THEN_DEC_ACQ32(p) __kmp_test_then_add32( (p), -1 +//) +#define KMP_TEST_THEN_DEC64(p) __kmp_test_then_add64((p), -1LL) +#define KMP_TEST_THEN_DEC_ACQ64(p) __kmp_test_then_add64((p), -1LL) +//# define KMP_TEST_THEN_ADD32(p, v) __kmp_test_then_add32( (p), +//(v) ) +#define KMP_TEST_THEN_ADD64(p, v) __kmp_test_then_add64((p), (v)) + +#define KMP_TEST_THEN_OR32(p, v) __kmp_test_then_or32((p), (v)) +#define KMP_TEST_THEN_AND32(p, v) __kmp_test_then_and32((p), (v)) +#define KMP_TEST_THEN_OR64(p, v) __kmp_test_then_or64((p), (v)) +#define KMP_TEST_THEN_AND64(p, v) __kmp_test_then_and64((p), (v)) + +#define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \ + __kmp_compare_and_store8((p), (cv), (sv)) +#define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \ + __kmp_compare_and_store8((p), (cv), (sv)) +#define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \ + __kmp_compare_and_store16((p), (cv), (sv)) +#define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \ + __kmp_compare_and_store16((p), (cv), (sv)) +#define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \ + __kmp_compare_and_store32((p), (cv), (sv)) +#define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \ + __kmp_compare_and_store32((p), (cv), (sv)) +#define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \ + __kmp_compare_and_store64((p), (cv), (sv)) +#define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \ + __kmp_compare_and_store64((p), (cv), (sv)) + +#if KMP_ARCH_X86 +#define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ + __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ + (kmp_int32)(sv)) +#else /* 64 bit pointers */ +#define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ + __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ + (kmp_int64)(sv)) +#endif /* KMP_ARCH_X86 */ +#define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \ + __kmp_compare_and_store_ret8((p), (cv), (sv)) +#define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \ + __kmp_compare_and_store_ret16((p), (cv), (sv)) +//# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) __kmp_compare_and_store_ret32( +//(p), (cv), (sv) ) +#define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \ + __kmp_compare_and_store_ret64((p), (cv), (sv)) + +#define KMP_XCHG_FIXED8(p, v) \ + __kmp_xchg_fixed8((volatile kmp_int8 *)(p), (kmp_int8)(v)); +#define KMP_XCHG_FIXED16(p, v) __kmp_xchg_fixed16((p), (v)); +//# define KMP_XCHG_FIXED32(p, v) __kmp_xchg_fixed32( (p), (v) +//); +//# define KMP_XCHG_FIXED64(p, v) __kmp_xchg_fixed64( (p), (v) +//); +//# define KMP_XCHG_REAL32(p, v) __kmp_xchg_real32( (p), (v) ); +#define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64((p), (v)); #elif (KMP_ASM_INTRINS && KMP_OS_UNIX) || !(KMP_ARCH_X86 || KMP_ARCH_X86_64) -# define KMP_TEST_THEN_ADD8(p, v) __sync_fetch_and_add( (kmp_int8 *)(p), (v) ) +#define KMP_TEST_THEN_ADD8(p, v) __sync_fetch_and_add((kmp_int8 *)(p), (v)) /* cast p to correct type so that proper intrinsic will be used */ -# define KMP_TEST_THEN_INC32(p) __sync_fetch_and_add( (kmp_int32 *)(p), 1 ) -# define KMP_TEST_THEN_OR8(p, v) __sync_fetch_and_or( (kmp_int8 *)(p), (v) ) -# define KMP_TEST_THEN_AND8(p, v) __sync_fetch_and_and( (kmp_int8 *)(p), (v) ) -# define KMP_TEST_THEN_INC_ACQ32(p) __sync_fetch_and_add( (kmp_int32 *)(p), 1 ) -# define KMP_TEST_THEN_INC64(p) __sync_fetch_and_add( (kmp_int64 *)(p), 1LL ) -# define KMP_TEST_THEN_INC_ACQ64(p) __sync_fetch_and_add( (kmp_int64 *)(p), 1LL ) -# define KMP_TEST_THEN_ADD4_32(p) __sync_fetch_and_add( (kmp_int32 *)(p), 4 ) -# define KMP_TEST_THEN_ADD4_ACQ32(p) __sync_fetch_and_add( (kmp_int32 *)(p), 4 ) -# define KMP_TEST_THEN_ADD4_64(p) __sync_fetch_and_add( (kmp_int64 *)(p), 4LL ) -# define KMP_TEST_THEN_ADD4_ACQ64(p) __sync_fetch_and_add( (kmp_int64 *)(p), 4LL ) -# define KMP_TEST_THEN_DEC32(p) __sync_fetch_and_sub( (kmp_int32 *)(p), 1 ) -# define KMP_TEST_THEN_DEC_ACQ32(p) __sync_fetch_and_sub( (kmp_int32 *)(p), 1 ) -# define KMP_TEST_THEN_DEC64(p) __sync_fetch_and_sub( (kmp_int64 *)(p), 1LL ) -# define KMP_TEST_THEN_DEC_ACQ64(p) __sync_fetch_and_sub( (kmp_int64 *)(p), 1LL ) -# define KMP_TEST_THEN_ADD32(p, v) __sync_fetch_and_add( (kmp_int32 *)(p), (v) ) -# define KMP_TEST_THEN_ADD64(p, v) __sync_fetch_and_add( (kmp_int64 *)(p), (v) ) - -# define KMP_TEST_THEN_OR32(p, v) __sync_fetch_and_or( (kmp_int32 *)(p), (v) ) -# define KMP_TEST_THEN_AND32(p, v) __sync_fetch_and_and( (kmp_int32 *)(p), (v) ) -# define KMP_TEST_THEN_OR64(p, v) __sync_fetch_and_or( (kmp_int64 *)(p), (v) ) -# define KMP_TEST_THEN_AND64(p, v) __sync_fetch_and_and( (kmp_int64 *)(p), (v) ) - -# define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint8 *)(p),(kmp_uint8)(cv),(kmp_uint8)(sv) ) -# define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint8 *)(p),(kmp_uint8)(cv),(kmp_uint8)(sv) ) -# define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint16 *)(p),(kmp_uint16)(cv),(kmp_uint16)(sv) ) -# define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint16 *)(p),(kmp_uint16)(cv),(kmp_uint16)(sv) ) -# define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint32 *)(p),(kmp_uint32)(cv),(kmp_uint32)(sv) ) -# define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint32 *)(p),(kmp_uint32)(cv),(kmp_uint32)(sv) ) -# define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint64 *)(p),(kmp_uint64)(cv),(kmp_uint64)(sv) ) -# define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) __sync_bool_compare_and_swap( (volatile kmp_uint64 *)(p),(kmp_uint64)(cv),(kmp_uint64)(sv) ) -# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) __sync_bool_compare_and_swap( (volatile void **)(p),(void *)(cv),(void *)(sv) ) - -# define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) __sync_val_compare_and_swap( (volatile kmp_uint8 *)(p),(kmp_uint8)(cv),(kmp_uint8)(sv) ) -# define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) __sync_val_compare_and_swap( (volatile kmp_uint16 *)(p),(kmp_uint16)(cv),(kmp_uint16)(sv) ) -# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) __sync_val_compare_and_swap( (volatile kmp_uint32 *)(p),(kmp_uint32)(cv),(kmp_uint32)(sv) ) -# define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) __sync_val_compare_and_swap( (volatile kmp_uint64 *)(p),(kmp_uint64)(cv),(kmp_uint64)(sv) ) - -#define KMP_XCHG_FIXED8(p, v) __sync_lock_test_and_set( (volatile kmp_uint8 *)(p), (kmp_uint8)(v) ) -#define KMP_XCHG_FIXED16(p, v) __sync_lock_test_and_set( (volatile kmp_uint16 *)(p), (kmp_uint16)(v) ) -#define KMP_XCHG_FIXED32(p, v) __sync_lock_test_and_set( (volatile kmp_uint32 *)(p), (kmp_uint32)(v) ) -#define KMP_XCHG_FIXED64(p, v) __sync_lock_test_and_set( (volatile kmp_uint64 *)(p), (kmp_uint64)(v) ) - -extern kmp_int8 __kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 v ); -extern kmp_int8 __kmp_test_then_or8( volatile kmp_int8 *p, kmp_int8 v ); -extern kmp_int8 __kmp_test_then_and8( volatile kmp_int8 *p, kmp_int8 v ); -inline kmp_real32 KMP_XCHG_REAL32( volatile kmp_real32 *p, kmp_real32 v) -{ - kmp_int32 tmp = __sync_lock_test_and_set( (kmp_int32*)p, *(kmp_int32*)&v); - return *(kmp_real32*)&tmp; +#define KMP_TEST_THEN_INC32(p) __sync_fetch_and_add((kmp_int32 *)(p), 1) +#define KMP_TEST_THEN_OR8(p, v) __sync_fetch_and_or((kmp_int8 *)(p), (v)) +#define KMP_TEST_THEN_AND8(p, v) __sync_fetch_and_and((kmp_int8 *)(p), (v)) +#define KMP_TEST_THEN_INC_ACQ32(p) __sync_fetch_and_add((kmp_int32 *)(p), 1) +#define KMP_TEST_THEN_INC64(p) __sync_fetch_and_add((kmp_int64 *)(p), 1LL) +#define KMP_TEST_THEN_INC_ACQ64(p) __sync_fetch_and_add((kmp_int64 *)(p), 1LL) +#define KMP_TEST_THEN_ADD4_32(p) __sync_fetch_and_add((kmp_int32 *)(p), 4) +#define KMP_TEST_THEN_ADD4_ACQ32(p) __sync_fetch_and_add((kmp_int32 *)(p), 4) +#define KMP_TEST_THEN_ADD4_64(p) __sync_fetch_and_add((kmp_int64 *)(p), 4LL) +#define KMP_TEST_THEN_ADD4_ACQ64(p) __sync_fetch_and_add((kmp_int64 *)(p), 4LL) +#define KMP_TEST_THEN_DEC32(p) __sync_fetch_and_sub((kmp_int32 *)(p), 1) +#define KMP_TEST_THEN_DEC_ACQ32(p) __sync_fetch_and_sub((kmp_int32 *)(p), 1) +#define KMP_TEST_THEN_DEC64(p) __sync_fetch_and_sub((kmp_int64 *)(p), 1LL) +#define KMP_TEST_THEN_DEC_ACQ64(p) __sync_fetch_and_sub((kmp_int64 *)(p), 1LL) +#define KMP_TEST_THEN_ADD32(p, v) __sync_fetch_and_add((kmp_int32 *)(p), (v)) +#define KMP_TEST_THEN_ADD64(p, v) __sync_fetch_and_add((kmp_int64 *)(p), (v)) + +#define KMP_TEST_THEN_OR32(p, v) __sync_fetch_and_or((kmp_int32 *)(p), (v)) +#define KMP_TEST_THEN_AND32(p, v) __sync_fetch_and_and((kmp_int32 *)(p), (v)) +#define KMP_TEST_THEN_OR64(p, v) __sync_fetch_and_or((kmp_int64 *)(p), (v)) +#define KMP_TEST_THEN_AND64(p, v) __sync_fetch_and_and((kmp_int64 *)(p), (v)) + +#define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \ + __sync_bool_compare_and_swap((volatile kmp_uint8 *)(p), (kmp_uint8)(cv), \ + (kmp_uint8)(sv)) +#define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \ + __sync_bool_compare_and_swap((volatile kmp_uint8 *)(p), (kmp_uint8)(cv), \ + (kmp_uint8)(sv)) +#define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \ + __sync_bool_compare_and_swap((volatile kmp_uint16 *)(p), (kmp_uint16)(cv), \ + (kmp_uint16)(sv)) +#define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \ + __sync_bool_compare_and_swap((volatile kmp_uint16 *)(p), (kmp_uint16)(cv), \ + (kmp_uint16)(sv)) +#define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \ + __sync_bool_compare_and_swap((volatile kmp_uint32 *)(p), (kmp_uint32)(cv), \ + (kmp_uint32)(sv)) +#define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \ + __sync_bool_compare_and_swap((volatile kmp_uint32 *)(p), (kmp_uint32)(cv), \ + (kmp_uint32)(sv)) +#define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \ + __sync_bool_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \ + (kmp_uint64)(sv)) +#define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \ + __sync_bool_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \ + (kmp_uint64)(sv)) +#define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ + __sync_bool_compare_and_swap((volatile void **)(p), (void *)(cv), \ + (void *)(sv)) + +#define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \ + __sync_val_compare_and_swap((volatile kmp_uint8 *)(p), (kmp_uint8)(cv), \ + (kmp_uint8)(sv)) +#define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \ + __sync_val_compare_and_swap((volatile kmp_uint16 *)(p), (kmp_uint16)(cv), \ + (kmp_uint16)(sv)) +#define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) \ + __sync_val_compare_and_swap((volatile kmp_uint32 *)(p), (kmp_uint32)(cv), \ + (kmp_uint32)(sv)) +#define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \ + __sync_val_compare_and_swap((volatile kmp_uint64 *)(p), (kmp_uint64)(cv), \ + (kmp_uint64)(sv)) + +#define KMP_XCHG_FIXED8(p, v) \ + __sync_lock_test_and_set((volatile kmp_uint8 *)(p), (kmp_uint8)(v)) +#define KMP_XCHG_FIXED16(p, v) \ + __sync_lock_test_and_set((volatile kmp_uint16 *)(p), (kmp_uint16)(v)) +#define KMP_XCHG_FIXED32(p, v) \ + __sync_lock_test_and_set((volatile kmp_uint32 *)(p), (kmp_uint32)(v)) +#define KMP_XCHG_FIXED64(p, v) \ + __sync_lock_test_and_set((volatile kmp_uint64 *)(p), (kmp_uint64)(v)) + +extern kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 v); +extern kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 v); +extern kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 v); +inline kmp_real32 KMP_XCHG_REAL32(volatile kmp_real32 *p, kmp_real32 v) { + kmp_int32 tmp = __sync_lock_test_and_set((kmp_int32 *)p, *(kmp_int32 *)&v); + return *(kmp_real32 *)&tmp; } -inline kmp_real64 KMP_XCHG_REAL64( volatile kmp_real64 *p, kmp_real64 v) -{ - kmp_int64 tmp = __sync_lock_test_and_set( (kmp_int64*)p, *(kmp_int64*)&v); - return *(kmp_real64*)&tmp; +inline kmp_real64 KMP_XCHG_REAL64(volatile kmp_real64 *p, kmp_real64 v) { + kmp_int64 tmp = __sync_lock_test_and_set((kmp_int64 *)p, *(kmp_int64 *)&v); + return *(kmp_real64 *)&tmp; } #else -extern kmp_int32 __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 v ); -extern kmp_int32 __kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 v ); -extern kmp_int32 __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 v ); -extern kmp_int64 __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 v ); -extern kmp_int64 __kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 v ); -extern kmp_int64 __kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 v ); - -extern kmp_int8 __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -extern kmp_int16 __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -extern kmp_int32 __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -extern kmp_int32 __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -extern kmp_int8 __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -extern kmp_int16 __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -extern kmp_int32 __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -extern kmp_int64 __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); - -extern kmp_int8 __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 v ); -extern kmp_int16 __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 v ); -extern kmp_int32 __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 v ); -extern kmp_int64 __kmp_xchg_fixed64( volatile kmp_int64 *p, kmp_int64 v ); -extern kmp_real32 __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 v ); -# define KMP_TEST_THEN_ADD8(p, v) __kmp_test_then_add8( (p), (v) ) -extern kmp_real64 __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 v ); - -# define KMP_TEST_THEN_INC32(p) __kmp_test_then_add32( (p), 1 ) -# define KMP_TEST_THEN_OR8(p, v) __kmp_test_then_or8( (p), (v) ) -# define KMP_TEST_THEN_AND8(p, v) __kmp_test_then_and8( (p), (v) ) -# define KMP_TEST_THEN_INC_ACQ32(p) __kmp_test_then_add32( (p), 1 ) -# define KMP_TEST_THEN_INC64(p) __kmp_test_then_add64( (p), 1LL ) -# define KMP_TEST_THEN_INC_ACQ64(p) __kmp_test_then_add64( (p), 1LL ) -# define KMP_TEST_THEN_ADD4_32(p) __kmp_test_then_add32( (p), 4 ) -# define KMP_TEST_THEN_ADD4_ACQ32(p) __kmp_test_then_add32( (p), 4 ) -# define KMP_TEST_THEN_ADD4_64(p) __kmp_test_then_add64( (p), 4LL ) -# define KMP_TEST_THEN_ADD4_ACQ64(p) __kmp_test_then_add64( (p), 4LL ) -# define KMP_TEST_THEN_DEC32(p) __kmp_test_then_add32( (p), -1 ) -# define KMP_TEST_THEN_DEC_ACQ32(p) __kmp_test_then_add32( (p), -1 ) -# define KMP_TEST_THEN_DEC64(p) __kmp_test_then_add64( (p), -1LL ) -# define KMP_TEST_THEN_DEC_ACQ64(p) __kmp_test_then_add64( (p), -1LL ) -# define KMP_TEST_THEN_ADD32(p, v) __kmp_test_then_add32( (p), (v) ) -# define KMP_TEST_THEN_ADD64(p, v) __kmp_test_then_add64( (p), (v) ) - -# define KMP_TEST_THEN_OR32(p, v) __kmp_test_then_or32( (p), (v) ) -# define KMP_TEST_THEN_AND32(p, v) __kmp_test_then_and32( (p), (v) ) -# define KMP_TEST_THEN_OR64(p, v) __kmp_test_then_or64( (p), (v) ) -# define KMP_TEST_THEN_AND64(p, v) __kmp_test_then_and64( (p), (v) ) - -# define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) __kmp_compare_and_store8( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) __kmp_compare_and_store8( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) __kmp_compare_and_store16( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) __kmp_compare_and_store32( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) __kmp_compare_and_store64( (p), (cv), (sv) ) - -# if KMP_ARCH_X86 -# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) __kmp_compare_and_store32( (volatile kmp_int32*)(p), (kmp_int32)(cv), (kmp_int32)(sv) ) -# else /* 64 bit pointers */ -# define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) __kmp_compare_and_store64( (volatile kmp_int64*)(p), (kmp_int64)(cv), (kmp_int64)(sv) ) -# endif /* KMP_ARCH_X86 */ - -# define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) __kmp_compare_and_store_ret8( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) __kmp_compare_and_store_ret16( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) __kmp_compare_and_store_ret32( (p), (cv), (sv) ) -# define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) __kmp_compare_and_store_ret64( (p), (cv), (sv) ) - -# define KMP_XCHG_FIXED8(p, v) __kmp_xchg_fixed8( (volatile kmp_int8*)(p), (kmp_int8)(v) ); -# define KMP_XCHG_FIXED16(p, v) __kmp_xchg_fixed16( (p), (v) ); -# define KMP_XCHG_FIXED32(p, v) __kmp_xchg_fixed32( (p), (v) ); -# define KMP_XCHG_FIXED64(p, v) __kmp_xchg_fixed64( (p), (v) ); -# define KMP_XCHG_REAL32(p, v) __kmp_xchg_real32( (p), (v) ); -# define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64( (p), (v) ); +extern kmp_int32 __kmp_test_then_add32(volatile kmp_int32 *p, kmp_int32 v); +extern kmp_int32 __kmp_test_then_or32(volatile kmp_int32 *p, kmp_int32 v); +extern kmp_int32 __kmp_test_then_and32(volatile kmp_int32 *p, kmp_int32 v); +extern kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 v); +extern kmp_int64 __kmp_test_then_or64(volatile kmp_int64 *p, kmp_int64 v); +extern kmp_int64 __kmp_test_then_and64(volatile kmp_int64 *p, kmp_int64 v); + +extern kmp_int8 __kmp_compare_and_store8(volatile kmp_int8 *p, kmp_int8 cv, + kmp_int8 sv); +extern kmp_int16 __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, + kmp_int16 sv); +extern kmp_int32 __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, + kmp_int32 sv); +extern kmp_int32 __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, + kmp_int64 sv); +extern kmp_int8 __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, + kmp_int8 sv); +extern kmp_int16 __kmp_compare_and_store_ret16(volatile kmp_int16 *p, + kmp_int16 cv, kmp_int16 sv); +extern kmp_int32 __kmp_compare_and_store_ret32(volatile kmp_int32 *p, + kmp_int32 cv, kmp_int32 sv); +extern kmp_int64 __kmp_compare_and_store_ret64(volatile kmp_int64 *p, + kmp_int64 cv, kmp_int64 sv); + +extern kmp_int8 __kmp_xchg_fixed8(volatile kmp_int8 *p, kmp_int8 v); +extern kmp_int16 __kmp_xchg_fixed16(volatile kmp_int16 *p, kmp_int16 v); +extern kmp_int32 __kmp_xchg_fixed32(volatile kmp_int32 *p, kmp_int32 v); +extern kmp_int64 __kmp_xchg_fixed64(volatile kmp_int64 *p, kmp_int64 v); +extern kmp_real32 __kmp_xchg_real32(volatile kmp_real32 *p, kmp_real32 v); +#define KMP_TEST_THEN_ADD8(p, v) __kmp_test_then_add8((p), (v)) +extern kmp_real64 __kmp_xchg_real64(volatile kmp_real64 *p, kmp_real64 v); + +#define KMP_TEST_THEN_INC32(p) __kmp_test_then_add32((p), 1) +#define KMP_TEST_THEN_OR8(p, v) __kmp_test_then_or8((p), (v)) +#define KMP_TEST_THEN_AND8(p, v) __kmp_test_then_and8((p), (v)) +#define KMP_TEST_THEN_INC_ACQ32(p) __kmp_test_then_add32((p), 1) +#define KMP_TEST_THEN_INC64(p) __kmp_test_then_add64((p), 1LL) +#define KMP_TEST_THEN_INC_ACQ64(p) __kmp_test_then_add64((p), 1LL) +#define KMP_TEST_THEN_ADD4_32(p) __kmp_test_then_add32((p), 4) +#define KMP_TEST_THEN_ADD4_ACQ32(p) __kmp_test_then_add32((p), 4) +#define KMP_TEST_THEN_ADD4_64(p) __kmp_test_then_add64((p), 4LL) +#define KMP_TEST_THEN_ADD4_ACQ64(p) __kmp_test_then_add64((p), 4LL) +#define KMP_TEST_THEN_DEC32(p) __kmp_test_then_add32((p), -1) +#define KMP_TEST_THEN_DEC_ACQ32(p) __kmp_test_then_add32((p), -1) +#define KMP_TEST_THEN_DEC64(p) __kmp_test_then_add64((p), -1LL) +#define KMP_TEST_THEN_DEC_ACQ64(p) __kmp_test_then_add64((p), -1LL) +#define KMP_TEST_THEN_ADD32(p, v) __kmp_test_then_add32((p), (v)) +#define KMP_TEST_THEN_ADD64(p, v) __kmp_test_then_add64((p), (v)) + +#define KMP_TEST_THEN_OR32(p, v) __kmp_test_then_or32((p), (v)) +#define KMP_TEST_THEN_AND32(p, v) __kmp_test_then_and32((p), (v)) +#define KMP_TEST_THEN_OR64(p, v) __kmp_test_then_or64((p), (v)) +#define KMP_TEST_THEN_AND64(p, v) __kmp_test_then_and64((p), (v)) + +#define KMP_COMPARE_AND_STORE_ACQ8(p, cv, sv) \ + __kmp_compare_and_store8((p), (cv), (sv)) +#define KMP_COMPARE_AND_STORE_REL8(p, cv, sv) \ + __kmp_compare_and_store8((p), (cv), (sv)) +#define KMP_COMPARE_AND_STORE_ACQ16(p, cv, sv) \ + __kmp_compare_and_store16((p), (cv), (sv)) +#define KMP_COMPARE_AND_STORE_REL16(p, cv, sv) \ + __kmp_compare_and_store16((p), (cv), (sv)) +#define KMP_COMPARE_AND_STORE_ACQ32(p, cv, sv) \ + __kmp_compare_and_store32((p), (cv), (sv)) +#define KMP_COMPARE_AND_STORE_REL32(p, cv, sv) \ + __kmp_compare_and_store32((p), (cv), (sv)) +#define KMP_COMPARE_AND_STORE_ACQ64(p, cv, sv) \ + __kmp_compare_and_store64((p), (cv), (sv)) +#define KMP_COMPARE_AND_STORE_REL64(p, cv, sv) \ + __kmp_compare_and_store64((p), (cv), (sv)) -#endif /* KMP_ASM_INTRINS */ +#if KMP_ARCH_X86 +#define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ + __kmp_compare_and_store32((volatile kmp_int32 *)(p), (kmp_int32)(cv), \ + (kmp_int32)(sv)) +#else /* 64 bit pointers */ +#define KMP_COMPARE_AND_STORE_PTR(p, cv, sv) \ + __kmp_compare_and_store64((volatile kmp_int64 *)(p), (kmp_int64)(cv), \ + (kmp_int64)(sv)) +#endif /* KMP_ARCH_X86 */ +#define KMP_COMPARE_AND_STORE_RET8(p, cv, sv) \ + __kmp_compare_and_store_ret8((p), (cv), (sv)) +#define KMP_COMPARE_AND_STORE_RET16(p, cv, sv) \ + __kmp_compare_and_store_ret16((p), (cv), (sv)) +#define KMP_COMPARE_AND_STORE_RET32(p, cv, sv) \ + __kmp_compare_and_store_ret32((p), (cv), (sv)) +#define KMP_COMPARE_AND_STORE_RET64(p, cv, sv) \ + __kmp_compare_and_store_ret64((p), (cv), (sv)) + +#define KMP_XCHG_FIXED8(p, v) \ + __kmp_xchg_fixed8((volatile kmp_int8 *)(p), (kmp_int8)(v)); +#define KMP_XCHG_FIXED16(p, v) __kmp_xchg_fixed16((p), (v)); +#define KMP_XCHG_FIXED32(p, v) __kmp_xchg_fixed32((p), (v)); +#define KMP_XCHG_FIXED64(p, v) __kmp_xchg_fixed64((p), (v)); +#define KMP_XCHG_REAL32(p, v) __kmp_xchg_real32((p), (v)); +#define KMP_XCHG_REAL64(p, v) __kmp_xchg_real64((p), (v)); + +#endif /* KMP_ASM_INTRINS */ /* ------------- relaxed consistency memory model stuff ------------------ */ #if KMP_OS_WINDOWS -# ifdef __ABSOFT_WIN -# define KMP_MB() asm ("nop") -# define KMP_IMB() asm ("nop") -# else -# define KMP_MB() /* _asm{ nop } */ -# define KMP_IMB() /* _asm{ nop } */ -# endif +#ifdef __ABSOFT_WIN +#define KMP_MB() asm("nop") +#define KMP_IMB() asm("nop") +#else +#define KMP_MB() /* _asm{ nop } */ +#define KMP_IMB() /* _asm{ nop } */ +#endif #endif /* KMP_OS_WINDOWS */ -#if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 -# define KMP_MB() __sync_synchronize() +#if KMP_ARCH_PPC64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || \ + KMP_ARCH_MIPS64 +#define KMP_MB() __sync_synchronize() #endif #ifndef KMP_MB -# define KMP_MB() /* nothing to do */ +#define KMP_MB() /* nothing to do */ #endif #ifndef KMP_IMB -# define KMP_IMB() /* nothing to do */ +#define KMP_IMB() /* nothing to do */ #endif #ifndef KMP_ST_REL32 -# define KMP_ST_REL32(A,D) ( *(A) = (D) ) +#define KMP_ST_REL32(A, D) (*(A) = (D)) #endif #ifndef KMP_ST_REL64 -# define KMP_ST_REL64(A,D) ( *(A) = (D) ) +#define KMP_ST_REL64(A, D) (*(A) = (D)) #endif #ifndef KMP_LD_ACQ32 -# define KMP_LD_ACQ32(A) ( *(A) ) +#define KMP_LD_ACQ32(A) (*(A)) #endif #ifndef KMP_LD_ACQ64 -# define KMP_LD_ACQ64(A) ( *(A) ) +#define KMP_LD_ACQ64(A) (*(A)) #endif -#define TCR_1(a) (a) -#define TCW_1(a,b) (a) = (b) +#define TCR_1(a) (a) +#define TCW_1(a, b) (a) = (b) /* ------------------------------------------------------------------------ */ -// // FIXME - maybe this should this be // // #define TCR_4(a) (*(volatile kmp_int32 *)(&a)) @@ -608,76 +688,77 @@ extern kmp_real64 __kmp_xchg_real64( volatile kmp_real64 *p, kmp_real64 v ); // // I'm fairly certain this is the correct thing to do, but I'm afraid // of performance regressions. -// -#define TCR_4(a) (a) -#define TCW_4(a,b) (a) = (b) -#define TCI_4(a) (++(a)) -#define TCD_4(a) (--(a)) -#define TCR_8(a) (a) -#define TCW_8(a,b) (a) = (b) -#define TCI_8(a) (++(a)) -#define TCD_8(a) (--(a)) -#define TCR_SYNC_4(a) (a) -#define TCW_SYNC_4(a,b) (a) = (b) -#define TCX_SYNC_4(a,b,c) KMP_COMPARE_AND_STORE_REL32((volatile kmp_int32 *)(volatile void *)&(a), (kmp_int32)(b), (kmp_int32)(c)) -#define TCR_SYNC_8(a) (a) -#define TCW_SYNC_8(a,b) (a) = (b) -#define TCX_SYNC_8(a,b,c) KMP_COMPARE_AND_STORE_REL64((volatile kmp_int64 *)(volatile void *)&(a), (kmp_int64)(b), (kmp_int64)(c)) +#define TCR_4(a) (a) +#define TCW_4(a, b) (a) = (b) +#define TCI_4(a) (++(a)) +#define TCD_4(a) (--(a)) +#define TCR_8(a) (a) +#define TCW_8(a, b) (a) = (b) +#define TCI_8(a) (++(a)) +#define TCD_8(a) (--(a)) +#define TCR_SYNC_4(a) (a) +#define TCW_SYNC_4(a, b) (a) = (b) +#define TCX_SYNC_4(a, b, c) \ + KMP_COMPARE_AND_STORE_REL32((volatile kmp_int32 *)(volatile void *)&(a), \ + (kmp_int32)(b), (kmp_int32)(c)) +#define TCR_SYNC_8(a) (a) +#define TCW_SYNC_8(a, b) (a) = (b) +#define TCX_SYNC_8(a, b, c) \ + KMP_COMPARE_AND_STORE_REL64((volatile kmp_int64 *)(volatile void *)&(a), \ + (kmp_int64)(b), (kmp_int64)(c)) #if KMP_ARCH_X86 || KMP_ARCH_MIPS // What about ARM? - #define TCR_PTR(a) ((void *)TCR_4(a)) - #define TCW_PTR(a,b) TCW_4((a),(b)) - #define TCR_SYNC_PTR(a) ((void *)TCR_SYNC_4(a)) - #define TCW_SYNC_PTR(a,b) TCW_SYNC_4((a),(b)) - #define TCX_SYNC_PTR(a,b,c) ((void *)TCX_SYNC_4((a),(b),(c))) +#define TCR_PTR(a) ((void *)TCR_4(a)) +#define TCW_PTR(a, b) TCW_4((a), (b)) +#define TCR_SYNC_PTR(a) ((void *)TCR_SYNC_4(a)) +#define TCW_SYNC_PTR(a, b) TCW_SYNC_4((a), (b)) +#define TCX_SYNC_PTR(a, b, c) ((void *)TCX_SYNC_4((a), (b), (c))) #else /* 64 bit pointers */ - #define TCR_PTR(a) ((void *)TCR_8(a)) - #define TCW_PTR(a,b) TCW_8((a),(b)) - #define TCR_SYNC_PTR(a) ((void *)TCR_SYNC_8(a)) - #define TCW_SYNC_PTR(a,b) TCW_SYNC_8((a),(b)) - #define TCX_SYNC_PTR(a,b,c) ((void *)TCX_SYNC_8((a),(b),(c))) +#define TCR_PTR(a) ((void *)TCR_8(a)) +#define TCW_PTR(a, b) TCW_8((a), (b)) +#define TCR_SYNC_PTR(a) ((void *)TCR_SYNC_8(a)) +#define TCW_SYNC_PTR(a, b) TCW_SYNC_8((a), (b)) +#define TCX_SYNC_PTR(a, b, c) ((void *)TCX_SYNC_8((a), (b), (c))) #endif /* KMP_ARCH_X86 */ -/* - * If these FTN_{TRUE,FALSE} values change, may need to - * change several places where they are used to check that - * language is Fortran, not C. - */ +/* If these FTN_{TRUE,FALSE} values change, may need to change several places + where they are used to check that language is Fortran, not C. */ #ifndef FTN_TRUE -# define FTN_TRUE TRUE +#define FTN_TRUE TRUE #endif #ifndef FTN_FALSE -# define FTN_FALSE FALSE +#define FTN_FALSE FALSE #endif -typedef void (*microtask_t)( int *gtid, int *npr, ... ); +typedef void (*microtask_t)(int *gtid, int *npr, ...); #ifdef USE_VOLATILE_CAST -# define VOLATILE_CAST(x) (volatile x) +#define VOLATILE_CAST(x) (volatile x) #else -# define VOLATILE_CAST(x) (x) +#define VOLATILE_CAST(x) (x) #endif -#define KMP_WAIT_YIELD __kmp_wait_yield_4 -#define KMP_WAIT_YIELD_PTR __kmp_wait_yield_4_ptr -#define KMP_EQ __kmp_eq_4 -#define KMP_NEQ __kmp_neq_4 -#define KMP_LT __kmp_lt_4 -#define KMP_GE __kmp_ge_4 -#define KMP_LE __kmp_le_4 +#define KMP_WAIT_YIELD __kmp_wait_yield_4 +#define KMP_WAIT_YIELD_PTR __kmp_wait_yield_4_ptr +#define KMP_EQ __kmp_eq_4 +#define KMP_NEQ __kmp_neq_4 +#define KMP_LT __kmp_lt_4 +#define KMP_GE __kmp_ge_4 +#define KMP_LE __kmp_le_4 -/* Workaround for Intel(R) 64 code gen bug when taking address of static array (Intel(R) 64 Tracker #138) */ +/* Workaround for Intel(R) 64 code gen bug when taking address of static array + * (Intel(R) 64 Tracker #138) */ #if (KMP_ARCH_X86_64 || KMP_ARCH_PPC64) && KMP_OS_LINUX -# define STATIC_EFI2_WORKAROUND +#define STATIC_EFI2_WORKAROUND #else -# define STATIC_EFI2_WORKAROUND static +#define STATIC_EFI2_WORKAROUND static #endif // Support of BGET usage @@ -688,38 +769,39 @@ typedef void (*microtask_t)( int *gtid, int *npr, ... ); // Switches for OSS builds #ifndef USE_SYSFS_INFO -# define USE_SYSFS_INFO 0 +#define USE_SYSFS_INFO 0 #endif #ifndef USE_CMPXCHG_FIX -# define USE_CMPXCHG_FIX 1 +#define USE_CMPXCHG_FIX 1 #endif // Enable dynamic user lock #if OMP_45_ENABLED -# define KMP_USE_DYNAMIC_LOCK 1 +#define KMP_USE_DYNAMIC_LOCK 1 #endif // Enable TSX if dynamic user lock is turned on #if KMP_USE_DYNAMIC_LOCK // Visual studio can't handle the asm sections in this code -# define KMP_USE_TSX (KMP_ARCH_X86 || KMP_ARCH_X86_64) && !KMP_COMPILER_MSVC -# ifdef KMP_USE_ADAPTIVE_LOCKS -# undef KMP_USE_ADAPTIVE_LOCKS -# endif -# define KMP_USE_ADAPTIVE_LOCKS KMP_USE_TSX +#define KMP_USE_TSX (KMP_ARCH_X86 || KMP_ARCH_X86_64) && !KMP_COMPILER_MSVC +#ifdef KMP_USE_ADAPTIVE_LOCKS +#undef KMP_USE_ADAPTIVE_LOCKS +#endif +#define KMP_USE_ADAPTIVE_LOCKS KMP_USE_TSX #endif // Enable tick time conversion of ticks to seconds #if KMP_STATS_ENABLED -# define KMP_HAVE_TICK_TIME (KMP_OS_LINUX && (KMP_MIC || KMP_ARCH_X86 || KMP_ARCH_X86_64)) +#define KMP_HAVE_TICK_TIME \ + (KMP_OS_LINUX && (KMP_MIC || KMP_ARCH_X86 || KMP_ARCH_X86_64)) #endif // Warning levels enum kmp_warnings_level { - kmp_warnings_off = 0, /* No warnings */ - kmp_warnings_low, /* Minimal warnings (default) */ - kmp_warnings_explicit = 6, /* Explicitly set to ON - more warnings */ - kmp_warnings_verbose /* reserved */ + kmp_warnings_off = 0, /* No warnings */ + kmp_warnings_low, /* Minimal warnings (default) */ + kmp_warnings_explicit = 6, /* Explicitly set to ON - more warnings */ + kmp_warnings_verbose /* reserved */ }; #ifdef __cplusplus @@ -729,4 +811,3 @@ enum kmp_warnings_level { #endif /* KMP_OS_H */ // Safe C API #include "kmp_safe_c_api.h" - diff --git a/openmp/runtime/src/kmp_platform.h b/openmp/runtime/src/kmp_platform.h index 2f43cf8..95cebf9 100644 --- a/openmp/runtime/src/kmp_platform.h +++ b/openmp/runtime/src/kmp_platform.h @@ -2,6 +2,7 @@ * kmp_platform.h -- header for determining operating system and architecture */ + //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -11,171 +12,175 @@ // //===----------------------------------------------------------------------===// + #ifndef KMP_PLATFORM_H #define KMP_PLATFORM_H /* ---------------------- Operating system recognition ------------------- */ -#define KMP_OS_LINUX 0 -#define KMP_OS_FREEBSD 0 -#define KMP_OS_NETBSD 0 -#define KMP_OS_DARWIN 0 -#define KMP_OS_WINDOWS 0 -#define KMP_OS_CNK 0 -#define KMP_OS_UNIX 0 /* disjunction of KMP_OS_LINUX, KMP_OS_DARWIN etc. */ - +#define KMP_OS_LINUX 0 +#define KMP_OS_FREEBSD 0 +#define KMP_OS_NETBSD 0 +#define KMP_OS_DARWIN 0 +#define KMP_OS_WINDOWS 0 +#define KMP_OS_CNK 0 +#define KMP_OS_UNIX 0 /* disjunction of KMP_OS_LINUX, KMP_OS_DARWIN etc. */ #ifdef _WIN32 -# undef KMP_OS_WINDOWS -# define KMP_OS_WINDOWS 1 +#undef KMP_OS_WINDOWS +#define KMP_OS_WINDOWS 1 #endif -#if ( defined __APPLE__ && defined __MACH__ ) -# undef KMP_OS_DARWIN -# define KMP_OS_DARWIN 1 +#if (defined __APPLE__ && defined __MACH__) +#undef KMP_OS_DARWIN +#define KMP_OS_DARWIN 1 #endif // in some ppc64 linux installations, only the second condition is met -#if ( defined __linux ) -# undef KMP_OS_LINUX -# define KMP_OS_LINUX 1 -#elif ( defined __linux__) -# undef KMP_OS_LINUX -# define KMP_OS_LINUX 1 +#if (defined __linux) +#undef KMP_OS_LINUX +#define KMP_OS_LINUX 1 +#elif (defined __linux__) +#undef KMP_OS_LINUX +#define KMP_OS_LINUX 1 #else #endif -#if ( defined __FreeBSD__ ) -# undef KMP_OS_FREEBSD -# define KMP_OS_FREEBSD 1 +#if (defined __FreeBSD__) +#undef KMP_OS_FREEBSD +#define KMP_OS_FREEBSD 1 #endif -#if ( defined __NetBSD__ ) -# undef KMP_OS_NETBSD -# define KMP_OS_NETBSD 1 +#if (defined __NetBSD__) +#undef KMP_OS_NETBSD +#define KMP_OS_NETBSD 1 #endif -#if ( defined __bgq__ ) -# undef KMP_OS_CNK -# define KMP_OS_CNK 1 +#if (defined __bgq__) +#undef KMP_OS_CNK +#define KMP_OS_CNK 1 #endif -#if (1 != KMP_OS_LINUX + KMP_OS_FREEBSD + KMP_OS_NETBSD + KMP_OS_DARWIN + KMP_OS_WINDOWS) -# error Unknown OS +#if (1 != \ + KMP_OS_LINUX + KMP_OS_FREEBSD + KMP_OS_NETBSD + KMP_OS_DARWIN + \ + KMP_OS_WINDOWS) +#error Unknown OS #endif #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DARWIN -# undef KMP_OS_UNIX -# define KMP_OS_UNIX 1 +#undef KMP_OS_UNIX +#define KMP_OS_UNIX 1 #endif /* ---------------------- Architecture recognition ------------------- */ -#define KMP_ARCH_X86 0 -#define KMP_ARCH_X86_64 0 -#define KMP_ARCH_AARCH64 0 -#define KMP_ARCH_PPC64_BE 0 -#define KMP_ARCH_PPC64_LE 0 +#define KMP_ARCH_X86 0 +#define KMP_ARCH_X86_64 0 +#define KMP_ARCH_AARCH64 0 +#define KMP_ARCH_PPC64_BE 0 +#define KMP_ARCH_PPC64_LE 0 #define KMP_ARCH_PPC64 (KMP_ARCH_PPC64_LE || KMP_ARCH_PPC64_BE) -#define KMP_ARCH_MIPS 0 -#define KMP_ARCH_MIPS64 0 +#define KMP_ARCH_MIPS 0 +#define KMP_ARCH_MIPS64 0 #if KMP_OS_WINDOWS -# if defined _M_AMD64 -# undef KMP_ARCH_X86_64 -# define KMP_ARCH_X86_64 1 -# else -# undef KMP_ARCH_X86 -# define KMP_ARCH_X86 1 -# endif +#if defined _M_AMD64 +#undef KMP_ARCH_X86_64 +#define KMP_ARCH_X86_64 1 +#else +#undef KMP_ARCH_X86 +#define KMP_ARCH_X86 1 +#endif #endif #if KMP_OS_UNIX -# if defined __x86_64 -# undef KMP_ARCH_X86_64 -# define KMP_ARCH_X86_64 1 -# elif defined __i386 -# undef KMP_ARCH_X86 -# define KMP_ARCH_X86 1 -# elif defined __powerpc64__ -# if defined __LITTLE_ENDIAN__ -# undef KMP_ARCH_PPC64_LE -# define KMP_ARCH_PPC64_LE 1 -# else -# undef KMP_ARCH_PPC64_BE -# define KMP_ARCH_PPC64_BE 1 -# endif -# elif defined __aarch64__ -# undef KMP_ARCH_AARCH64 -# define KMP_ARCH_AARCH64 1 -# elif defined __mips__ -# if defined __mips64 -# undef KMP_ARCH_MIPS64 -# define KMP_ARCH_MIPS64 1 -# else -# undef KMP_ARCH_MIPS -# define KMP_ARCH_MIPS 1 -# endif -# endif -#endif - -#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7R__) || \ +#if defined __x86_64 +#undef KMP_ARCH_X86_64 +#define KMP_ARCH_X86_64 1 +#elif defined __i386 +#undef KMP_ARCH_X86 +#define KMP_ARCH_X86 1 +#elif defined __powerpc64__ +#if defined __LITTLE_ENDIAN__ +#undef KMP_ARCH_PPC64_LE +#define KMP_ARCH_PPC64_LE 1 +#else +#undef KMP_ARCH_PPC64_BE +#define KMP_ARCH_PPC64_BE 1 +#endif +#elif defined __aarch64__ +#undef KMP_ARCH_AARCH64 +#define KMP_ARCH_AARCH64 1 +#elif defined __mips__ +#if defined __mips64 +#undef KMP_ARCH_MIPS64 +#define KMP_ARCH_MIPS64 1 +#else +#undef KMP_ARCH_MIPS +#define KMP_ARCH_MIPS 1 +#endif +#endif +#endif + +#if defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7R__) || \ defined(__ARM_ARCH_7A__) -# define KMP_ARCH_ARMV7 1 +#define KMP_ARCH_ARMV7 1 #endif -#if defined(KMP_ARCH_ARMV7) || defined(__ARM_ARCH_6__) || \ - defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || \ - defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6T2__) || \ +#if defined(KMP_ARCH_ARMV7) || defined(__ARM_ARCH_6__) || \ + defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || \ + defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6T2__) || \ defined(__ARM_ARCH_6ZK__) -# define KMP_ARCH_ARMV6 1 +#define KMP_ARCH_ARMV6 1 #endif -#if defined(KMP_ARCH_ARMV6) || defined(__ARM_ARCH_5T__) || \ - defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \ +#if defined(KMP_ARCH_ARMV6) || defined(__ARM_ARCH_5T__) || \ + defined(__ARM_ARCH_5E__) || defined(__ARM_ARCH_5TE__) || \ defined(__ARM_ARCH_5TEJ__) -# define KMP_ARCH_ARMV5 1 +#define KMP_ARCH_ARMV5 1 #endif -#if defined(KMP_ARCH_ARMV5) || defined(__ARM_ARCH_4__) || \ +#if defined(KMP_ARCH_ARMV5) || defined(__ARM_ARCH_4__) || \ defined(__ARM_ARCH_4T__) -# define KMP_ARCH_ARMV4 1 +#define KMP_ARCH_ARMV4 1 #endif -#if defined(KMP_ARCH_ARMV4) || defined(__ARM_ARCH_3__) || \ +#if defined(KMP_ARCH_ARMV4) || defined(__ARM_ARCH_3__) || \ defined(__ARM_ARCH_3M__) -# define KMP_ARCH_ARMV3 1 +#define KMP_ARCH_ARMV3 1 #endif -#if defined(KMP_ARCH_ARMV3) || defined(__ARM_ARCH_2__) -# define KMP_ARCH_ARMV2 1 +#if defined(KMP_ARCH_ARMV3) || defined(__ARM_ARCH_2__) +#define KMP_ARCH_ARMV2 1 #endif #if defined(KMP_ARCH_ARMV2) -# define KMP_ARCH_ARM 1 +#define KMP_ARCH_ARM 1 #endif #if defined(__MIC__) || defined(__MIC2__) -# define KMP_MIC 1 -# if __MIC2__ || __KNC__ -# define KMP_MIC1 0 -# define KMP_MIC2 1 -# else -# define KMP_MIC1 1 -# define KMP_MIC2 0 -# endif +#define KMP_MIC 1 +#if __MIC2__ || __KNC__ +#define KMP_MIC1 0 +#define KMP_MIC2 1 +#else +#define KMP_MIC1 1 +#define KMP_MIC2 0 +#endif #else -# define KMP_MIC 0 -# define KMP_MIC1 0 -# define KMP_MIC2 0 +#define KMP_MIC 0 +#define KMP_MIC1 0 +#define KMP_MIC2 0 #endif /* Specify 32 bit architectures here */ #define KMP_32_BIT_ARCH (KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_MIPS) // TODO: Fixme - This is clever, but really fugly -#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64) -# error Unknown or unsupported architecture +#if (1 != \ + KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64 + \ + KMP_ARCH_AARCH64 + KMP_ARCH_MIPS + KMP_ARCH_MIPS64) +#error Unknown or unsupported architecture #endif #endif // KMP_PLATFORM_H diff --git a/openmp/runtime/src/kmp_runtime.cpp b/openmp/runtime/src/kmp_runtime.cpp index 5d5f5de..2e386ef 100644 --- a/openmp/runtime/src/kmp_runtime.cpp +++ b/openmp/runtime/src/kmp_runtime.cpp @@ -14,18 +14,18 @@ #include "kmp.h" +#include "kmp_affinity.h" #include "kmp_atomic.h" -#include "kmp_wrapper_getpid.h" #include "kmp_environment.h" -#include "kmp_itt.h" -#include "kmp_str.h" -#include "kmp_settings.h" +#include "kmp_error.h" #include "kmp_i18n.h" #include "kmp_io.h" -#include "kmp_error.h" +#include "kmp_itt.h" +#include "kmp_settings.h" #include "kmp_stats.h" +#include "kmp_str.h" #include "kmp_wait_release.h" -#include "kmp_affinity.h" +#include "kmp_wrapper_getpid.h" #if OMPT_SUPPORT #include "ompt-specific.h" @@ -41,494 +41,499 @@ #include "tsan_annotations.h" #if defined(KMP_GOMP_COMPAT) -char const __kmp_version_alt_comp[] = KMP_VERSION_PREFIX "alternative compiler support: yes"; +char const __kmp_version_alt_comp[] = + KMP_VERSION_PREFIX "alternative compiler support: yes"; #endif /* defined(KMP_GOMP_COMPAT) */ char const __kmp_version_omp_api[] = KMP_VERSION_PREFIX "API version: " #if OMP_50_ENABLED - "5.0 (201611)"; + "5.0 (201611)"; #elif OMP_45_ENABLED - "4.5 (201511)"; + "4.5 (201511)"; #elif OMP_40_ENABLED - "4.0 (201307)"; + "4.0 (201307)"; #else - "3.1 (201107)"; + "3.1 (201107)"; #endif #ifdef KMP_DEBUG -char const __kmp_version_lock[] = KMP_VERSION_PREFIX "lock type: run time selectable"; +char const __kmp_version_lock[] = + KMP_VERSION_PREFIX "lock type: run time selectable"; #endif /* KMP_DEBUG */ -#define KMP_MIN( x, y ) ( (x) < (y) ? (x) : (y) ) +#define KMP_MIN(x, y) ((x) < (y) ? (x) : (y)) /* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ kmp_info_t __kmp_monitor; -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - /* Forward declarations */ -void __kmp_cleanup( void ); +void __kmp_cleanup(void); -static void __kmp_initialize_info( kmp_info_t *, kmp_team_t *, int tid, int gtid ); -static void __kmp_initialize_team( kmp_team_t * team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t * loc ); +static void __kmp_initialize_info(kmp_info_t *, kmp_team_t *, int tid, + int gtid); +static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, + kmp_internal_control_t *new_icvs, + ident_t *loc); #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED -static void __kmp_partition_places( kmp_team_t *team, int update_master_only=0 ); +static void __kmp_partition_places(kmp_team_t *team, + int update_master_only = 0); #endif -static void __kmp_do_serial_initialize( void ); -void __kmp_fork_barrier( int gtid, int tid ); -void __kmp_join_barrier( int gtid ); -void __kmp_setup_icv_copy( kmp_team_t *team, int new_nproc, kmp_internal_control_t * new_icvs, ident_t *loc ); +static void __kmp_do_serial_initialize(void); +void __kmp_fork_barrier(int gtid, int tid); +void __kmp_join_barrier(int gtid); +void __kmp_setup_icv_copy(kmp_team_t *team, int new_nproc, + kmp_internal_control_t *new_icvs, ident_t *loc); #ifdef USE_LOAD_BALANCE -static int __kmp_load_balance_nproc( kmp_root_t * root, int set_nproc ); +static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc); #endif static int __kmp_expand_threads(int nWish, int nNeed); #if KMP_OS_WINDOWS -static int __kmp_unregister_root_other_thread( int gtid ); +static int __kmp_unregister_root_other_thread(int gtid); #endif -static void __kmp_unregister_library( void ); // called by __kmp_internal_end() -static void __kmp_reap_thread( kmp_info_t * thread, int is_root ); +static void __kmp_unregister_library(void); // called by __kmp_internal_end() +static void __kmp_reap_thread(kmp_info_t *thread, int is_root); static kmp_info_t *__kmp_thread_pool_insert_pt = NULL; -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - /* Calculate the identifier of the current thread */ -/* fast (and somewhat portable) way to get unique */ -/* identifier of executing thread. */ -/* returns KMP_GTID_DNE if we haven't been assigned a gtid */ - -int -__kmp_get_global_thread_id( ) -{ - int i; - kmp_info_t **other_threads; - size_t stack_data; - char *stack_addr; - size_t stack_size; - char *stack_base; - - KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n", - __kmp_nth, __kmp_all_nth )); - - /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to a - parallel region, made it return KMP_GTID_DNE to force serial_initialize by - caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee - __kmp_init_gtid for this to work. */ - - if ( !TCR_4(__kmp_init_gtid) ) return KMP_GTID_DNE; +/* fast (and somewhat portable) way to get unique identifier of executing + thread. Returns KMP_GTID_DNE if we haven't been assigned a gtid. */ + +int __kmp_get_global_thread_id() { + int i; + kmp_info_t **other_threads; + size_t stack_data; + char *stack_addr; + size_t stack_size; + char *stack_base; + + KA_TRACE( + 1000, + ("*** __kmp_get_global_thread_id: entering, nproc=%d all_nproc=%d\n", + __kmp_nth, __kmp_all_nth)); + + /* JPH - to handle the case where __kmpc_end(0) is called immediately prior to + a parallel region, made it return KMP_GTID_DNE to force serial_initialize + by caller. Had to handle KMP_GTID_DNE at all call-sites, or else guarantee + __kmp_init_gtid for this to work. */ + + if (!TCR_4(__kmp_init_gtid)) + return KMP_GTID_DNE; #ifdef KMP_TDATA_GTID - if ( TCR_4(__kmp_gtid_mode) >= 3) { - KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using TDATA\n" )); - return __kmp_gtid; - } -#endif - if ( TCR_4(__kmp_gtid_mode) >= 2) { - KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using keyed TLS\n" )); - return __kmp_gtid_get_specific(); - } - KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: using internal alg.\n" )); - - stack_addr = (char*) & stack_data; - other_threads = __kmp_threads; - - /* - ATT: The code below is a source of potential bugs due to unsynchronized access to - __kmp_threads array. For example: - 1. Current thread loads other_threads[i] to thr and checks it, it is non-NULL. - 2. Current thread is suspended by OS. - 3. Another thread unregisters and finishes (debug versions of free() may fill memory - with something like 0xEF). - 4. Current thread is resumed. - 5. Current thread reads junk from *thr. - TODO: Fix it. - --ln - */ - - for( i = 0 ; i < __kmp_threads_capacity ; i++ ) { - - kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]); - if( !thr ) continue; - - stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize); - stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase); - - /* stack grows down -- search through all of the active threads */ - - if( stack_addr <= stack_base ) { - size_t stack_diff = stack_base - stack_addr; - - if( stack_diff <= stack_size ) { - /* The only way we can be closer than the allocated */ - /* stack size is if we are running on this thread. */ - KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == i ); - return i; - } - } - } - - /* get specific to try and determine our gtid */ - KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id: internal alg. failed to find " - "thread, using TLS\n" )); - i = __kmp_gtid_get_specific(); - - /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */ - - /* if we havn't been assigned a gtid, then return code */ - if( i<0 ) return i; - - /* dynamically updated stack window for uber threads to avoid get_specific call */ - if( ! TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow) ) { - KMP_FATAL( StackOverflow, i ); - } - - stack_base = (char *) other_threads[i]->th.th_info.ds.ds_stackbase; - if( stack_addr > stack_base ) { - TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr); - TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, - other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - stack_base); - } else { - TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, stack_base - stack_addr); - } - - /* Reprint stack bounds for ubermaster since they have been refined */ - if ( __kmp_storage_map ) { - char *stack_end = (char *) other_threads[i]->th.th_info.ds.ds_stackbase; - char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize; - __kmp_print_storage_map_gtid( i, stack_beg, stack_end, - other_threads[i]->th.th_info.ds.ds_stacksize, - "th_%d stack (refinement)", i ); - } + if (TCR_4(__kmp_gtid_mode) >= 3) { + KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using TDATA\n")); + return __kmp_gtid; + } +#endif + if (TCR_4(__kmp_gtid_mode) >= 2) { + KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using keyed TLS\n")); + return __kmp_gtid_get_specific(); + } + KA_TRACE(1000, ("*** __kmp_get_global_thread_id: using internal alg.\n")); + + stack_addr = (char *)&stack_data; + other_threads = __kmp_threads; + + /* ATT: The code below is a source of potential bugs due to unsynchronized + access to __kmp_threads array. For example: + 1. Current thread loads other_threads[i] to thr and checks it, it is + non-NULL. + 2. Current thread is suspended by OS. + 3. Another thread unregisters and finishes (debug versions of free() + may fill memory with something like 0xEF). + 4. Current thread is resumed. + 5. Current thread reads junk from *thr. + TODO: Fix it. --ln */ + + for (i = 0; i < __kmp_threads_capacity; i++) { + + kmp_info_t *thr = (kmp_info_t *)TCR_SYNC_PTR(other_threads[i]); + if (!thr) + continue; + + stack_size = (size_t)TCR_PTR(thr->th.th_info.ds.ds_stacksize); + stack_base = (char *)TCR_PTR(thr->th.th_info.ds.ds_stackbase); + + /* stack grows down -- search through all of the active threads */ + + if (stack_addr <= stack_base) { + size_t stack_diff = stack_base - stack_addr; + + if (stack_diff <= stack_size) { + /* The only way we can be closer than the allocated */ + /* stack size is if we are running on this thread. */ + KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == i); + return i; + } + } + } + + /* get specific to try and determine our gtid */ + KA_TRACE(1000, + ("*** __kmp_get_global_thread_id: internal alg. failed to find " + "thread, using TLS\n")); + i = __kmp_gtid_get_specific(); + + /*fprintf( stderr, "=== %d\n", i ); */ /* GROO */ + + /* if we havn't been assigned a gtid, then return code */ + if (i < 0) return i; + + /* dynamically updated stack window for uber threads to avoid get_specific + call */ + if (!TCR_4(other_threads[i]->th.th_info.ds.ds_stackgrow)) { + KMP_FATAL(StackOverflow, i); + } + + stack_base = (char *)other_threads[i]->th.th_info.ds.ds_stackbase; + if (stack_addr > stack_base) { + TCW_PTR(other_threads[i]->th.th_info.ds.ds_stackbase, stack_addr); + TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, + other_threads[i]->th.th_info.ds.ds_stacksize + stack_addr - + stack_base); + } else { + TCW_PTR(other_threads[i]->th.th_info.ds.ds_stacksize, + stack_base - stack_addr); + } + + /* Reprint stack bounds for ubermaster since they have been refined */ + if (__kmp_storage_map) { + char *stack_end = (char *)other_threads[i]->th.th_info.ds.ds_stackbase; + char *stack_beg = stack_end - other_threads[i]->th.th_info.ds.ds_stacksize; + __kmp_print_storage_map_gtid(i, stack_beg, stack_end, + other_threads[i]->th.th_info.ds.ds_stacksize, + "th_%d stack (refinement)", i); + } + return i; } -int -__kmp_get_global_thread_id_reg( ) -{ - int gtid; +int __kmp_get_global_thread_id_reg() { + int gtid; - if ( !__kmp_init_serial ) { - gtid = KMP_GTID_DNE; - } else + if (!__kmp_init_serial) { + gtid = KMP_GTID_DNE; + } else #ifdef KMP_TDATA_GTID - if ( TCR_4(__kmp_gtid_mode) >= 3 ) { - KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using TDATA\n" )); - gtid = __kmp_gtid; - } else -#endif - if ( TCR_4(__kmp_gtid_mode) >= 2 ) { - KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using keyed TLS\n" )); - gtid = __kmp_gtid_get_specific(); + if (TCR_4(__kmp_gtid_mode) >= 3) { + KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using TDATA\n")); + gtid = __kmp_gtid; + } else +#endif + if (TCR_4(__kmp_gtid_mode) >= 2) { + KA_TRACE(1000, ("*** __kmp_get_global_thread_id_reg: using keyed TLS\n")); + gtid = __kmp_gtid_get_specific(); + } else { + KA_TRACE(1000, + ("*** __kmp_get_global_thread_id_reg: using internal alg.\n")); + gtid = __kmp_get_global_thread_id(); + } + + /* we must be a new uber master sibling thread */ + if (gtid == KMP_GTID_DNE) { + KA_TRACE(10, + ("__kmp_get_global_thread_id_reg: Encountered new root thread. " + "Registering a new gtid.\n")); + __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); + if (!__kmp_init_serial) { + __kmp_do_serial_initialize(); + gtid = __kmp_gtid_get_specific(); } else { - KA_TRACE( 1000, ( "*** __kmp_get_global_thread_id_reg: using internal alg.\n" )); - gtid = __kmp_get_global_thread_id(); - } - - /* we must be a new uber master sibling thread */ - if( gtid == KMP_GTID_DNE ) { - KA_TRACE( 10, ( "__kmp_get_global_thread_id_reg: Encountered new root thread. " - "Registering a new gtid.\n" )); - __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); - if( !__kmp_init_serial ) { - __kmp_do_serial_initialize(); - gtid = __kmp_gtid_get_specific(); - } else { - gtid = __kmp_register_root(FALSE); - } - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); - /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */ + gtid = __kmp_register_root(FALSE); } + __kmp_release_bootstrap_lock(&__kmp_initz_lock); + /*__kmp_printf( "+++ %d\n", gtid ); */ /* GROO */ + } - KMP_DEBUG_ASSERT( gtid >=0 ); + KMP_DEBUG_ASSERT(gtid >= 0); - return gtid; + return gtid; } /* caller must hold forkjoin_lock */ -void -__kmp_check_stack_overlap( kmp_info_t *th ) -{ - int f; - char *stack_beg = NULL; - char *stack_end = NULL; - int gtid; - - KA_TRACE(10,("__kmp_check_stack_overlap: called\n")); - if ( __kmp_storage_map ) { - stack_end = (char *) th->th.th_info.ds.ds_stackbase; - stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; - - gtid = __kmp_gtid_from_thread( th ); - - if (gtid == KMP_GTID_MONITOR) { - __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, - "th_%s stack (%s)", "mon", - ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" ); - } else { - __kmp_print_storage_map_gtid( gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, - "th_%d stack (%s)", gtid, - ( th->th.th_info.ds.ds_stackgrow ) ? "initial" : "actual" ); - } - } - - /* No point in checking ubermaster threads since they use refinement and cannot overlap */ - gtid = __kmp_gtid_from_thread( th ); - if ( __kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) - { - KA_TRACE(10,("__kmp_check_stack_overlap: performing extensive checking\n")); - if ( stack_beg == NULL ) { - stack_end = (char *) th->th.th_info.ds.ds_stackbase; - stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; - } +void __kmp_check_stack_overlap(kmp_info_t *th) { + int f; + char *stack_beg = NULL; + char *stack_end = NULL; + int gtid; + + KA_TRACE(10, ("__kmp_check_stack_overlap: called\n")); + if (__kmp_storage_map) { + stack_end = (char *)th->th.th_info.ds.ds_stackbase; + stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; + + gtid = __kmp_gtid_from_thread(th); + + if (gtid == KMP_GTID_MONITOR) { + __kmp_print_storage_map_gtid( + gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, + "th_%s stack (%s)", "mon", + (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual"); + } else { + __kmp_print_storage_map_gtid( + gtid, stack_beg, stack_end, th->th.th_info.ds.ds_stacksize, + "th_%d stack (%s)", gtid, + (th->th.th_info.ds.ds_stackgrow) ? "initial" : "actual"); + } + } + + /* No point in checking ubermaster threads since they use refinement and + * cannot overlap */ + gtid = __kmp_gtid_from_thread(th); + if (__kmp_env_checks == TRUE && !KMP_UBER_GTID(gtid)) { + KA_TRACE(10, + ("__kmp_check_stack_overlap: performing extensive checking\n")); + if (stack_beg == NULL) { + stack_end = (char *)th->th.th_info.ds.ds_stackbase; + stack_beg = stack_end - th->th.th_info.ds.ds_stacksize; + } + + for (f = 0; f < __kmp_threads_capacity; f++) { + kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]); + + if (f_th && f_th != th) { + char *other_stack_end = + (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase); + char *other_stack_beg = + other_stack_end - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize); + if ((stack_beg > other_stack_beg && stack_beg < other_stack_end) || + (stack_end > other_stack_beg && stack_end < other_stack_end)) { + + /* Print the other stack values before the abort */ + if (__kmp_storage_map) + __kmp_print_storage_map_gtid( + -1, other_stack_beg, other_stack_end, + (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize), + "th_%d stack (overlapped)", __kmp_gtid_from_thread(f_th)); - for( f=0 ; f < __kmp_threads_capacity ; f++ ) { - kmp_info_t *f_th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[f]); - - if( f_th && f_th != th ) { - char *other_stack_end = (char *)TCR_PTR(f_th->th.th_info.ds.ds_stackbase); - char *other_stack_beg = other_stack_end - - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize); - if((stack_beg > other_stack_beg && stack_beg < other_stack_end) || - (stack_end > other_stack_beg && stack_end < other_stack_end)) { - - /* Print the other stack values before the abort */ - if ( __kmp_storage_map ) - __kmp_print_storage_map_gtid( -1, other_stack_beg, other_stack_end, - (size_t)TCR_PTR(f_th->th.th_info.ds.ds_stacksize), - "th_%d stack (overlapped)", - __kmp_gtid_from_thread( f_th ) ); - - __kmp_msg( kmp_ms_fatal, KMP_MSG( StackOverlap ), KMP_HNT( ChangeStackLimit ), __kmp_msg_null ); - } - } + __kmp_msg(kmp_ms_fatal, KMP_MSG(StackOverlap), + KMP_HNT(ChangeStackLimit), __kmp_msg_null); } + } } - KA_TRACE(10,("__kmp_check_stack_overlap: returning\n")); + } + KA_TRACE(10, ("__kmp_check_stack_overlap: returning\n")); } - -/* ------------------------------------------------------------------------ */ - /* ------------------------------------------------------------------------ */ -void -__kmp_infinite_loop( void ) -{ - static int done = FALSE; +void __kmp_infinite_loop(void) { + static int done = FALSE; - while (! done) { - KMP_YIELD( 1 ); - } + while (!done) { + KMP_YIELD(1); + } } -#define MAX_MESSAGE 512 +#define MAX_MESSAGE 512 -void -__kmp_print_storage_map_gtid( int gtid, void *p1, void *p2, size_t size, char const *format, ...) { - char buffer[MAX_MESSAGE]; - va_list ap; +void __kmp_print_storage_map_gtid(int gtid, void *p1, void *p2, size_t size, + char const *format, ...) { + char buffer[MAX_MESSAGE]; + va_list ap; - va_start( ap, format); - KMP_SNPRINTF( buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, p2, (unsigned long) size, format ); - __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); - __kmp_vprintf( kmp_err, buffer, ap ); + va_start(ap, format); + KMP_SNPRINTF(buffer, sizeof(buffer), "OMP storage map: %p %p%8lu %s\n", p1, + p2, (unsigned long)size, format); + __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); + __kmp_vprintf(kmp_err, buffer, ap); #if KMP_PRINT_DATA_PLACEMENT - int node; - if(gtid >= 0) { - if(p1 <= p2 && (char*)p2 - (char*)p1 == size) { - if( __kmp_storage_map_verbose ) { - node = __kmp_get_host_node(p1); - if(node < 0) /* doesn't work, so don't try this next time */ - __kmp_storage_map_verbose = FALSE; - else { - char *last; - int lastNode; - int localProc = __kmp_get_cpu_from_gtid(gtid); - - const int page_size = KMP_GET_PAGE_SIZE(); - - p1 = (void *)( (size_t)p1 & ~((size_t)page_size - 1) ); - p2 = (void *)( ((size_t) p2 - 1) & ~((size_t)page_size - 1) ); - if(localProc >= 0) - __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, localProc>>1); - else - __kmp_printf_no_lock(" GTID %d\n", gtid); -# if KMP_USE_PRCTL -/* The more elaborate format is disabled for now because of the prctl hanging bug. */ - do { - last = p1; - lastNode = node; - /* This loop collates adjacent pages with the same host node. */ - do { - (char*)p1 += page_size; - } while(p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode); - __kmp_printf_no_lock(" %p-%p memNode %d\n", last, - (char*)p1 - 1, lastNode); - } while(p1 <= p2); -# else - __kmp_printf_no_lock(" %p-%p memNode %d\n", p1, - (char*)p1 + (page_size - 1), __kmp_get_host_node(p1)); - if(p1 < p2) { - __kmp_printf_no_lock(" %p-%p memNode %d\n", p2, - (char*)p2 + (page_size - 1), __kmp_get_host_node(p2)); - } -# endif - } - } - } else - __kmp_printf_no_lock(" %s\n", KMP_I18N_STR( StorageMapWarning ) ); - } + int node; + if (gtid >= 0) { + if (p1 <= p2 && (char *)p2 - (char *)p1 == size) { + if (__kmp_storage_map_verbose) { + node = __kmp_get_host_node(p1); + if (node < 0) /* doesn't work, so don't try this next time */ + __kmp_storage_map_verbose = FALSE; + else { + char *last; + int lastNode; + int localProc = __kmp_get_cpu_from_gtid(gtid); + + const int page_size = KMP_GET_PAGE_SIZE(); + + p1 = (void *)((size_t)p1 & ~((size_t)page_size - 1)); + p2 = (void *)(((size_t)p2 - 1) & ~((size_t)page_size - 1)); + if (localProc >= 0) + __kmp_printf_no_lock(" GTID %d localNode %d\n", gtid, + localProc >> 1); + else + __kmp_printf_no_lock(" GTID %d\n", gtid); +#if KMP_USE_PRCTL + /* The more elaborate format is disabled for now because of the prctl + * hanging bug. */ + do { + last = p1; + lastNode = node; + /* This loop collates adjacent pages with the same host node. */ + do { + (char *)p1 += page_size; + } while (p1 <= p2 && (node = __kmp_get_host_node(p1)) == lastNode); + __kmp_printf_no_lock(" %p-%p memNode %d\n", last, (char *)p1 - 1, + lastNode); + } while (p1 <= p2); +#else + __kmp_printf_no_lock(" %p-%p memNode %d\n", p1, + (char *)p1 + (page_size - 1), + __kmp_get_host_node(p1)); + if (p1 < p2) { + __kmp_printf_no_lock(" %p-%p memNode %d\n", p2, + (char *)p2 + (page_size - 1), + __kmp_get_host_node(p2)); + } +#endif + } + } + } else + __kmp_printf_no_lock(" %s\n", KMP_I18N_STR(StorageMapWarning)); + } #endif /* KMP_PRINT_DATA_PLACEMENT */ - __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); + __kmp_release_bootstrap_lock(&__kmp_stdio_lock); } -void -__kmp_warn( char const * format, ... ) -{ - char buffer[MAX_MESSAGE]; - va_list ap; +void __kmp_warn(char const *format, ...) { + char buffer[MAX_MESSAGE]; + va_list ap; - if ( __kmp_generate_warnings == kmp_warnings_off ) { - return; - } + if (__kmp_generate_warnings == kmp_warnings_off) { + return; + } - va_start( ap, format ); + va_start(ap, format); - KMP_SNPRINTF( buffer, sizeof(buffer) , "OMP warning: %s\n", format ); - __kmp_acquire_bootstrap_lock( & __kmp_stdio_lock ); - __kmp_vprintf( kmp_err, buffer, ap ); - __kmp_release_bootstrap_lock( & __kmp_stdio_lock ); + KMP_SNPRINTF(buffer, sizeof(buffer), "OMP warning: %s\n", format); + __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); + __kmp_vprintf(kmp_err, buffer, ap); + __kmp_release_bootstrap_lock(&__kmp_stdio_lock); - va_end( ap ); + va_end(ap); } -void -__kmp_abort_process() -{ - - // Later threads may stall here, but that's ok because abort() will kill them. - __kmp_acquire_bootstrap_lock( & __kmp_exit_lock ); - - if ( __kmp_debug_buf ) { - __kmp_dump_debug_buffer(); - }; // if - - if ( KMP_OS_WINDOWS ) { - // Let other threads know of abnormal termination and prevent deadlock - // if abort happened during library initialization or shutdown - __kmp_global.g.g_abort = SIGABRT; - - /* - On Windows* OS by default abort() causes pop-up error box, which stalls nightly testing. - Unfortunately, we cannot reliably suppress pop-up error boxes. _set_abort_behavior() - works well, but this function is not available in VS7 (this is not problem for DLL, but - it is a problem for static OpenMP RTL). SetErrorMode (and so, timelimit utility) does - not help, at least in some versions of MS C RTL. - - It seems following sequence is the only way to simulate abort() and avoid pop-up error - box. - */ - raise( SIGABRT ); - _exit( 3 ); // Just in case, if signal ignored, exit anyway. - } else { - abort(); - }; // if - - __kmp_infinite_loop(); - __kmp_release_bootstrap_lock( & __kmp_exit_lock ); +void __kmp_abort_process() { + // Later threads may stall here, but that's ok because abort() will kill them. + __kmp_acquire_bootstrap_lock(&__kmp_exit_lock); + + if (__kmp_debug_buf) { + __kmp_dump_debug_buffer(); + }; // if + + if (KMP_OS_WINDOWS) { + // Let other threads know of abnormal termination and prevent deadlock + // if abort happened during library initialization or shutdown + __kmp_global.g.g_abort = SIGABRT; + + /* On Windows* OS by default abort() causes pop-up error box, which stalls + nightly testing. Unfortunately, we cannot reliably suppress pop-up error + boxes. _set_abort_behavior() works well, but this function is not + available in VS7 (this is not problem for DLL, but it is a problem for + static OpenMP RTL). SetErrorMode (and so, timelimit utility) does not + help, at least in some versions of MS C RTL. + + It seems following sequence is the only way to simulate abort() and + avoid pop-up error box. */ + raise(SIGABRT); + _exit(3); // Just in case, if signal ignored, exit anyway. + } else { + abort(); + }; // if + + __kmp_infinite_loop(); + __kmp_release_bootstrap_lock(&__kmp_exit_lock); } // __kmp_abort_process -void -__kmp_abort_thread( void ) -{ - // TODO: Eliminate g_abort global variable and this function. - // In case of abort just call abort(), it will kill all the threads. - __kmp_infinite_loop(); +void __kmp_abort_thread(void) { + // TODO: Eliminate g_abort global variable and this function. + // In case of abort just call abort(), it will kill all the threads. + __kmp_infinite_loop(); } // __kmp_abort_thread -/* ------------------------------------------------------------------------ */ - -/* - * Print out the storage map for the major kmp_info_t thread data structures - * that are allocated together. - */ +/* Print out the storage map for the major kmp_info_t thread data structures + that are allocated together. */ -static void -__kmp_print_thread_storage_map( kmp_info_t *thr, int gtid ) -{ - __kmp_print_storage_map_gtid( gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", gtid ); +static void __kmp_print_thread_storage_map(kmp_info_t *thr, int gtid) { + __kmp_print_storage_map_gtid(gtid, thr, thr + 1, sizeof(kmp_info_t), "th_%d", + gtid); - __kmp_print_storage_map_gtid( gtid, &thr->th.th_info, &thr->th.th_team, sizeof(kmp_desc_t), - "th_%d.th_info", gtid ); + __kmp_print_storage_map_gtid(gtid, &thr->th.th_info, &thr->th.th_team, + sizeof(kmp_desc_t), "th_%d.th_info", gtid); - __kmp_print_storage_map_gtid( gtid, &thr->th.th_local, &thr->th.th_pri_head, sizeof(kmp_local_t), - "th_%d.th_local", gtid ); + __kmp_print_storage_map_gtid(gtid, &thr->th.th_local, &thr->th.th_pri_head, + sizeof(kmp_local_t), "th_%d.th_local", gtid); - __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier], - sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid ); + __kmp_print_storage_map_gtid( + gtid, &thr->th.th_bar[0], &thr->th.th_bar[bs_last_barrier], + sizeof(kmp_balign_t) * bs_last_barrier, "th_%d.th_bar", gtid); - __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_plain_barrier], - &thr->th.th_bar[bs_plain_barrier+1], - sizeof(kmp_balign_t), "th_%d.th_bar[plain]", gtid); + __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_plain_barrier], + &thr->th.th_bar[bs_plain_barrier + 1], + sizeof(kmp_balign_t), "th_%d.th_bar[plain]", + gtid); - __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_forkjoin_barrier], - &thr->th.th_bar[bs_forkjoin_barrier+1], - sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", gtid); + __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_forkjoin_barrier], + &thr->th.th_bar[bs_forkjoin_barrier + 1], + sizeof(kmp_balign_t), "th_%d.th_bar[forkjoin]", + gtid); - #if KMP_FAST_REDUCTION_BARRIER - __kmp_print_storage_map_gtid( gtid, &thr->th.th_bar[bs_reduction_barrier], - &thr->th.th_bar[bs_reduction_barrier+1], - sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", gtid); - #endif // KMP_FAST_REDUCTION_BARRIER +#if KMP_FAST_REDUCTION_BARRIER + __kmp_print_storage_map_gtid(gtid, &thr->th.th_bar[bs_reduction_barrier], + &thr->th.th_bar[bs_reduction_barrier + 1], + sizeof(kmp_balign_t), "th_%d.th_bar[reduction]", + gtid); +#endif // KMP_FAST_REDUCTION_BARRIER } -/* - * Print out the storage map for the major kmp_team_t team data structures - * that are allocated together. - */ - -static void -__kmp_print_team_storage_map( const char *header, kmp_team_t *team, int team_id, int num_thr ) -{ - int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2; - __kmp_print_storage_map_gtid( -1, team, team + 1, sizeof(kmp_team_t), "%s_%d", - header, team_id ); - - __kmp_print_storage_map_gtid( -1, &team->t.t_bar[0], &team->t.t_bar[bs_last_barrier], - sizeof(kmp_balign_team_t) * bs_last_barrier, "%s_%d.t_bar", header, team_id ); - +/* Print out the storage map for the major kmp_team_t team data structures + that are allocated together. */ - __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_plain_barrier], &team->t.t_bar[bs_plain_barrier+1], - sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", header, team_id ); +static void __kmp_print_team_storage_map(const char *header, kmp_team_t *team, + int team_id, int num_thr) { + int num_disp_buff = team->t.t_max_nproc > 1 ? __kmp_dispatch_num_buffers : 2; + __kmp_print_storage_map_gtid(-1, team, team + 1, sizeof(kmp_team_t), "%s_%d", + header, team_id); - __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_forkjoin_barrier], &team->t.t_bar[bs_forkjoin_barrier+1], - sizeof(kmp_balign_team_t), "%s_%d.t_bar[forkjoin]", header, team_id ); + __kmp_print_storage_map_gtid(-1, &team->t.t_bar[0], + &team->t.t_bar[bs_last_barrier], + sizeof(kmp_balign_team_t) * bs_last_barrier, + "%s_%d.t_bar", header, team_id); - #if KMP_FAST_REDUCTION_BARRIER - __kmp_print_storage_map_gtid( -1, &team->t.t_bar[bs_reduction_barrier], &team->t.t_bar[bs_reduction_barrier+1], - sizeof(kmp_balign_team_t), "%s_%d.t_bar[reduction]", header, team_id ); - #endif // KMP_FAST_REDUCTION_BARRIER + __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_plain_barrier], + &team->t.t_bar[bs_plain_barrier + 1], + sizeof(kmp_balign_team_t), "%s_%d.t_bar[plain]", + header, team_id); - __kmp_print_storage_map_gtid( -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr], - sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id ); + __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_forkjoin_barrier], + &team->t.t_bar[bs_forkjoin_barrier + 1], + sizeof(kmp_balign_team_t), + "%s_%d.t_bar[forkjoin]", header, team_id); - __kmp_print_storage_map_gtid( -1, &team->t.t_threads[0], &team->t.t_threads[num_thr], - sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id ); - - __kmp_print_storage_map_gtid( -1, &team->t.t_disp_buffer[0], &team->t.t_disp_buffer[num_disp_buff], - sizeof(dispatch_shared_info_t) * num_disp_buff, "%s_%d.t_disp_buffer", - header, team_id ); - - - __kmp_print_storage_map_gtid( -1, &team->t.t_taskq, &team->t.t_copypriv_data, - sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, team_id ); +#if KMP_FAST_REDUCTION_BARRIER + __kmp_print_storage_map_gtid(-1, &team->t.t_bar[bs_reduction_barrier], + &team->t.t_bar[bs_reduction_barrier + 1], + sizeof(kmp_balign_team_t), + "%s_%d.t_bar[reduction]", header, team_id); +#endif // KMP_FAST_REDUCTION_BARRIER + + __kmp_print_storage_map_gtid( + -1, &team->t.t_dispatch[0], &team->t.t_dispatch[num_thr], + sizeof(kmp_disp_t) * num_thr, "%s_%d.t_dispatch", header, team_id); + + __kmp_print_storage_map_gtid( + -1, &team->t.t_threads[0], &team->t.t_threads[num_thr], + sizeof(kmp_info_t *) * num_thr, "%s_%d.t_threads", header, team_id); + + __kmp_print_storage_map_gtid(-1, &team->t.t_disp_buffer[0], + &team->t.t_disp_buffer[num_disp_buff], + sizeof(dispatch_shared_info_t) * num_disp_buff, + "%s_%d.t_disp_buffer", header, team_id); + + __kmp_print_storage_map_gtid(-1, &team->t.t_taskq, &team->t.t_copypriv_data, + sizeof(kmp_taskq_t), "%s_%d.t_taskq", header, + team_id); } static void __kmp_init_allocator() {} @@ -537,915 +542,866 @@ static void __kmp_fini_allocator() {} /* ------------------------------------------------------------------------ */ #ifdef KMP_DYNAMIC_LIB -# if KMP_OS_WINDOWS +#if KMP_OS_WINDOWS -static void -__kmp_reset_lock( kmp_bootstrap_lock_t* lck ) { - // TODO: Change to __kmp_break_bootstrap_lock(). - __kmp_init_bootstrap_lock( lck ); // make the lock released +static void __kmp_reset_lock(kmp_bootstrap_lock_t *lck) { + // TODO: Change to __kmp_break_bootstrap_lock(). + __kmp_init_bootstrap_lock(lck); // make the lock released } -static void -__kmp_reset_locks_on_process_detach( int gtid_req ) { - int i; - int thread_count; - - // PROCESS_DETACH is expected to be called by a thread - // that executes ProcessExit() or FreeLibrary(). - // OS terminates other threads (except the one calling ProcessExit or FreeLibrary). - // So, it might be safe to access the __kmp_threads[] without taking the forkjoin_lock. - // However, in fact, some threads can be still alive here, although being about to be terminated. - // The threads in the array with ds_thread==0 are most suspicious. - // Actually, it can be not safe to access the __kmp_threads[]. - - // TODO: does it make sense to check __kmp_roots[] ? - - // Let's check that there are no other alive threads registered with the OMP lib. - while( 1 ) { - thread_count = 0; - for( i = 0; i < __kmp_threads_capacity; ++i ) { - if( !__kmp_threads ) continue; - kmp_info_t* th = __kmp_threads[ i ]; - if( th == NULL ) continue; - int gtid = th->th.th_info.ds.ds_gtid; - if( gtid == gtid_req ) continue; - if( gtid < 0 ) continue; - DWORD exit_val; - int alive = __kmp_is_thread_alive( th, &exit_val ); - if( alive ) { - ++thread_count; - } - } - if( thread_count == 0 ) break; // success - } +static void __kmp_reset_locks_on_process_detach(int gtid_req) { + int i; + int thread_count; + + // PROCESS_DETACH is expected to be called by a thread that executes + // ProcessExit() or FreeLibrary(). OS terminates other threads (except the one + // calling ProcessExit or FreeLibrary). So, it might be safe to access the + // __kmp_threads[] without taking the forkjoin_lock. However, in fact, some + // threads can be still alive here, although being about to be terminated. The + // threads in the array with ds_thread==0 are most suspicious. Actually, it + // can be not safe to access the __kmp_threads[]. + + // TODO: does it make sense to check __kmp_roots[] ? + + // Let's check that there are no other alive threads registered with the OMP + // lib. + while (1) { + thread_count = 0; + for (i = 0; i < __kmp_threads_capacity; ++i) { + if (!__kmp_threads) + continue; + kmp_info_t *th = __kmp_threads[i]; + if (th == NULL) + continue; + int gtid = th->th.th_info.ds.ds_gtid; + if (gtid == gtid_req) + continue; + if (gtid < 0) + continue; + DWORD exit_val; + int alive = __kmp_is_thread_alive(th, &exit_val); + if (alive) { + ++thread_count; + } + } + if (thread_count == 0) + break; // success + } + + // Assume that I'm alone. Now it might be safe to check and reset locks. + // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset. + __kmp_reset_lock(&__kmp_forkjoin_lock); +#ifdef KMP_DEBUG + __kmp_reset_lock(&__kmp_stdio_lock); +#endif // KMP_DEBUG +} - // Assume that I'm alone. +BOOL WINAPI DllMain(HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved) { + //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); - // Now it might be probably safe to check and reset locks. - // __kmp_forkjoin_lock and __kmp_stdio_lock are expected to be reset. - __kmp_reset_lock( &__kmp_forkjoin_lock ); - #ifdef KMP_DEBUG - __kmp_reset_lock( &__kmp_stdio_lock ); - #endif // KMP_DEBUG -} + switch (fdwReason) { -BOOL WINAPI -DllMain( HINSTANCE hInstDLL, DWORD fdwReason, LPVOID lpReserved ) { - //__kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); - - switch( fdwReason ) { - - case DLL_PROCESS_ATTACH: - KA_TRACE( 10, ("DllMain: PROCESS_ATTACH\n" )); - - return TRUE; - - case DLL_PROCESS_DETACH: - KA_TRACE( 10, ("DllMain: PROCESS_DETACH T#%d\n", - __kmp_gtid_get_specific() )); - - if( lpReserved != NULL ) - { - // lpReserved is used for telling the difference: - // lpReserved == NULL when FreeLibrary() was called, - // lpReserved != NULL when the process terminates. - // When FreeLibrary() is called, worker threads remain alive. - // So they will release the forkjoin lock by themselves. - // When the process terminates, worker threads disappear triggering - // the problem of unreleased forkjoin lock as described below. - - // A worker thread can take the forkjoin lock. - // The problem comes up if that worker thread becomes dead - // before it releases the forkjoin lock. - // The forkjoin lock remains taken, while the thread - // executing DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below - // will try to take the forkjoin lock and will always fail, - // so that the application will never finish [normally]. - // This scenario is possible if __kmpc_end() has not been executed. - // It looks like it's not a corner case, but common cases: - // - the main function was compiled by an alternative compiler; - // - the main function was compiled by icl but without /Qopenmp (application with plugins); - // - application terminates by calling C exit(), Fortran CALL EXIT() or Fortran STOP. - // - alive foreign thread prevented __kmpc_end from doing cleanup. - - // This is a hack to work around the problem. - // TODO: !!! to figure out something better. - __kmp_reset_locks_on_process_detach( __kmp_gtid_get_specific() ); - } + case DLL_PROCESS_ATTACH: + KA_TRACE(10, ("DllMain: PROCESS_ATTACH\n")); - __kmp_internal_end_library( __kmp_gtid_get_specific() ); + return TRUE; - return TRUE; + case DLL_PROCESS_DETACH: + KA_TRACE(10, ("DllMain: PROCESS_DETACH T#%d\n", __kmp_gtid_get_specific())); + + if (lpReserved != NULL) { + // lpReserved is used for telling the difference: + // lpReserved == NULL when FreeLibrary() was called, + // lpReserved != NULL when the process terminates. + // When FreeLibrary() is called, worker threads remain alive. So they will + // release the forkjoin lock by themselves. When the process terminates, + // worker threads disappear triggering the problem of unreleased forkjoin + // lock as described below. + + // A worker thread can take the forkjoin lock. The problem comes up if + // that worker thread becomes dead before it releases the forkjoin lock. + // The forkjoin lock remains taken, while the thread executing + // DllMain()->PROCESS_DETACH->__kmp_internal_end_library() below will try + // to take the forkjoin lock and will always fail, so that the application + // will never finish [normally]. This scenario is possible if + // __kmpc_end() has not been executed. It looks like it's not a corner + // case, but common cases: + // - the main function was compiled by an alternative compiler; + // - the main function was compiled by icl but without /Qopenmp + // (application with plugins); + // - application terminates by calling C exit(), Fortran CALL EXIT() or + // Fortran STOP. + // - alive foreign thread prevented __kmpc_end from doing cleanup. + // + // This is a hack to work around the problem. + // TODO: !!! figure out something better. + __kmp_reset_locks_on_process_detach(__kmp_gtid_get_specific()); + } + + __kmp_internal_end_library(__kmp_gtid_get_specific()); - case DLL_THREAD_ATTACH: - KA_TRACE( 10, ("DllMain: THREAD_ATTACH\n" )); + return TRUE; - /* if we wanted to register new siblings all the time here call - * __kmp_get_gtid(); */ - return TRUE; + case DLL_THREAD_ATTACH: + KA_TRACE(10, ("DllMain: THREAD_ATTACH\n")); - case DLL_THREAD_DETACH: - KA_TRACE( 10, ("DllMain: THREAD_DETACH T#%d\n", - __kmp_gtid_get_specific() )); + /* if we want to register new siblings all the time here call + * __kmp_get_gtid(); */ + return TRUE; - __kmp_internal_end_thread( __kmp_gtid_get_specific() ); - return TRUE; - } + case DLL_THREAD_DETACH: + KA_TRACE(10, ("DllMain: THREAD_DETACH T#%d\n", __kmp_gtid_get_specific())); + __kmp_internal_end_thread(__kmp_gtid_get_specific()); return TRUE; + } + + return TRUE; } -# endif /* KMP_OS_WINDOWS */ +#endif /* KMP_OS_WINDOWS */ #endif /* KMP_DYNAMIC_LIB */ - -/* ------------------------------------------------------------------------ */ - /* Change the library type to "status" and return the old type */ /* called from within initialization routines where __kmp_initz_lock is held */ -int -__kmp_change_library( int status ) -{ - int old_status; +int __kmp_change_library(int status) { + int old_status; - old_status = __kmp_yield_init & 1; // check whether KMP_LIBRARY=throughput (even init count) + old_status = __kmp_yield_init & + 1; // check whether KMP_LIBRARY=throughput (even init count) - if (status) { - __kmp_yield_init |= 1; // throughput => turnaround (odd init count) - } - else { - __kmp_yield_init &= ~1; // turnaround => throughput (even init count) - } + if (status) { + __kmp_yield_init |= 1; // throughput => turnaround (odd init count) + } else { + __kmp_yield_init &= ~1; // turnaround => throughput (even init count) + } - return old_status; // return previous setting of whether KMP_LIBRARY=throughput + return old_status; // return previous setting of whether + // KMP_LIBRARY=throughput } -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* __kmp_parallel_deo -- - * Wait until it's our turn. - */ -void -__kmp_parallel_deo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) -{ - int gtid = *gtid_ref; +/* __kmp_parallel_deo -- Wait until it's our turn. */ +void __kmp_parallel_deo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { + int gtid = *gtid_ref; #ifdef BUILD_PARALLEL_ORDERED - kmp_team_t *team = __kmp_team_from_gtid( gtid ); + kmp_team_t *team = __kmp_team_from_gtid(gtid); #endif /* BUILD_PARALLEL_ORDERED */ - if( __kmp_env_consistency_check ) { - if( __kmp_threads[gtid]->th.th_root->r.r_active ) + if (__kmp_env_consistency_check) { + if (__kmp_threads[gtid]->th.th_root->r.r_active) #if KMP_USE_DYNAMIC_LOCK - __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL, 0 ); + __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL, 0); #else - __kmp_push_sync( gtid, ct_ordered_in_parallel, loc_ref, NULL ); + __kmp_push_sync(gtid, ct_ordered_in_parallel, loc_ref, NULL); #endif - } + } #ifdef BUILD_PARALLEL_ORDERED - if( !team->t.t_serialized ) { - KMP_MB(); - KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid( gtid ), KMP_EQ, NULL); - KMP_MB(); - } + if (!team->t.t_serialized) { + KMP_MB(); + KMP_WAIT_YIELD(&team->t.t_ordered.dt.t_value, __kmp_tid_from_gtid(gtid), + KMP_EQ, NULL); + KMP_MB(); + } #endif /* BUILD_PARALLEL_ORDERED */ } -/* __kmp_parallel_dxo -- - * Signal the next task. - */ - -void -__kmp_parallel_dxo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) -{ - int gtid = *gtid_ref; +/* __kmp_parallel_dxo -- Signal the next task. */ +void __kmp_parallel_dxo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { + int gtid = *gtid_ref; #ifdef BUILD_PARALLEL_ORDERED - int tid = __kmp_tid_from_gtid( gtid ); - kmp_team_t *team = __kmp_team_from_gtid( gtid ); + int tid = __kmp_tid_from_gtid(gtid); + kmp_team_t *team = __kmp_team_from_gtid(gtid); #endif /* BUILD_PARALLEL_ORDERED */ - if( __kmp_env_consistency_check ) { - if( __kmp_threads[gtid]->th.th_root->r.r_active ) - __kmp_pop_sync( gtid, ct_ordered_in_parallel, loc_ref ); - } + if (__kmp_env_consistency_check) { + if (__kmp_threads[gtid]->th.th_root->r.r_active) + __kmp_pop_sync(gtid, ct_ordered_in_parallel, loc_ref); + } #ifdef BUILD_PARALLEL_ORDERED - if ( ! team->t.t_serialized ) { - KMP_MB(); /* Flush all pending memory write invalidates. */ + if (!team->t.t_serialized) { + KMP_MB(); /* Flush all pending memory write invalidates. */ - /* use the tid of the next thread in this team */ - /* TODO repleace with general release procedure */ - team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc ); + /* use the tid of the next thread in this team */ + /* TODO replace with general release procedure */ + team->t.t_ordered.dt.t_value = ((tid + 1) % team->t.t_nproc); #if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_release_ordered)) { - /* accept blame for "ordered" waiting */ - kmp_info_t *this_thread = __kmp_threads[gtid]; - ompt_callbacks.ompt_callback(ompt_event_release_ordered)( - this_thread->th.ompt_thread_info.wait_id); - } + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_release_ordered)) { + /* accept blame for "ordered" waiting */ + kmp_info_t *this_thread = __kmp_threads[gtid]; + ompt_callbacks.ompt_callback(ompt_event_release_ordered)( + this_thread->th.ompt_thread_info.wait_id); + } #endif - KMP_MB(); /* Flush all pending memory write invalidates. */ - } + KMP_MB(); /* Flush all pending memory write invalidates. */ + } #endif /* BUILD_PARALLEL_ORDERED */ } /* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - /* The BARRIER for a SINGLE process section is always explicit */ -int -__kmp_enter_single( int gtid, ident_t *id_ref, int push_ws ) -{ - int status; - kmp_info_t *th; - kmp_team_t *team; +int __kmp_enter_single(int gtid, ident_t *id_ref, int push_ws) { + int status; + kmp_info_t *th; + kmp_team_t *team; - if( ! TCR_4(__kmp_init_parallel) ) - __kmp_parallel_initialize(); + if (!TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); - th = __kmp_threads[ gtid ]; - team = th->th.th_team; - status = 0; + th = __kmp_threads[gtid]; + team = th->th.th_team; + status = 0; - th->th.th_ident = id_ref; + th->th.th_ident = id_ref; - if ( team->t.t_serialized ) { - status = 1; - } else { - kmp_int32 old_this = th->th.th_local.this_construct; - - ++th->th.th_local.this_construct; - /* try to set team count to thread count--success means thread got the - single block - */ - /* TODO: Should this be acquire or release? */ - if (team->t.t_construct == old_this) { - status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this, - th->th.th_local.this_construct); - } + if (team->t.t_serialized) { + status = 1; + } else { + kmp_int32 old_this = th->th.th_local.this_construct; + + ++th->th.th_local.this_construct; + /* try to set team count to thread count--success means thread got the + single block */ + /* TODO: Should this be acquire or release? */ + if (team->t.t_construct == old_this) { + status = KMP_COMPARE_AND_STORE_ACQ32(&team->t.t_construct, old_this, + th->th.th_local.this_construct); + } #if USE_ITT_BUILD - if ( __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && KMP_MASTER_GTID(gtid) && + if (__itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && + KMP_MASTER_GTID(gtid) && #if OMP_40_ENABLED - th->th.th_teams_microtask == NULL && + th->th.th_teams_microtask == NULL && #endif - team->t.t_active_level == 1 ) - { // Only report metadata by master of active team at level 1 - __kmp_itt_metadata_single( id_ref ); - } -#endif /* USE_ITT_BUILD */ + team->t.t_active_level == + 1) { // Only report metadata by master of active team at level 1 + __kmp_itt_metadata_single(id_ref); } +#endif /* USE_ITT_BUILD */ + } - if( __kmp_env_consistency_check ) { - if (status && push_ws) { - __kmp_push_workshare( gtid, ct_psingle, id_ref ); - } else { - __kmp_check_workshare( gtid, ct_psingle, id_ref ); - } + if (__kmp_env_consistency_check) { + if (status && push_ws) { + __kmp_push_workshare(gtid, ct_psingle, id_ref); + } else { + __kmp_check_workshare(gtid, ct_psingle, id_ref); } + } #if USE_ITT_BUILD - if ( status ) { - __kmp_itt_single_start( gtid ); - } + if (status) { + __kmp_itt_single_start(gtid); + } #endif /* USE_ITT_BUILD */ - return status; + return status; } -void -__kmp_exit_single( int gtid ) -{ +void __kmp_exit_single(int gtid) { #if USE_ITT_BUILD - __kmp_itt_single_end( gtid ); + __kmp_itt_single_end(gtid); #endif /* USE_ITT_BUILD */ - if( __kmp_env_consistency_check ) - __kmp_pop_workshare( gtid, ct_psingle, NULL ); + if (__kmp_env_consistency_check) + __kmp_pop_workshare(gtid, ct_psingle, NULL); } - -/* - * determine if we can go parallel or must use a serialized parallel region and +/* determine if we can go parallel or must use a serialized parallel region and * how many threads we can use * set_nproc is the number of threads requested for the team * returns 0 if we should serialize or only use one thread, * otherwise the number of threads to use - * The forkjoin lock is held by the caller. - */ -static int -__kmp_reserve_threads( kmp_root_t *root, kmp_team_t *parent_team, - int master_tid, int set_nthreads + * The forkjoin lock is held by the caller. */ +static int __kmp_reserve_threads(kmp_root_t *root, kmp_team_t *parent_team, + int master_tid, int set_nthreads #if OMP_40_ENABLED - , int enter_teams + , + int enter_teams #endif /* OMP_40_ENABLED */ -) -{ - int capacity; - int new_nthreads; - KMP_DEBUG_ASSERT( __kmp_init_serial ); - KMP_DEBUG_ASSERT( root && parent_team ); - - // - // If dyn-var is set, dynamically adjust the number of desired threads, - // according to the method specified by dynamic_mode. - // - new_nthreads = set_nthreads; - if ( ! get__dynamic_2( parent_team, master_tid ) ) { - ; - } + ) { + int capacity; + int new_nthreads; + KMP_DEBUG_ASSERT(__kmp_init_serial); + KMP_DEBUG_ASSERT(root && parent_team); + + // If dyn-var is set, dynamically adjust the number of desired threads, + // according to the method specified by dynamic_mode. + new_nthreads = set_nthreads; + if (!get__dynamic_2(parent_team, master_tid)) { + ; + } #ifdef USE_LOAD_BALANCE - else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) { - new_nthreads = __kmp_load_balance_nproc( root, set_nthreads ); - if ( new_nthreads == 1 ) { - KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to 1 thread\n", - master_tid )); - return 1; - } - if ( new_nthreads < set_nthreads ) { - KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d load balance reduced reservation to %d threads\n", - master_tid, new_nthreads )); - } - } + else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) { + new_nthreads = __kmp_load_balance_nproc(root, set_nthreads); + if (new_nthreads == 1) { + KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced " + "reservation to 1 thread\n", + master_tid)); + return 1; + } + if (new_nthreads < set_nthreads) { + KC_TRACE(10, ("__kmp_reserve_threads: T#%d load balance reduced " + "reservation to %d threads\n", + master_tid, new_nthreads)); + } + } #endif /* USE_LOAD_BALANCE */ - else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) { - new_nthreads = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1 - : root->r.r_hot_team->t.t_nproc); - if ( new_nthreads <= 1 ) { - KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to 1 thread\n", - master_tid )); - return 1; - } - if ( new_nthreads < set_nthreads ) { - KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d thread limit reduced reservation to %d threads\n", - master_tid, new_nthreads )); - } - else { - new_nthreads = set_nthreads; - } - } - else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) { - if ( set_nthreads > 2 ) { - new_nthreads = __kmp_get_random( parent_team->t.t_threads[master_tid] ); - new_nthreads = ( new_nthreads % set_nthreads ) + 1; - if ( new_nthreads == 1 ) { - KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to 1 thread\n", - master_tid )); - return 1; - } - if ( new_nthreads < set_nthreads ) { - KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d dynamic random reduced reservation to %d threads\n", - master_tid, new_nthreads )); - } - } - } - else { - KMP_ASSERT( 0 ); - } - - // - // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT. - // - if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : - root->r.r_hot_team->t.t_nproc ) > __kmp_max_nth ) { - int tl_nthreads = __kmp_max_nth - __kmp_nth + ( root->r.r_active ? 1 : - root->r.r_hot_team->t.t_nproc ); - if ( tl_nthreads <= 0 ) { - tl_nthreads = 1; - } - - // - // If dyn-var is false, emit a 1-time warning. - // - if ( ! get__dynamic_2( parent_team, master_tid ) - && ( ! __kmp_reserve_warn ) ) { - __kmp_reserve_warn = 1; - __kmp_msg( - kmp_ms_warning, - KMP_MSG( CantFormThrTeam, set_nthreads, tl_nthreads ), - KMP_HNT( Unset_ALL_THREADS ), - __kmp_msg_null - ); - } - if ( tl_nthreads == 1 ) { - KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to 1 thread\n", - master_tid )); - return 1; - } - KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced reservation to %d threads\n", - master_tid, tl_nthreads )); - new_nthreads = tl_nthreads; - } - - // - // Check if the threads array is large enough, or needs expanding. - // - // See comment in __kmp_register_root() about the adjustment if - // __kmp_threads[0] == NULL. - // - capacity = __kmp_threads_capacity; - if ( TCR_PTR(__kmp_threads[0]) == NULL ) { - --capacity; - } - if ( __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : - root->r.r_hot_team->t.t_nproc ) > capacity ) { - // - // Expand the threads array. - // - int slotsRequired = __kmp_nth + new_nthreads - ( root->r.r_active ? 1 : - root->r.r_hot_team->t.t_nproc ) - capacity; - int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired); - if ( slotsAdded < slotsRequired ) { - // - // The threads array was not expanded enough. - // - new_nthreads -= ( slotsRequired - slotsAdded ); - KMP_ASSERT( new_nthreads >= 1 ); - - // - // If dyn-var is false, emit a 1-time warning. - // - if ( ! get__dynamic_2( parent_team, master_tid ) - && ( ! __kmp_reserve_warn ) ) { - __kmp_reserve_warn = 1; - if ( __kmp_tp_cached ) { - __kmp_msg( - kmp_ms_warning, - KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ), - KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ), - KMP_HNT( PossibleSystemLimitOnThreads ), - __kmp_msg_null - ); - } - else { - __kmp_msg( - kmp_ms_warning, - KMP_MSG( CantFormThrTeam, set_nthreads, new_nthreads ), - KMP_HNT( SystemLimitOnThreads ), - __kmp_msg_null - ); - } - } + else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) { + new_nthreads = __kmp_avail_proc - __kmp_nth + + (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); + if (new_nthreads <= 1) { + KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced " + "reservation to 1 thread\n", + master_tid)); + return 1; + } + if (new_nthreads < set_nthreads) { + KC_TRACE(10, ("__kmp_reserve_threads: T#%d thread limit reduced " + "reservation to %d threads\n", + master_tid, new_nthreads)); + } else { + new_nthreads = set_nthreads; + } + } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) { + if (set_nthreads > 2) { + new_nthreads = __kmp_get_random(parent_team->t.t_threads[master_tid]); + new_nthreads = (new_nthreads % set_nthreads) + 1; + if (new_nthreads == 1) { + KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced " + "reservation to 1 thread\n", + master_tid)); + return 1; + } + if (new_nthreads < set_nthreads) { + KC_TRACE(10, ("__kmp_reserve_threads: T#%d dynamic random reduced " + "reservation to %d threads\n", + master_tid, new_nthreads)); + } + } + } else { + KMP_ASSERT(0); + } + + // Respect KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT. + if (__kmp_nth + new_nthreads - + (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > + __kmp_max_nth) { + int tl_nthreads = __kmp_max_nth - __kmp_nth + + (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); + if (tl_nthreads <= 0) { + tl_nthreads = 1; + } + + // If dyn-var is false, emit a 1-time warning. + if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { + __kmp_reserve_warn = 1; + __kmp_msg(kmp_ms_warning, + KMP_MSG(CantFormThrTeam, set_nthreads, tl_nthreads), + KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); + } + if (tl_nthreads == 1) { + KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced " + "reservation to 1 thread\n", + master_tid)); + return 1; + } + KC_TRACE(10, ("__kmp_reserve_threads: T#%d KMP_ALL_THREADS reduced " + "reservation to %d threads\n", + master_tid, tl_nthreads)); + new_nthreads = tl_nthreads; + } + + // Check if the threads array is large enough, or needs expanding. + // + // See comment in __kmp_register_root() about the adjustment if + // __kmp_threads[0] == NULL. + capacity = __kmp_threads_capacity; + if (TCR_PTR(__kmp_threads[0]) == NULL) { + --capacity; + } + if (__kmp_nth + new_nthreads - + (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) > + capacity) { + // Expand the threads array. + int slotsRequired = __kmp_nth + new_nthreads - + (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc) - + capacity; + int slotsAdded = __kmp_expand_threads(slotsRequired, slotsRequired); + if (slotsAdded < slotsRequired) { + // The threads array was not expanded enough. + new_nthreads -= (slotsRequired - slotsAdded); + KMP_ASSERT(new_nthreads >= 1); + + // If dyn-var is false, emit a 1-time warning. + if (!get__dynamic_2(parent_team, master_tid) && (!__kmp_reserve_warn)) { + __kmp_reserve_warn = 1; + if (__kmp_tp_cached) { + __kmp_msg(kmp_ms_warning, + KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads), + KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity), + KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null); + } else { + __kmp_msg(kmp_ms_warning, + KMP_MSG(CantFormThrTeam, set_nthreads, new_nthreads), + KMP_HNT(SystemLimitOnThreads), __kmp_msg_null); } + } } + } - if ( new_nthreads == 1 ) { - KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d serializing team after reclaiming dead roots and rechecking; requested %d threads\n", - __kmp_get_gtid(), set_nthreads ) ); - return 1; - } + if (new_nthreads == 1) { + KC_TRACE(10, + ("__kmp_reserve_threads: T#%d serializing team after reclaiming " + "dead roots and rechecking; requested %d threads\n", + __kmp_get_gtid(), set_nthreads)); + return 1; + } - KC_TRACE( 10, ( "__kmp_reserve_threads: T#%d allocating %d threads; requested %d threads\n", - __kmp_get_gtid(), new_nthreads, set_nthreads )); - return new_nthreads; + KC_TRACE(10, ("__kmp_reserve_threads: T#%d allocating %d threads; requested " + "%d threads\n", + __kmp_get_gtid(), new_nthreads, set_nthreads)); + return new_nthreads; } -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* allocate threads from the thread pool and assign them to the new team */ -/* we are assured that there are enough threads available, because we - * checked on that earlier within critical section forkjoin */ - -static void -__kmp_fork_team_threads( kmp_root_t *root, kmp_team_t *team, - kmp_info_t *master_th, int master_gtid ) -{ - int i; - int use_hot_team; - - KA_TRACE( 10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc ) ); - KMP_DEBUG_ASSERT( master_gtid == __kmp_get_gtid() ); - KMP_MB(); - - /* first, let's setup the master thread */ - master_th->th.th_info.ds.ds_tid = 0; - master_th->th.th_team = team; - master_th->th.th_team_nproc = team->t.t_nproc; - master_th->th.th_team_master = master_th; - master_th->th.th_team_serialized = FALSE; - master_th->th.th_dispatch = & team->t.t_dispatch[ 0 ]; - - /* make sure we are not the optimized hot team */ +/* Allocate threads from the thread pool and assign them to the new team. We are + assured that there are enough threads available, because we checked on that + earlier within critical section forkjoin */ +static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team, + kmp_info_t *master_th, int master_gtid) { + int i; + int use_hot_team; + + KA_TRACE(10, ("__kmp_fork_team_threads: new_nprocs = %d\n", team->t.t_nproc)); + KMP_DEBUG_ASSERT(master_gtid == __kmp_get_gtid()); + KMP_MB(); + + /* first, let's setup the master thread */ + master_th->th.th_info.ds.ds_tid = 0; + master_th->th.th_team = team; + master_th->th.th_team_nproc = team->t.t_nproc; + master_th->th.th_team_master = master_th; + master_th->th.th_team_serialized = FALSE; + master_th->th.th_dispatch = &team->t.t_dispatch[0]; + +/* make sure we are not the optimized hot team */ #if KMP_NESTED_HOT_TEAMS - use_hot_team = 0; - kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams; - if( hot_teams ) { // hot teams array is not allocated if KMP_HOT_TEAMS_MAX_LEVEL=0 - int level = team->t.t_active_level - 1; // index in array of hot teams - if( master_th->th.th_teams_microtask ) { // are we inside the teams? - if( master_th->th.th_teams_size.nteams > 1 ) { - ++level; // level was not increased in teams construct for team_of_masters - } - if( team->t.t_pkfn != (microtask_t)__kmp_teams_master && - master_th->th.th_teams_level == team->t.t_level ) { - ++level; // level was not increased in teams construct for team_of_workers before the parallel - } // team->t.t_level will be increased inside parallel - } - if( level < __kmp_hot_teams_max_level ) { - if( hot_teams[level].hot_team ) { - // hot team has already been allocated for given level - KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team); - use_hot_team = 1; // the team is ready to use - } else { - use_hot_team = 0; // AC: threads are not allocated yet - hot_teams[level].hot_team = team; // remember new hot team - hot_teams[level].hot_team_nth = team->t.t_nproc; - } - } else { - use_hot_team = 0; - } + use_hot_team = 0; + kmp_hot_team_ptr_t *hot_teams = master_th->th.th_hot_teams; + if (hot_teams) { // hot teams array is not allocated if + // KMP_HOT_TEAMS_MAX_LEVEL=0 + int level = team->t.t_active_level - 1; // index in array of hot teams + if (master_th->th.th_teams_microtask) { // are we inside the teams? + if (master_th->th.th_teams_size.nteams > 1) { + ++level; // level was not increased in teams construct for + // team_of_masters + } + if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && + master_th->th.th_teams_level == team->t.t_level) { + ++level; // level was not increased in teams construct for + // team_of_workers before the parallel + } // team->t.t_level will be increased inside parallel + } + if (level < __kmp_hot_teams_max_level) { + if (hot_teams[level].hot_team) { + // hot team has already been allocated for given level + KMP_DEBUG_ASSERT(hot_teams[level].hot_team == team); + use_hot_team = 1; // the team is ready to use + } else { + use_hot_team = 0; // AC: threads are not allocated yet + hot_teams[level].hot_team = team; // remember new hot team + hot_teams[level].hot_team_nth = team->t.t_nproc; + } + } else { + use_hot_team = 0; } + } #else - use_hot_team = team == root->r.r_hot_team; -#endif - if ( !use_hot_team ) { - - /* install the master thread */ - team->t.t_threads[ 0 ] = master_th; - __kmp_initialize_info( master_th, team, 0, master_gtid ); - - /* now, install the worker threads */ - for ( i=1 ; i < team->t.t_nproc ; i++ ) { - - /* fork or reallocate a new thread and install it in team */ - kmp_info_t *thr = __kmp_allocate_thread( root, team, i ); - team->t.t_threads[ i ] = thr; - KMP_DEBUG_ASSERT( thr ); - KMP_DEBUG_ASSERT( thr->th.th_team == team ); - /* align team and thread arrived states */ - KA_TRACE( 20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived T#%d(%d:%d) join =%llu, plain=%llu\n", - __kmp_gtid_from_tid( 0, team ), team->t.t_id, 0, - __kmp_gtid_from_tid( i, team ), team->t.t_id, i, - team->t.t_bar[ bs_forkjoin_barrier ].b_arrived, - team->t.t_bar[ bs_plain_barrier ].b_arrived ) ); + use_hot_team = team == root->r.r_hot_team; +#endif + if (!use_hot_team) { + + /* install the master thread */ + team->t.t_threads[0] = master_th; + __kmp_initialize_info(master_th, team, 0, master_gtid); + + /* now, install the worker threads */ + for (i = 1; i < team->t.t_nproc; i++) { + + /* fork or reallocate a new thread and install it in team */ + kmp_info_t *thr = __kmp_allocate_thread(root, team, i); + team->t.t_threads[i] = thr; + KMP_DEBUG_ASSERT(thr); + KMP_DEBUG_ASSERT(thr->th.th_team == team); + /* align team and thread arrived states */ + KA_TRACE(20, ("__kmp_fork_team_threads: T#%d(%d:%d) init arrived " + "T#%d(%d:%d) join =%llu, plain=%llu\n", + __kmp_gtid_from_tid(0, team), team->t.t_id, 0, + __kmp_gtid_from_tid(i, team), team->t.t_id, i, + team->t.t_bar[bs_forkjoin_barrier].b_arrived, + team->t.t_bar[bs_plain_barrier].b_arrived)); #if OMP_40_ENABLED - thr->th.th_teams_microtask = master_th->th.th_teams_microtask; - thr->th.th_teams_level = master_th->th.th_teams_level; - thr->th.th_teams_size = master_th->th.th_teams_size; -#endif - { // Initialize threads' barrier data. - int b; - kmp_balign_t * balign = team->t.t_threads[ i ]->th.th_bar; - for ( b = 0; b < bs_last_barrier; ++ b ) { - balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; - KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); + thr->th.th_teams_microtask = master_th->th.th_teams_microtask; + thr->th.th_teams_level = master_th->th.th_teams_level; + thr->th.th_teams_size = master_th->th.th_teams_size; +#endif + { // Initialize threads' barrier data. + int b; + kmp_balign_t *balign = team->t.t_threads[i]->th.th_bar; + for (b = 0; b < bs_last_barrier; ++b) { + balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; + KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); #if USE_DEBUGGER - balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; + balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; #endif - }; // for b - } - } + }; // for b + } + } #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - __kmp_partition_places( team ); + __kmp_partition_places(team); #endif + } - } - - KMP_MB(); + KMP_MB(); } #if KMP_ARCH_X86 || KMP_ARCH_X86_64 -// // Propagate any changes to the floating point control registers out to the team -// We try to avoid unnecessary writes to the relevant cache line in the team structure, -// so we don't make changes unless they are needed. -// -inline static void -propagateFPControl(kmp_team_t * team) -{ - if ( __kmp_inherit_fp_control ) { - kmp_int16 x87_fpu_control_word; - kmp_uint32 mxcsr; - - // Get master values of FPU control flags (both X87 and vector) - __kmp_store_x87_fpu_control_word( &x87_fpu_control_word ); - __kmp_store_mxcsr( &mxcsr ); - mxcsr &= KMP_X86_MXCSR_MASK; - - // There is no point looking at t_fp_control_saved here. - // If it is TRUE, we still have to update the values if they are different from those we now have. - // If it is FALSE we didn't save anything yet, but our objective is the same. We have to ensure - // that the values in the team are the same as those we have. - // So, this code achieves what we need whether or not t_fp_control_saved is true. - // By checking whether the value needs updating we avoid unnecessary writes that would put the - // cache-line into a written state, causing all threads in the team to have to read it again. - KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word); - KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr); - // Although we don't use this value, other code in the runtime wants to know whether it should restore them. - // So we must ensure it is correct. - KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE); - } - else { - // Similarly here. Don't write to this cache-line in the team structure unless we have to. - KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE); - } +// We try to avoid unnecessary writes to the relevant cache line in the team +// structure, so we don't make changes unless they are needed. +inline static void propagateFPControl(kmp_team_t *team) { + if (__kmp_inherit_fp_control) { + kmp_int16 x87_fpu_control_word; + kmp_uint32 mxcsr; + + // Get master values of FPU control flags (both X87 and vector) + __kmp_store_x87_fpu_control_word(&x87_fpu_control_word); + __kmp_store_mxcsr(&mxcsr); + mxcsr &= KMP_X86_MXCSR_MASK; + +// There is no point looking at t_fp_control_saved here. +// If it is TRUE, we still have to update the values if they are different from +// those we now have. +// If it is FALSE we didn't save anything yet, but our objective is the same. We +// have to ensure that the values in the team are the same as those we have. +// So, this code achieves what we need whether or not t_fp_control_saved is +// true. By checking whether the value needs updating we avoid unnecessary +// writes that would put the cache-line into a written state, causing all +// threads in the team to have to read it again. + KMP_CHECK_UPDATE(team->t.t_x87_fpu_control_word, x87_fpu_control_word); + KMP_CHECK_UPDATE(team->t.t_mxcsr, mxcsr); + // Although we don't use this value, other code in the runtime wants to know + // whether it should restore them. So we must ensure it is correct. + KMP_CHECK_UPDATE(team->t.t_fp_control_saved, TRUE); + } else { + // Similarly here. Don't write to this cache-line in the team structure + // unless we have to. + KMP_CHECK_UPDATE(team->t.t_fp_control_saved, FALSE); + } } -// Do the opposite, setting the hardware registers to the updated values from the team. -inline static void -updateHWFPControl(kmp_team_t * team) -{ - if ( __kmp_inherit_fp_control && team->t.t_fp_control_saved ) { - // - // Only reset the fp control regs if they have been changed in the team. - // the parallel region that we are exiting. - // - kmp_int16 x87_fpu_control_word; - kmp_uint32 mxcsr; - __kmp_store_x87_fpu_control_word( &x87_fpu_control_word ); - __kmp_store_mxcsr( &mxcsr ); - mxcsr &= KMP_X86_MXCSR_MASK; - - if ( team->t.t_x87_fpu_control_word != x87_fpu_control_word ) { - __kmp_clear_x87_fpu_status_word(); - __kmp_load_x87_fpu_control_word( &team->t.t_x87_fpu_control_word ); - } +// Do the opposite, setting the hardware registers to the updated values from +// the team. +inline static void updateHWFPControl(kmp_team_t *team) { + if (__kmp_inherit_fp_control && team->t.t_fp_control_saved) { + // Only reset the fp control regs if they have been changed in the team. + // the parallel region that we are exiting. + kmp_int16 x87_fpu_control_word; + kmp_uint32 mxcsr; + __kmp_store_x87_fpu_control_word(&x87_fpu_control_word); + __kmp_store_mxcsr(&mxcsr); + mxcsr &= KMP_X86_MXCSR_MASK; - if ( team->t.t_mxcsr != mxcsr ) { - __kmp_load_mxcsr( &team->t.t_mxcsr ); - } + if (team->t.t_x87_fpu_control_word != x87_fpu_control_word) { + __kmp_clear_x87_fpu_status_word(); + __kmp_load_x87_fpu_control_word(&team->t.t_x87_fpu_control_word); + } + + if (team->t.t_mxcsr != mxcsr) { + __kmp_load_mxcsr(&team->t.t_mxcsr); } + } } #else -# define propagateFPControl(x) ((void)0) -# define updateHWFPControl(x) ((void)0) +#define propagateFPControl(x) ((void)0) +#define updateHWFPControl(x) ((void)0) #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ -static void -__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ); // forward declaration - -/* - * Run a parallel region that has been serialized, so runs only in a team of the single master thread. - */ -void -__kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) -{ - kmp_info_t *this_thr; - kmp_team_t *serial_team; +static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, + int realloc); // forward declaration + +/* Run a parallel region that has been serialized, so runs only in a team of the + single master thread. */ +void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { + kmp_info_t *this_thr; + kmp_team_t *serial_team; + + KC_TRACE(10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid)); + + /* Skip all this code for autopar serialized loops since it results in + unacceptable overhead */ + if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR)) + return; + + if (!TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); + + this_thr = __kmp_threads[global_tid]; + serial_team = this_thr->th.th_serial_team; + + /* utilize the serialized team held by this thread */ + KMP_DEBUG_ASSERT(serial_team); + KMP_MB(); + + if (__kmp_tasking_mode != tskm_immediate_exec) { + KMP_DEBUG_ASSERT( + this_thr->th.th_task_team == + this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]); + KMP_DEBUG_ASSERT(serial_team->t.t_task_team[this_thr->th.th_task_state] == + NULL); + KA_TRACE(20, ("__kmpc_serialized_parallel: T#%d pushing task_team %p / " + "team %p, new task_team = NULL\n", + global_tid, this_thr->th.th_task_team, this_thr->th.th_team)); + this_thr->th.th_task_team = NULL; + } - KC_TRACE( 10, ("__kmpc_serialized_parallel: called by T#%d\n", global_tid ) ); +#if OMP_40_ENABLED + kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind; + if (this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { + proc_bind = proc_bind_false; + } else if (proc_bind == proc_bind_default) { + // No proc_bind clause was specified, so use the current value + // of proc-bind-var for this parallel region. + proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind; + } + // Reset for next parallel region + this_thr->th.th_set_proc_bind = proc_bind_default; +#endif /* OMP_40_ENABLED */ - /* Skip all this code for autopar serialized loops since it results in - unacceptable overhead */ - if( loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR ) ) - return; + if (this_thr->th.th_team != serial_team) { + // Nested level will be an index in the nested nthreads array + int level = this_thr->th.th_team->t.t_level; - if( ! TCR_4( __kmp_init_parallel ) ) - __kmp_parallel_initialize(); + if (serial_team->t.t_serialized) { + /* this serial team was already used + TODO increase performance by making this locks more specific */ + kmp_team_t *new_team; - this_thr = __kmp_threads[ global_tid ]; - serial_team = this_thr->th.th_serial_team; + __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); - /* utilize the serialized team held by this thread */ - KMP_DEBUG_ASSERT( serial_team ); - KMP_MB(); +#if OMPT_SUPPORT + ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); +#endif - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - KMP_DEBUG_ASSERT(this_thr->th.th_task_team == this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state]); - KMP_DEBUG_ASSERT( serial_team->t.t_task_team[this_thr->th.th_task_state] == NULL ); - KA_TRACE( 20, ( "__kmpc_serialized_parallel: T#%d pushing task_team %p / team %p, new task_team = NULL\n", - global_tid, this_thr->th.th_task_team, this_thr->th.th_team ) ); - this_thr->th.th_task_team = NULL; + new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1, +#if OMPT_SUPPORT + ompt_parallel_id, +#endif +#if OMP_40_ENABLED + proc_bind, +#endif + &this_thr->th.th_current_task->td_icvs, + 0 USE_NESTED_HOT_ARG(NULL)); + __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); + KMP_ASSERT(new_team); + + /* setup new serialized team and install it */ + new_team->t.t_threads[0] = this_thr; + new_team->t.t_parent = this_thr->th.th_team; + serial_team = new_team; + this_thr->th.th_serial_team = serial_team; + + KF_TRACE( + 10, + ("__kmpc_serialized_parallel: T#%d allocated new serial team %p\n", + global_tid, serial_team)); + + /* TODO the above breaks the requirement that if we run out of resources, + then we can still guarantee that serialized teams are ok, since we may + need to allocate a new one */ + } else { + KF_TRACE( + 10, + ("__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n", + global_tid, serial_team)); + } + + /* we have to initialize this serial team */ + KMP_DEBUG_ASSERT(serial_team->t.t_threads); + KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); + KMP_DEBUG_ASSERT(this_thr->th.th_team != serial_team); + serial_team->t.t_ident = loc; + serial_team->t.t_serialized = 1; + serial_team->t.t_nproc = 1; + serial_team->t.t_parent = this_thr->th.th_team; + serial_team->t.t_sched = this_thr->th.th_team->t.t_sched; + this_thr->th.th_team = serial_team; + serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid; + + KF_TRACE(10, ("__kmpc_serialized_parallel: T#d curtask=%p\n", global_tid, + this_thr->th.th_current_task)); + KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 1); + this_thr->th.th_current_task->td_flags.executing = 0; + + __kmp_push_current_task_to_thread(this_thr, serial_team, 0); + + /* TODO: GEH: do ICVs work for nested serialized teams? Don't we need an + implicit task for each serialized task represented by + team->t.t_serialized? */ + copy_icvs(&this_thr->th.th_current_task->td_icvs, + &this_thr->th.th_current_task->td_parent->td_icvs); + + // Thread value exists in the nested nthreads array for the next nested + // level + if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) { + this_thr->th.th_current_task->td_icvs.nproc = + __kmp_nested_nth.nth[level + 1]; } #if OMP_40_ENABLED - kmp_proc_bind_t proc_bind = this_thr->th.th_set_proc_bind; - if ( this_thr->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) { - proc_bind = proc_bind_false; - } - else if ( proc_bind == proc_bind_default ) { - // - // No proc_bind clause was specified, so use the current value - // of proc-bind-var for this parallel region. - // - proc_bind = this_thr->th.th_current_task->td_icvs.proc_bind; + if (__kmp_nested_proc_bind.used && + (level + 1 < __kmp_nested_proc_bind.used)) { + this_thr->th.th_current_task->td_icvs.proc_bind = + __kmp_nested_proc_bind.bind_types[level + 1]; } - // - // Reset for next parallel region - // - this_thr->th.th_set_proc_bind = proc_bind_default; #endif /* OMP_40_ENABLED */ - if( this_thr->th.th_team != serial_team ) { - // Nested level will be an index in the nested nthreads array - int level = this_thr->th.th_team->t.t_level; +#if USE_DEBUGGER + serial_team->t.t_pkfn = (microtask_t)(~0); // For the debugger. +#endif + this_thr->th.th_info.ds.ds_tid = 0; - if( serial_team->t.t_serialized ) { - /* this serial team was already used - * TODO increase performance by making this locks more specific */ - kmp_team_t *new_team; + /* set thread cache values */ + this_thr->th.th_team_nproc = 1; + this_thr->th.th_team_master = this_thr; + this_thr->th.th_team_serialized = 1; - __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); + serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1; + serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level; -#if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); -#endif - - new_team = __kmp_allocate_team(this_thr->th.th_root, 1, 1, -#if OMPT_SUPPORT - ompt_parallel_id, -#endif -#if OMP_40_ENABLED - proc_bind, -#endif - & this_thr->th.th_current_task->td_icvs, - 0 USE_NESTED_HOT_ARG(NULL) ); - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); - KMP_ASSERT( new_team ); - - /* setup new serialized team and install it */ - new_team->t.t_threads[0] = this_thr; - new_team->t.t_parent = this_thr->th.th_team; - serial_team = new_team; - this_thr->th.th_serial_team = serial_team; - - KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d allocated new serial team %p\n", - global_tid, serial_team ) ); - - - /* TODO the above breaks the requirement that if we run out of - * resources, then we can still guarantee that serialized teams - * are ok, since we may need to allocate a new one */ - } else { - KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d reusing cached serial team %p\n", - global_tid, serial_team ) ); - } - - /* we have to initialize this serial team */ - KMP_DEBUG_ASSERT( serial_team->t.t_threads ); - KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr ); - KMP_DEBUG_ASSERT( this_thr->th.th_team != serial_team ); - serial_team->t.t_ident = loc; - serial_team->t.t_serialized = 1; - serial_team->t.t_nproc = 1; - serial_team->t.t_parent = this_thr->th.th_team; - serial_team->t.t_sched = this_thr->th.th_team->t.t_sched; - this_thr->th.th_team = serial_team; - serial_team->t.t_master_tid = this_thr->th.th_info.ds.ds_tid; - - KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#d curtask=%p\n", - global_tid, this_thr->th.th_current_task ) ); - KMP_ASSERT( this_thr->th.th_current_task->td_flags.executing == 1 ); - this_thr->th.th_current_task->td_flags.executing = 0; - - __kmp_push_current_task_to_thread( this_thr, serial_team, 0 ); - - /* TODO: GEH: do the ICVs work for nested serialized teams? Don't we need an implicit task for - each serialized task represented by team->t.t_serialized? */ - copy_icvs( - & this_thr->th.th_current_task->td_icvs, - & this_thr->th.th_current_task->td_parent->td_icvs ); - - // Thread value exists in the nested nthreads array for the next nested level - if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) { - this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ]; - } - -#if OMP_40_ENABLED - if ( __kmp_nested_proc_bind.used && ( level + 1 < __kmp_nested_proc_bind.used ) ) { - this_thr->th.th_current_task->td_icvs.proc_bind - = __kmp_nested_proc_bind.bind_types[ level + 1 ]; - } -#endif /* OMP_40_ENABLED */ - -#if USE_DEBUGGER - serial_team->t.t_pkfn = (microtask_t)( ~0 ); // For the debugger. -#endif - this_thr->th.th_info.ds.ds_tid = 0; - - /* set thread cache values */ - this_thr->th.th_team_nproc = 1; - this_thr->th.th_team_master = this_thr; - this_thr->th.th_team_serialized = 1; - - serial_team->t.t_level = serial_team->t.t_parent->t.t_level + 1; - serial_team->t.t_active_level = serial_team->t.t_parent->t.t_active_level; - - propagateFPControl (serial_team); + propagateFPControl(serial_team); - /* check if we need to allocate dispatch buffers stack */ - KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); - if ( !serial_team->t.t_dispatch->th_disp_buffer ) { - serial_team->t.t_dispatch->th_disp_buffer = (dispatch_private_info_t *) - __kmp_allocate( sizeof( dispatch_private_info_t ) ); - } - this_thr->th.th_dispatch = serial_team->t.t_dispatch; + /* check if we need to allocate dispatch buffers stack */ + KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); + if (!serial_team->t.t_dispatch->th_disp_buffer) { + serial_team->t.t_dispatch->th_disp_buffer = + (dispatch_private_info_t *)__kmp_allocate( + sizeof(dispatch_private_info_t)); + } + this_thr->th.th_dispatch = serial_team->t.t_dispatch; #if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); - __ompt_team_assign_id(serial_team, ompt_parallel_id); + ompt_parallel_id_t ompt_parallel_id = __ompt_parallel_id_new(global_tid); + __ompt_team_assign_id(serial_team, ompt_parallel_id); #endif - KMP_MB(); - - } else { - /* this serialized team is already being used, - * that's fine, just add another nested level */ - KMP_DEBUG_ASSERT( this_thr->th.th_team == serial_team ); - KMP_DEBUG_ASSERT( serial_team->t.t_threads ); - KMP_DEBUG_ASSERT( serial_team->t.t_threads[0] == this_thr ); - ++ serial_team->t.t_serialized; - this_thr->th.th_team_serialized = serial_team->t.t_serialized; - - // Nested level will be an index in the nested nthreads array - int level = this_thr->th.th_team->t.t_level; - // Thread value exists in the nested nthreads array for the next nested level - if ( __kmp_nested_nth.used && ( level + 1 < __kmp_nested_nth.used ) ) { - this_thr->th.th_current_task->td_icvs.nproc = __kmp_nested_nth.nth[ level + 1 ]; - } - serial_team->t.t_level++; - KF_TRACE( 10, ( "__kmpc_serialized_parallel: T#%d increasing nesting level of serial team %p to %d\n", - global_tid, serial_team, serial_team->t.t_level ) ); + KMP_MB(); - /* allocate/push dispatch buffers stack */ - KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); - { - dispatch_private_info_t * disp_buffer = (dispatch_private_info_t *) - __kmp_allocate( sizeof( dispatch_private_info_t ) ); - disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer; - serial_team->t.t_dispatch->th_disp_buffer = disp_buffer; - } - this_thr->th.th_dispatch = serial_team->t.t_dispatch; + } else { + /* this serialized team is already being used, + * that's fine, just add another nested level */ + KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team); + KMP_DEBUG_ASSERT(serial_team->t.t_threads); + KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr); + ++serial_team->t.t_serialized; + this_thr->th.th_team_serialized = serial_team->t.t_serialized; - KMP_MB(); + // Nested level will be an index in the nested nthreads array + int level = this_thr->th.th_team->t.t_level; + // Thread value exists in the nested nthreads array for the next nested + // level + if (__kmp_nested_nth.used && (level + 1 < __kmp_nested_nth.used)) { + this_thr->th.th_current_task->td_icvs.nproc = + __kmp_nested_nth.nth[level + 1]; + } + serial_team->t.t_level++; + KF_TRACE(10, ("__kmpc_serialized_parallel: T#%d increasing nesting level " + "of serial team %p to %d\n", + global_tid, serial_team, serial_team->t.t_level)); + + /* allocate/push dispatch buffers stack */ + KMP_DEBUG_ASSERT(serial_team->t.t_dispatch); + { + dispatch_private_info_t *disp_buffer = + (dispatch_private_info_t *)__kmp_allocate( + sizeof(dispatch_private_info_t)); + disp_buffer->next = serial_team->t.t_dispatch->th_disp_buffer; + serial_team->t.t_dispatch->th_disp_buffer = disp_buffer; } + this_thr->th.th_dispatch = serial_team->t.t_dispatch; + + KMP_MB(); + } #if OMP_40_ENABLED - KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq); + KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq); #endif - if ( __kmp_env_consistency_check ) - __kmp_push_parallel( global_tid, NULL ); - + if (__kmp_env_consistency_check) + __kmp_push_parallel(global_tid, NULL); } /* most of the work for a fork */ /* return true if we really went parallel, false if serialized */ -int -__kmp_fork_call( - ident_t * loc, - int gtid, - enum fork_context_e call_context, // Intel, GNU, ... - kmp_int32 argc, +int __kmp_fork_call(ident_t *loc, int gtid, + enum fork_context_e call_context, // Intel, GNU, ... + kmp_int32 argc, #if OMPT_SUPPORT - void *unwrapped_task, + void *unwrapped_task, #endif - microtask_t microtask, - launch_t invoker, + microtask_t microtask, launch_t invoker, /* TODO: revert workaround for Intel(R) 64 tracker #96 */ #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - va_list * ap + va_list *ap #else - va_list ap -#endif - ) -{ - void **argv; - int i; - int master_tid; - int master_this_cons; - kmp_team_t *team; - kmp_team_t *parent_team; - kmp_info_t *master_th; - kmp_root_t *root; - int nthreads; - int master_active; - int master_set_numthreads; - int level; + va_list ap +#endif + ) { + void **argv; + int i; + int master_tid; + int master_this_cons; + kmp_team_t *team; + kmp_team_t *parent_team; + kmp_info_t *master_th; + kmp_root_t *root; + int nthreads; + int master_active; + int master_set_numthreads; + int level; #if OMP_40_ENABLED - int active_level; - int teams_level; + int active_level; + int teams_level; #endif #if KMP_NESTED_HOT_TEAMS - kmp_hot_team_ptr_t **p_hot_teams; + kmp_hot_team_ptr_t **p_hot_teams; #endif - { // KMP_TIME_BLOCK + { // KMP_TIME_BLOCK KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_fork_call); KMP_COUNT_VALUE(OMP_PARALLEL_args, argc); - KA_TRACE( 20, ("__kmp_fork_call: enter T#%d\n", gtid )); - if ( __kmp_stkpadding > 0 && __kmp_root[gtid] != NULL ) { - /* Some systems prefer the stack for the root thread(s) to start with */ - /* some gap from the parent stack to prevent false sharing. */ - void *dummy = KMP_ALLOCA(__kmp_stkpadding); - /* These 2 lines below are so this does not get optimized out */ - if ( __kmp_stkpadding > KMP_MAX_STKPADDING ) - __kmp_stkpadding += (short)((kmp_int64)dummy); + KA_TRACE(20, ("__kmp_fork_call: enter T#%d\n", gtid)); + if (__kmp_stkpadding > 0 && __kmp_root[gtid] != NULL) { + /* Some systems prefer the stack for the root thread(s) to start with */ + /* some gap from the parent stack to prevent false sharing. */ + void *dummy = KMP_ALLOCA(__kmp_stkpadding); + /* These 2 lines below are so this does not get optimized out */ + if (__kmp_stkpadding > KMP_MAX_STKPADDING) + __kmp_stkpadding += (short)((kmp_int64)dummy); } /* initialize if needed */ - KMP_DEBUG_ASSERT( __kmp_init_serial ); // AC: potentially unsafe, not in sync with shutdown - if( ! TCR_4(__kmp_init_parallel) ) - __kmp_parallel_initialize(); + KMP_DEBUG_ASSERT( + __kmp_init_serial); // AC: potentially unsafe, not in sync with shutdown + if (!TCR_4(__kmp_init_parallel)) + __kmp_parallel_initialize(); /* setup current data */ - master_th = __kmp_threads[ gtid ]; // AC: potentially unsafe, not in sync with shutdown - parent_team = master_th->th.th_team; - master_tid = master_th->th.th_info.ds.ds_tid; + master_th = __kmp_threads[gtid]; // AC: potentially unsafe, not in sync with + // shutdown + parent_team = master_th->th.th_team; + master_tid = master_th->th.th_info.ds.ds_tid; master_this_cons = master_th->th.th_local.this_construct; - root = master_th->th.th_root; + root = master_th->th.th_root; master_active = root->r.r_active; master_set_numthreads = master_th->th.th_set_nproc; @@ -1457,511 +1413,548 @@ __kmp_fork_call( ompt_parallel_id_t my_parallel_id; if (ompt_enabled) { - ompt_parallel_id = __ompt_parallel_id_new(gtid); - ompt_task_id = __ompt_get_task_id_internal(0); - ompt_frame = __ompt_get_task_frame_internal(0); + ompt_parallel_id = __ompt_parallel_id_new(gtid); + ompt_task_id = __ompt_get_task_id_internal(0); + ompt_frame = __ompt_get_task_frame_internal(0); } #endif // Nested level will be an index in the nested nthreads array - level = parent_team->t.t_level; - active_level = parent_team->t.t_active_level; // is used to launch non-serial teams even if nested is not allowed + level = parent_team->t.t_level; + // used to launch non-serial teams even if nested is not allowed + active_level = parent_team->t.t_active_level; #if OMP_40_ENABLED - teams_level = master_th->th.th_teams_level; // needed to check nesting inside the teams + teams_level = + master_th->th + .th_teams_level; // needed to check nesting inside the teams #endif #if KMP_NESTED_HOT_TEAMS - p_hot_teams = &master_th->th.th_hot_teams; - if( *p_hot_teams == NULL && __kmp_hot_teams_max_level > 0 ) { - *p_hot_teams = (kmp_hot_team_ptr_t*)__kmp_allocate( - sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level); - (*p_hot_teams)[0].hot_team = root->r.r_hot_team; - (*p_hot_teams)[0].hot_team_nth = 1; // it is either actual or not needed (when active_level > 0) + p_hot_teams = &master_th->th.th_hot_teams; + if (*p_hot_teams == NULL && __kmp_hot_teams_max_level > 0) { + *p_hot_teams = (kmp_hot_team_ptr_t *)__kmp_allocate( + sizeof(kmp_hot_team_ptr_t) * __kmp_hot_teams_max_level); + (*p_hot_teams)[0].hot_team = root->r.r_hot_team; + (*p_hot_teams)[0].hot_team_nth = + 1; // it is either actual or not needed (when active_level > 0) } #endif #if OMPT_SUPPORT if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_parallel_begin)) { - int team_size = master_set_numthreads; + int team_size = master_set_numthreads; - ompt_callbacks.ompt_callback(ompt_event_parallel_begin)( - ompt_task_id, ompt_frame, ompt_parallel_id, - team_size, unwrapped_task, OMPT_INVOKER(call_context)); + ompt_callbacks.ompt_callback(ompt_event_parallel_begin)( + ompt_task_id, ompt_frame, ompt_parallel_id, team_size, unwrapped_task, + OMPT_INVOKER(call_context)); } #endif master_th->th.th_ident = loc; #if OMP_40_ENABLED - if ( master_th->th.th_teams_microtask && - ap && microtask != (microtask_t)__kmp_teams_master && level == teams_level ) { - // AC: This is start of parallel that is nested inside teams construct. - // The team is actual (hot), all workers are ready at the fork barrier. - // No lock needed to initialize the team a bit, then free workers. - parent_team->t.t_ident = loc; - __kmp_alloc_argv_entries( argc, parent_team, TRUE ); - parent_team->t.t_argc = argc; - argv = (void**)parent_team->t.t_argv; - for( i=argc-1; i >= 0; --i ) + if (master_th->th.th_teams_microtask && ap && + microtask != (microtask_t)__kmp_teams_master && level == teams_level) { + // AC: This is start of parallel that is nested inside teams construct. + // The team is actual (hot), all workers are ready at the fork barrier. + // No lock needed to initialize the team a bit, then free workers. + parent_team->t.t_ident = loc; + __kmp_alloc_argv_entries(argc, parent_team, TRUE); + parent_team->t.t_argc = argc; + argv = (void **)parent_team->t.t_argv; + for (i = argc - 1; i >= 0; --i) /* TODO: revert workaround for Intel(R) 64 tracker #96 */ #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - *argv++ = va_arg( *ap, void * ); + *argv++ = va_arg(*ap, void *); #else - *argv++ = va_arg( ap, void * ); + *argv++ = va_arg(ap, void *); #endif - /* Increment our nested depth levels, but not increase the serialization */ - if ( parent_team == master_th->th.th_serial_team ) { - // AC: we are in serialized parallel - __kmpc_serialized_parallel(loc, gtid); - KMP_DEBUG_ASSERT( parent_team->t.t_serialized > 1 ); - parent_team->t.t_serialized--; // AC: need this in order enquiry functions - // work correctly, will restore at join time - + // Increment our nested depth levels, but not increase the serialization + if (parent_team == master_th->th.th_serial_team) { + // AC: we are in serialized parallel + __kmpc_serialized_parallel(loc, gtid); + KMP_DEBUG_ASSERT(parent_team->t.t_serialized > 1); + // AC: need this in order enquiry functions work + // correctly, will restore at join time + parent_team->t.t_serialized--; #if OMPT_SUPPORT - void *dummy; - void **exit_runtime_p; + void *dummy; + void **exit_runtime_p; - ompt_lw_taskteam_t lw_taskteam; + ompt_lw_taskteam_t lw_taskteam; - if (ompt_enabled) { - __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, - unwrapped_task, ompt_parallel_id); - lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); - exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); + if (ompt_enabled) { + __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, unwrapped_task, + ompt_parallel_id); + lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); + exit_runtime_p = + &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); - __ompt_lw_taskteam_link(&lw_taskteam, master_th); + __ompt_lw_taskteam_link(&lw_taskteam, master_th); #if OMPT_TRACE - /* OMPT implicit task begin */ - my_task_id = lw_taskteam.ompt_task_info.task_id; - my_parallel_id = parent_team->t.ompt_team_info.parallel_id; - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - my_parallel_id, my_task_id); - } -#endif - - /* OMPT state */ - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; - } else { - exit_runtime_p = &dummy; - } + /* OMPT implicit task begin */ + my_task_id = lw_taskteam.ompt_task_info.task_id; + my_parallel_id = parent_team->t.ompt_team_info.parallel_id; + if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( + my_parallel_id, my_task_id); + } +#endif + + /* OMPT state */ + master_th->th.ompt_thread_info.state = ompt_state_work_parallel; + } else { + exit_runtime_p = &dummy; + } #endif - { - KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); - KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); - __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv + { + KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); + KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); + __kmp_invoke_microtask(microtask, gtid, 0, argc, parent_team->t.t_argv #if OMPT_SUPPORT - , exit_runtime_p + , + exit_runtime_p #endif - ); - } + ); + } #if OMPT_SUPPORT - *exit_runtime_p = NULL; - if (ompt_enabled) { + *exit_runtime_p = NULL; + if (ompt_enabled) { #if OMPT_TRACE - lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; + lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - ompt_parallel_id, ompt_task_id); - } + if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( + ompt_parallel_id, ompt_task_id); + } - __ompt_lw_taskteam_unlink(master_th); - // reset clear the task id only after unlinking the task - lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; + __ompt_lw_taskteam_unlink(master_th); + // reset clear the task id only after unlinking the task + lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; #endif - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - ompt_parallel_id, ompt_task_id, - OMPT_INVOKER(call_context)); - } - master_th->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif - return TRUE; + if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { + ompt_callbacks.ompt_callback(ompt_event_parallel_end)( + ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context)); + } + master_th->th.ompt_thread_info.state = ompt_state_overhead; } +#endif + return TRUE; + } - parent_team->t.t_pkfn = microtask; + parent_team->t.t_pkfn = microtask; #if OMPT_SUPPORT - parent_team->t.ompt_team_info.microtask = unwrapped_task; -#endif - parent_team->t.t_invoke = invoker; - KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel ); - parent_team->t.t_active_level ++; - parent_team->t.t_level ++; - - /* Change number of threads in the team if requested */ - if ( master_set_numthreads ) { // The parallel has num_threads clause - if ( master_set_numthreads < master_th->th.th_teams_size.nth ) { - // AC: only can reduce the number of threads dynamically, cannot increase - kmp_info_t **other_threads = parent_team->t.t_threads; - parent_team->t.t_nproc = master_set_numthreads; - for ( i = 0; i < master_set_numthreads; ++i ) { - other_threads[i]->th.th_team_nproc = master_set_numthreads; - } - // Keep extra threads hot in the team for possible next parallels - } - master_th->th.th_set_nproc = 0; - } + parent_team->t.ompt_team_info.microtask = unwrapped_task; +#endif + parent_team->t.t_invoke = invoker; + KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel); + parent_team->t.t_active_level++; + parent_team->t.t_level++; + + /* Change number of threads in the team if requested */ + if (master_set_numthreads) { // The parallel has num_threads clause + if (master_set_numthreads < master_th->th.th_teams_size.nth) { + // AC: only can reduce number of threads dynamically, can't increase + kmp_info_t **other_threads = parent_team->t.t_threads; + parent_team->t.t_nproc = master_set_numthreads; + for (i = 0; i < master_set_numthreads; ++i) { + other_threads[i]->th.th_team_nproc = master_set_numthreads; + } + // Keep extra threads hot in the team for possible next parallels + } + master_th->th.th_set_nproc = 0; + } #if USE_DEBUGGER - if ( __kmp_debugging ) { // Let debugger override number of threads. - int nth = __kmp_omp_num_threads( loc ); - if ( nth > 0 ) { // 0 means debugger does not want to change number of threads. - master_set_numthreads = nth; + if (__kmp_debugging) { // Let debugger override number of threads. + int nth = __kmp_omp_num_threads(loc); + if (nth > + 0) { // 0 means debugger does not want to change number of threads. + master_set_numthreads = nth; }; // if - }; // if + }; // if #endif - KF_TRACE( 10, ( "__kmp_fork_call: before internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) ); - __kmp_internal_fork( loc, gtid, parent_team ); - KF_TRACE( 10, ( "__kmp_fork_call: after internal fork: root=%p, team=%p, master_th=%p, gtid=%d\n", root, parent_team, master_th, gtid ) ); + KF_TRACE(10, ("__kmp_fork_call: before internal fork: root=%p, team=%p, " + "master_th=%p, gtid=%d\n", + root, parent_team, master_th, gtid)); + __kmp_internal_fork(loc, gtid, parent_team); + KF_TRACE(10, ("__kmp_fork_call: after internal fork: root=%p, team=%p, " + "master_th=%p, gtid=%d\n", + root, parent_team, master_th, gtid)); - /* Invoke microtask for MASTER thread */ - KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", - gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) ); + /* Invoke microtask for MASTER thread */ + KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid, + parent_team->t.t_id, parent_team->t.t_pkfn)); - { - KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); - KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); - if (! parent_team->t.t_invoke( gtid )) { - KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" ); - } + { + KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); + KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); + if (!parent_team->t.t_invoke(gtid)) { + KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread"); } - KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", - gtid, parent_team->t.t_id, parent_team->t.t_pkfn ) ); - KMP_MB(); /* Flush all pending memory write invalidates. */ + } + KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid, + parent_team->t.t_id, parent_team->t.t_pkfn)); + KMP_MB(); /* Flush all pending memory write invalidates. */ - KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); + KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); - return TRUE; + return TRUE; } // Parallel closely nested in teams construct #endif /* OMP_40_ENABLED */ #if KMP_DEBUG - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]); + if (__kmp_tasking_mode != tskm_immediate_exec) { + KMP_DEBUG_ASSERT(master_th->th.th_task_team == + parent_team->t.t_task_team[master_th->th.th_task_state]); } #endif - if ( parent_team->t.t_active_level >= master_th->th.th_current_task->td_icvs.max_active_levels ) { - nthreads = 1; + if (parent_team->t.t_active_level >= + master_th->th.th_current_task->td_icvs.max_active_levels) { + nthreads = 1; } else { #if OMP_40_ENABLED - int enter_teams = ((ap==NULL && active_level==0)||(ap && teams_level>0 && teams_level==level)); -#endif - nthreads = master_set_numthreads ? - master_set_numthreads : get__nproc_2( parent_team, master_tid ); // TODO: get nproc directly from current task - - // Check if we need to take forkjoin lock? (no need for serialized parallel out of teams construct). - // This code moved here from __kmp_reserve_threads() to speedup nested serialized parallels. - if (nthreads > 1) { - if ( ( !get__nested(master_th) && (root->r.r_in_parallel + int enter_teams = ((ap == NULL && active_level == 0) || + (ap && teams_level > 0 && teams_level == level)); +#endif + nthreads = + master_set_numthreads + ? master_set_numthreads + : get__nproc_2( + parent_team, + master_tid); // TODO: get nproc directly from current task + + // Check if we need to take forkjoin lock? (no need for serialized + // parallel out of teams construct). This code moved here from + // __kmp_reserve_threads() to speedup nested serialized parallels. + if (nthreads > 1) { + if ((!get__nested(master_th) && (root->r.r_in_parallel #if OMP_40_ENABLED - && !enter_teams + && !enter_teams #endif /* OMP_40_ENABLED */ - ) ) || ( __kmp_library == library_serial ) ) { - KC_TRACE( 10, ( "__kmp_fork_call: T#%d serializing team; requested %d threads\n", - gtid, nthreads )); - nthreads = 1; - } - } - if ( nthreads > 1 ) { - /* determine how many new threads we can use */ - __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); - - nthreads = __kmp_reserve_threads(root, parent_team, master_tid, nthreads + )) || + (__kmp_library == library_serial)) { + KC_TRACE( + 10, + ("__kmp_fork_call: T#%d serializing team; requested %d threads\n", + gtid, nthreads)); + nthreads = 1; + } + } + if (nthreads > 1) { + /* determine how many new threads we can use */ + __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); + + nthreads = __kmp_reserve_threads( + root, parent_team, master_tid, nthreads #if OMP_40_ENABLED -/* AC: If we execute teams from parallel region (on host), then teams should be created - but each can only have 1 thread if nesting is disabled. If teams called from serial region, - then teams and their threads should be created regardless of the nesting setting. */ - , enter_teams + /* AC: If we execute teams from parallel region (on host), then + teams should be created but each can only have 1 thread if + nesting is disabled. If teams called from serial region, then + teams and their threads should be created regardless of the + nesting setting. */ + , + enter_teams #endif /* OMP_40_ENABLED */ - ); - if ( nthreads == 1 ) { - // Free lock for single thread execution here; - // for multi-thread execution it will be freed later - // after team of threads created and initialized - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); - } + ); + if (nthreads == 1) { + // Free lock for single thread execution here; for multi-thread + // execution it will be freed later after team of threads created + // and initialized + __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); } + } } - KMP_DEBUG_ASSERT( nthreads > 0 ); + KMP_DEBUG_ASSERT(nthreads > 0); - /* If we temporarily changed the set number of threads then restore it now */ + // If we temporarily changed the set number of threads then restore it now master_th->th.th_set_nproc = 0; /* create a serialized parallel region? */ - if ( nthreads == 1 ) { - /* josh todo: hypothetical question: what do we do for OS X*? */ -#if KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) - void * args[ argc ]; + if (nthreads == 1) { +/* josh todo: hypothetical question: what do we do for OS X*? */ +#if KMP_OS_LINUX && \ + (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) + void *args[argc]; #else - void * * args = (void**) KMP_ALLOCA( argc * sizeof( void * ) ); -#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) */ + void **args = (void **)KMP_ALLOCA(argc * sizeof(void *)); +#endif /* KMP_OS_LINUX && ( KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || \ + KMP_ARCH_AARCH64) */ - KA_TRACE( 20, ("__kmp_fork_call: T#%d serializing parallel region\n", gtid )); + KA_TRACE(20, + ("__kmp_fork_call: T#%d serializing parallel region\n", gtid)); - __kmpc_serialized_parallel(loc, gtid); + __kmpc_serialized_parallel(loc, gtid); - if ( call_context == fork_context_intel ) { - /* TODO this sucks, use the compiler itself to pass args! :) */ - master_th->th.th_serial_team->t.t_ident = loc; + if (call_context == fork_context_intel) { + /* TODO this sucks, use the compiler itself to pass args! :) */ + master_th->th.th_serial_team->t.t_ident = loc; #if OMP_40_ENABLED - if ( !ap ) { - // revert change made in __kmpc_serialized_parallel() - master_th->th.th_serial_team->t.t_level--; - // Get args from parent team for teams construct + if (!ap) { + // revert change made in __kmpc_serialized_parallel() + master_th->th.th_serial_team->t.t_level--; +// Get args from parent team for teams construct #if OMPT_SUPPORT - void *dummy; - void **exit_runtime_p; + void *dummy; + void **exit_runtime_p; - ompt_lw_taskteam_t lw_taskteam; + ompt_lw_taskteam_t lw_taskteam; - if (ompt_enabled) { - __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, - unwrapped_task, ompt_parallel_id); - lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); - exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); + if (ompt_enabled) { + __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, + unwrapped_task, ompt_parallel_id); + lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); + exit_runtime_p = + &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); - __ompt_lw_taskteam_link(&lw_taskteam, master_th); + __ompt_lw_taskteam_link(&lw_taskteam, master_th); #if OMPT_TRACE - my_task_id = lw_taskteam.ompt_task_info.task_id; - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - ompt_parallel_id, my_task_id); - } + my_task_id = lw_taskteam.ompt_task_info.task_id; + if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( + ompt_parallel_id, my_task_id); + } #endif - /* OMPT state */ - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; - } else { - exit_runtime_p = &dummy; - } + /* OMPT state */ + master_th->th.ompt_thread_info.state = ompt_state_work_parallel; + } else { + exit_runtime_p = &dummy; + } #endif - { - KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); - KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); - __kmp_invoke_microtask( microtask, gtid, 0, argc, parent_team->t.t_argv + { + KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); + KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); + __kmp_invoke_microtask(microtask, gtid, 0, argc, + parent_team->t.t_argv #if OMPT_SUPPORT - , exit_runtime_p + , + exit_runtime_p #endif - ); - } + ); + } #if OMPT_SUPPORT - *exit_runtime_p = NULL; - if (ompt_enabled) { - lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; + *exit_runtime_p = NULL; + if (ompt_enabled) { + lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; #if OMPT_TRACE - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - ompt_parallel_id, ompt_task_id); - } -#endif - - __ompt_lw_taskteam_unlink(master_th); - // reset clear the task id only after unlinking the task - lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; - - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - ompt_parallel_id, ompt_task_id, - OMPT_INVOKER(call_context)); - } - master_th->th.ompt_thread_info.state = ompt_state_overhead; - } -#endif - } else if ( microtask == (microtask_t)__kmp_teams_master ) { - KMP_DEBUG_ASSERT( master_th->th.th_team == master_th->th.th_serial_team ); - team = master_th->th.th_team; - //team->t.t_pkfn = microtask; - team->t.t_invoke = invoker; - __kmp_alloc_argv_entries( argc, team, TRUE ); - team->t.t_argc = argc; - argv = (void**) team->t.t_argv; - if ( ap ) { - for( i=argc-1; i >= 0; --i ) + if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( + ompt_parallel_id, ompt_task_id); + } +#endif + + __ompt_lw_taskteam_unlink(master_th); + // reset clear the task id only after unlinking the task + lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; + + if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { + ompt_callbacks.ompt_callback(ompt_event_parallel_end)( + ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context)); + } + master_th->th.ompt_thread_info.state = ompt_state_overhead; + } +#endif + } else if (microtask == (microtask_t)__kmp_teams_master) { + KMP_DEBUG_ASSERT(master_th->th.th_team == + master_th->th.th_serial_team); + team = master_th->th.th_team; + // team->t.t_pkfn = microtask; + team->t.t_invoke = invoker; + __kmp_alloc_argv_entries(argc, team, TRUE); + team->t.t_argc = argc; + argv = (void **)team->t.t_argv; + if (ap) { + for (i = argc - 1; i >= 0; --i) // TODO: revert workaround for Intel(R) 64 tracker #96 -# if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - *argv++ = va_arg( *ap, void * ); -# else - *argv++ = va_arg( ap, void * ); -# endif - } else { - for( i=0; i < argc; ++i ) - // Get args from parent team for teams construct - argv[i] = parent_team->t.t_argv[i]; - } - // AC: revert change made in __kmpc_serialized_parallel() - // because initial code in teams should have level=0 - team->t.t_level--; - // AC: call special invoker for outer "parallel" of the teams construct - { - KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); - KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); - invoker(gtid); - } - } else { +#if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX + *argv++ = va_arg(*ap, void *); +#else + *argv++ = va_arg(ap, void *); +#endif + } else { + for (i = 0; i < argc; ++i) + // Get args from parent team for teams construct + argv[i] = parent_team->t.t_argv[i]; + } + // AC: revert change made in __kmpc_serialized_parallel() + // because initial code in teams should have level=0 + team->t.t_level--; + // AC: call special invoker for outer "parallel" of teams construct + { + KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); + KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); + invoker(gtid); + } + } else { #endif /* OMP_40_ENABLED */ - argv = args; - for( i=argc-1; i >= 0; --i ) + argv = args; + for (i = argc - 1; i >= 0; --i) // TODO: revert workaround for Intel(R) 64 tracker #96 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - *argv++ = va_arg( *ap, void * ); + *argv++ = va_arg(*ap, void *); #else - *argv++ = va_arg( ap, void * ); + *argv++ = va_arg(ap, void *); #endif - KMP_MB(); + KMP_MB(); #if OMPT_SUPPORT - void *dummy; - void **exit_runtime_p; + void *dummy; + void **exit_runtime_p; - ompt_lw_taskteam_t lw_taskteam; + ompt_lw_taskteam_t lw_taskteam; - if (ompt_enabled) { - __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, - unwrapped_task, ompt_parallel_id); - lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); - exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); + if (ompt_enabled) { + __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, + unwrapped_task, ompt_parallel_id); + lw_taskteam.ompt_task_info.task_id = __ompt_task_id_new(gtid); + exit_runtime_p = + &(lw_taskteam.ompt_task_info.frame.exit_runtime_frame); - __ompt_lw_taskteam_link(&lw_taskteam, master_th); + __ompt_lw_taskteam_link(&lw_taskteam, master_th); #if OMPT_TRACE - /* OMPT implicit task begin */ - my_task_id = lw_taskteam.ompt_task_info.task_id; - my_parallel_id = ompt_parallel_id; - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - my_parallel_id, my_task_id); - } -#endif - - /* OMPT state */ - master_th->th.ompt_thread_info.state = ompt_state_work_parallel; - } else { - exit_runtime_p = &dummy; - } -#endif - - { - KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); - KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); - __kmp_invoke_microtask( microtask, gtid, 0, argc, args + /* OMPT implicit task begin */ + my_task_id = lw_taskteam.ompt_task_info.task_id; + my_parallel_id = ompt_parallel_id; + if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( + my_parallel_id, my_task_id); + } +#endif + + /* OMPT state */ + master_th->th.ompt_thread_info.state = ompt_state_work_parallel; + } else { + exit_runtime_p = &dummy; + } +#endif + + { + KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); + KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); + __kmp_invoke_microtask(microtask, gtid, 0, argc, args #if OMPT_SUPPORT - , exit_runtime_p + , + exit_runtime_p #endif - ); - } + ); + } #if OMPT_SUPPORT - *exit_runtime_p = NULL; - if (ompt_enabled) { + *exit_runtime_p = NULL; + if (ompt_enabled) { #if OMPT_TRACE - lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; + lw_taskteam.ompt_task_info.frame.exit_runtime_frame = NULL; - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - my_parallel_id, my_task_id); - } + if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( + my_parallel_id, my_task_id); + } #endif - __ompt_lw_taskteam_unlink(master_th); - // reset clear the task id only after unlinking the task - lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; + __ompt_lw_taskteam_unlink(master_th); + // reset clear the task id only after unlinking the task + lw_taskteam.ompt_task_info.task_id = ompt_task_id_none; - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - ompt_parallel_id, ompt_task_id, - OMPT_INVOKER(call_context)); - } - master_th->th.ompt_thread_info.state = ompt_state_overhead; - } + if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { + ompt_callbacks.ompt_callback(ompt_event_parallel_end)( + ompt_parallel_id, ompt_task_id, OMPT_INVOKER(call_context)); + } + master_th->th.ompt_thread_info.state = ompt_state_overhead; + } #endif #if OMP_40_ENABLED - } -#endif /* OMP_40_ENABLED */ } - else if ( call_context == fork_context_gnu ) { +#endif /* OMP_40_ENABLED */ + } else if (call_context == fork_context_gnu) { #if OMPT_SUPPORT - ompt_lw_taskteam_t *lwt = (ompt_lw_taskteam_t *) - __kmp_allocate(sizeof(ompt_lw_taskteam_t)); - __ompt_lw_taskteam_init(lwt, master_th, gtid, - unwrapped_task, ompt_parallel_id); + ompt_lw_taskteam_t *lwt = + (ompt_lw_taskteam_t *)__kmp_allocate(sizeof(ompt_lw_taskteam_t)); + __ompt_lw_taskteam_init(lwt, master_th, gtid, unwrapped_task, + ompt_parallel_id); - lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid); - lwt->ompt_task_info.frame.exit_runtime_frame = NULL; - __ompt_lw_taskteam_link(lwt, master_th); + lwt->ompt_task_info.task_id = __ompt_task_id_new(gtid); + lwt->ompt_task_info.frame.exit_runtime_frame = NULL; + __ompt_lw_taskteam_link(lwt, master_th); #endif - // we were called from GNU native code - KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid )); - return FALSE; - } - else { - KMP_ASSERT2( call_context < fork_context_last, "__kmp_fork_call: unknown fork_context parameter" ); - } - - - KA_TRACE( 20, ("__kmp_fork_call: T#%d serial exit\n", gtid )); - KMP_MB(); + // we were called from GNU native code + KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid)); return FALSE; + } + else { + KMP_ASSERT2(call_context < fork_context_last, + "__kmp_fork_call: unknown fork_context parameter"); + } + + KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid)); + KMP_MB(); + return FALSE; } // GEH: only modify the executing flag in the case when not serialized // serialized case is handled in kmpc_serialized_parallel - KF_TRACE( 10, ( "__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, curtask=%p, curtask_max_aclevel=%d\n", - parent_team->t.t_active_level, master_th, master_th->th.th_current_task, - master_th->th.th_current_task->td_icvs.max_active_levels ) ); - // TODO: GEH - cannot do this assertion because root thread not set up as executing + KF_TRACE(10, ("__kmp_fork_call: parent_team_aclevel=%d, master_th=%p, " + "curtask=%p, curtask_max_aclevel=%d\n", + parent_team->t.t_active_level, master_th, + master_th->th.th_current_task, + master_th->th.th_current_task->td_icvs.max_active_levels)); + // TODO: GEH - cannot do this assertion because root thread not set up as + // executing // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 1 ); master_th->th.th_current_task->td_flags.executing = 0; #if OMP_40_ENABLED - if ( !master_th->th.th_teams_microtask || level > teams_level ) + if (!master_th->th.th_teams_microtask || level > teams_level) #endif /* OMP_40_ENABLED */ { - /* Increment our nested depth level */ - KMP_TEST_THEN_INC32( (kmp_int32*) &root->r.r_in_parallel ); + /* Increment our nested depth level */ + KMP_TEST_THEN_INC32((kmp_int32 *)&root->r.r_in_parallel); } // See if we need to make a copy of the ICVs. int nthreads_icv = master_th->th.th_current_task->td_icvs.nproc; - if ((level+1 < __kmp_nested_nth.used) && (__kmp_nested_nth.nth[level+1] != nthreads_icv)) { - nthreads_icv = __kmp_nested_nth.nth[level+1]; - } - else { - nthreads_icv = 0; // don't update + if ((level + 1 < __kmp_nested_nth.used) && + (__kmp_nested_nth.nth[level + 1] != nthreads_icv)) { + nthreads_icv = __kmp_nested_nth.nth[level + 1]; + } else { + nthreads_icv = 0; // don't update } #if OMP_40_ENABLED // Figure out the proc_bind_policy for the new team. kmp_proc_bind_t proc_bind = master_th->th.th_set_proc_bind; - kmp_proc_bind_t proc_bind_icv = proc_bind_default; // proc_bind_default means don't update - if ( master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false ) { - proc_bind = proc_bind_false; - } - else { - if (proc_bind == proc_bind_default) { - // No proc_bind clause specified; use current proc-bind-var for this parallel region - proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; - } - /* else: The proc_bind policy was specified explicitly on parallel clause. This - overrides proc-bind-var for this parallel region, but does not change proc-bind-var. */ - // Figure the value of proc-bind-var for the child threads. - if ((level+1 < __kmp_nested_proc_bind.used) - && (__kmp_nested_proc_bind.bind_types[level+1] != master_th->th.th_current_task->td_icvs.proc_bind)) { - proc_bind_icv = __kmp_nested_proc_bind.bind_types[level+1]; - } + kmp_proc_bind_t proc_bind_icv = + proc_bind_default; // proc_bind_default means don't update + if (master_th->th.th_current_task->td_icvs.proc_bind == proc_bind_false) { + proc_bind = proc_bind_false; + } else { + if (proc_bind == proc_bind_default) { + // No proc_bind clause specified; use current proc-bind-var for this + // parallel region + proc_bind = master_th->th.th_current_task->td_icvs.proc_bind; + } + /* else: The proc_bind policy was specified explicitly on parallel clause. + This overrides proc-bind-var for this parallel region, but does not + change proc-bind-var. */ + // Figure the value of proc-bind-var for the child threads. + if ((level + 1 < __kmp_nested_proc_bind.used) && + (__kmp_nested_proc_bind.bind_types[level + 1] != + master_th->th.th_current_task->td_icvs.proc_bind)) { + proc_bind_icv = __kmp_nested_proc_bind.bind_types[level + 1]; + } } // Reset for next parallel region @@ -1972,44 +1965,45 @@ __kmp_fork_call( #if OMP_40_ENABLED || (proc_bind_icv != proc_bind_default) #endif /* OMP_40_ENABLED */ - ) { - kmp_internal_control_t new_icvs; - copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs); - new_icvs.next = NULL; - if (nthreads_icv > 0) { - new_icvs.nproc = nthreads_icv; - } + ) { + kmp_internal_control_t new_icvs; + copy_icvs(&new_icvs, &master_th->th.th_current_task->td_icvs); + new_icvs.next = NULL; + if (nthreads_icv > 0) { + new_icvs.nproc = nthreads_icv; + } #if OMP_40_ENABLED - if (proc_bind_icv != proc_bind_default) { - new_icvs.proc_bind = proc_bind_icv; - } + if (proc_bind_icv != proc_bind_default) { + new_icvs.proc_bind = proc_bind_icv; + } #endif /* OMP_40_ENABLED */ - /* allocate a new parallel team */ - KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) ); - team = __kmp_allocate_team(root, nthreads, nthreads, + /* allocate a new parallel team */ + KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); + team = __kmp_allocate_team(root, nthreads, nthreads, #if OMPT_SUPPORT - ompt_parallel_id, + ompt_parallel_id, #endif #if OMP_40_ENABLED - proc_bind, + proc_bind, #endif - &new_icvs, argc USE_NESTED_HOT_ARG(master_th) ); + &new_icvs, argc USE_NESTED_HOT_ARG(master_th)); } else { - /* allocate a new parallel team */ - KF_TRACE( 10, ( "__kmp_fork_call: before __kmp_allocate_team\n" ) ); - team = __kmp_allocate_team(root, nthreads, nthreads, + /* allocate a new parallel team */ + KF_TRACE(10, ("__kmp_fork_call: before __kmp_allocate_team\n")); + team = __kmp_allocate_team(root, nthreads, nthreads, #if OMPT_SUPPORT - ompt_parallel_id, + ompt_parallel_id, #endif #if OMP_40_ENABLED - proc_bind, + proc_bind, #endif - &master_th->th.th_current_task->td_icvs, argc - USE_NESTED_HOT_ARG(master_th) ); + &master_th->th.th_current_task->td_icvs, + argc USE_NESTED_HOT_ARG(master_th)); } - KF_TRACE( 10, ( "__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team ) ); + KF_TRACE( + 10, ("__kmp_fork_call: after __kmp_allocate_team - team = %p\n", team)); /* setup the new team */ KMP_CHECK_UPDATE(team->t.t_master_tid, master_tid); @@ -2020,27 +2014,29 @@ __kmp_fork_call( #if OMPT_SUPPORT KMP_CHECK_UPDATE_SYNC(team->t.ompt_team_info.microtask, unwrapped_task); #endif - KMP_CHECK_UPDATE(team->t.t_invoke, invoker); /* TODO move this to root, maybe */ - // TODO: parent_team->t.t_level == INT_MAX ??? + KMP_CHECK_UPDATE(team->t.t_invoke, invoker); // TODO move to root, maybe +// TODO: parent_team->t.t_level == INT_MAX ??? #if OMP_40_ENABLED - if ( !master_th->th.th_teams_microtask || level > teams_level ) { + if (!master_th->th.th_teams_microtask || level > teams_level) { #endif /* OMP_40_ENABLED */ - int new_level = parent_team->t.t_level + 1; - KMP_CHECK_UPDATE(team->t.t_level, new_level); - new_level = parent_team->t.t_active_level + 1; - KMP_CHECK_UPDATE(team->t.t_active_level, new_level); + int new_level = parent_team->t.t_level + 1; + KMP_CHECK_UPDATE(team->t.t_level, new_level); + new_level = parent_team->t.t_active_level + 1; + KMP_CHECK_UPDATE(team->t.t_active_level, new_level); #if OMP_40_ENABLED } else { - // AC: Do not increase parallel level at start of the teams construct - int new_level = parent_team->t.t_level; - KMP_CHECK_UPDATE(team->t.t_level, new_level); - new_level = parent_team->t.t_active_level; - KMP_CHECK_UPDATE(team->t.t_active_level, new_level); + // AC: Do not increase parallel level at start of the teams construct + int new_level = parent_team->t.t_level; + KMP_CHECK_UPDATE(team->t.t_level, new_level); + new_level = parent_team->t.t_active_level; + KMP_CHECK_UPDATE(team->t.t_active_level, new_level); } #endif /* OMP_40_ENABLED */ kmp_r_sched_t new_sched = get__sched_2(parent_team, master_tid); - if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || team->t.t_sched.chunk != new_sched.chunk) - team->t.t_sched = new_sched; // set master's schedule as new run-time schedule + if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || + team->t.t_sched.chunk != new_sched.chunk) + team->t.t_sched = + new_sched; // set master's schedule as new run-time schedule #if OMP_40_ENABLED KMP_CHECK_UPDATE(team->t.t_cancel_request, cancel_noreq); @@ -2049,3174 +2045,3185 @@ __kmp_fork_call( // Update the floating point rounding in the team if required. propagateFPControl(team); - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - // Set master's task team to team's task team. Unless this is hot team, it should be NULL. + if (__kmp_tasking_mode != tskm_immediate_exec) { + // Set master's task team to team's task team. Unless this is hot team, it + // should be NULL. #if 0 - // Patch out an assertion that trips while the runtime seems to operate correctly. - // Avoiding the preconditions that cause the assertion to trip has been promised as a forthcoming patch. - KMP_DEBUG_ASSERT(master_th->th.th_task_team == parent_team->t.t_task_team[master_th->th.th_task_state]); -#endif - KA_TRACE( 20, ( "__kmp_fork_call: Master T#%d pushing task_team %p / team %p, new task_team %p / team %p\n", - __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, - parent_team, team->t.t_task_team[master_th->th.th_task_state], team ) ); - - if ( active_level || master_th->th.th_task_team ) { - // Take a memo of master's task_state - KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); - if (master_th->th.th_task_state_top >= master_th->th.th_task_state_stack_sz) { // increase size - kmp_uint32 new_size = 2*master_th->th.th_task_state_stack_sz; - kmp_uint8 *old_stack, *new_stack; - kmp_uint32 i; - new_stack = (kmp_uint8 *)__kmp_allocate(new_size); - for (i=0; ith.th_task_state_stack_sz; ++i) { - new_stack[i] = master_th->th.th_task_state_memo_stack[i]; - } - for (i=master_th->th.th_task_state_stack_sz; ith.th_task_state_memo_stack; - master_th->th.th_task_state_memo_stack = new_stack; - master_th->th.th_task_state_stack_sz = new_size; - __kmp_free(old_stack); - } - // Store master's task_state on stack - master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state; - master_th->th.th_task_state_top++; + // Patch out an assertion that trips while the runtime seems to operate + // correctly. Avoiding the preconditions that cause the assertion to trip + // has been promised as a forthcoming patch. + KMP_DEBUG_ASSERT(master_th->th.th_task_team == + parent_team->t.t_task_team[master_th->th.th_task_state]); +#endif + KA_TRACE(20, ("__kmp_fork_call: Master T#%d pushing task_team %p / team " + "%p, new task_team %p / team %p\n", + __kmp_gtid_from_thread(master_th), + master_th->th.th_task_team, parent_team, + team->t.t_task_team[master_th->th.th_task_state], team)); + + if (active_level || master_th->th.th_task_team) { + // Take a memo of master's task_state + KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); + if (master_th->th.th_task_state_top >= + master_th->th.th_task_state_stack_sz) { // increase size + kmp_uint32 new_size = 2 * master_th->th.th_task_state_stack_sz; + kmp_uint8 *old_stack, *new_stack; + kmp_uint32 i; + new_stack = (kmp_uint8 *)__kmp_allocate(new_size); + for (i = 0; i < master_th->th.th_task_state_stack_sz; ++i) { + new_stack[i] = master_th->th.th_task_state_memo_stack[i]; + } + for (i = master_th->th.th_task_state_stack_sz; i < new_size; + ++i) { // zero-init rest of stack + new_stack[i] = 0; + } + old_stack = master_th->th.th_task_state_memo_stack; + master_th->th.th_task_state_memo_stack = new_stack; + master_th->th.th_task_state_stack_sz = new_size; + __kmp_free(old_stack); + } + // Store master's task_state on stack + master_th->th + .th_task_state_memo_stack[master_th->th.th_task_state_top] = + master_th->th.th_task_state; + master_th->th.th_task_state_top++; #if KMP_NESTED_HOT_TEAMS - if (team == master_th->th.th_hot_teams[active_level].hot_team) { // Restore master's nested state if nested hot team - master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top]; - } - else { + if (team == + master_th->th.th_hot_teams[active_level] + .hot_team) { // Restore master's nested state if nested hot team + master_th->th.th_task_state = + master_th->th + .th_task_state_memo_stack[master_th->th.th_task_state_top]; + } else { #endif - master_th->th.th_task_state = 0; + master_th->th.th_task_state = 0; #if KMP_NESTED_HOT_TEAMS - } -#endif } +#endif + } #if !KMP_NESTED_HOT_TEAMS - KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || (team == root->r.r_hot_team)); + KMP_DEBUG_ASSERT((master_th->th.th_task_team == NULL) || + (team == root->r.r_hot_team)); #endif } - KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n", - gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, team->t.t_nproc )); - KMP_DEBUG_ASSERT( team != root->r.r_hot_team || - ( team->t.t_master_tid == 0 && - ( team->t.t_parent == root->r.r_root_team || team->t.t_parent->t.t_serialized ) )); + KA_TRACE( + 20, + ("__kmp_fork_call: T#%d(%d:%d)->(%d:0) created a team of %d threads\n", + gtid, parent_team->t.t_id, team->t.t_master_tid, team->t.t_id, + team->t.t_nproc)); + KMP_DEBUG_ASSERT(team != root->r.r_hot_team || + (team->t.t_master_tid == 0 && + (team->t.t_parent == root->r.r_root_team || + team->t.t_parent->t.t_serialized))); KMP_MB(); /* now, setup the arguments */ - argv = (void**)team->t.t_argv; + argv = (void **)team->t.t_argv; #if OMP_40_ENABLED - if ( ap ) { + if (ap) { #endif /* OMP_40_ENABLED */ - for ( i=argc-1; i >= 0; --i ) { + for (i = argc - 1; i >= 0; --i) { // TODO: revert workaround for Intel(R) 64 tracker #96 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX - void *new_argv = va_arg(*ap, void *); + void *new_argv = va_arg(*ap, void *); #else - void *new_argv = va_arg(ap, void *); + void *new_argv = va_arg(ap, void *); #endif - KMP_CHECK_UPDATE(*argv, new_argv); - argv++; - } + KMP_CHECK_UPDATE(*argv, new_argv); + argv++; + } #if OMP_40_ENABLED } else { - for ( i=0; i < argc; ++i ) { - // Get args from parent team for teams construct - KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]); - } + for (i = 0; i < argc; ++i) { + // Get args from parent team for teams construct + KMP_CHECK_UPDATE(argv[i], team->t.t_parent->t.t_argv[i]); + } } #endif /* OMP_40_ENABLED */ /* now actually fork the threads */ KMP_CHECK_UPDATE(team->t.t_master_active, master_active); if (!root->r.r_active) // Only do assignment if it prevents cache ping-pong - root->r.r_active = TRUE; + root->r.r_active = TRUE; - __kmp_fork_team_threads( root, team, master_th, gtid ); - __kmp_setup_icv_copy( team, nthreads, &master_th->th.th_current_task->td_icvs, loc ); + __kmp_fork_team_threads(root, team, master_th, gtid); + __kmp_setup_icv_copy(team, nthreads, + &master_th->th.th_current_task->td_icvs, loc); #if OMPT_SUPPORT master_th->th.ompt_thread_info.state = ompt_state_work_parallel; #endif - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); + __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); #if USE_ITT_BUILD - if ( team->t.t_active_level == 1 // only report frames at level 1 -# if OMP_40_ENABLED + if (team->t.t_active_level == 1 // only report frames at level 1 +#if OMP_40_ENABLED && !master_th->th.th_teams_microtask // not in teams construct -# endif /* OMP_40_ENABLED */ - ) { +#endif /* OMP_40_ENABLED */ + ) { #if USE_ITT_NOTIFY - if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && - ( __kmp_forkjoin_frames_mode == 3 || __kmp_forkjoin_frames_mode == 1 ) ) - { - kmp_uint64 tmp_time = 0; - if ( __itt_get_timestamp_ptr ) - tmp_time = __itt_get_timestamp(); - // Internal fork - report frame begin - master_th->th.th_frame_time = tmp_time; - if ( __kmp_forkjoin_frames_mode == 3 ) - team->t.t_region_time = tmp_time; - } else // only one notification scheme (either "submit" or "forking/joined", not both) + if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && + (__kmp_forkjoin_frames_mode == 3 || + __kmp_forkjoin_frames_mode == 1)) { + kmp_uint64 tmp_time = 0; + if (__itt_get_timestamp_ptr) + tmp_time = __itt_get_timestamp(); + // Internal fork - report frame begin + master_th->th.th_frame_time = tmp_time; + if (__kmp_forkjoin_frames_mode == 3) + team->t.t_region_time = tmp_time; + } else // only one notification scheme (either "submit" or +// "forking/joined", not both) #endif /* USE_ITT_NOTIFY */ - if ( ( __itt_frame_begin_v3_ptr || KMP_ITT_DEBUG ) && - __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode ) - { // Mark start of "parallel" region for VTune. - __kmp_itt_region_forking(gtid, team->t.t_nproc, 0); - } + if ((__itt_frame_begin_v3_ptr || KMP_ITT_DEBUG) && + __kmp_forkjoin_frames && !__kmp_forkjoin_frames_mode) { + // Mark start of "parallel" region for VTune. + __kmp_itt_region_forking(gtid, team->t.t_nproc, 0); + } } #endif /* USE_ITT_BUILD */ /* now go on and do the work */ - KMP_DEBUG_ASSERT( team == __kmp_threads[gtid]->th.th_team ); + KMP_DEBUG_ASSERT(team == __kmp_threads[gtid]->th.th_team); KMP_MB(); - KF_TRACE(10, ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", - root, team, master_th, gtid)); + KF_TRACE(10, + ("__kmp_internal_fork : root=%p, team=%p, master_th=%p, gtid=%d\n", + root, team, master_th, gtid)); #if USE_ITT_BUILD - if ( __itt_stack_caller_create_ptr ) { - team->t.t_stack_id = __kmp_itt_stack_caller_create(); // create new stack stitching id before entering fork barrier + if (__itt_stack_caller_create_ptr) { + team->t.t_stack_id = + __kmp_itt_stack_caller_create(); // create new stack stitching id + // before entering fork barrier } #endif /* USE_ITT_BUILD */ #if OMP_40_ENABLED - if ( ap ) // AC: skip __kmp_internal_fork at teams construct, let only master threads execute + if (ap) // AC: skip __kmp_internal_fork at teams construct, let only master +// threads execute #endif /* OMP_40_ENABLED */ { - __kmp_internal_fork( loc, gtid, team ); - KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, master_th=%p, gtid=%d\n", - root, team, master_th, gtid)); + __kmp_internal_fork(loc, gtid, team); + KF_TRACE(10, ("__kmp_internal_fork : after : root=%p, team=%p, " + "master_th=%p, gtid=%d\n", + root, team, master_th, gtid)); } if (call_context == fork_context_gnu) { - KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); - return TRUE; + KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); + return TRUE; } /* Invoke microtask for MASTER thread */ - KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", - gtid, team->t.t_id, team->t.t_pkfn ) ); - } // END of timer KMP_fork_call block + KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) invoke microtask = %p\n", gtid, + team->t.t_id, team->t.t_pkfn)); + } // END of timer KMP_fork_call block - { - KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); - KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); - if (! team->t.t_invoke( gtid )) { - KMP_ASSERT2( 0, "cannot invoke microtask for MASTER thread" ); - } + { + KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); + KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); + if (!team->t.t_invoke(gtid)) { + KMP_ASSERT2(0, "cannot invoke microtask for MASTER thread"); } - KA_TRACE( 20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", - gtid, team->t.t_id, team->t.t_pkfn ) ); - KMP_MB(); /* Flush all pending memory write invalidates. */ + } + KA_TRACE(20, ("__kmp_fork_call: T#%d(%d:0) done microtask = %p\n", gtid, + team->t.t_id, team->t.t_pkfn)); + KMP_MB(); /* Flush all pending memory write invalidates. */ - KA_TRACE( 20, ("__kmp_fork_call: parallel exit T#%d\n", gtid )); + KA_TRACE(20, ("__kmp_fork_call: parallel exit T#%d\n", gtid)); #if OMPT_SUPPORT - if (ompt_enabled) { - master_th->th.ompt_thread_info.state = ompt_state_overhead; - } + if (ompt_enabled) { + master_th->th.ompt_thread_info.state = ompt_state_overhead; + } #endif - return TRUE; + return TRUE; } #if OMPT_SUPPORT -static inline void -__kmp_join_restore_state( - kmp_info_t *thread, - kmp_team_t *team) -{ - // restore state outside the region - thread->th.ompt_thread_info.state = ((team->t.t_serialized) ? - ompt_state_work_serial : ompt_state_work_parallel); +static inline void __kmp_join_restore_state(kmp_info_t *thread, + kmp_team_t *team) { + // restore state outside the region + thread->th.ompt_thread_info.state = + ((team->t.t_serialized) ? ompt_state_work_serial + : ompt_state_work_parallel); } -static inline void -__kmp_join_ompt( - kmp_info_t *thread, - kmp_team_t *team, - ompt_parallel_id_t parallel_id, - fork_context_e fork_context) -{ - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { - ompt_callbacks.ompt_callback(ompt_event_parallel_end)( - parallel_id, task_info->task_id, OMPT_INVOKER(fork_context)); - } - - task_info->frame.reenter_runtime_frame = NULL; - __kmp_join_restore_state(thread,team); +static inline void __kmp_join_ompt(kmp_info_t *thread, kmp_team_t *team, + ompt_parallel_id_t parallel_id, + fork_context_e fork_context) { + ompt_task_info_t *task_info = __ompt_get_taskinfo(0); + if (ompt_callbacks.ompt_callback(ompt_event_parallel_end)) { + ompt_callbacks.ompt_callback(ompt_event_parallel_end)( + parallel_id, task_info->task_id, OMPT_INVOKER(fork_context)); + } + + task_info->frame.reenter_runtime_frame = NULL; + __kmp_join_restore_state(thread, team); } #endif -void -__kmp_join_call(ident_t *loc, int gtid +void __kmp_join_call(ident_t *loc, int gtid #if OMPT_SUPPORT - , enum fork_context_e fork_context + , + enum fork_context_e fork_context #endif #if OMP_40_ENABLED - , int exit_teams + , + int exit_teams #endif /* OMP_40_ENABLED */ -) -{ - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call); - kmp_team_t *team; - kmp_team_t *parent_team; - kmp_info_t *master_th; - kmp_root_t *root; - int master_active; - int i; - - KA_TRACE( 20, ("__kmp_join_call: enter T#%d\n", gtid )); + ) { + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_join_call); + kmp_team_t *team; + kmp_team_t *parent_team; + kmp_info_t *master_th; + kmp_root_t *root; + int master_active; + int i; - /* setup current data */ - master_th = __kmp_threads[ gtid ]; - root = master_th->th.th_root; - team = master_th->th.th_team; - parent_team = team->t.t_parent; + KA_TRACE(20, ("__kmp_join_call: enter T#%d\n", gtid)); - master_th->th.th_ident = loc; + /* setup current data */ + master_th = __kmp_threads[gtid]; + root = master_th->th.th_root; + team = master_th->th.th_team; + parent_team = team->t.t_parent; + + master_th->th.th_ident = loc; #if OMPT_SUPPORT - if (ompt_enabled) { - master_th->th.ompt_thread_info.state = ompt_state_overhead; - } + if (ompt_enabled) { + master_th->th.ompt_thread_info.state = ompt_state_overhead; + } #endif #if KMP_DEBUG - if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) { - KA_TRACE( 20, ( "__kmp_join_call: T#%d, old team = %p old task_team = %p, th_task_team = %p\n", - __kmp_gtid_from_thread( master_th ), team, - team->t.t_task_team[master_th->th.th_task_state], master_th->th.th_task_team) ); - KMP_DEBUG_ASSERT( master_th->th.th_task_team == team->t.t_task_team[master_th->th.th_task_state] ); - } -#endif - - if( team->t.t_serialized ) { + if (__kmp_tasking_mode != tskm_immediate_exec && !exit_teams) { + KA_TRACE(20, ("__kmp_join_call: T#%d, old team = %p old task_team = %p, " + "th_task_team = %p\n", + __kmp_gtid_from_thread(master_th), team, + team->t.t_task_team[master_th->th.th_task_state], + master_th->th.th_task_team)); + KMP_DEBUG_ASSERT(master_th->th.th_task_team == + team->t.t_task_team[master_th->th.th_task_state]); + } +#endif + + if (team->t.t_serialized) { #if OMP_40_ENABLED - if ( master_th->th.th_teams_microtask ) { - // We are in teams construct - int level = team->t.t_level; - int tlevel = master_th->th.th_teams_level; - if ( level == tlevel ) { - // AC: we haven't incremented it earlier at start of teams construct, - // so do it here - at the end of teams construct - team->t.t_level++; - } else if ( level == tlevel + 1 ) { - // AC: we are exiting parallel inside teams, need to increment serialization - // in order to restore it in the next call to __kmpc_end_serialized_parallel - team->t.t_serialized++; - } - } + if (master_th->th.th_teams_microtask) { + // We are in teams construct + int level = team->t.t_level; + int tlevel = master_th->th.th_teams_level; + if (level == tlevel) { + // AC: we haven't incremented it earlier at start of teams construct, + // so do it here - at the end of teams construct + team->t.t_level++; + } else if (level == tlevel + 1) { + // AC: we are exiting parallel inside teams, need to increment + // serialization in order to restore it in the next call to + // __kmpc_end_serialized_parallel + team->t.t_serialized++; + } + } #endif /* OMP_40_ENABLED */ - __kmpc_end_serialized_parallel( loc, gtid ); + __kmpc_end_serialized_parallel(loc, gtid); #if OMPT_SUPPORT - if (ompt_enabled) { - __kmp_join_restore_state(master_th, parent_team); - } + if (ompt_enabled) { + __kmp_join_restore_state(master_th, parent_team); + } #endif - return; - } + return; + } - master_active = team->t.t_master_active; + master_active = team->t.t_master_active; #if OMP_40_ENABLED - if (!exit_teams) + if (!exit_teams) #endif /* OMP_40_ENABLED */ - { - // AC: No barrier for internal teams at exit from teams construct. - // But there is barrier for external team (league). - __kmp_internal_join( loc, gtid, team ); - } + { + // AC: No barrier for internal teams at exit from teams construct. + // But there is barrier for external team (league). + __kmp_internal_join(loc, gtid, team); + } #if OMP_40_ENABLED - else { - master_th->th.th_task_state = 0; // AC: no tasking in teams (out of any parallel) - } + else { + master_th->th.th_task_state = + 0; // AC: no tasking in teams (out of any parallel) + } #endif /* OMP_40_ENABLED */ - KMP_MB(); + KMP_MB(); #if OMPT_SUPPORT - ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id; + ompt_parallel_id_t parallel_id = team->t.ompt_team_info.parallel_id; #endif #if USE_ITT_BUILD - if ( __itt_stack_caller_create_ptr ) { - __kmp_itt_stack_caller_destroy( (__itt_caller)team->t.t_stack_id ); // destroy the stack stitching id after join barrier - } - - // Mark end of "parallel" region for VTune. - if ( team->t.t_active_level == 1 -# if OMP_40_ENABLED - && !master_th->th.th_teams_microtask /* not in teams construct */ -# endif /* OMP_40_ENABLED */ - ) { - master_th->th.th_ident = loc; - // only one notification scheme (either "submit" or "forking/joined", not both) - if ( ( __itt_frame_submit_v3_ptr || KMP_ITT_DEBUG ) && __kmp_forkjoin_frames_mode == 3 ) - __kmp_itt_frame_submit( gtid, team->t.t_region_time, master_th->th.th_frame_time, - 0, loc, master_th->th.th_team_nproc, 1 ); - else if ( ( __itt_frame_end_v3_ptr || KMP_ITT_DEBUG ) && - ! __kmp_forkjoin_frames_mode && __kmp_forkjoin_frames ) - __kmp_itt_region_joined( gtid ); - } // active_level == 1 + if (__itt_stack_caller_create_ptr) { + __kmp_itt_stack_caller_destroy( + (__itt_caller)team->t + .t_stack_id); // destroy the stack stitching id after join barrier + } + + // Mark end of "parallel" region for VTune. + if (team->t.t_active_level == 1 +#if OMP_40_ENABLED + && !master_th->th.th_teams_microtask /* not in teams construct */ +#endif /* OMP_40_ENABLED */ + ) { + master_th->th.th_ident = loc; + // only one notification scheme (either "submit" or "forking/joined", not + // both) + if ((__itt_frame_submit_v3_ptr || KMP_ITT_DEBUG) && + __kmp_forkjoin_frames_mode == 3) + __kmp_itt_frame_submit(gtid, team->t.t_region_time, + master_th->th.th_frame_time, 0, loc, + master_th->th.th_team_nproc, 1); + else if ((__itt_frame_end_v3_ptr || KMP_ITT_DEBUG) && + !__kmp_forkjoin_frames_mode && __kmp_forkjoin_frames) + __kmp_itt_region_joined(gtid); + } // active_level == 1 #endif /* USE_ITT_BUILD */ #if OMP_40_ENABLED - if ( master_th->th.th_teams_microtask && - !exit_teams && - team->t.t_pkfn != (microtask_t)__kmp_teams_master && - team->t.t_level == master_th->th.th_teams_level + 1 ) { - // AC: We need to leave the team structure intact at the end - // of parallel inside the teams construct, so that at the next - // parallel same (hot) team works, only adjust nesting levels - - /* Decrement our nested depth level */ - team->t.t_level --; - team->t.t_active_level --; - KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel ); - - /* Restore number of threads in the team if needed */ - if ( master_th->th.th_team_nproc < master_th->th.th_teams_size.nth ) { - int old_num = master_th->th.th_team_nproc; - int new_num = master_th->th.th_teams_size.nth; - kmp_info_t **other_threads = team->t.t_threads; - team->t.t_nproc = new_num; - for ( i = 0; i < old_num; ++i ) { - other_threads[i]->th.th_team_nproc = new_num; - } - // Adjust states of non-used threads of the team - for ( i = old_num; i < new_num; ++i ) { - // Re-initialize thread's barrier data. - int b; - kmp_balign_t * balign = other_threads[i]->th.th_bar; - for ( b = 0; b < bs_last_barrier; ++ b ) { - balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; - KMP_DEBUG_ASSERT(balign[ b ].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); + if (master_th->th.th_teams_microtask && !exit_teams && + team->t.t_pkfn != (microtask_t)__kmp_teams_master && + team->t.t_level == master_th->th.th_teams_level + 1) { + // AC: We need to leave the team structure intact at the end of parallel + // inside the teams construct, so that at the next parallel same (hot) team + // works, only adjust nesting levels + + /* Decrement our nested depth level */ + team->t.t_level--; + team->t.t_active_level--; + KMP_TEST_THEN_DEC32((kmp_int32 *)&root->r.r_in_parallel); + + /* Restore number of threads in the team if needed */ + if (master_th->th.th_team_nproc < master_th->th.th_teams_size.nth) { + int old_num = master_th->th.th_team_nproc; + int new_num = master_th->th.th_teams_size.nth; + kmp_info_t **other_threads = team->t.t_threads; + team->t.t_nproc = new_num; + for (i = 0; i < old_num; ++i) { + other_threads[i]->th.th_team_nproc = new_num; + } + // Adjust states of non-used threads of the team + for (i = old_num; i < new_num; ++i) { + // Re-initialize thread's barrier data. + int b; + kmp_balign_t *balign = other_threads[i]->th.th_bar; + for (b = 0; b < bs_last_barrier; ++b) { + balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; + KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); #if USE_DEBUGGER - balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; + balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; #endif - } - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - // Synchronize thread's task state - other_threads[i]->th.th_task_state = master_th->th.th_task_state; - } - } } + if (__kmp_tasking_mode != tskm_immediate_exec) { + // Synchronize thread's task state + other_threads[i]->th.th_task_state = master_th->th.th_task_state; + } + } + } #if OMPT_SUPPORT - if (ompt_enabled) { - __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); - } + if (ompt_enabled) { + __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); + } #endif - return; - } + return; + } #endif /* OMP_40_ENABLED */ - /* do cleanup and restore the parent team */ - master_th->th.th_info .ds.ds_tid = team->t.t_master_tid; - master_th->th.th_local.this_construct = team->t.t_master_this_cons; + /* do cleanup and restore the parent team */ + master_th->th.th_info.ds.ds_tid = team->t.t_master_tid; + master_th->th.th_local.this_construct = team->t.t_master_this_cons; - master_th->th.th_dispatch = - & parent_team->t.t_dispatch[ team->t.t_master_tid ]; + master_th->th.th_dispatch = &parent_team->t.t_dispatch[team->t.t_master_tid]; - /* jc: The following lock has instructions with REL and ACQ semantics, - separating the parallel user code called in this parallel region - from the serial user code called after this function returns. - */ - __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); + /* jc: The following lock has instructions with REL and ACQ semantics, + separating the parallel user code called in this parallel region + from the serial user code called after this function returns. */ + __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); #if OMP_40_ENABLED - if ( !master_th->th.th_teams_microtask || team->t.t_level > master_th->th.th_teams_level ) + if (!master_th->th.th_teams_microtask || + team->t.t_level > master_th->th.th_teams_level) #endif /* OMP_40_ENABLED */ - { - /* Decrement our nested depth level */ - KMP_TEST_THEN_DEC32( (kmp_int32*) &root->r.r_in_parallel ); - } - KMP_DEBUG_ASSERT( root->r.r_in_parallel >= 0 ); + { + /* Decrement our nested depth level */ + KMP_TEST_THEN_DEC32((kmp_int32 *)&root->r.r_in_parallel); + } + KMP_DEBUG_ASSERT(root->r.r_in_parallel >= 0); #if OMPT_SUPPORT && OMPT_TRACE - if(ompt_enabled){ - ompt_task_info_t *task_info = __ompt_get_taskinfo(0); - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - parallel_id, task_info->task_id); - } - task_info->frame.exit_runtime_frame = NULL; - task_info->task_id = 0; + if (ompt_enabled) { + ompt_task_info_t *task_info = __ompt_get_taskinfo(0); + if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( + parallel_id, task_info->task_id); } + task_info->frame.exit_runtime_frame = NULL; + task_info->task_id = 0; + } #endif - KF_TRACE( 10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", - 0, master_th, team ) ); - __kmp_pop_current_task_from_thread( master_th ); + KF_TRACE(10, ("__kmp_join_call1: T#%d, this_thread=%p team=%p\n", 0, + master_th, team)); + __kmp_pop_current_task_from_thread(master_th); #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - // - // Restore master thread's partition. - // - master_th->th.th_first_place = team->t.t_first_place; - master_th->th.th_last_place = team->t.t_last_place; + // Restore master thread's partition. + master_th->th.th_first_place = team->t.t_first_place; + master_th->th.th_last_place = team->t.t_last_place; #endif /* OMP_40_ENABLED */ - updateHWFPControl (team); - - if ( root->r.r_active != master_active ) - root->r.r_active = master_active; - - __kmp_free_team( root, team USE_NESTED_HOT_ARG(master_th) ); // this will free worker threads - - /* this race was fun to find. make sure the following is in the critical - * region otherwise assertions may fail occasionally since the old team - * may be reallocated and the hierarchy appears inconsistent. it is - * actually safe to run and won't cause any bugs, but will cause those - * assertion failures. it's only one deref&assign so might as well put this - * in the critical region */ - master_th->th.th_team = parent_team; - master_th->th.th_team_nproc = parent_team->t.t_nproc; - master_th->th.th_team_master = parent_team->t.t_threads[0]; - master_th->th.th_team_serialized = parent_team->t.t_serialized; - - /* restore serialized team, if need be */ - if( parent_team->t.t_serialized && - parent_team != master_th->th.th_serial_team && - parent_team != root->r.r_root_team ) { - __kmp_free_team( root, master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL) ); - master_th->th.th_serial_team = parent_team; - } - - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - if (master_th->th.th_task_state_top > 0) { // Restore task state from memo stack - KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); - // Remember master's state if we re-use this nested hot team - master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = master_th->th.th_task_state; - --master_th->th.th_task_state_top; // pop - // Now restore state at this level - master_th->th.th_task_state = master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top]; - } - // Copy the task team from the parent team to the master thread - master_th->th.th_task_team = parent_team->t.t_task_team[master_th->th.th_task_state]; - KA_TRACE( 20, ( "__kmp_join_call: Master T#%d restoring task_team %p / team %p\n", - __kmp_gtid_from_thread( master_th ), master_th->th.th_task_team, parent_team ) ); - } - - // TODO: GEH - cannot do this assertion because root thread not set up as executing - // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 ); - master_th->th.th_current_task->td_flags.executing = 1; - - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); + updateHWFPControl(team); + + if (root->r.r_active != master_active) + root->r.r_active = master_active; + + __kmp_free_team(root, team USE_NESTED_HOT_ARG( + master_th)); // this will free worker threads + + /* this race was fun to find. make sure the following is in the critical + region otherwise assertions may fail occasionally since the old team may be + reallocated and the hierarchy appears inconsistent. it is actually safe to + run and won't cause any bugs, but will cause those assertion failures. it's + only one deref&assign so might as well put this in the critical region */ + master_th->th.th_team = parent_team; + master_th->th.th_team_nproc = parent_team->t.t_nproc; + master_th->th.th_team_master = parent_team->t.t_threads[0]; + master_th->th.th_team_serialized = parent_team->t.t_serialized; + + /* restore serialized team, if need be */ + if (parent_team->t.t_serialized && + parent_team != master_th->th.th_serial_team && + parent_team != root->r.r_root_team) { + __kmp_free_team(root, + master_th->th.th_serial_team USE_NESTED_HOT_ARG(NULL)); + master_th->th.th_serial_team = parent_team; + } + + if (__kmp_tasking_mode != tskm_immediate_exec) { + if (master_th->th.th_task_state_top > + 0) { // Restore task state from memo stack + KMP_DEBUG_ASSERT(master_th->th.th_task_state_memo_stack); + // Remember master's state if we re-use this nested hot team + master_th->th.th_task_state_memo_stack[master_th->th.th_task_state_top] = + master_th->th.th_task_state; + --master_th->th.th_task_state_top; // pop + // Now restore state at this level + master_th->th.th_task_state = + master_th->th + .th_task_state_memo_stack[master_th->th.th_task_state_top]; + } + // Copy the task team from the parent team to the master thread + master_th->th.th_task_team = + parent_team->t.t_task_team[master_th->th.th_task_state]; + KA_TRACE(20, + ("__kmp_join_call: Master T#%d restoring task_team %p / team %p\n", + __kmp_gtid_from_thread(master_th), master_th->th.th_task_team, + parent_team)); + } + + // TODO: GEH - cannot do this assertion because root thread not set up as + // executing + // KMP_ASSERT( master_th->th.th_current_task->td_flags.executing == 0 ); + master_th->th.th_current_task->td_flags.executing = 1; + + __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); #if OMPT_SUPPORT - if (ompt_enabled) { - __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); - } + if (ompt_enabled) { + __kmp_join_ompt(master_th, parent_team, parallel_id, fork_context); + } #endif - KMP_MB(); - KA_TRACE( 20, ("__kmp_join_call: exit T#%d\n", gtid )); + KMP_MB(); + KA_TRACE(20, ("__kmp_join_call: exit T#%d\n", gtid)); } -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - /* Check whether we should push an internal control record onto the serial team stack. If so, do it. */ -void -__kmp_save_internal_controls ( kmp_info_t * thread ) -{ +void __kmp_save_internal_controls(kmp_info_t *thread) { - if ( thread->th.th_team != thread->th.th_serial_team ) { - return; - } - if (thread->th.th_team->t.t_serialized > 1) { - int push = 0; + if (thread->th.th_team != thread->th.th_serial_team) { + return; + } + if (thread->th.th_team->t.t_serialized > 1) { + int push = 0; - if (thread->th.th_team->t.t_control_stack_top == NULL) { - push = 1; - } else { - if ( thread->th.th_team->t.t_control_stack_top->serial_nesting_level != - thread->th.th_team->t.t_serialized ) { - push = 1; - } - } - if (push) { /* push a record on the serial team's stack */ - kmp_internal_control_t * control = (kmp_internal_control_t *) __kmp_allocate(sizeof(kmp_internal_control_t)); + if (thread->th.th_team->t.t_control_stack_top == NULL) { + push = 1; + } else { + if (thread->th.th_team->t.t_control_stack_top->serial_nesting_level != + thread->th.th_team->t.t_serialized) { + push = 1; + } + } + if (push) { /* push a record on the serial team's stack */ + kmp_internal_control_t *control = + (kmp_internal_control_t *)__kmp_allocate( + sizeof(kmp_internal_control_t)); - copy_icvs( control, & thread->th.th_current_task->td_icvs ); + copy_icvs(control, &thread->th.th_current_task->td_icvs); - control->serial_nesting_level = thread->th.th_team->t.t_serialized; + control->serial_nesting_level = thread->th.th_team->t.t_serialized; - control->next = thread->th.th_team->t.t_control_stack_top; - thread->th.th_team->t.t_control_stack_top = control; - } + control->next = thread->th.th_team->t.t_control_stack_top; + thread->th.th_team->t.t_control_stack_top = control; } + } } /* Changes set_nproc */ -void -__kmp_set_num_threads( int new_nth, int gtid ) -{ - kmp_info_t *thread; - kmp_root_t *root; +void __kmp_set_num_threads(int new_nth, int gtid) { + kmp_info_t *thread; + kmp_root_t *root; - KF_TRACE( 10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth )); - KMP_DEBUG_ASSERT( __kmp_init_serial ); + KF_TRACE(10, ("__kmp_set_num_threads: new __kmp_nth = %d\n", new_nth)); + KMP_DEBUG_ASSERT(__kmp_init_serial); - if (new_nth < 1) - new_nth = 1; - else if (new_nth > __kmp_max_nth) - new_nth = __kmp_max_nth; + if (new_nth < 1) + new_nth = 1; + else if (new_nth > __kmp_max_nth) + new_nth = __kmp_max_nth; - KMP_COUNT_VALUE(OMP_set_numthreads, new_nth); - thread = __kmp_threads[gtid]; + KMP_COUNT_VALUE(OMP_set_numthreads, new_nth); + thread = __kmp_threads[gtid]; - __kmp_save_internal_controls( thread ); + __kmp_save_internal_controls(thread); - set__nproc( thread, new_nth ); + set__nproc(thread, new_nth); - // - // If this omp_set_num_threads() call will cause the hot team size to be - // reduced (in the absence of a num_threads clause), then reduce it now, - // rather than waiting for the next parallel region. - // - root = thread->th.th_root; - if ( __kmp_init_parallel && ( ! root->r.r_active ) - && ( root->r.r_hot_team->t.t_nproc > new_nth ) + // If this omp_set_num_threads() call will cause the hot team size to be + // reduced (in the absence of a num_threads clause), then reduce it now, + // rather than waiting for the next parallel region. + root = thread->th.th_root; + if (__kmp_init_parallel && (!root->r.r_active) && + (root->r.r_hot_team->t.t_nproc > new_nth) #if KMP_NESTED_HOT_TEAMS && __kmp_hot_teams_max_level && !__kmp_hot_teams_mode #endif - ) { - kmp_team_t *hot_team = root->r.r_hot_team; - int f; + ) { + kmp_team_t *hot_team = root->r.r_hot_team; + int f; - __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); + __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); - // Release the extra threads we don't need any more. - for ( f = new_nth; f < hot_team->t.t_nproc; f++ ) { - KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL ); - if ( __kmp_tasking_mode != tskm_immediate_exec) { - // When decreasing team size, threads no longer in the team should unref task team. - hot_team->t.t_threads[f]->th.th_task_team = NULL; - } - __kmp_free_thread( hot_team->t.t_threads[f] ); - hot_team->t.t_threads[f] = NULL; - } - hot_team->t.t_nproc = new_nth; + // Release the extra threads we don't need any more. + for (f = new_nth; f < hot_team->t.t_nproc; f++) { + KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL); + if (__kmp_tasking_mode != tskm_immediate_exec) { + // When decreasing team size, threads no longer in the team should unref + // task team. + hot_team->t.t_threads[f]->th.th_task_team = NULL; + } + __kmp_free_thread(hot_team->t.t_threads[f]); + hot_team->t.t_threads[f] = NULL; + } + hot_team->t.t_nproc = new_nth; #if KMP_NESTED_HOT_TEAMS - if( thread->th.th_hot_teams ) { - KMP_DEBUG_ASSERT( hot_team == thread->th.th_hot_teams[0].hot_team ); - thread->th.th_hot_teams[0].hot_team_nth = new_nth; - } + if (thread->th.th_hot_teams) { + KMP_DEBUG_ASSERT(hot_team == thread->th.th_hot_teams[0].hot_team); + thread->th.th_hot_teams[0].hot_team_nth = new_nth; + } #endif - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); + __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); - // - // Update the t_nproc field in the threads that are still active. - // - for( f=0 ; f < new_nth; f++ ) { - KMP_DEBUG_ASSERT( hot_team->t.t_threads[f] != NULL ); - hot_team->t.t_threads[f]->th.th_team_nproc = new_nth; - } - // Special flag in case omp_set_num_threads() call - hot_team->t.t_size_changed = -1; + // Update the t_nproc field in the threads that are still active. + for (f = 0; f < new_nth; f++) { + KMP_DEBUG_ASSERT(hot_team->t.t_threads[f] != NULL); + hot_team->t.t_threads[f]->th.th_team_nproc = new_nth; } + // Special flag in case omp_set_num_threads() call + hot_team->t.t_size_changed = -1; + } } /* Changes max_active_levels */ -void -__kmp_set_max_active_levels( int gtid, int max_active_levels ) -{ - kmp_info_t *thread; - - KF_TRACE( 10, ( "__kmp_set_max_active_levels: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); - KMP_DEBUG_ASSERT( __kmp_init_serial ); - - // validate max_active_levels - if( max_active_levels < 0 ) { - KMP_WARNING( ActiveLevelsNegative, max_active_levels ); - // We ignore this call if the user has specified a negative value. - // The current setting won't be changed. The last valid setting will be used. - // A warning will be issued (if warnings are allowed as controlled by the KMP_WARNINGS env var). - KF_TRACE( 10, ( "__kmp_set_max_active_levels: the call is ignored: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); - return; - } - if( max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT ) { - // it's OK, the max_active_levels is within the valid range: [ 0; KMP_MAX_ACTIVE_LEVELS_LIMIT ] - // We allow a zero value. (implementation defined behavior) - } else { - KMP_WARNING( ActiveLevelsExceedLimit, max_active_levels, KMP_MAX_ACTIVE_LEVELS_LIMIT ); - max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; - // Current upper limit is MAX_INT. (implementation defined behavior) - // If the input exceeds the upper limit, we correct the input to be the upper limit. (implementation defined behavior) - // Actually, the flow should never get here until we use MAX_INT limit. - } - KF_TRACE( 10, ( "__kmp_set_max_active_levels: after validation: new max_active_levels for thread %d = (%d)\n", gtid, max_active_levels ) ); - - thread = __kmp_threads[ gtid ]; - - __kmp_save_internal_controls( thread ); - - set__max_active_levels( thread, max_active_levels ); - +void __kmp_set_max_active_levels(int gtid, int max_active_levels) { + kmp_info_t *thread; + + KF_TRACE(10, ("__kmp_set_max_active_levels: new max_active_levels for thread " + "%d = (%d)\n", + gtid, max_active_levels)); + KMP_DEBUG_ASSERT(__kmp_init_serial); + + // validate max_active_levels + if (max_active_levels < 0) { + KMP_WARNING(ActiveLevelsNegative, max_active_levels); + // We ignore this call if the user has specified a negative value. + // The current setting won't be changed. The last valid setting will be + // used. A warning will be issued (if warnings are allowed as controlled by + // the KMP_WARNINGS env var). + KF_TRACE(10, ("__kmp_set_max_active_levels: the call is ignored: new " + "max_active_levels for thread %d = (%d)\n", + gtid, max_active_levels)); + return; + } + if (max_active_levels <= KMP_MAX_ACTIVE_LEVELS_LIMIT) { + // it's OK, the max_active_levels is within the valid range: [ 0; + // KMP_MAX_ACTIVE_LEVELS_LIMIT ] + // We allow a zero value. (implementation defined behavior) + } else { + KMP_WARNING(ActiveLevelsExceedLimit, max_active_levels, + KMP_MAX_ACTIVE_LEVELS_LIMIT); + max_active_levels = KMP_MAX_ACTIVE_LEVELS_LIMIT; + // Current upper limit is MAX_INT. (implementation defined behavior) + // If the input exceeds the upper limit, we correct the input to be the + // upper limit. (implementation defined behavior) + // Actually, the flow should never get here until we use MAX_INT limit. + } + KF_TRACE(10, ("__kmp_set_max_active_levels: after validation: new " + "max_active_levels for thread %d = (%d)\n", + gtid, max_active_levels)); + + thread = __kmp_threads[gtid]; + + __kmp_save_internal_controls(thread); + + set__max_active_levels(thread, max_active_levels); } /* Gets max_active_levels */ -int -__kmp_get_max_active_levels( int gtid ) -{ - kmp_info_t *thread; - - KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d\n", gtid ) ); - KMP_DEBUG_ASSERT( __kmp_init_serial ); - - thread = __kmp_threads[ gtid ]; - KMP_DEBUG_ASSERT( thread->th.th_current_task ); - KF_TRACE( 10, ( "__kmp_get_max_active_levels: thread %d, curtask=%p, curtask_maxaclevel=%d\n", - gtid, thread->th.th_current_task, thread->th.th_current_task->td_icvs.max_active_levels ) ); - return thread->th.th_current_task->td_icvs.max_active_levels; +int __kmp_get_max_active_levels(int gtid) { + kmp_info_t *thread; + + KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d\n", gtid)); + KMP_DEBUG_ASSERT(__kmp_init_serial); + + thread = __kmp_threads[gtid]; + KMP_DEBUG_ASSERT(thread->th.th_current_task); + KF_TRACE(10, ("__kmp_get_max_active_levels: thread %d, curtask=%p, " + "curtask_maxaclevel=%d\n", + gtid, thread->th.th_current_task, + thread->th.th_current_task->td_icvs.max_active_levels)); + return thread->th.th_current_task->td_icvs.max_active_levels; } /* Changes def_sched_var ICV values (run-time schedule kind and chunk) */ -void -__kmp_set_schedule( int gtid, kmp_sched_t kind, int chunk ) -{ - kmp_info_t *thread; -// kmp_team_t *team; - - KF_TRACE( 10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", gtid, (int)kind, chunk )); - KMP_DEBUG_ASSERT( __kmp_init_serial ); - - // Check if the kind parameter is valid, correct if needed. - // Valid parameters should fit in one of two intervals - standard or extended: - // , , , , , - // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103 - if ( kind <= kmp_sched_lower || kind >= kmp_sched_upper || - ( kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std ) ) - { - // TODO: Hint needs attention in case we change the default schedule. - __kmp_msg( - kmp_ms_warning, - KMP_MSG( ScheduleKindOutOfRange, kind ), - KMP_HNT( DefaultScheduleKindUsed, "static, no chunk" ), - __kmp_msg_null - ); - kind = kmp_sched_default; - chunk = 0; // ignore chunk value in case of bad kind - } - - thread = __kmp_threads[ gtid ]; - - __kmp_save_internal_controls( thread ); - - if ( kind < kmp_sched_upper_std ) { - if ( kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK ) { - // differ static chunked vs. unchunked: - // chunk should be invalid to indicate unchunked schedule (which is the default) - thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static; - } else { - thread->th.th_current_task->td_icvs.sched.r_sched_type = __kmp_sch_map[ kind - kmp_sched_lower - 1 ]; - } - } else { - // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ]; - thread->th.th_current_task->td_icvs.sched.r_sched_type = - __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - kmp_sched_lower - 2 ]; - } - if ( kind == kmp_sched_auto ) { - // ignore parameter chunk for schedule auto - thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK; +void __kmp_set_schedule(int gtid, kmp_sched_t kind, int chunk) { + kmp_info_t *thread; + // kmp_team_t *team; + + KF_TRACE(10, ("__kmp_set_schedule: new schedule for thread %d = (%d, %d)\n", + gtid, (int)kind, chunk)); + KMP_DEBUG_ASSERT(__kmp_init_serial); + + // Check if the kind parameter is valid, correct if needed. + // Valid parameters should fit in one of two intervals - standard or extended: + // , , , , , + // 2008-01-25: 0, 1 - 4, 5, 100, 101 - 102, 103 + if (kind <= kmp_sched_lower || kind >= kmp_sched_upper || + (kind <= kmp_sched_lower_ext && kind >= kmp_sched_upper_std)) { + // TODO: Hint needs attention in case we change the default schedule. + __kmp_msg(kmp_ms_warning, KMP_MSG(ScheduleKindOutOfRange, kind), + KMP_HNT(DefaultScheduleKindUsed, "static, no chunk"), + __kmp_msg_null); + kind = kmp_sched_default; + chunk = 0; // ignore chunk value in case of bad kind + } + + thread = __kmp_threads[gtid]; + + __kmp_save_internal_controls(thread); + + if (kind < kmp_sched_upper_std) { + if (kind == kmp_sched_static && chunk < KMP_DEFAULT_CHUNK) { + // differ static chunked vs. unchunked: chunk should be invalid to + // indicate unchunked schedule (which is the default) + thread->th.th_current_task->td_icvs.sched.r_sched_type = kmp_sch_static; } else { - thread->th.th_current_task->td_icvs.sched.chunk = chunk; - } + thread->th.th_current_task->td_icvs.sched.r_sched_type = + __kmp_sch_map[kind - kmp_sched_lower - 1]; + } + } else { + // __kmp_sch_map[ kind - kmp_sched_lower_ext + kmp_sched_upper_std - + // kmp_sched_lower - 2 ]; + thread->th.th_current_task->td_icvs.sched.r_sched_type = + __kmp_sch_map[kind - kmp_sched_lower_ext + kmp_sched_upper_std - + kmp_sched_lower - 2]; + } + if (kind == kmp_sched_auto) { + // ignore parameter chunk for schedule auto + thread->th.th_current_task->td_icvs.sched.chunk = KMP_DEFAULT_CHUNK; + } else { + thread->th.th_current_task->td_icvs.sched.chunk = chunk; + } } /* Gets def_sched_var ICV values */ -void -__kmp_get_schedule( int gtid, kmp_sched_t * kind, int * chunk ) -{ - kmp_info_t *thread; - enum sched_type th_type; +void __kmp_get_schedule(int gtid, kmp_sched_t *kind, int *chunk) { + kmp_info_t *thread; + enum sched_type th_type; + + KF_TRACE(10, ("__kmp_get_schedule: thread %d\n", gtid)); + KMP_DEBUG_ASSERT(__kmp_init_serial); + + thread = __kmp_threads[gtid]; + + th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type; + + switch (th_type) { + case kmp_sch_static: + case kmp_sch_static_greedy: + case kmp_sch_static_balanced: + *kind = kmp_sched_static; + *chunk = 0; // chunk was not set, try to show this fact via zero value + return; + case kmp_sch_static_chunked: + *kind = kmp_sched_static; + break; + case kmp_sch_dynamic_chunked: + *kind = kmp_sched_dynamic; + break; + case kmp_sch_guided_chunked: + case kmp_sch_guided_iterative_chunked: + case kmp_sch_guided_analytical_chunked: + *kind = kmp_sched_guided; + break; + case kmp_sch_auto: + *kind = kmp_sched_auto; + break; + case kmp_sch_trapezoidal: + *kind = kmp_sched_trapezoidal; + break; +#if KMP_STATIC_STEAL_ENABLED + case kmp_sch_static_steal: + *kind = kmp_sched_static_steal; + break; +#endif + default: + KMP_FATAL(UnknownSchedulingType, th_type); + } - KF_TRACE( 10, ("__kmp_get_schedule: thread %d\n", gtid )); - KMP_DEBUG_ASSERT( __kmp_init_serial ); + *chunk = thread->th.th_current_task->td_icvs.sched.chunk; +} - thread = __kmp_threads[ gtid ]; +int __kmp_get_ancestor_thread_num(int gtid, int level) { - th_type = thread->th.th_current_task->td_icvs.sched.r_sched_type; + int ii, dd; + kmp_team_t *team; + kmp_info_t *thr; - switch ( th_type ) { - case kmp_sch_static: - case kmp_sch_static_greedy: - case kmp_sch_static_balanced: - *kind = kmp_sched_static; - *chunk = 0; // chunk was not set, try to show this fact via zero value - return; - case kmp_sch_static_chunked: - *kind = kmp_sched_static; - break; - case kmp_sch_dynamic_chunked: - *kind = kmp_sched_dynamic; - break; - case kmp_sch_guided_chunked: - case kmp_sch_guided_iterative_chunked: - case kmp_sch_guided_analytical_chunked: - *kind = kmp_sched_guided; - break; - case kmp_sch_auto: - *kind = kmp_sched_auto; - break; - case kmp_sch_trapezoidal: - *kind = kmp_sched_trapezoidal; - break; -#if KMP_STATIC_STEAL_ENABLED - case kmp_sch_static_steal: - *kind = kmp_sched_static_steal; - break; -#endif - default: - KMP_FATAL( UnknownSchedulingType, th_type ); - } + KF_TRACE(10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level)); + KMP_DEBUG_ASSERT(__kmp_init_serial); + + // validate level + if (level == 0) + return 0; + if (level < 0) + return -1; + thr = __kmp_threads[gtid]; + team = thr->th.th_team; + ii = team->t.t_level; + if (level > ii) + return -1; - *chunk = thread->th.th_current_task->td_icvs.sched.chunk; +#if OMP_40_ENABLED + if (thr->th.th_teams_microtask) { + // AC: we are in teams region where multiple nested teams have same level + int tlevel = thr->th.th_teams_level; // the level of the teams construct + if (level <= + tlevel) { // otherwise usual algorithm works (will not touch the teams) + KMP_DEBUG_ASSERT(ii >= tlevel); + // AC: As we need to pass by the teams league, we need to artificially + // increase ii + if (ii == tlevel) { + ii += 2; // three teams have same level + } else { + ii++; // two teams have same level + } + } + } +#endif + + if (ii == level) + return __kmp_tid_from_gtid(gtid); + + dd = team->t.t_serialized; + level++; + while (ii > level) { + for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { + } + if ((team->t.t_serialized) && (!dd)) { + team = team->t.t_parent; + continue; + } + if (ii > level) { + team = team->t.t_parent; + dd = team->t.t_serialized; + ii--; + } + } + + return (dd > 1) ? (0) : (team->t.t_master_tid); } -int -__kmp_get_ancestor_thread_num( int gtid, int level ) { +int __kmp_get_team_size(int gtid, int level) { - int ii, dd; - kmp_team_t *team; - kmp_info_t *thr; + int ii, dd; + kmp_team_t *team; + kmp_info_t *thr; - KF_TRACE( 10, ("__kmp_get_ancestor_thread_num: thread %d %d\n", gtid, level )); - KMP_DEBUG_ASSERT( __kmp_init_serial ); + KF_TRACE(10, ("__kmp_get_team_size: thread %d %d\n", gtid, level)); + KMP_DEBUG_ASSERT(__kmp_init_serial); - // validate level - if( level == 0 ) return 0; - if( level < 0 ) return -1; - thr = __kmp_threads[ gtid ]; - team = thr->th.th_team; - ii = team->t.t_level; - if( level > ii ) return -1; + // validate level + if (level == 0) + return 1; + if (level < 0) + return -1; + thr = __kmp_threads[gtid]; + team = thr->th.th_team; + ii = team->t.t_level; + if (level > ii) + return -1; #if OMP_40_ENABLED - if( thr->th.th_teams_microtask ) { - // AC: we are in teams region where multiple nested teams have same level - int tlevel = thr->th.th_teams_level; // the level of the teams construct - if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams) - KMP_DEBUG_ASSERT( ii >= tlevel ); - // AC: As we need to pass by the teams league, we need to artificially increase ii - if ( ii == tlevel ) { - ii += 2; // three teams have same level - } else { - ii ++; // two teams have same level - } - } + if (thr->th.th_teams_microtask) { + // AC: we are in teams region where multiple nested teams have same level + int tlevel = thr->th.th_teams_level; // the level of the teams construct + if (level <= + tlevel) { // otherwise usual algorithm works (will not touch the teams) + KMP_DEBUG_ASSERT(ii >= tlevel); + // AC: As we need to pass by the teams league, we need to artificially + // increase ii + if (ii == tlevel) { + ii += 2; // three teams have same level + } else { + ii++; // two teams have same level + } } + } #endif - if( ii == level ) return __kmp_tid_from_gtid( gtid ); - - dd = team->t.t_serialized; - level++; - while( ii > level ) - { - for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) - { - } - if( ( team->t.t_serialized ) && ( !dd ) ) { - team = team->t.t_parent; - continue; - } - if( ii > level ) { - team = team->t.t_parent; - dd = team->t.t_serialized; - ii--; - } + while (ii > level) { + for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { + } + if (team->t.t_serialized && (!dd)) { + team = team->t.t_parent; + continue; + } + if (ii > level) { + team = team->t.t_parent; + ii--; } + } - return ( dd > 1 ) ? ( 0 ) : ( team->t.t_master_tid ); + return team->t.t_nproc; } -int -__kmp_get_team_size( int gtid, int level ) { - - int ii, dd; - kmp_team_t *team; - kmp_info_t *thr; - - KF_TRACE( 10, ("__kmp_get_team_size: thread %d %d\n", gtid, level )); - KMP_DEBUG_ASSERT( __kmp_init_serial ); - - // validate level - if( level == 0 ) return 1; - if( level < 0 ) return -1; - thr = __kmp_threads[ gtid ]; - team = thr->th.th_team; - ii = team->t.t_level; - if( level > ii ) return -1; - -#if OMP_40_ENABLED - if( thr->th.th_teams_microtask ) { - // AC: we are in teams region where multiple nested teams have same level - int tlevel = thr->th.th_teams_level; // the level of the teams construct - if( level <= tlevel ) { // otherwise usual algorithm works (will not touch the teams) - KMP_DEBUG_ASSERT( ii >= tlevel ); - // AC: As we need to pass by the teams league, we need to artificially increase ii - if ( ii == tlevel ) { - ii += 2; // three teams have same level - } else { - ii ++; // two teams have same level - } - } - } -#endif - - while( ii > level ) - { - for( dd = team->t.t_serialized; ( dd > 0 ) && ( ii > level ); dd--, ii-- ) - { - } - if( team->t.t_serialized && ( !dd ) ) { - team = team->t.t_parent; - continue; - } - if( ii > level ) { - team = team->t.t_parent; - ii--; - } - } - - return team->t.t_nproc; +kmp_r_sched_t __kmp_get_schedule_global() { + // This routine created because pairs (__kmp_sched, __kmp_chunk) and + // (__kmp_static, __kmp_guided) may be changed by kmp_set_defaults + // independently. So one can get the updated schedule here. + + kmp_r_sched_t r_sched; + + // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, + // __kmp_guided. __kmp_sched should keep original value, so that user can set + // KMP_SCHEDULE multiple times, and thus have different run-time schedules in + // different roots (even in OMP 2.5) + if (__kmp_sched == kmp_sch_static) { + r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed + // schedule (balanced or greedy) + } else if (__kmp_sched == kmp_sch_guided_chunked) { + r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed + // schedule (iterative or analytical) + } else { + r_sched.r_sched_type = + __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other + } + + if (__kmp_chunk < KMP_DEFAULT_CHUNK) { // __kmp_chunk may be wrong here (if it + // was not ever set) + r_sched.chunk = KMP_DEFAULT_CHUNK; + } else { + r_sched.chunk = __kmp_chunk; + } + + return r_sched; } -kmp_r_sched_t -__kmp_get_schedule_global() { -// This routine created because pairs (__kmp_sched, __kmp_chunk) and (__kmp_static, __kmp_guided) -// may be changed by kmp_set_defaults independently. So one can get the updated schedule here. - - kmp_r_sched_t r_sched; - - // create schedule from 4 globals: __kmp_sched, __kmp_chunk, __kmp_static, __kmp_guided - // __kmp_sched should keep original value, so that user can set KMP_SCHEDULE multiple times, - // and thus have different run-time schedules in different roots (even in OMP 2.5) - if ( __kmp_sched == kmp_sch_static ) { - r_sched.r_sched_type = __kmp_static; // replace STATIC with more detailed schedule (balanced or greedy) - } else if ( __kmp_sched == kmp_sch_guided_chunked ) { - r_sched.r_sched_type = __kmp_guided; // replace GUIDED with more detailed schedule (iterative or analytical) - } else { - r_sched.r_sched_type = __kmp_sched; // (STATIC_CHUNKED), or (DYNAMIC_CHUNKED), or other - } - - if ( __kmp_chunk < KMP_DEFAULT_CHUNK ) { // __kmp_chunk may be wrong here (if it was not ever set) - r_sched.chunk = KMP_DEFAULT_CHUNK; +/* Allocate (realloc == FALSE) * or reallocate (realloc == TRUE) + at least argc number of *t_argv entries for the requested team. */ +static void __kmp_alloc_argv_entries(int argc, kmp_team_t *team, int realloc) { + + KMP_DEBUG_ASSERT(team); + if (!realloc || argc > team->t.t_max_argc) { + + KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: needed entries=%d, " + "current entries=%d\n", + team->t.t_id, argc, (realloc) ? team->t.t_max_argc : 0)); + /* if previously allocated heap space for args, free them */ + if (realloc && team->t.t_argv != &team->t.t_inline_argv[0]) + __kmp_free((void *)team->t.t_argv); + + if (argc <= KMP_INLINE_ARGV_ENTRIES) { + /* use unused space in the cache line for arguments */ + team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES; + KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: inline allocate %d " + "argv entries\n", + team->t.t_id, team->t.t_max_argc)); + team->t.t_argv = &team->t.t_inline_argv[0]; + if (__kmp_storage_map) { + __kmp_print_storage_map_gtid( + -1, &team->t.t_inline_argv[0], + &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES], + (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), "team_%d.t_inline_argv", + team->t.t_id); + } } else { - r_sched.chunk = __kmp_chunk; - } - - return r_sched; + /* allocate space for arguments in the heap */ + team->t.t_max_argc = (argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1)) + ? KMP_MIN_MALLOC_ARGV_ENTRIES + : 2 * argc; + KA_TRACE(100, ("__kmp_alloc_argv_entries: team %d: dynamic allocate %d " + "argv entries\n", + team->t.t_id, team->t.t_max_argc)); + team->t.t_argv = + (void **)__kmp_page_allocate(sizeof(void *) * team->t.t_max_argc); + if (__kmp_storage_map) { + __kmp_print_storage_map_gtid(-1, &team->t.t_argv[0], + &team->t.t_argv[team->t.t_max_argc], + sizeof(void *) * team->t.t_max_argc, + "team_%d.t_argv", team->t.t_id); + } + } + } } -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - - -/* - * Allocate (realloc == FALSE) * or reallocate (realloc == TRUE) - * at least argc number of *t_argv entries for the requested team. - */ -static void -__kmp_alloc_argv_entries( int argc, kmp_team_t *team, int realloc ) -{ - - KMP_DEBUG_ASSERT( team ); - if( !realloc || argc > team->t.t_max_argc ) { - - KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: needed entries=%d, current entries=%d\n", - team->t.t_id, argc, ( realloc ) ? team->t.t_max_argc : 0 )); - /* if previously allocated heap space for args, free them */ - if ( realloc && team->t.t_argv != &team->t.t_inline_argv[0] ) - __kmp_free( (void *) team->t.t_argv ); - - if ( argc <= KMP_INLINE_ARGV_ENTRIES ) { - /* use unused space in the cache line for arguments */ - team->t.t_max_argc = KMP_INLINE_ARGV_ENTRIES; - KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: inline allocate %d argv entries\n", - team->t.t_id, team->t.t_max_argc )); - team->t.t_argv = &team->t.t_inline_argv[0]; - if ( __kmp_storage_map ) { - __kmp_print_storage_map_gtid( -1, &team->t.t_inline_argv[0], - &team->t.t_inline_argv[KMP_INLINE_ARGV_ENTRIES], - (sizeof(void *) * KMP_INLINE_ARGV_ENTRIES), - "team_%d.t_inline_argv", - team->t.t_id ); - } - } else { - /* allocate space for arguments in the heap */ - team->t.t_max_argc = ( argc <= (KMP_MIN_MALLOC_ARGV_ENTRIES >> 1 )) ? - KMP_MIN_MALLOC_ARGV_ENTRIES : 2 * argc; - KA_TRACE( 100, ( "__kmp_alloc_argv_entries: team %d: dynamic allocate %d argv entries\n", - team->t.t_id, team->t.t_max_argc )); - team->t.t_argv = (void**) __kmp_page_allocate( sizeof(void*) * team->t.t_max_argc ); - if ( __kmp_storage_map ) { - __kmp_print_storage_map_gtid( -1, &team->t.t_argv[0], &team->t.t_argv[team->t.t_max_argc], - sizeof(void *) * team->t.t_max_argc, "team_%d.t_argv", - team->t.t_id ); - } - } - } -} - -static void -__kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) -{ - int i; - int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2; - team->t.t_threads = (kmp_info_t**) __kmp_allocate( sizeof(kmp_info_t*) * max_nth ); - team->t.t_disp_buffer = (dispatch_shared_info_t*) - __kmp_allocate( sizeof(dispatch_shared_info_t) * num_disp_buff ); - team->t.t_dispatch = (kmp_disp_t*) __kmp_allocate( sizeof(kmp_disp_t) * max_nth ); - team->t.t_implicit_task_taskdata = (kmp_taskdata_t*) __kmp_allocate( sizeof(kmp_taskdata_t) * max_nth ); - team->t.t_max_nproc = max_nth; - - /* setup dispatch buffers */ - for(i = 0 ; i < num_disp_buff; ++i) { - team->t.t_disp_buffer[i].buffer_index = i; +static void __kmp_allocate_team_arrays(kmp_team_t *team, int max_nth) { + int i; + int num_disp_buff = max_nth > 1 ? __kmp_dispatch_num_buffers : 2; + team->t.t_threads = + (kmp_info_t **)__kmp_allocate(sizeof(kmp_info_t *) * max_nth); + team->t.t_disp_buffer = (dispatch_shared_info_t *)__kmp_allocate( + sizeof(dispatch_shared_info_t) * num_disp_buff); + team->t.t_dispatch = + (kmp_disp_t *)__kmp_allocate(sizeof(kmp_disp_t) * max_nth); + team->t.t_implicit_task_taskdata = + (kmp_taskdata_t *)__kmp_allocate(sizeof(kmp_taskdata_t) * max_nth); + team->t.t_max_nproc = max_nth; + + /* setup dispatch buffers */ + for (i = 0; i < num_disp_buff; ++i) { + team->t.t_disp_buffer[i].buffer_index = i; #if OMP_45_ENABLED - team->t.t_disp_buffer[i].doacross_buf_idx = i; + team->t.t_disp_buffer[i].doacross_buf_idx = i; #endif - } + } } -static void -__kmp_free_team_arrays(kmp_team_t *team) { - /* Note: this does not free the threads in t_threads (__kmp_free_threads) */ - int i; - for ( i = 0; i < team->t.t_max_nproc; ++ i ) { - if ( team->t.t_dispatch[ i ].th_disp_buffer != NULL ) { - __kmp_free( team->t.t_dispatch[ i ].th_disp_buffer ); - team->t.t_dispatch[ i ].th_disp_buffer = NULL; - }; // if - }; // for - __kmp_free(team->t.t_threads); - __kmp_free(team->t.t_disp_buffer); - __kmp_free(team->t.t_dispatch); - __kmp_free(team->t.t_implicit_task_taskdata); - team->t.t_threads = NULL; - team->t.t_disp_buffer = NULL; - team->t.t_dispatch = NULL; - team->t.t_implicit_task_taskdata = 0; +static void __kmp_free_team_arrays(kmp_team_t *team) { + /* Note: this does not free the threads in t_threads (__kmp_free_threads) */ + int i; + for (i = 0; i < team->t.t_max_nproc; ++i) { + if (team->t.t_dispatch[i].th_disp_buffer != NULL) { + __kmp_free(team->t.t_dispatch[i].th_disp_buffer); + team->t.t_dispatch[i].th_disp_buffer = NULL; + }; // if + }; // for + __kmp_free(team->t.t_threads); + __kmp_free(team->t.t_disp_buffer); + __kmp_free(team->t.t_dispatch); + __kmp_free(team->t.t_implicit_task_taskdata); + team->t.t_threads = NULL; + team->t.t_disp_buffer = NULL; + team->t.t_dispatch = NULL; + team->t.t_implicit_task_taskdata = 0; } -static void -__kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) { - kmp_info_t **oldThreads = team->t.t_threads; +static void __kmp_reallocate_team_arrays(kmp_team_t *team, int max_nth) { + kmp_info_t **oldThreads = team->t.t_threads; - __kmp_free(team->t.t_disp_buffer); - __kmp_free(team->t.t_dispatch); - __kmp_free(team->t.t_implicit_task_taskdata); - __kmp_allocate_team_arrays(team, max_nth); + __kmp_free(team->t.t_disp_buffer); + __kmp_free(team->t.t_dispatch); + __kmp_free(team->t.t_implicit_task_taskdata); + __kmp_allocate_team_arrays(team, max_nth); - KMP_MEMCPY(team->t.t_threads, oldThreads, team->t.t_nproc * sizeof (kmp_info_t*)); + KMP_MEMCPY(team->t.t_threads, oldThreads, + team->t.t_nproc * sizeof(kmp_info_t *)); - __kmp_free(oldThreads); + __kmp_free(oldThreads); } -static kmp_internal_control_t -__kmp_get_global_icvs( void ) { +static kmp_internal_control_t __kmp_get_global_icvs(void) { - kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals + kmp_r_sched_t r_sched = + __kmp_get_schedule_global(); // get current state of scheduling globals #if OMP_40_ENABLED - KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.used > 0 ); + KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.used > 0); #endif /* OMP_40_ENABLED */ - kmp_internal_control_t g_icvs = { - 0, //int serial_nesting_level; //corresponds to the value of the th_team_serialized field - (kmp_int8)__kmp_dflt_nested, //int nested; //internal control for nested parallelism (per thread) - (kmp_int8)__kmp_global.g.g_dynamic, //internal control for dynamic adjustment of threads (per thread) - (kmp_int8)__kmp_env_blocktime, //int bt_set; //internal control for whether blocktime is explicitly set - __kmp_dflt_blocktime, //int blocktime; //internal control for blocktime + kmp_internal_control_t g_icvs = { + 0, // int serial_nesting_level; //corresponds to value of th_team_serialized + (kmp_int8)__kmp_dflt_nested, // int nested; //internal control + // for nested parallelism (per thread) + (kmp_int8)__kmp_global.g.g_dynamic, // internal control for dynamic + // adjustment of threads (per thread) + (kmp_int8)__kmp_env_blocktime, // int bt_set; //internal control for + // whether blocktime is explicitly set + __kmp_dflt_blocktime, // int blocktime; //internal control for blocktime #if KMP_USE_MONITOR - __kmp_bt_intervals, //int bt_intervals; //internal control for blocktime intervals -#endif - __kmp_dflt_team_nth, //int nproc; //internal control for # of threads for next parallel region (per thread) - // (use a max ub on value if __kmp_parallel_initialize not called yet) - __kmp_dflt_max_active_levels, //int max_active_levels; //internal control for max_active_levels - r_sched, //kmp_r_sched_t sched; //internal control for runtime schedule {sched,chunk} pair + __kmp_bt_intervals, // int bt_intervals; //internal control for blocktime +// intervals +#endif + __kmp_dflt_team_nth, // int nproc; //internal control for # of threads for + // next parallel region (per thread) + // (use a max ub on value if __kmp_parallel_initialize not called yet) + __kmp_dflt_max_active_levels, // int max_active_levels; //internal control + // for max_active_levels + r_sched, // kmp_r_sched_t sched; //internal control for runtime schedule +// {sched,chunk} pair #if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0], - __kmp_default_device, + __kmp_nested_proc_bind.bind_types[0], + __kmp_default_device, #endif /* OMP_40_ENABLED */ - NULL //struct kmp_internal_control *next; - }; + NULL // struct kmp_internal_control *next; + }; - return g_icvs; + return g_icvs; } -static kmp_internal_control_t -__kmp_get_x_global_icvs( const kmp_team_t *team ) { +static kmp_internal_control_t __kmp_get_x_global_icvs(const kmp_team_t *team) { - kmp_internal_control_t gx_icvs; - gx_icvs.serial_nesting_level = 0; // probably =team->t.t_serial like in save_inter_controls - copy_icvs( & gx_icvs, & team->t.t_threads[0]->th.th_current_task->td_icvs ); - gx_icvs.next = NULL; + kmp_internal_control_t gx_icvs; + gx_icvs.serial_nesting_level = + 0; // probably =team->t.t_serial like in save_inter_controls + copy_icvs(&gx_icvs, &team->t.t_threads[0]->th.th_current_task->td_icvs); + gx_icvs.next = NULL; - return gx_icvs; + return gx_icvs; } -static void -__kmp_initialize_root( kmp_root_t *root ) -{ - int f; - kmp_team_t *root_team; - kmp_team_t *hot_team; - int hot_team_max_nth; - kmp_r_sched_t r_sched = __kmp_get_schedule_global(); // get current state of scheduling globals - kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); - KMP_DEBUG_ASSERT( root ); - KMP_ASSERT( ! root->r.r_begin ); - - /* setup the root state structure */ - __kmp_init_lock( &root->r.r_begin_lock ); - root->r.r_begin = FALSE; - root->r.r_active = FALSE; - root->r.r_in_parallel = 0; - root->r.r_blocktime = __kmp_dflt_blocktime; - root->r.r_nested = __kmp_dflt_nested; - - /* setup the root team for this task */ - /* allocate the root team structure */ - KF_TRACE( 10, ( "__kmp_initialize_root: before root_team\n" ) ); - - root_team = - __kmp_allocate_team( - root, - 1, // new_nproc - 1, // max_nproc +static void __kmp_initialize_root(kmp_root_t *root) { + int f; + kmp_team_t *root_team; + kmp_team_t *hot_team; + int hot_team_max_nth; + kmp_r_sched_t r_sched = + __kmp_get_schedule_global(); // get current state of scheduling globals + kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); + KMP_DEBUG_ASSERT(root); + KMP_ASSERT(!root->r.r_begin); + + /* setup the root state structure */ + __kmp_init_lock(&root->r.r_begin_lock); + root->r.r_begin = FALSE; + root->r.r_active = FALSE; + root->r.r_in_parallel = 0; + root->r.r_blocktime = __kmp_dflt_blocktime; + root->r.r_nested = __kmp_dflt_nested; + + /* setup the root team for this task */ + /* allocate the root team structure */ + KF_TRACE(10, ("__kmp_initialize_root: before root_team\n")); + + root_team = + __kmp_allocate_team(root, + 1, // new_nproc + 1, // max_nproc #if OMPT_SUPPORT - 0, // root parallel id + 0, // root parallel id #endif #if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0], + __kmp_nested_proc_bind.bind_types[0], #endif - &r_icvs, - 0 // argc - USE_NESTED_HOT_ARG(NULL) // master thread is unknown - ); + &r_icvs, + 0 // argc + USE_NESTED_HOT_ARG(NULL) // master thread is unknown + ); #if USE_DEBUGGER - // Non-NULL value should be assigned to make the debugger display the root team. - TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)( ~ 0 )); -#endif - - KF_TRACE( 10, ( "__kmp_initialize_root: after root_team = %p\n", root_team ) ); - - root->r.r_root_team = root_team; - root_team->t.t_control_stack_top = NULL; - - /* initialize root team */ - root_team->t.t_threads[0] = NULL; - root_team->t.t_nproc = 1; - root_team->t.t_serialized = 1; - // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; - root_team->t.t_sched.r_sched_type = r_sched.r_sched_type; - root_team->t.t_sched.chunk = r_sched.chunk; - KA_TRACE( 20, ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n", - root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); - - /* setup the hot team for this task */ - /* allocate the hot team structure */ - KF_TRACE( 10, ( "__kmp_initialize_root: before hot_team\n" ) ); - - hot_team = - __kmp_allocate_team( - root, - 1, // new_nproc - __kmp_dflt_team_nth_ub * 2, // max_nproc + // Non-NULL value should be assigned to make the debugger display the root + // team. + TCW_SYNC_PTR(root_team->t.t_pkfn, (microtask_t)(~0)); +#endif + + KF_TRACE(10, ("__kmp_initialize_root: after root_team = %p\n", root_team)); + + root->r.r_root_team = root_team; + root_team->t.t_control_stack_top = NULL; + + /* initialize root team */ + root_team->t.t_threads[0] = NULL; + root_team->t.t_nproc = 1; + root_team->t.t_serialized = 1; + // TODO???: root_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; + root_team->t.t_sched.r_sched_type = r_sched.r_sched_type; + root_team->t.t_sched.chunk = r_sched.chunk; + KA_TRACE( + 20, + ("__kmp_initialize_root: init root team %d arrived: join=%u, plain=%u\n", + root_team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); + + /* setup the hot team for this task */ + /* allocate the hot team structure */ + KF_TRACE(10, ("__kmp_initialize_root: before hot_team\n")); + + hot_team = + __kmp_allocate_team(root, + 1, // new_nproc + __kmp_dflt_team_nth_ub * 2, // max_nproc #if OMPT_SUPPORT - 0, // root parallel id + 0, // root parallel id #endif #if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0], -#endif - &r_icvs, - 0 // argc - USE_NESTED_HOT_ARG(NULL) // master thread is unknown - ); - KF_TRACE( 10, ( "__kmp_initialize_root: after hot_team = %p\n", hot_team ) ); - - root->r.r_hot_team = hot_team; - root_team->t.t_control_stack_top = NULL; - - /* first-time initialization */ - hot_team->t.t_parent = root_team; - - /* initialize hot team */ - hot_team_max_nth = hot_team->t.t_max_nproc; - for ( f = 0; f < hot_team_max_nth; ++ f ) { - hot_team->t.t_threads[ f ] = NULL; - }; // for - hot_team->t.t_nproc = 1; - // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; - hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type; - hot_team->t.t_sched.chunk = r_sched.chunk; - hot_team->t.t_size_changed = 0; + __kmp_nested_proc_bind.bind_types[0], +#endif + &r_icvs, + 0 // argc + USE_NESTED_HOT_ARG(NULL) // master thread is unknown + ); + KF_TRACE(10, ("__kmp_initialize_root: after hot_team = %p\n", hot_team)); + + root->r.r_hot_team = hot_team; + root_team->t.t_control_stack_top = NULL; + + /* first-time initialization */ + hot_team->t.t_parent = root_team; + + /* initialize hot team */ + hot_team_max_nth = hot_team->t.t_max_nproc; + for (f = 0; f < hot_team_max_nth; ++f) { + hot_team->t.t_threads[f] = NULL; + }; // for + hot_team->t.t_nproc = 1; + // TODO???: hot_team->t.t_max_active_levels = __kmp_dflt_max_active_levels; + hot_team->t.t_sched.r_sched_type = r_sched.r_sched_type; + hot_team->t.t_sched.chunk = r_sched.chunk; + hot_team->t.t_size_changed = 0; } #ifdef KMP_DEBUG - typedef struct kmp_team_list_item { - kmp_team_p const * entry; - struct kmp_team_list_item * next; + kmp_team_p const *entry; + struct kmp_team_list_item *next; } kmp_team_list_item_t; -typedef kmp_team_list_item_t * kmp_team_list_t; - +typedef kmp_team_list_item_t *kmp_team_list_t; -static void -__kmp_print_structure_team_accum( // Add team to list of teams. - kmp_team_list_t list, // List of teams. - kmp_team_p const * team // Team to add. -) { - - // List must terminate with item where both entry and next are NULL. - // Team is added to the list only once. - // List is sorted in ascending order by team id. - // Team id is *not* a key. - - kmp_team_list_t l; - - KMP_DEBUG_ASSERT( list != NULL ); - if ( team == NULL ) { - return; - }; // if - - __kmp_print_structure_team_accum( list, team->t.t_parent ); - __kmp_print_structure_team_accum( list, team->t.t_next_pool ); - - // Search list for the team. - l = list; - while ( l->next != NULL && l->entry != team ) { - l = l->next; - }; // while - if ( l->next != NULL ) { - return; // Team has been added before, exit. - }; // if - - // Team is not found. Search list again for insertion point. - l = list; - while ( l->next != NULL && l->entry->t.t_id <= team->t.t_id ) { - l = l->next; - }; // while - - // Insert team. - { - kmp_team_list_item_t * item = - (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) ); - * item = * l; - l->entry = team; - l->next = item; - } +static void __kmp_print_structure_team_accum( // Add team to list of teams. + kmp_team_list_t list, // List of teams. + kmp_team_p const *team // Team to add. + ) { + // List must terminate with item where both entry and next are NULL. + // Team is added to the list only once. + // List is sorted in ascending order by team id. + // Team id is *not* a key. + + kmp_team_list_t l; + + KMP_DEBUG_ASSERT(list != NULL); + if (team == NULL) { + return; + }; // if + + __kmp_print_structure_team_accum(list, team->t.t_parent); + __kmp_print_structure_team_accum(list, team->t.t_next_pool); + + // Search list for the team. + l = list; + while (l->next != NULL && l->entry != team) { + l = l->next; + }; // while + if (l->next != NULL) { + return; // Team has been added before, exit. + }; // if + + // Team is not found. Search list again for insertion point. + l = list; + while (l->next != NULL && l->entry->t.t_id <= team->t.t_id) { + l = l->next; + }; // while + + // Insert team. + { + kmp_team_list_item_t *item = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( + sizeof(kmp_team_list_item_t)); + *item = *l; + l->entry = team; + l->next = item; + } } -static void -__kmp_print_structure_team( - char const * title, - kmp_team_p const * team - -) { - __kmp_printf( "%s", title ); - if ( team != NULL ) { - __kmp_printf( "%2x %p\n", team->t.t_id, team ); - } else { - __kmp_printf( " - (nil)\n" ); - }; // if +static void __kmp_print_structure_team(char const *title, kmp_team_p const *team + + ) { + __kmp_printf("%s", title); + if (team != NULL) { + __kmp_printf("%2x %p\n", team->t.t_id, team); + } else { + __kmp_printf(" - (nil)\n"); + }; // if } -static void -__kmp_print_structure_thread( - char const * title, - kmp_info_p const * thread - -) { - __kmp_printf( "%s", title ); - if ( thread != NULL ) { - __kmp_printf( "%2d %p\n", thread->th.th_info.ds.ds_gtid, thread ); - } else { - __kmp_printf( " - (nil)\n" ); - }; // if +static void __kmp_print_structure_thread(char const *title, + kmp_info_p const *thread) { + __kmp_printf("%s", title); + if (thread != NULL) { + __kmp_printf("%2d %p\n", thread->th.th_info.ds.ds_gtid, thread); + } else { + __kmp_printf(" - (nil)\n"); + }; // if } -void -__kmp_print_structure( - void -) { +void __kmp_print_structure(void) { - kmp_team_list_t list; + kmp_team_list_t list; - // Initialize list of teams. - list = (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC( sizeof( kmp_team_list_item_t ) ); - list->entry = NULL; - list->next = NULL; + // Initialize list of teams. + list = + (kmp_team_list_item_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_team_list_item_t)); + list->entry = NULL; + list->next = NULL; - __kmp_printf( "\n------------------------------\nGlobal Thread Table\n------------------------------\n" ); - { - int gtid; - for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) { - __kmp_printf( "%2d", gtid ); - if ( __kmp_threads != NULL ) { - __kmp_printf( " %p", __kmp_threads[ gtid ] ); - }; // if - if ( __kmp_root != NULL ) { - __kmp_printf( " %p", __kmp_root[ gtid ] ); - }; // if - __kmp_printf( "\n" ); - }; // for gtid - } - - // Print out __kmp_threads array. - __kmp_printf( "\n------------------------------\nThreads\n------------------------------\n" ); - if ( __kmp_threads != NULL ) { - int gtid; - for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) { - kmp_info_t const * thread = __kmp_threads[ gtid ]; - if ( thread != NULL ) { - __kmp_printf( "GTID %2d %p:\n", gtid, thread ); - __kmp_printf( " Our Root: %p\n", thread->th.th_root ); - __kmp_print_structure_team( " Our Team: ", thread->th.th_team ); - __kmp_print_structure_team( " Serial Team: ", thread->th.th_serial_team ); - __kmp_printf( " Threads: %2d\n", thread->th.th_team_nproc ); - __kmp_print_structure_thread( " Master: ", thread->th.th_team_master ); - __kmp_printf( " Serialized?: %2d\n", thread->th.th_team_serialized ); - __kmp_printf( " Set NProc: %2d\n", thread->th.th_set_nproc ); + __kmp_printf("\n------------------------------\nGlobal Thread " + "Table\n------------------------------\n"); + { + int gtid; + for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { + __kmp_printf("%2d", gtid); + if (__kmp_threads != NULL) { + __kmp_printf(" %p", __kmp_threads[gtid]); + }; // if + if (__kmp_root != NULL) { + __kmp_printf(" %p", __kmp_root[gtid]); + }; // if + __kmp_printf("\n"); + }; // for gtid + } + + // Print out __kmp_threads array. + __kmp_printf("\n------------------------------\nThreads\n--------------------" + "----------\n"); + if (__kmp_threads != NULL) { + int gtid; + for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { + kmp_info_t const *thread = __kmp_threads[gtid]; + if (thread != NULL) { + __kmp_printf("GTID %2d %p:\n", gtid, thread); + __kmp_printf(" Our Root: %p\n", thread->th.th_root); + __kmp_print_structure_team(" Our Team: ", thread->th.th_team); + __kmp_print_structure_team(" Serial Team: ", + thread->th.th_serial_team); + __kmp_printf(" Threads: %2d\n", thread->th.th_team_nproc); + __kmp_print_structure_thread(" Master: ", + thread->th.th_team_master); + __kmp_printf(" Serialized?: %2d\n", thread->th.th_team_serialized); + __kmp_printf(" Set NProc: %2d\n", thread->th.th_set_nproc); #if OMP_40_ENABLED - __kmp_printf( " Set Proc Bind: %2d\n", thread->th.th_set_proc_bind ); -#endif - __kmp_print_structure_thread( " Next in pool: ", thread->th.th_next_pool ); - __kmp_printf( "\n" ); - __kmp_print_structure_team_accum( list, thread->th.th_team ); - __kmp_print_structure_team_accum( list, thread->th.th_serial_team ); - }; // if - }; // for gtid - } else { - __kmp_printf( "Threads array is not allocated.\n" ); - }; // if - - // Print out __kmp_root array. - __kmp_printf( "\n------------------------------\nUbers\n------------------------------\n" ); - if ( __kmp_root != NULL ) { - int gtid; - for ( gtid = 0; gtid < __kmp_threads_capacity; ++ gtid ) { - kmp_root_t const * root = __kmp_root[ gtid ]; - if ( root != NULL ) { - __kmp_printf( "GTID %2d %p:\n", gtid, root ); - __kmp_print_structure_team( " Root Team: ", root->r.r_root_team ); - __kmp_print_structure_team( " Hot Team: ", root->r.r_hot_team ); - __kmp_print_structure_thread( " Uber Thread: ", root->r.r_uber_thread ); - __kmp_printf( " Active?: %2d\n", root->r.r_active ); - __kmp_printf( " Nested?: %2d\n", root->r.r_nested ); - __kmp_printf( " In Parallel: %2d\n", root->r.r_in_parallel ); - __kmp_printf( "\n" ); - __kmp_print_structure_team_accum( list, root->r.r_root_team ); - __kmp_print_structure_team_accum( list, root->r.r_hot_team ); - }; // if - }; // for gtid - } else { - __kmp_printf( "Ubers array is not allocated.\n" ); - }; // if - - __kmp_printf( "\n------------------------------\nTeams\n------------------------------\n" ); - while ( list->next != NULL ) { - kmp_team_p const * team = list->entry; - int i; - __kmp_printf( "Team %2x %p:\n", team->t.t_id, team ); - __kmp_print_structure_team( " Parent Team: ", team->t.t_parent ); - __kmp_printf( " Master TID: %2d\n", team->t.t_master_tid ); - __kmp_printf( " Max threads: %2d\n", team->t.t_max_nproc ); - __kmp_printf( " Levels of serial: %2d\n", team->t.t_serialized ); - __kmp_printf( " Number threads: %2d\n", team->t.t_nproc ); - for ( i = 0; i < team->t.t_nproc; ++ i ) { - __kmp_printf( " Thread %2d: ", i ); - __kmp_print_structure_thread( "", team->t.t_threads[ i ] ); - }; // for i - __kmp_print_structure_team( " Next in pool: ", team->t.t_next_pool ); - __kmp_printf( "\n" ); - list = list->next; - }; // while - - // Print out __kmp_thread_pool and __kmp_team_pool. - __kmp_printf( "\n------------------------------\nPools\n------------------------------\n" ); - __kmp_print_structure_thread( "Thread pool: ", (kmp_info_t *)__kmp_thread_pool ); - __kmp_print_structure_team( "Team pool: ", (kmp_team_t *)__kmp_team_pool ); - __kmp_printf( "\n" ); - - // Free team list. - while ( list != NULL ) { - kmp_team_list_item_t * item = list; - list = list->next; - KMP_INTERNAL_FREE( item ); - }; // while - + __kmp_printf(" Set Proc Bind: %2d\n", thread->th.th_set_proc_bind); +#endif + __kmp_print_structure_thread(" Next in pool: ", + thread->th.th_next_pool); + __kmp_printf("\n"); + __kmp_print_structure_team_accum(list, thread->th.th_team); + __kmp_print_structure_team_accum(list, thread->th.th_serial_team); + }; // if + }; // for gtid + } else { + __kmp_printf("Threads array is not allocated.\n"); + }; // if + + // Print out __kmp_root array. + __kmp_printf("\n------------------------------\nUbers\n----------------------" + "--------\n"); + if (__kmp_root != NULL) { + int gtid; + for (gtid = 0; gtid < __kmp_threads_capacity; ++gtid) { + kmp_root_t const *root = __kmp_root[gtid]; + if (root != NULL) { + __kmp_printf("GTID %2d %p:\n", gtid, root); + __kmp_print_structure_team(" Root Team: ", root->r.r_root_team); + __kmp_print_structure_team(" Hot Team: ", root->r.r_hot_team); + __kmp_print_structure_thread(" Uber Thread: ", + root->r.r_uber_thread); + __kmp_printf(" Active?: %2d\n", root->r.r_active); + __kmp_printf(" Nested?: %2d\n", root->r.r_nested); + __kmp_printf(" In Parallel: %2d\n", root->r.r_in_parallel); + __kmp_printf("\n"); + __kmp_print_structure_team_accum(list, root->r.r_root_team); + __kmp_print_structure_team_accum(list, root->r.r_hot_team); + }; // if + }; // for gtid + } else { + __kmp_printf("Ubers array is not allocated.\n"); + }; // if + + __kmp_printf("\n------------------------------\nTeams\n----------------------" + "--------\n"); + while (list->next != NULL) { + kmp_team_p const *team = list->entry; + int i; + __kmp_printf("Team %2x %p:\n", team->t.t_id, team); + __kmp_print_structure_team(" Parent Team: ", team->t.t_parent); + __kmp_printf(" Master TID: %2d\n", team->t.t_master_tid); + __kmp_printf(" Max threads: %2d\n", team->t.t_max_nproc); + __kmp_printf(" Levels of serial: %2d\n", team->t.t_serialized); + __kmp_printf(" Number threads: %2d\n", team->t.t_nproc); + for (i = 0; i < team->t.t_nproc; ++i) { + __kmp_printf(" Thread %2d: ", i); + __kmp_print_structure_thread("", team->t.t_threads[i]); + }; // for i + __kmp_print_structure_team(" Next in pool: ", team->t.t_next_pool); + __kmp_printf("\n"); + list = list->next; + }; // while + + // Print out __kmp_thread_pool and __kmp_team_pool. + __kmp_printf("\n------------------------------\nPools\n----------------------" + "--------\n"); + __kmp_print_structure_thread("Thread pool: ", + (kmp_info_t *)__kmp_thread_pool); + __kmp_print_structure_team("Team pool: ", + (kmp_team_t *)__kmp_team_pool); + __kmp_printf("\n"); + + // Free team list. + while (list != NULL) { + kmp_team_list_item_t *item = list; + list = list->next; + KMP_INTERNAL_FREE(item); + }; // while } #endif - //--------------------------------------------------------------------------- // Stuff for per-thread fast random number generator // Table of primes - static const unsigned __kmp_primes[] = { - 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, - 0xba5703f5, 0xb495a877, 0xe1626741, 0x79695e6b, - 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231, - 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, - 0xbe4d6fe9, 0x5f15e201, 0x99afc3fd, 0xf3f16801, - 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3, - 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, - 0x085a3d61, 0x46eb5ea7, 0x3d9910ed, 0x2e687b5b, - 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9, - 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, - 0x54581edb, 0xf2480f45, 0x0bb9288f, 0xef1affc7, - 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7, - 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, - 0xfc411073, 0xc3749363, 0xb892d829, 0x3549366b, - 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3, - 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f -}; + 0x9e3779b1, 0xffe6cc59, 0x2109f6dd, 0x43977ab5, 0xba5703f5, 0xb495a877, + 0xe1626741, 0x79695e6b, 0xbc98c09f, 0xd5bee2b3, 0x287488f9, 0x3af18231, + 0x9677cd4d, 0xbe3a6929, 0xadc6a877, 0xdcf0674b, 0xbe4d6fe9, 0x5f15e201, + 0x99afc3fd, 0xf3f16801, 0xe222cfff, 0x24ba5fdb, 0x0620452d, 0x79f149e3, + 0xc8b93f49, 0x972702cd, 0xb07dd827, 0x6c97d5ed, 0x085a3d61, 0x46eb5ea7, + 0x3d9910ed, 0x2e687b5b, 0x29609227, 0x6eb081f1, 0x0954c4e1, 0x9d114db9, + 0x542acfa9, 0xb3e6bd7b, 0x0742d917, 0xe9f3ffa7, 0x54581edb, 0xf2480f45, + 0x0bb9288f, 0xef1affc7, 0x85fa0ca7, 0x3ccc14db, 0xe6baf34b, 0x343377f7, + 0x5ca19031, 0xe6d9293b, 0xf0a9f391, 0x5d2e980b, 0xfc411073, 0xc3749363, + 0xb892d829, 0x3549366b, 0x629750ad, 0xb98294e5, 0x892d9483, 0xc235baf3, + 0x3d2402a3, 0x6bdef3c9, 0xbec333cd, 0x40c9520f}; //--------------------------------------------------------------------------- // __kmp_get_random: Get a random number using a linear congruential method. - -unsigned short -__kmp_get_random( kmp_info_t * thread ) -{ +unsigned short __kmp_get_random(kmp_info_t *thread) { unsigned x = thread->th.th_x; - unsigned short r = x>>16; + unsigned short r = x >> 16; - thread->th.th_x = x*thread->th.th_a+1; + thread->th.th_x = x * thread->th.th_a + 1; KA_TRACE(30, ("__kmp_get_random: THREAD: %d, RETURN: %u\n", - thread->th.th_info.ds.ds_tid, r) ); + thread->th.th_info.ds.ds_tid, r)); return r; } //-------------------------------------------------------- // __kmp_init_random: Initialize a random number generator - -void -__kmp_init_random( kmp_info_t * thread ) -{ +void __kmp_init_random(kmp_info_t *thread) { unsigned seed = thread->th.th_info.ds.ds_tid; - thread->th.th_a = __kmp_primes[seed%(sizeof(__kmp_primes)/sizeof(__kmp_primes[0]))]; - thread->th.th_x = (seed+1)*thread->th.th_a+1; - KA_TRACE(30, ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a) ); + thread->th.th_a = + __kmp_primes[seed % (sizeof(__kmp_primes) / sizeof(__kmp_primes[0]))]; + thread->th.th_x = (seed + 1) * thread->th.th_a + 1; + KA_TRACE(30, + ("__kmp_init_random: THREAD: %u; A: %u\n", seed, thread->th.th_a)); } - #if KMP_OS_WINDOWS -/* reclaim array entries for root threads that are already dead, returns number reclaimed */ -static int -__kmp_reclaim_dead_roots(void) { - int i, r = 0; - - for(i = 0; i < __kmp_threads_capacity; ++i) { - if( KMP_UBER_GTID( i ) && - !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) && - !__kmp_root[i]->r.r_active ) { // AC: reclaim only roots died in non-active state - r += __kmp_unregister_root_other_thread(i); - } - } - return r; +/* reclaim array entries for root threads that are already dead, returns number + * reclaimed */ +static int __kmp_reclaim_dead_roots(void) { + int i, r = 0; + + for (i = 0; i < __kmp_threads_capacity; ++i) { + if (KMP_UBER_GTID(i) && + !__kmp_still_running((kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[i])) && + !__kmp_root[i] + ->r.r_active) { // AC: reclaim only roots died in non-active state + r += __kmp_unregister_root_other_thread(i); + } + } + return r; } #endif -/* - This function attempts to create free entries in __kmp_threads and __kmp_root, and returns the number of - free entries generated. - - For Windows* OS static library, the first mechanism used is to reclaim array entries for root threads that are - already dead. - - On all platforms, expansion is attempted on the arrays __kmp_threads_ and __kmp_root, with appropriate - update to __kmp_threads_capacity. Array capacity is increased by doubling with clipping to - __kmp_tp_capacity, if threadprivate cache array has been created. - Synchronization with __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock. - - After any dead root reclamation, if the clipping value allows array expansion to result in the generation - of a total of nWish free slots, the function does that expansion. If not, but the clipping value allows - array expansion to result in the generation of a total of nNeed free slots, the function does that expansion. - Otherwise, nothing is done beyond the possible initial root thread reclamation. However, if nNeed is zero, - a best-effort attempt is made to fulfil nWish as far as possible, i.e. the function will attempt to create - as many free slots as possible up to nWish. - - If any argument is negative, the behavior is undefined. -*/ -static int -__kmp_expand_threads(int nWish, int nNeed) { - int added = 0; - int old_tp_cached; - int __kmp_actual_max_nth; - - if(nNeed > nWish) /* normalize the arguments */ - nWish = nNeed; -#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB -/* only for Windows static library */ - /* reclaim array entries for root threads that are already dead */ - added = __kmp_reclaim_dead_roots(); - - if(nNeed) { - nNeed -= added; - if(nNeed < 0) - nNeed = 0; - } - if(nWish) { - nWish -= added; - if(nWish < 0) - nWish = 0; - } -#endif - if(nWish <= 0) - return added; - - while(1) { - int nTarget; - int minimumRequiredCapacity; - int newCapacity; - kmp_info_t **newThreads; - kmp_root_t **newRoot; - - // - // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. - // If __kmp_max_nth is set to some value less than __kmp_sys_max_nth - // by the user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may - // become > __kmp_max_nth in one of two ways: - // - // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0] - // may not be resused by another thread, so we may need to increase - // __kmp_threads_capacity to __kmp_max_threads + 1. - // - // 2) New foreign root(s) are encountered. We always register new - // foreign roots. This may cause a smaller # of threads to be - // allocated at subsequent parallel regions, but the worker threads - // hang around (and eventually go to sleep) and need slots in the - // __kmp_threads[] array. - // - // Anyway, that is the reason for moving the check to see if - // __kmp_max_threads was exceeded into __kmp_reseerve_threads() - // instead of having it performed here. -BB - // - old_tp_cached = __kmp_tp_cached; - __kmp_actual_max_nth = old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth; - KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity); - - /* compute expansion headroom to check if we can expand and whether to aim for nWish or nNeed */ - nTarget = nWish; - if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) { - /* can't fulfil nWish, so try nNeed */ - if(nNeed) { - nTarget = nNeed; - if(__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) { - /* possible expansion too small -- give up */ - break; - } - } else { - /* best-effort */ - nTarget = __kmp_actual_max_nth - __kmp_threads_capacity; - if(!nTarget) { - /* can expand at all -- give up */ - break; - } - } - } - minimumRequiredCapacity = __kmp_threads_capacity + nTarget; - - newCapacity = __kmp_threads_capacity; - do{ - newCapacity = - newCapacity <= (__kmp_actual_max_nth >> 1) ? - (newCapacity << 1) : - __kmp_actual_max_nth; - } while(newCapacity < minimumRequiredCapacity); - newThreads = (kmp_info_t**) __kmp_allocate((sizeof(kmp_info_t*) + sizeof(kmp_root_t*)) * newCapacity + CACHE_LINE); - newRoot = (kmp_root_t**) ((char*)newThreads + sizeof(kmp_info_t*) * newCapacity ); - KMP_MEMCPY(newThreads, __kmp_threads, __kmp_threads_capacity * sizeof(kmp_info_t*)); - KMP_MEMCPY(newRoot, __kmp_root, __kmp_threads_capacity * sizeof(kmp_root_t*)); - memset(newThreads + __kmp_threads_capacity, 0, - (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t*)); - memset(newRoot + __kmp_threads_capacity, 0, - (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t*)); - - if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) { - /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has allocated a threadprivate cache - while we were allocating the expanded array, and our new capacity is larger than the threadprivate - cache capacity, so we should deallocate the expanded arrays and try again. This is the first check - of a double-check pair. - */ - __kmp_free(newThreads); - continue; /* start over and try again */ - } - __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); - if(!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) { - /* Same check as above, but this time with the lock so we can be sure if we can succeed. */ - __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); - __kmp_free(newThreads); - continue; /* start over and try again */ - } else { - /* success */ - // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be investigated. - // - *(kmp_info_t**volatile*)&__kmp_threads = newThreads; - *(kmp_root_t**volatile*)&__kmp_root = newRoot; - added += newCapacity - __kmp_threads_capacity; - *(volatile int*)&__kmp_threads_capacity = newCapacity; - __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); - break; /* succeeded, so we can exit the loop */ - } - } - return added; -} - -/* register the current thread as a root thread and obtain our gtid */ -/* we must have the __kmp_initz_lock held at this point */ -/* Argument TRUE only if are the thread that calls from __kmp_do_serial_initialize() */ -int -__kmp_register_root( int initial_thread ) -{ - kmp_info_t *root_thread; - kmp_root_t *root; - int gtid; - int capacity; - __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); - KA_TRACE( 20, ("__kmp_register_root: entered\n")); - KMP_MB(); +/* This function attempts to create free entries in __kmp_threads and + __kmp_root, and returns the number of free entries generated. + For Windows* OS static library, the first mechanism used is to reclaim array + entries for root threads that are already dead. - /* - 2007-03-02: + On all platforms, expansion is attempted on the arrays __kmp_threads_ and + __kmp_root, with appropriate update to __kmp_threads_capacity. Array + capacity is increased by doubling with clipping to __kmp_tp_capacity, if + threadprivate cache array has been created. Synchronization with + __kmpc_threadprivate_cached is done using __kmp_tp_cached_lock. - If initial thread did not invoke OpenMP RTL yet, and this thread is not an initial one, - "__kmp_all_nth >= __kmp_threads_capacity" condition does not work as expected -- it may - return false (that means there is at least one empty slot in __kmp_threads array), but it - is possible the only free slot is #0, which is reserved for initial thread and so cannot be - used for this one. Following code workarounds this bug. - - However, right solution seems to be not reserving slot #0 for initial thread because: - (1) there is no magic in slot #0, - (2) we cannot detect initial thread reliably (the first thread which does serial - initialization may be not a real initial thread). - */ - capacity = __kmp_threads_capacity; - if ( ! initial_thread && TCR_PTR(__kmp_threads[0]) == NULL ) { - -- capacity; - }; // if - - /* see if there are too many threads */ - if ( __kmp_all_nth >= capacity && !__kmp_expand_threads( 1, 1 ) ) { - if ( __kmp_tp_cached ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantRegisterNewThread ), - KMP_HNT( Set_ALL_THREADPRIVATE, __kmp_tp_capacity ), - KMP_HNT( PossibleSystemLimitOnThreads ), - __kmp_msg_null - ); - } - else { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantRegisterNewThread ), - KMP_HNT( SystemLimitOnThreads ), - __kmp_msg_null - ); - } - }; // if + After any dead root reclamation, if the clipping value allows array expansion + to result in the generation of a total of nWish free slots, the function does + that expansion. If not, but the clipping value allows array expansion to + result in the generation of a total of nNeed free slots, the function does + that expansion. Otherwise, nothing is done beyond the possible initial root + thread reclamation. However, if nNeed is zero, a best-effort attempt is made + to fulfil nWish as far as possible, i.e. the function will attempt to create + as many free slots as possible up to nWish. - /* find an available thread slot */ - /* Don't reassign the zero slot since we need that to only be used by initial - thread */ - for( gtid=(initial_thread ? 0 : 1) ; TCR_PTR(__kmp_threads[gtid]) != NULL ; gtid++ ) - ; - KA_TRACE( 1, ("__kmp_register_root: found slot in threads array: T#%d\n", gtid )); - KMP_ASSERT( gtid < __kmp_threads_capacity ); + If any argument is negative, the behavior is undefined. */ +static int __kmp_expand_threads(int nWish, int nNeed) { + int added = 0; + int old_tp_cached; + int __kmp_actual_max_nth; - /* update global accounting */ - __kmp_all_nth ++; - TCW_4(__kmp_nth, __kmp_nth + 1); + if (nNeed > nWish) /* normalize the arguments */ + nWish = nNeed; +#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB + /* only for Windows static library */ + /* reclaim array entries for root threads that are already dead */ + added = __kmp_reclaim_dead_roots(); + + if (nNeed) { + nNeed -= added; + if (nNeed < 0) + nNeed = 0; + } + if (nWish) { + nWish -= added; + if (nWish < 0) + nWish = 0; + } +#endif + if (nWish <= 0) + return added; + while (1) { + int nTarget; + int minimumRequiredCapacity; + int newCapacity; + kmp_info_t **newThreads; + kmp_root_t **newRoot; + + // Note that __kmp_threads_capacity is not bounded by __kmp_max_nth. If + // __kmp_max_nth is set to some value less than __kmp_sys_max_nth by the + // user via OMP_THREAD_LIMIT, then __kmp_threads_capacity may become + // > __kmp_max_nth in one of two ways: // - // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) - // for low numbers of procs, and method #2 (keyed API call) for higher - // numbers of procs. + // 1) The initialization thread (gtid = 0) exits. __kmp_threads[0] + // may not be resused by another thread, so we may need to increase + // __kmp_threads_capacity to __kmp_max_threads + 1. // - if ( __kmp_adjust_gtid_mode ) { - if ( __kmp_all_nth >= __kmp_tls_gtid_min ) { - if ( TCR_4(__kmp_gtid_mode) != 2) { - TCW_4(__kmp_gtid_mode, 2); - } - } - else { - if (TCR_4(__kmp_gtid_mode) != 1 ) { - TCW_4(__kmp_gtid_mode, 1); - } - } + // 2) New foreign root(s) are encountered. We always register new foreign + // roots. This may cause a smaller # of threads to be allocated at + // subsequent parallel regions, but the worker threads hang around (and + // eventually go to sleep) and need slots in the __kmp_threads[] array. + // + // Anyway, that is the reason for moving the check to see if + // __kmp_max_threads was exceeded into __kmp_reseerve_threads() + // instead of having it performed here. -BB + old_tp_cached = __kmp_tp_cached; + __kmp_actual_max_nth = + old_tp_cached ? __kmp_tp_capacity : __kmp_sys_max_nth; + KMP_DEBUG_ASSERT(__kmp_actual_max_nth >= __kmp_threads_capacity); + + /* compute expansion headroom to check if we can expand and whether to aim + for nWish or nNeed */ + nTarget = nWish; + if (__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) { + /* can't fulfil nWish, so try nNeed */ + if (nNeed) { + nTarget = nNeed; + if (__kmp_actual_max_nth - __kmp_threads_capacity < nTarget) { + /* possible expansion too small -- give up */ + break; + } + } else { + /* best-effort */ + nTarget = __kmp_actual_max_nth - __kmp_threads_capacity; + if (!nTarget) { + /* can expand at all -- give up */ + break; + } + } + } + minimumRequiredCapacity = __kmp_threads_capacity + nTarget; + + newCapacity = __kmp_threads_capacity; + do { + newCapacity = newCapacity <= (__kmp_actual_max_nth >> 1) + ? (newCapacity << 1) + : __kmp_actual_max_nth; + } while (newCapacity < minimumRequiredCapacity); + newThreads = (kmp_info_t **)__kmp_allocate( + (sizeof(kmp_info_t *) + sizeof(kmp_root_t *)) * newCapacity + + CACHE_LINE); + newRoot = (kmp_root_t **)((char *)newThreads + + sizeof(kmp_info_t *) * newCapacity); + KMP_MEMCPY(newThreads, __kmp_threads, + __kmp_threads_capacity * sizeof(kmp_info_t *)); + KMP_MEMCPY(newRoot, __kmp_root, + __kmp_threads_capacity * sizeof(kmp_root_t *)); + memset(newThreads + __kmp_threads_capacity, 0, + (newCapacity - __kmp_threads_capacity) * sizeof(kmp_info_t *)); + memset(newRoot + __kmp_threads_capacity, 0, + (newCapacity - __kmp_threads_capacity) * sizeof(kmp_root_t *)); + + if (!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) { + /* __kmp_tp_cached has changed, i.e. __kmpc_threadprivate_cached has + allocated a threadprivate cache while we were allocating the expanded + array, and our new capacity is larger than the threadprivate cache + capacity, so we should deallocate the expanded arrays and try again. + This is the first check of a double-check pair. */ + __kmp_free(newThreads); + continue; /* start over and try again */ + } + __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); + if (!old_tp_cached && __kmp_tp_cached && newCapacity > __kmp_tp_capacity) { + /* Same check as above, but this time with the lock so we can be sure if + we can succeed. */ + __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); + __kmp_free(newThreads); + continue; /* start over and try again */ + } else { + /* success */ + // __kmp_free( __kmp_threads ); // ATT: It leads to crash. Need to be + // investigated. + *(kmp_info_t * *volatile *)&__kmp_threads = newThreads; + *(kmp_root_t * *volatile *)&__kmp_root = newRoot; + added += newCapacity - __kmp_threads_capacity; + *(volatile int *)&__kmp_threads_capacity = newCapacity; + __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); + break; /* succeeded, so we can exit the loop */ + } + } + return added; +} + +/* Register the current thread as a root thread and obtain our gtid. We must + have the __kmp_initz_lock held at this point. Argument TRUE only if are the + thread that calls from __kmp_do_serial_initialize() */ +int __kmp_register_root(int initial_thread) { + kmp_info_t *root_thread; + kmp_root_t *root; + int gtid; + int capacity; + __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); + KA_TRACE(20, ("__kmp_register_root: entered\n")); + KMP_MB(); + + /* 2007-03-02: + If initial thread did not invoke OpenMP RTL yet, and this thread is not an + initial one, "__kmp_all_nth >= __kmp_threads_capacity" condition does not + work as expected -- it may return false (that means there is at least one + empty slot in __kmp_threads array), but it is possible the only free slot + is #0, which is reserved for initial thread and so cannot be used for this + one. Following code workarounds this bug. + + However, right solution seems to be not reserving slot #0 for initial + thread because: + (1) there is no magic in slot #0, + (2) we cannot detect initial thread reliably (the first thread which does + serial initialization may be not a real initial thread). + */ + capacity = __kmp_threads_capacity; + if (!initial_thread && TCR_PTR(__kmp_threads[0]) == NULL) { + --capacity; + }; // if + + /* see if there are too many threads */ + if (__kmp_all_nth >= capacity && !__kmp_expand_threads(1, 1)) { + if (__kmp_tp_cached) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantRegisterNewThread), + KMP_HNT(Set_ALL_THREADPRIVATE, __kmp_tp_capacity), + KMP_HNT(PossibleSystemLimitOnThreads), __kmp_msg_null); + } else { + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantRegisterNewThread), + KMP_HNT(SystemLimitOnThreads), __kmp_msg_null); + } + }; // if + + /* find an available thread slot */ + /* Don't reassign the zero slot since we need that to only be used by initial + thread */ + for (gtid = (initial_thread ? 0 : 1); TCR_PTR(__kmp_threads[gtid]) != NULL; + gtid++) + ; + KA_TRACE(1, + ("__kmp_register_root: found slot in threads array: T#%d\n", gtid)); + KMP_ASSERT(gtid < __kmp_threads_capacity); + + /* update global accounting */ + __kmp_all_nth++; + TCW_4(__kmp_nth, __kmp_nth + 1); + + // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low + // numbers of procs, and method #2 (keyed API call) for higher numbers. + if (__kmp_adjust_gtid_mode) { + if (__kmp_all_nth >= __kmp_tls_gtid_min) { + if (TCR_4(__kmp_gtid_mode) != 2) { + TCW_4(__kmp_gtid_mode, 2); + } + } else { + if (TCR_4(__kmp_gtid_mode) != 1) { + TCW_4(__kmp_gtid_mode, 1); + } } + } #ifdef KMP_ADJUST_BLOCKTIME - /* Adjust blocktime to zero if necessary */ - /* Middle initialization might not have occurred yet */ - if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { - if ( __kmp_nth > __kmp_avail_proc ) { - __kmp_zero_bt = TRUE; - } + /* Adjust blocktime to zero if necessary */ + /* Middle initialization might not have occurred yet */ + if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { + if (__kmp_nth > __kmp_avail_proc) { + __kmp_zero_bt = TRUE; } + } #endif /* KMP_ADJUST_BLOCKTIME */ - /* setup this new hierarchy */ - if( ! ( root = __kmp_root[gtid] )) { - root = __kmp_root[gtid] = (kmp_root_t*) __kmp_allocate( sizeof(kmp_root_t) ); - KMP_DEBUG_ASSERT( ! root->r.r_root_team ); - } + /* setup this new hierarchy */ + if (!(root = __kmp_root[gtid])) { + root = __kmp_root[gtid] = (kmp_root_t *)__kmp_allocate(sizeof(kmp_root_t)); + KMP_DEBUG_ASSERT(!root->r.r_root_team); + } #if KMP_STATS_ENABLED - // Initialize stats as soon as possible (right after gtid assignment). - __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid); - KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life); - KMP_SET_THREAD_STATE(SERIAL_REGION); - KMP_INIT_PARTITIONED_TIMERS(OMP_serial); -#endif - __kmp_initialize_root( root ); - - /* setup new root thread structure */ - if( root->r.r_uber_thread ) { - root_thread = root->r.r_uber_thread; - } else { - root_thread = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) ); - if ( __kmp_storage_map ) { - __kmp_print_thread_storage_map( root_thread, gtid ); - } - root_thread->th.th_info .ds.ds_gtid = gtid; - root_thread->th.th_root = root; - if( __kmp_env_consistency_check ) { - root_thread->th.th_cons = __kmp_allocate_cons_stack( gtid ); - } - #if USE_FAST_MEMORY - __kmp_initialize_fast_memory( root_thread ); - #endif /* USE_FAST_MEMORY */ - - #if KMP_USE_BGET - KMP_DEBUG_ASSERT( root_thread->th.th_local.bget_data == NULL ); - __kmp_initialize_bget( root_thread ); - #endif - __kmp_init_random( root_thread ); // Initialize random number generator - } - - /* setup the serial team held in reserve by the root thread */ - if( ! root_thread->th.th_serial_team ) { - kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); - KF_TRACE( 10, ( "__kmp_register_root: before serial_team\n" ) ); - - root_thread->th.th_serial_team = __kmp_allocate_team( root, 1, 1, + // Initialize stats as soon as possible (right after gtid assignment). + __kmp_stats_thread_ptr = __kmp_stats_list->push_back(gtid); + KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life); + KMP_SET_THREAD_STATE(SERIAL_REGION); + KMP_INIT_PARTITIONED_TIMERS(OMP_serial); +#endif + __kmp_initialize_root(root); + + /* setup new root thread structure */ + if (root->r.r_uber_thread) { + root_thread = root->r.r_uber_thread; + } else { + root_thread = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t)); + if (__kmp_storage_map) { + __kmp_print_thread_storage_map(root_thread, gtid); + } + root_thread->th.th_info.ds.ds_gtid = gtid; + root_thread->th.th_root = root; + if (__kmp_env_consistency_check) { + root_thread->th.th_cons = __kmp_allocate_cons_stack(gtid); + } +#if USE_FAST_MEMORY + __kmp_initialize_fast_memory(root_thread); +#endif /* USE_FAST_MEMORY */ + +#if KMP_USE_BGET + KMP_DEBUG_ASSERT(root_thread->th.th_local.bget_data == NULL); + __kmp_initialize_bget(root_thread); +#endif + __kmp_init_random(root_thread); // Initialize random number generator + } + + /* setup the serial team held in reserve by the root thread */ + if (!root_thread->th.th_serial_team) { + kmp_internal_control_t r_icvs = __kmp_get_global_icvs(); + KF_TRACE(10, ("__kmp_register_root: before serial_team\n")); + root_thread->th.th_serial_team = + __kmp_allocate_team(root, 1, 1, #if OMPT_SUPPORT - 0, // root parallel id + 0, // root parallel id #endif #if OMP_40_ENABLED - proc_bind_default, + proc_bind_default, #endif - &r_icvs, - 0 USE_NESTED_HOT_ARG(NULL) ); - } - KMP_ASSERT( root_thread->th.th_serial_team ); - KF_TRACE( 10, ( "__kmp_register_root: after serial_team = %p\n", - root_thread->th.th_serial_team ) ); + &r_icvs, 0 USE_NESTED_HOT_ARG(NULL)); + } + KMP_ASSERT(root_thread->th.th_serial_team); + KF_TRACE(10, ("__kmp_register_root: after serial_team = %p\n", + root_thread->th.th_serial_team)); - /* drop root_thread into place */ - TCW_SYNC_PTR(__kmp_threads[gtid], root_thread); + /* drop root_thread into place */ + TCW_SYNC_PTR(__kmp_threads[gtid], root_thread); - root->r.r_root_team->t.t_threads[0] = root_thread; - root->r.r_hot_team ->t.t_threads[0] = root_thread; - root_thread->th.th_serial_team->t.t_threads[0] = root_thread; - root_thread->th.th_serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now). - root->r.r_uber_thread = root_thread; + root->r.r_root_team->t.t_threads[0] = root_thread; + root->r.r_hot_team->t.t_threads[0] = root_thread; + root_thread->th.th_serial_team->t.t_threads[0] = root_thread; + // AC: the team created in reserve, not for execution (it is unused for now). + root_thread->th.th_serial_team->t.t_serialized = 0; + root->r.r_uber_thread = root_thread; - /* initialize the thread, get it ready to go */ - __kmp_initialize_info( root_thread, root->r.r_root_team, 0, gtid ); - TCW_4(__kmp_init_gtid, TRUE); + /* initialize the thread, get it ready to go */ + __kmp_initialize_info(root_thread, root->r.r_root_team, 0, gtid); + TCW_4(__kmp_init_gtid, TRUE); - /* prepare the master thread for get_gtid() */ - __kmp_gtid_set_specific( gtid ); + /* prepare the master thread for get_gtid() */ + __kmp_gtid_set_specific(gtid); #if USE_ITT_BUILD - __kmp_itt_thread_name( gtid ); + __kmp_itt_thread_name(gtid); #endif /* USE_ITT_BUILD */ - #ifdef KMP_TDATA_GTID - __kmp_gtid = gtid; - #endif - __kmp_create_worker( gtid, root_thread, __kmp_stksize ); - KMP_DEBUG_ASSERT( __kmp_gtid_get_specific() == gtid ); - - KA_TRACE( 20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, plain=%u\n", - gtid, __kmp_gtid_from_tid( 0, root->r.r_hot_team ), - root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE, - KMP_INIT_BARRIER_STATE ) ); - { // Initialize barrier data. - int b; - for ( b = 0; b < bs_last_barrier; ++ b ) { - root_thread->th.th_bar[ b ].bb.b_arrived = KMP_INIT_BARRIER_STATE; +#ifdef KMP_TDATA_GTID + __kmp_gtid = gtid; +#endif + __kmp_create_worker(gtid, root_thread, __kmp_stksize); + KMP_DEBUG_ASSERT(__kmp_gtid_get_specific() == gtid); + + KA_TRACE(20, ("__kmp_register_root: T#%d init T#%d(%d:%d) arrived: join=%u, " + "plain=%u\n", + gtid, __kmp_gtid_from_tid(0, root->r.r_hot_team), + root->r.r_hot_team->t.t_id, 0, KMP_INIT_BARRIER_STATE, + KMP_INIT_BARRIER_STATE)); + { // Initialize barrier data. + int b; + for (b = 0; b < bs_last_barrier; ++b) { + root_thread->th.th_bar[b].bb.b_arrived = KMP_INIT_BARRIER_STATE; #if USE_DEBUGGER - root_thread->th.th_bar[ b ].bb.b_worker_arrived = 0; + root_thread->th.th_bar[b].bb.b_worker_arrived = 0; #endif - }; // for - } - KMP_DEBUG_ASSERT( root->r.r_hot_team->t.t_bar[ bs_forkjoin_barrier ].b_arrived == KMP_INIT_BARRIER_STATE ); + }; // for + } + KMP_DEBUG_ASSERT(root->r.r_hot_team->t.t_bar[bs_forkjoin_barrier].b_arrived == + KMP_INIT_BARRIER_STATE); #if KMP_AFFINITY_SUPPORTED -# if OMP_40_ENABLED - root_thread->th.th_current_place = KMP_PLACE_UNDEFINED; - root_thread->th.th_new_place = KMP_PLACE_UNDEFINED; - root_thread->th.th_first_place = KMP_PLACE_UNDEFINED; - root_thread->th.th_last_place = KMP_PLACE_UNDEFINED; -# endif +#if OMP_40_ENABLED + root_thread->th.th_current_place = KMP_PLACE_UNDEFINED; + root_thread->th.th_new_place = KMP_PLACE_UNDEFINED; + root_thread->th.th_first_place = KMP_PLACE_UNDEFINED; + root_thread->th.th_last_place = KMP_PLACE_UNDEFINED; +#endif - if ( TCR_4(__kmp_init_middle) ) { - __kmp_affinity_set_init_mask( gtid, TRUE ); - } + if (TCR_4(__kmp_init_middle)) { + __kmp_affinity_set_init_mask(gtid, TRUE); + } #endif /* KMP_AFFINITY_SUPPORTED */ - __kmp_root_counter ++; + __kmp_root_counter++; - KMP_MB(); - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); + KMP_MB(); + __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); - return gtid; + return gtid; } #if KMP_NESTED_HOT_TEAMS -static int -__kmp_free_hot_teams( kmp_root_t *root, kmp_info_t *thr, int level, const int max_level ) -{ - int i, n, nth; - kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams; - if( !hot_teams || !hot_teams[level].hot_team ) { - return 0; - } - KMP_DEBUG_ASSERT( level < max_level ); - kmp_team_t *team = hot_teams[level].hot_team; - nth = hot_teams[level].hot_team_nth; - n = nth - 1; // master is not freed - if( level < max_level - 1 ) { - for( i = 0; i < nth; ++i ) { - kmp_info_t *th = team->t.t_threads[i]; - n += __kmp_free_hot_teams( root, th, level + 1, max_level ); - if( i > 0 && th->th.th_hot_teams ) { - __kmp_free( th->th.th_hot_teams ); - th->th.th_hot_teams = NULL; - } - } - } - __kmp_free_team( root, team, NULL ); - return n; +static int __kmp_free_hot_teams(kmp_root_t *root, kmp_info_t *thr, int level, + const int max_level) { + int i, n, nth; + kmp_hot_team_ptr_t *hot_teams = thr->th.th_hot_teams; + if (!hot_teams || !hot_teams[level].hot_team) { + return 0; + } + KMP_DEBUG_ASSERT(level < max_level); + kmp_team_t *team = hot_teams[level].hot_team; + nth = hot_teams[level].hot_team_nth; + n = nth - 1; // master is not freed + if (level < max_level - 1) { + for (i = 0; i < nth; ++i) { + kmp_info_t *th = team->t.t_threads[i]; + n += __kmp_free_hot_teams(root, th, level + 1, max_level); + if (i > 0 && th->th.th_hot_teams) { + __kmp_free(th->th.th_hot_teams); + th->th.th_hot_teams = NULL; + } + } + } + __kmp_free_team(root, team, NULL); + return n; } #endif -/* Resets a root thread and clear its root and hot teams. - Returns the number of __kmp_threads entries directly and indirectly freed. -*/ -static int -__kmp_reset_root(int gtid, kmp_root_t *root) -{ - kmp_team_t * root_team = root->r.r_root_team; - kmp_team_t * hot_team = root->r.r_hot_team; - int n = hot_team->t.t_nproc; - int i; +// Resets a root thread and clear its root and hot teams. +// Returns the number of __kmp_threads entries directly and indirectly freed. +static int __kmp_reset_root(int gtid, kmp_root_t *root) { + kmp_team_t *root_team = root->r.r_root_team; + kmp_team_t *hot_team = root->r.r_hot_team; + int n = hot_team->t.t_nproc; + int i; - KMP_DEBUG_ASSERT( ! root->r.r_active ); + KMP_DEBUG_ASSERT(!root->r.r_active); - root->r.r_root_team = NULL; - root->r.r_hot_team = NULL; - // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team before call - // to __kmp_free_team(). - __kmp_free_team( root, root_team USE_NESTED_HOT_ARG(NULL) ); + root->r.r_root_team = NULL; + root->r.r_hot_team = NULL; + // __kmp_free_team() does not free hot teams, so we have to clear r_hot_team + // before call to __kmp_free_team(). + __kmp_free_team(root, root_team USE_NESTED_HOT_ARG(NULL)); #if KMP_NESTED_HOT_TEAMS - if( __kmp_hot_teams_max_level > 0 ) { // need to free nested hot teams and their threads if any - for( i = 0; i < hot_team->t.t_nproc; ++i ) { - kmp_info_t *th = hot_team->t.t_threads[i]; - if( __kmp_hot_teams_max_level > 1 ) { - n += __kmp_free_hot_teams( root, th, 1, __kmp_hot_teams_max_level ); - } - if( th->th.th_hot_teams ) { - __kmp_free( th->th.th_hot_teams ); - th->th.th_hot_teams = NULL; - } - } - } -#endif - __kmp_free_team( root, hot_team USE_NESTED_HOT_ARG(NULL) ); - - // - // Before we can reap the thread, we need to make certain that all - // other threads in the teams that had this root as ancestor have stopped trying to steal tasks. - // - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - __kmp_wait_to_unref_task_teams(); - } + if (__kmp_hot_teams_max_level > + 0) { // need to free nested hot teams and their threads if any + for (i = 0; i < hot_team->t.t_nproc; ++i) { + kmp_info_t *th = hot_team->t.t_threads[i]; + if (__kmp_hot_teams_max_level > 1) { + n += __kmp_free_hot_teams(root, th, 1, __kmp_hot_teams_max_level); + } + if (th->th.th_hot_teams) { + __kmp_free(th->th.th_hot_teams); + th->th.th_hot_teams = NULL; + } + } + } +#endif + __kmp_free_team(root, hot_team USE_NESTED_HOT_ARG(NULL)); + + // Before we can reap the thread, we need to make certain that all other + // threads in the teams that had this root as ancestor have stopped trying to + // steal tasks. + if (__kmp_tasking_mode != tskm_immediate_exec) { + __kmp_wait_to_unref_task_teams(); + } - #if KMP_OS_WINDOWS - /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */ - KA_TRACE( 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC "\n", - (LPVOID)&(root->r.r_uber_thread->th), - root->r.r_uber_thread->th.th_info.ds.ds_thread ) ); - __kmp_free_handle( root->r.r_uber_thread->th.th_info.ds.ds_thread ); - #endif /* KMP_OS_WINDOWS */ +#if KMP_OS_WINDOWS + /* Close Handle of root duplicated in __kmp_create_worker (tr #62919) */ + KA_TRACE( + 10, ("__kmp_reset_root: free handle, th = %p, handle = %" KMP_UINTPTR_SPEC + "\n", + (LPVOID) & (root->r.r_uber_thread->th), + root->r.r_uber_thread->th.th_info.ds.ds_thread)); + __kmp_free_handle(root->r.r_uber_thread->th.th_info.ds.ds_thread); +#endif /* KMP_OS_WINDOWS */ #if OMPT_SUPPORT - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_thread_end)) { - int gtid = __kmp_get_gtid(); - __ompt_thread_end(ompt_thread_initial, gtid); - } + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) { + int gtid = __kmp_get_gtid(); + __ompt_thread_end(ompt_thread_initial, gtid); + } #endif - TCW_4(__kmp_nth, __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth. - __kmp_reap_thread( root->r.r_uber_thread, 1 ); + TCW_4(__kmp_nth, + __kmp_nth - 1); // __kmp_reap_thread will decrement __kmp_all_nth. + __kmp_reap_thread(root->r.r_uber_thread, 1); - // We canot put root thread to __kmp_thread_pool, so we have to reap it istead of freeing. - root->r.r_uber_thread = NULL; - /* mark root as no longer in use */ - root->r.r_begin = FALSE; + // We canot put root thread to __kmp_thread_pool, so we have to reap it istead + // of freeing. + root->r.r_uber_thread = NULL; + /* mark root as no longer in use */ + root->r.r_begin = FALSE; - return n; + return n; } -void -__kmp_unregister_root_current_thread( int gtid ) -{ - KA_TRACE( 1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid )); - /* this lock should be ok, since unregister_root_current_thread is never called during - * and abort, only during a normal close. furthermore, if you have the - * forkjoin lock, you should never try to get the initz lock */ - - __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); - if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { - KC_TRACE( 10, ("__kmp_unregister_root_current_thread: already finished, exiting T#%d\n", gtid )); - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); - return; - } - kmp_root_t *root = __kmp_root[gtid]; - - KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] ); - KMP_ASSERT( KMP_UBER_GTID( gtid )); - KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root ); - KMP_ASSERT( root->r.r_active == FALSE ); - - - KMP_MB(); +void __kmp_unregister_root_current_thread(int gtid) { + KA_TRACE(1, ("__kmp_unregister_root_current_thread: enter T#%d\n", gtid)); + /* this lock should be ok, since unregister_root_current_thread is never + called during an abort, only during a normal close. furthermore, if you + have the forkjoin lock, you should never try to get the initz lock */ + __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); + if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { + KC_TRACE(10, ("__kmp_unregister_root_current_thread: already finished, " + "exiting T#%d\n", + gtid)); + __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); + return; + } + kmp_root_t *root = __kmp_root[gtid]; + + KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); + KMP_ASSERT(KMP_UBER_GTID(gtid)); + KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root); + KMP_ASSERT(root->r.r_active == FALSE); + + KMP_MB(); #if OMP_45_ENABLED - kmp_info_t * thread = __kmp_threads[gtid]; - kmp_team_t * team = thread->th.th_team; - kmp_task_team_t * task_team = thread->th.th_task_team; + kmp_info_t *thread = __kmp_threads[gtid]; + kmp_team_t *team = thread->th.th_team; + kmp_task_team_t *task_team = thread->th.th_task_team; - // we need to wait for the proxy tasks before finishing the thread - if ( task_team != NULL && task_team->tt.tt_found_proxy_tasks ) { + // we need to wait for the proxy tasks before finishing the thread + if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) { #if OMPT_SUPPORT - // the runtime is shutting down so we won't report any events - thread->th.ompt_thread_info.state = ompt_state_undefined; + // the runtime is shutting down so we won't report any events + thread->th.ompt_thread_info.state = ompt_state_undefined; #endif - __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL)); - } + __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL)); + } #endif - __kmp_reset_root(gtid, root); + __kmp_reset_root(gtid, root); - /* free up this thread slot */ - __kmp_gtid_set_specific( KMP_GTID_DNE ); + /* free up this thread slot */ + __kmp_gtid_set_specific(KMP_GTID_DNE); #ifdef KMP_TDATA_GTID - __kmp_gtid = KMP_GTID_DNE; + __kmp_gtid = KMP_GTID_DNE; #endif - KMP_MB(); - KC_TRACE( 10, ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid )); + KMP_MB(); + KC_TRACE(10, + ("__kmp_unregister_root_current_thread: T#%d unregistered\n", gtid)); - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); + __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); } #if KMP_OS_WINDOWS /* __kmp_forkjoin_lock must be already held - Unregisters a root thread that is not the current thread. Returns the number of - __kmp_threads entries freed as a result. - */ -static int -__kmp_unregister_root_other_thread( int gtid ) -{ - kmp_root_t *root = __kmp_root[gtid]; - int r; - - KA_TRACE( 1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid )); - KMP_DEBUG_ASSERT( __kmp_threads && __kmp_threads[gtid] ); - KMP_ASSERT( KMP_UBER_GTID( gtid )); - KMP_ASSERT( root == __kmp_threads[gtid]->th.th_root ); - KMP_ASSERT( root->r.r_active == FALSE ); - - r = __kmp_reset_root(gtid, root); - KC_TRACE( 10, ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid )); - return r; + Unregisters a root thread that is not the current thread. Returns the number + of __kmp_threads entries freed as a result. */ +static int __kmp_unregister_root_other_thread(int gtid) { + kmp_root_t *root = __kmp_root[gtid]; + int r; + + KA_TRACE(1, ("__kmp_unregister_root_other_thread: enter T#%d\n", gtid)); + KMP_DEBUG_ASSERT(__kmp_threads && __kmp_threads[gtid]); + KMP_ASSERT(KMP_UBER_GTID(gtid)); + KMP_ASSERT(root == __kmp_threads[gtid]->th.th_root); + KMP_ASSERT(root->r.r_active == FALSE); + + r = __kmp_reset_root(gtid, root); + KC_TRACE(10, + ("__kmp_unregister_root_other_thread: T#%d unregistered\n", gtid)); + return r; } #endif #if KMP_DEBUG void __kmp_task_info() { - kmp_int32 gtid = __kmp_entry_gtid(); - kmp_int32 tid = __kmp_tid_from_gtid( gtid ); - kmp_info_t *this_thr = __kmp_threads[ gtid ]; - kmp_team_t *steam = this_thr->th.th_serial_team; - kmp_team_t *team = this_thr->th.th_team; + kmp_int32 gtid = __kmp_entry_gtid(); + kmp_int32 tid = __kmp_tid_from_gtid(gtid); + kmp_info_t *this_thr = __kmp_threads[gtid]; + kmp_team_t *steam = this_thr->th.th_serial_team; + kmp_team_t *team = this_thr->th.th_team; - __kmp_printf( "__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p ptask=%p\n", - gtid, tid, this_thr, team, this_thr->th.th_current_task, team->t.t_implicit_task_taskdata[tid].td_parent ); + __kmp_printf("__kmp_task_info: gtid=%d tid=%d t_thread=%p team=%p curtask=%p " + "ptask=%p\n", + gtid, tid, this_thr, team, this_thr->th.th_current_task, + team->t.t_implicit_task_taskdata[tid].td_parent); } #endif // KMP_DEBUG -/* TODO optimize with one big memclr, take out what isn't needed, - * split responsibility to workers as much as possible, and delay - * initialization of features as much as possible */ -static void -__kmp_initialize_info( kmp_info_t *this_thr, kmp_team_t *team, int tid, int gtid ) -{ - /* this_thr->th.th_info.ds.ds_gtid is setup in kmp_allocate_thread/create_worker - * this_thr->th.th_serial_team is setup in __kmp_allocate_thread */ - kmp_info_t *master = team->t.t_threads[0]; - KMP_DEBUG_ASSERT( this_thr != NULL ); - KMP_DEBUG_ASSERT( this_thr->th.th_serial_team ); - KMP_DEBUG_ASSERT( team ); - KMP_DEBUG_ASSERT( team->t.t_threads ); - KMP_DEBUG_ASSERT( team->t.t_dispatch ); - KMP_DEBUG_ASSERT( master ); - KMP_DEBUG_ASSERT( master->th.th_root ); - - KMP_MB(); - - TCW_SYNC_PTR(this_thr->th.th_team, team); - - this_thr->th.th_info.ds.ds_tid = tid; - this_thr->th.th_set_nproc = 0; - if (__kmp_tasking_mode != tskm_immediate_exec) - // When tasking is possible, threads are not safe to reap until they are - // done tasking; this will be set when tasking code is exited in wait - this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; - else // no tasking --> always safe to reap - this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; +/* TODO optimize with one big memclr, take out what isn't needed, split + responsibility to workers as much as possible, and delay initialization of + features as much as possible */ +static void __kmp_initialize_info(kmp_info_t *this_thr, kmp_team_t *team, + int tid, int gtid) { + /* this_thr->th.th_info.ds.ds_gtid is setup in + kmp_allocate_thread/create_worker. + this_thr->th.th_serial_team is setup in __kmp_allocate_thread */ + kmp_info_t *master = team->t.t_threads[0]; + KMP_DEBUG_ASSERT(this_thr != NULL); + KMP_DEBUG_ASSERT(this_thr->th.th_serial_team); + KMP_DEBUG_ASSERT(team); + KMP_DEBUG_ASSERT(team->t.t_threads); + KMP_DEBUG_ASSERT(team->t.t_dispatch); + KMP_DEBUG_ASSERT(master); + KMP_DEBUG_ASSERT(master->th.th_root); + + KMP_MB(); + + TCW_SYNC_PTR(this_thr->th.th_team, team); + + this_thr->th.th_info.ds.ds_tid = tid; + this_thr->th.th_set_nproc = 0; + if (__kmp_tasking_mode != tskm_immediate_exec) + // When tasking is possible, threads are not safe to reap until they are + // done tasking; this will be set when tasking code is exited in wait + this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; + else // no tasking --> always safe to reap + this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; #if OMP_40_ENABLED - this_thr->th.th_set_proc_bind = proc_bind_default; -# if KMP_AFFINITY_SUPPORTED - this_thr->th.th_new_place = this_thr->th.th_current_place; -# endif + this_thr->th.th_set_proc_bind = proc_bind_default; +#if KMP_AFFINITY_SUPPORTED + this_thr->th.th_new_place = this_thr->th.th_current_place; #endif - this_thr->th.th_root = master->th.th_root; +#endif + this_thr->th.th_root = master->th.th_root; - /* setup the thread's cache of the team structure */ - this_thr->th.th_team_nproc = team->t.t_nproc; - this_thr->th.th_team_master = master; - this_thr->th.th_team_serialized = team->t.t_serialized; - TCW_PTR(this_thr->th.th_sleep_loc, NULL); + /* setup the thread's cache of the team structure */ + this_thr->th.th_team_nproc = team->t.t_nproc; + this_thr->th.th_team_master = master; + this_thr->th.th_team_serialized = team->t.t_serialized; + TCW_PTR(this_thr->th.th_sleep_loc, NULL); - KMP_DEBUG_ASSERT( team->t.t_implicit_task_taskdata ); + KMP_DEBUG_ASSERT(team->t.t_implicit_task_taskdata); - KF_TRACE( 10, ( "__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n", - tid, gtid, this_thr, this_thr->th.th_current_task ) ); + KF_TRACE(10, ("__kmp_initialize_info1: T#%d:%d this_thread=%p curtask=%p\n", + tid, gtid, this_thr, this_thr->th.th_current_task)); - __kmp_init_implicit_task( this_thr->th.th_team_master->th.th_ident, this_thr, team, tid, TRUE ); + __kmp_init_implicit_task(this_thr->th.th_team_master->th.th_ident, this_thr, + team, tid, TRUE); - KF_TRACE( 10, ( "__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n", - tid, gtid, this_thr, this_thr->th.th_current_task ) ); - // TODO: Initialize ICVs from parent; GEH - isn't that already done in __kmp_initialize_team()? + KF_TRACE(10, ("__kmp_initialize_info2: T#%d:%d this_thread=%p curtask=%p\n", + tid, gtid, this_thr, this_thr->th.th_current_task)); + // TODO: Initialize ICVs from parent; GEH - isn't that already done in + // __kmp_initialize_team()? - /* TODO no worksharing in speculative threads */ - this_thr->th.th_dispatch = &team->t.t_dispatch[ tid ]; + /* TODO no worksharing in speculative threads */ + this_thr->th.th_dispatch = &team->t.t_dispatch[tid]; - this_thr->th.th_local.this_construct = 0; + this_thr->th.th_local.this_construct = 0; #ifdef BUILD_TV - this_thr->th.th_local.tv_data = 0; + this_thr->th.th_local.tv_data = 0; #endif - if ( ! this_thr->th.th_pri_common ) { - this_thr->th.th_pri_common = (struct common_table *) __kmp_allocate( sizeof(struct common_table) ); - if ( __kmp_storage_map ) { - __kmp_print_storage_map_gtid( - gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1, - sizeof( struct common_table ), "th_%d.th_pri_common\n", gtid - ); - }; // if - this_thr->th.th_pri_head = NULL; + if (!this_thr->th.th_pri_common) { + this_thr->th.th_pri_common = + (struct common_table *)__kmp_allocate(sizeof(struct common_table)); + if (__kmp_storage_map) { + __kmp_print_storage_map_gtid( + gtid, this_thr->th.th_pri_common, this_thr->th.th_pri_common + 1, + sizeof(struct common_table), "th_%d.th_pri_common\n", gtid); }; // if - - /* Initialize dynamic dispatch */ - { - volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch; - /* - * Use team max_nproc since this will never change for the team. - */ - size_t disp_size = sizeof( dispatch_private_info_t ) * - ( team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers ); - KD_TRACE( 10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, team->t.t_max_nproc ) ); - KMP_ASSERT( dispatch ); - KMP_DEBUG_ASSERT( team->t.t_dispatch ); - KMP_DEBUG_ASSERT( dispatch == &team->t.t_dispatch[ tid ] ); - - dispatch->th_disp_index = 0; + this_thr->th.th_pri_head = NULL; + }; // if + + /* Initialize dynamic dispatch */ + { + volatile kmp_disp_t *dispatch = this_thr->th.th_dispatch; + // Use team max_nproc since this will never change for the team. + size_t disp_size = + sizeof(dispatch_private_info_t) * + (team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers); + KD_TRACE(10, ("__kmp_initialize_info: T#%d max_nproc: %d\n", gtid, + team->t.t_max_nproc)); + KMP_ASSERT(dispatch); + KMP_DEBUG_ASSERT(team->t.t_dispatch); + KMP_DEBUG_ASSERT(dispatch == &team->t.t_dispatch[tid]); + + dispatch->th_disp_index = 0; #if OMP_45_ENABLED - dispatch->th_doacross_buf_idx = 0; -#endif - if( ! dispatch->th_disp_buffer ) { - dispatch->th_disp_buffer = (dispatch_private_info_t *) __kmp_allocate( disp_size ); + dispatch->th_doacross_buf_idx = 0; +#endif + if (!dispatch->th_disp_buffer) { + dispatch->th_disp_buffer = + (dispatch_private_info_t *)__kmp_allocate(disp_size); + + if (__kmp_storage_map) { + __kmp_print_storage_map_gtid( + gtid, &dispatch->th_disp_buffer[0], + &dispatch->th_disp_buffer[team->t.t_max_nproc == 1 + ? 1 + : __kmp_dispatch_num_buffers], + disp_size, "th_%d.th_dispatch.th_disp_buffer " + "(team_%d.t_dispatch[%d].th_disp_buffer)", + gtid, team->t.t_id, gtid); + } + } else { + memset(&dispatch->th_disp_buffer[0], '\0', disp_size); + } - if ( __kmp_storage_map ) { - __kmp_print_storage_map_gtid( gtid, &dispatch->th_disp_buffer[ 0 ], - &dispatch->th_disp_buffer[ team->t.t_max_nproc == 1 ? 1 : __kmp_dispatch_num_buffers ], - disp_size, "th_%d.th_dispatch.th_disp_buffer " - "(team_%d.t_dispatch[%d].th_disp_buffer)", - gtid, team->t.t_id, gtid ); - } - } else { - memset( & dispatch->th_disp_buffer[0], '\0', disp_size ); - } + dispatch->th_dispatch_pr_current = 0; + dispatch->th_dispatch_sh_current = 0; - dispatch->th_dispatch_pr_current = 0; - dispatch->th_dispatch_sh_current = 0; + dispatch->th_deo_fcn = 0; /* ORDERED */ + dispatch->th_dxo_fcn = 0; /* END ORDERED */ + } - dispatch->th_deo_fcn = 0; /* ORDERED */ - dispatch->th_dxo_fcn = 0; /* END ORDERED */ - } + this_thr->th.th_next_pool = NULL; - this_thr->th.th_next_pool = NULL; + if (!this_thr->th.th_task_state_memo_stack) { + size_t i; + this_thr->th.th_task_state_memo_stack = + (kmp_uint8 *)__kmp_allocate(4 * sizeof(kmp_uint8)); + this_thr->th.th_task_state_top = 0; + this_thr->th.th_task_state_stack_sz = 4; + for (i = 0; i < this_thr->th.th_task_state_stack_sz; + ++i) // zero init the stack + this_thr->th.th_task_state_memo_stack[i] = 0; + } - if (!this_thr->th.th_task_state_memo_stack) { - size_t i; - this_thr->th.th_task_state_memo_stack = (kmp_uint8 *) __kmp_allocate( 4*sizeof(kmp_uint8) ); - this_thr->th.th_task_state_top = 0; - this_thr->th.th_task_state_stack_sz = 4; - for (i=0; ith.th_task_state_stack_sz; ++i) // zero init the stack - this_thr->th.th_task_state_memo_stack[i] = 0; - } - - KMP_DEBUG_ASSERT( !this_thr->th.th_spin_here ); - KMP_DEBUG_ASSERT( this_thr->th.th_next_waiting == 0 ); + KMP_DEBUG_ASSERT(!this_thr->th.th_spin_here); + KMP_DEBUG_ASSERT(this_thr->th.th_next_waiting == 0); - KMP_MB(); + KMP_MB(); } - -/* allocate a new thread for the requesting team. this is only called from within a - * forkjoin critical section. we will first try to get an available thread from the - * thread pool. if none is available, we will fork a new one assuming we are able - * to create a new one. this should be assured, as the caller should check on this - * first. - */ -kmp_info_t * -__kmp_allocate_thread( kmp_root_t *root, kmp_team_t *team, int new_tid ) -{ - kmp_team_t *serial_team; - kmp_info_t *new_thr; - int new_gtid; - - KA_TRACE( 20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid() )); - KMP_DEBUG_ASSERT( root && team ); +/* allocate a new thread for the requesting team. this is only called from + within a forkjoin critical section. we will first try to get an available + thread from the thread pool. if none is available, we will fork a new one + assuming we are able to create a new one. this should be assured, as the + caller should check on this first. */ +kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, + int new_tid) { + kmp_team_t *serial_team; + kmp_info_t *new_thr; + int new_gtid; + + KA_TRACE(20, ("__kmp_allocate_thread: T#%d\n", __kmp_get_gtid())); + KMP_DEBUG_ASSERT(root && team); #if !KMP_NESTED_HOT_TEAMS - KMP_DEBUG_ASSERT( KMP_MASTER_GTID( __kmp_get_gtid() )); + KMP_DEBUG_ASSERT(KMP_MASTER_GTID(__kmp_get_gtid())); #endif - KMP_MB(); + KMP_MB(); - /* first, try to get one from the thread pool */ - if ( __kmp_thread_pool ) { + /* first, try to get one from the thread pool */ + if (__kmp_thread_pool) { - new_thr = (kmp_info_t*)__kmp_thread_pool; - __kmp_thread_pool = (volatile kmp_info_t *) new_thr->th.th_next_pool; - if ( new_thr == __kmp_thread_pool_insert_pt ) { - __kmp_thread_pool_insert_pt = NULL; - } - TCW_4(new_thr->th.th_in_pool, FALSE); - // - // Don't touch th_active_in_pool or th_active. - // The worker thread adjusts those flags as it sleeps/awakens. - // - __kmp_thread_pool_nth--; + new_thr = (kmp_info_t *)__kmp_thread_pool; + __kmp_thread_pool = (volatile kmp_info_t *)new_thr->th.th_next_pool; + if (new_thr == __kmp_thread_pool_insert_pt) { + __kmp_thread_pool_insert_pt = NULL; + } + TCW_4(new_thr->th.th_in_pool, FALSE); + // Don't touch th_active_in_pool or th_active. + // The worker thread adjusts those flags as it sleeps/awakens. + __kmp_thread_pool_nth--; - KA_TRACE( 20, ("__kmp_allocate_thread: T#%d using thread T#%d\n", - __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid )); - KMP_ASSERT( ! new_thr->th.th_team ); - KMP_DEBUG_ASSERT( __kmp_nth < __kmp_threads_capacity ); - KMP_DEBUG_ASSERT( __kmp_thread_pool_nth >= 0 ); + KA_TRACE(20, ("__kmp_allocate_thread: T#%d using thread T#%d\n", + __kmp_get_gtid(), new_thr->th.th_info.ds.ds_gtid)); + KMP_ASSERT(!new_thr->th.th_team); + KMP_DEBUG_ASSERT(__kmp_nth < __kmp_threads_capacity); + KMP_DEBUG_ASSERT(__kmp_thread_pool_nth >= 0); - /* setup the thread structure */ - __kmp_initialize_info( new_thr, team, new_tid, new_thr->th.th_info.ds.ds_gtid ); - KMP_DEBUG_ASSERT( new_thr->th.th_serial_team ); + /* setup the thread structure */ + __kmp_initialize_info(new_thr, team, new_tid, + new_thr->th.th_info.ds.ds_gtid); + KMP_DEBUG_ASSERT(new_thr->th.th_serial_team); - TCW_4(__kmp_nth, __kmp_nth + 1); + TCW_4(__kmp_nth, __kmp_nth + 1); - new_thr->th.th_task_state = 0; - new_thr->th.th_task_state_top = 0; - new_thr->th.th_task_state_stack_sz = 4; + new_thr->th.th_task_state = 0; + new_thr->th.th_task_state_top = 0; + new_thr->th.th_task_state_stack_sz = 4; #ifdef KMP_ADJUST_BLOCKTIME - /* Adjust blocktime back to zero if necessar y */ - /* Middle initialization might not have occurred yet */ - if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { - if ( __kmp_nth > __kmp_avail_proc ) { - __kmp_zero_bt = TRUE; - } - } + /* Adjust blocktime back to zero if necessary */ + /* Middle initialization might not have occurred yet */ + if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { + if (__kmp_nth > __kmp_avail_proc) { + __kmp_zero_bt = TRUE; + } + } #endif /* KMP_ADJUST_BLOCKTIME */ #if KMP_DEBUG - // If thread entered pool via __kmp_free_thread, wait_flag should != KMP_BARRIER_PARENT_FLAG. - int b; - kmp_balign_t * balign = new_thr->th.th_bar; - for( b = 0; b < bs_last_barrier; ++ b ) - KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); + // If thread entered pool via __kmp_free_thread, wait_flag should != + // KMP_BARRIER_PARENT_FLAG. + int b; + kmp_balign_t *balign = new_thr->th.th_bar; + for (b = 0; b < bs_last_barrier; ++b) + KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); #endif - KF_TRACE( 10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n", - __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid )); - - KMP_MB(); - return new_thr; - } - - - /* no, well fork a new one */ - KMP_ASSERT( __kmp_nth == __kmp_all_nth ); - KMP_ASSERT( __kmp_all_nth < __kmp_threads_capacity ); - -#if KMP_USE_MONITOR - // - // If this is the first worker thread the RTL is creating, then also - // launch the monitor thread. We try to do this as early as possible. - // - if ( ! TCR_4( __kmp_init_monitor ) ) { - __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock ); - if ( ! TCR_4( __kmp_init_monitor ) ) { - KF_TRACE( 10, ( "before __kmp_create_monitor\n" ) ); - TCW_4( __kmp_init_monitor, 1 ); - __kmp_create_monitor( & __kmp_monitor ); - KF_TRACE( 10, ( "after __kmp_create_monitor\n" ) ); - #if KMP_OS_WINDOWS - // AC: wait until monitor has started. This is a fix for CQ232808. - // The reason is that if the library is loaded/unloaded in a loop with small (parallel) - // work in between, then there is high probability that monitor thread started after - // the library shutdown. At shutdown it is too late to cope with the problem, because - // when the master is in DllMain (process detach) the monitor has no chances to start - // (it is blocked), and master has no means to inform the monitor that the library has gone, - // because all the memory which the monitor can access is going to be released/reset. - while ( TCR_4(__kmp_init_monitor) < 2 ) { - KMP_YIELD( TRUE ); - } - KF_TRACE( 10, ( "after monitor thread has started\n" ) ); - #endif - } - __kmp_release_bootstrap_lock( & __kmp_monitor_lock ); - } -#endif + KF_TRACE(10, ("__kmp_allocate_thread: T#%d using thread %p T#%d\n", + __kmp_get_gtid(), new_thr, new_thr->th.th_info.ds.ds_gtid)); KMP_MB(); - for( new_gtid=1 ; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid ) { - KMP_DEBUG_ASSERT( new_gtid < __kmp_threads_capacity ); - } - - /* allocate space for it. */ - new_thr = (kmp_info_t*) __kmp_allocate( sizeof(kmp_info_t) ); - - TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr); - - if ( __kmp_storage_map ) { - __kmp_print_thread_storage_map( new_thr, new_gtid ); - } + return new_thr; + } - /* add the reserve serialized team, initialized from the team's master thread */ - { - kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs( team ); - KF_TRACE( 10, ( "__kmp_allocate_thread: before th_serial/serial_team\n" ) ); + /* no, well fork a new one */ + KMP_ASSERT(__kmp_nth == __kmp_all_nth); + KMP_ASSERT(__kmp_all_nth < __kmp_threads_capacity); +#if KMP_USE_MONITOR + // If this is the first worker thread the RTL is creating, then also + // launch the monitor thread. We try to do this as early as possible. + if (!TCR_4(__kmp_init_monitor)) { + __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); + if (!TCR_4(__kmp_init_monitor)) { + KF_TRACE(10, ("before __kmp_create_monitor\n")); + TCW_4(__kmp_init_monitor, 1); + __kmp_create_monitor(&__kmp_monitor); + KF_TRACE(10, ("after __kmp_create_monitor\n")); +#if KMP_OS_WINDOWS + // AC: wait until monitor has started. This is a fix for CQ232808. + // The reason is that if the library is loaded/unloaded in a loop with + // small (parallel) work in between, then there is high probability that + // monitor thread started after the library shutdown. At shutdown it is + // too late to cope with the problem, because when the master is in + // DllMain (process detach) the monitor has no chances to start (it is + // blocked), and master has no means to inform the monitor that the + // library has gone, because all the memory which the monitor can access + // is going to be released/reset. + while (TCR_4(__kmp_init_monitor) < 2) { + KMP_YIELD(TRUE); + } + KF_TRACE(10, ("after monitor thread has started\n")); +#endif + } + __kmp_release_bootstrap_lock(&__kmp_monitor_lock); + } +#endif + + KMP_MB(); + for (new_gtid = 1; TCR_PTR(__kmp_threads[new_gtid]) != NULL; ++new_gtid) { + KMP_DEBUG_ASSERT(new_gtid < __kmp_threads_capacity); + } + + /* allocate space for it. */ + new_thr = (kmp_info_t *)__kmp_allocate(sizeof(kmp_info_t)); + + TCW_SYNC_PTR(__kmp_threads[new_gtid], new_thr); + + if (__kmp_storage_map) { + __kmp_print_thread_storage_map(new_thr, new_gtid); + } + + // add the reserve serialized team, initialized from the team's master thread + { + kmp_internal_control_t r_icvs = __kmp_get_x_global_icvs(team); + KF_TRACE(10, ("__kmp_allocate_thread: before th_serial/serial_team\n")); new_thr->th.th_serial_team = serial_team = - (kmp_team_t*) __kmp_allocate_team( root, 1, 1, + (kmp_team_t *)__kmp_allocate_team(root, 1, 1, #if OMPT_SUPPORT - 0, // root parallel id + 0, // root parallel id #endif #if OMP_40_ENABLED - proc_bind_default, + proc_bind_default, #endif - &r_icvs, - 0 USE_NESTED_HOT_ARG(NULL) ); - } - KMP_ASSERT ( serial_team ); - serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for execution (it is unused for now). - serial_team->t.t_threads[0] = new_thr; - KF_TRACE( 10, ( "__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n", - new_thr ) ); + &r_icvs, 0 USE_NESTED_HOT_ARG(NULL)); + } + KMP_ASSERT(serial_team); + serial_team->t.t_serialized = 0; // AC: the team created in reserve, not for + // execution (it is unused for now). + serial_team->t.t_threads[0] = new_thr; + KF_TRACE(10, + ("__kmp_allocate_thread: after th_serial/serial_team : new_thr=%p\n", + new_thr)); - /* setup the thread structures */ - __kmp_initialize_info( new_thr, team, new_tid, new_gtid ); + /* setup the thread structures */ + __kmp_initialize_info(new_thr, team, new_tid, new_gtid); - #if USE_FAST_MEMORY - __kmp_initialize_fast_memory( new_thr ); - #endif /* USE_FAST_MEMORY */ +#if USE_FAST_MEMORY + __kmp_initialize_fast_memory(new_thr); +#endif /* USE_FAST_MEMORY */ - #if KMP_USE_BGET - KMP_DEBUG_ASSERT( new_thr->th.th_local.bget_data == NULL ); - __kmp_initialize_bget( new_thr ); - #endif +#if KMP_USE_BGET + KMP_DEBUG_ASSERT(new_thr->th.th_local.bget_data == NULL); + __kmp_initialize_bget(new_thr); +#endif - __kmp_init_random( new_thr ); // Initialize random number generator + __kmp_init_random(new_thr); // Initialize random number generator - /* Initialize these only once when thread is grabbed for a team allocation */ - KA_TRACE( 20, ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n", - __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); + /* Initialize these only once when thread is grabbed for a team allocation */ + KA_TRACE(20, + ("__kmp_allocate_thread: T#%d init go fork=%u, plain=%u\n", + __kmp_get_gtid(), KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); - int b; - kmp_balign_t * balign = new_thr->th.th_bar; - for(b=0; bth.th_bar; + for (b = 0; b < bs_last_barrier; ++b) { + balign[b].bb.b_go = KMP_INIT_BARRIER_STATE; + balign[b].bb.team = NULL; + balign[b].bb.wait_flag = KMP_BARRIER_NOT_WAITING; + balign[b].bb.use_oncore_barrier = 0; + } - new_thr->th.th_spin_here = FALSE; - new_thr->th.th_next_waiting = 0; + new_thr->th.th_spin_here = FALSE; + new_thr->th.th_next_waiting = 0; #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED - new_thr->th.th_current_place = KMP_PLACE_UNDEFINED; - new_thr->th.th_new_place = KMP_PLACE_UNDEFINED; - new_thr->th.th_first_place = KMP_PLACE_UNDEFINED; - new_thr->th.th_last_place = KMP_PLACE_UNDEFINED; -#endif - - TCW_4(new_thr->th.th_in_pool, FALSE); - new_thr->th.th_active_in_pool = FALSE; - TCW_4(new_thr->th.th_active, TRUE); - - /* adjust the global counters */ - __kmp_all_nth ++; - __kmp_nth ++; - - // - // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) - // for low numbers of procs, and method #2 (keyed API call) for higher - // numbers of procs. - // - if ( __kmp_adjust_gtid_mode ) { - if ( __kmp_all_nth >= __kmp_tls_gtid_min ) { - if ( TCR_4(__kmp_gtid_mode) != 2) { - TCW_4(__kmp_gtid_mode, 2); - } - } - else { - if (TCR_4(__kmp_gtid_mode) != 1 ) { - TCW_4(__kmp_gtid_mode, 1); - } - } + new_thr->th.th_current_place = KMP_PLACE_UNDEFINED; + new_thr->th.th_new_place = KMP_PLACE_UNDEFINED; + new_thr->th.th_first_place = KMP_PLACE_UNDEFINED; + new_thr->th.th_last_place = KMP_PLACE_UNDEFINED; +#endif + + TCW_4(new_thr->th.th_in_pool, FALSE); + new_thr->th.th_active_in_pool = FALSE; + TCW_4(new_thr->th.th_active, TRUE); + + /* adjust the global counters */ + __kmp_all_nth++; + __kmp_nth++; + + // if __kmp_adjust_gtid_mode is set, then we use method #1 (sp search) for low + // numbers of procs, and method #2 (keyed API call) for higher numbers. + if (__kmp_adjust_gtid_mode) { + if (__kmp_all_nth >= __kmp_tls_gtid_min) { + if (TCR_4(__kmp_gtid_mode) != 2) { + TCW_4(__kmp_gtid_mode, 2); + } + } else { + if (TCR_4(__kmp_gtid_mode) != 1) { + TCW_4(__kmp_gtid_mode, 1); + } } + } #ifdef KMP_ADJUST_BLOCKTIME - /* Adjust blocktime back to zero if necessary */ - /* Middle initialization might not have occurred yet */ - if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { - if ( __kmp_nth > __kmp_avail_proc ) { - __kmp_zero_bt = TRUE; - } + /* Adjust blocktime back to zero if necessary */ + /* Middle initialization might not have occurred yet */ + if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { + if (__kmp_nth > __kmp_avail_proc) { + __kmp_zero_bt = TRUE; } + } #endif /* KMP_ADJUST_BLOCKTIME */ - /* actually fork it and create the new worker thread */ - KF_TRACE( 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr )); - __kmp_create_worker( new_gtid, new_thr, __kmp_stksize ); - KF_TRACE( 10, ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr )); - - KA_TRACE( 20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), new_gtid )); - KMP_MB(); - return new_thr; + /* actually fork it and create the new worker thread */ + KF_TRACE( + 10, ("__kmp_allocate_thread: before __kmp_create_worker: %p\n", new_thr)); + __kmp_create_worker(new_gtid, new_thr, __kmp_stksize); + KF_TRACE(10, + ("__kmp_allocate_thread: after __kmp_create_worker: %p\n", new_thr)); + + KA_TRACE(20, ("__kmp_allocate_thread: T#%d forked T#%d\n", __kmp_get_gtid(), + new_gtid)); + KMP_MB(); + return new_thr; } -/* - * reinitialize team for reuse. - * - * The hot team code calls this case at every fork barrier, so EPCC barrier - * test are extremely sensitive to changes in it, esp. writes to the team - * struct, which cause a cache invalidation in all threads. - * - * IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! - */ -static void -__kmp_reinitialize_team( kmp_team_t *team, kmp_internal_control_t *new_icvs, ident_t *loc ) { - KF_TRACE( 10, ( "__kmp_reinitialize_team: enter this_thread=%p team=%p\n", - team->t.t_threads[0], team ) ); - KMP_DEBUG_ASSERT( team && new_icvs); - KMP_DEBUG_ASSERT( ( ! TCR_4(__kmp_init_parallel) ) || new_icvs->nproc ); - KMP_CHECK_UPDATE(team->t.t_ident, loc); - - KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID()); - - // Copy ICVs to the master thread's implicit taskdata - __kmp_init_implicit_task( loc, team->t.t_threads[0], team, 0, FALSE ); - copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs); - - KF_TRACE( 10, ( "__kmp_reinitialize_team: exit this_thread=%p team=%p\n", - team->t.t_threads[0], team ) ); +/* Reinitialize team for reuse. + The hot team code calls this case at every fork barrier, so EPCC barrier + test are extremely sensitive to changes in it, esp. writes to the team + struct, which cause a cache invalidation in all threads. + IF YOU TOUCH THIS ROUTINE, RUN EPCC C SYNCBENCH ON A BIG-IRON MACHINE!!! */ +static void __kmp_reinitialize_team(kmp_team_t *team, + kmp_internal_control_t *new_icvs, + ident_t *loc) { + KF_TRACE(10, ("__kmp_reinitialize_team: enter this_thread=%p team=%p\n", + team->t.t_threads[0], team)); + KMP_DEBUG_ASSERT(team && new_icvs); + KMP_DEBUG_ASSERT((!TCR_4(__kmp_init_parallel)) || new_icvs->nproc); + KMP_CHECK_UPDATE(team->t.t_ident, loc); + + KMP_CHECK_UPDATE(team->t.t_id, KMP_GEN_TEAM_ID()); + + // Copy ICVs to the master thread's implicit taskdata + __kmp_init_implicit_task(loc, team->t.t_threads[0], team, 0, FALSE); + copy_icvs(&team->t.t_implicit_task_taskdata[0].td_icvs, new_icvs); + + KF_TRACE(10, ("__kmp_reinitialize_team: exit this_thread=%p team=%p\n", + team->t.t_threads[0], team)); } +/* Initialize the team data structure. + This assumes the t_threads and t_max_nproc are already set. + Also, we don't touch the arguments */ +static void __kmp_initialize_team(kmp_team_t *team, int new_nproc, + kmp_internal_control_t *new_icvs, + ident_t *loc) { + KF_TRACE(10, ("__kmp_initialize_team: enter: team=%p\n", team)); -/* initialize the team data structure - * this assumes the t_threads and t_max_nproc are already set - * also, we don't touch the arguments */ -static void -__kmp_initialize_team( - kmp_team_t * team, - int new_nproc, - kmp_internal_control_t * new_icvs, - ident_t * loc -) { - KF_TRACE( 10, ( "__kmp_initialize_team: enter: team=%p\n", team ) ); - - /* verify */ - KMP_DEBUG_ASSERT( team ); - KMP_DEBUG_ASSERT( new_nproc <= team->t.t_max_nproc ); - KMP_DEBUG_ASSERT( team->t.t_threads ); - KMP_MB(); + /* verify */ + KMP_DEBUG_ASSERT(team); + KMP_DEBUG_ASSERT(new_nproc <= team->t.t_max_nproc); + KMP_DEBUG_ASSERT(team->t.t_threads); + KMP_MB(); - team->t.t_master_tid = 0; /* not needed */ - /* team->t.t_master_bar; not needed */ - team->t.t_serialized = new_nproc > 1 ? 0 : 1; - team->t.t_nproc = new_nproc; + team->t.t_master_tid = 0; /* not needed */ + /* team->t.t_master_bar; not needed */ + team->t.t_serialized = new_nproc > 1 ? 0 : 1; + team->t.t_nproc = new_nproc; - /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */ - team->t.t_next_pool = NULL; - /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess up hot team */ + /* team->t.t_parent = NULL; TODO not needed & would mess up hot team */ + team->t.t_next_pool = NULL; + /* memset( team->t.t_threads, 0, sizeof(kmp_info_t*)*new_nproc ); would mess + * up hot team */ - TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */ - team->t.t_invoke = NULL; /* not needed */ + TCW_SYNC_PTR(team->t.t_pkfn, NULL); /* not needed */ + team->t.t_invoke = NULL; /* not needed */ - // TODO???: team->t.t_max_active_levels = new_max_active_levels; - team->t.t_sched = new_icvs->sched; + // TODO???: team->t.t_max_active_levels = new_max_active_levels; + team->t.t_sched = new_icvs->sched; #if KMP_ARCH_X86 || KMP_ARCH_X86_64 - team->t.t_fp_control_saved = FALSE; /* not needed */ - team->t.t_x87_fpu_control_word = 0; /* not needed */ - team->t.t_mxcsr = 0; /* not needed */ + team->t.t_fp_control_saved = FALSE; /* not needed */ + team->t.t_x87_fpu_control_word = 0; /* not needed */ + team->t.t_mxcsr = 0; /* not needed */ #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - team->t.t_construct = 0; - __kmp_init_lock( & team->t.t_single_lock ); + team->t.t_construct = 0; + __kmp_init_lock(&team->t.t_single_lock); - team->t.t_ordered .dt.t_value = 0; - team->t.t_master_active = FALSE; + team->t.t_ordered.dt.t_value = 0; + team->t.t_master_active = FALSE; - memset( & team->t.t_taskq, '\0', sizeof( kmp_taskq_t )); + memset(&team->t.t_taskq, '\0', sizeof(kmp_taskq_t)); #ifdef KMP_DEBUG - team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */ + team->t.t_copypriv_data = NULL; /* not necessary, but nice for debugging */ #endif - team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */ + team->t.t_copyin_counter = 0; /* for barrier-free copyin implementation */ - team->t.t_control_stack_top = NULL; + team->t.t_control_stack_top = NULL; - __kmp_reinitialize_team( team, new_icvs, loc ); + __kmp_reinitialize_team(team, new_icvs, loc); - KMP_MB(); - KF_TRACE( 10, ( "__kmp_initialize_team: exit: team=%p\n", team ) ); + KMP_MB(); + KF_TRACE(10, ("__kmp_initialize_team: exit: team=%p\n", team)); } #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED /* Sets full mask for thread and returns old mask, no changes to structures. */ static void -__kmp_set_thread_affinity_mask_full_tmp( kmp_affin_mask_t *old_mask ) -{ - if ( KMP_AFFINITY_CAPABLE() ) { - int status; - if ( old_mask != NULL ) { - status = __kmp_get_system_affinity( old_mask, TRUE ); - int error = errno; - if ( status != 0 ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( ChangeThreadAffMaskError ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - } - __kmp_set_system_affinity( __kmp_affin_fullMask, TRUE ); - } +__kmp_set_thread_affinity_mask_full_tmp(kmp_affin_mask_t *old_mask) { + if (KMP_AFFINITY_CAPABLE()) { + int status; + if (old_mask != NULL) { + status = __kmp_get_system_affinity(old_mask, TRUE); + int error = errno; + if (status != 0) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(ChangeThreadAffMaskError), + KMP_ERR(error), __kmp_msg_null); + } + } + __kmp_set_system_affinity(__kmp_affin_fullMask, TRUE); + } } #endif #if OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED -// // __kmp_partition_places() is the heart of the OpenMP 4.0 affinity mechanism. // It calculats the worker + master thread's partition based upon the parent // thread's partition, and binds each worker to a thread in their partition. // The master thread's partition should already include its current binding. -// -static void -__kmp_partition_places( kmp_team_t *team, int update_master_only ) -{ - // - // Copy the master thread's place partion to the team struct - // - kmp_info_t *master_th = team->t.t_threads[0]; - KMP_DEBUG_ASSERT( master_th != NULL ); - kmp_proc_bind_t proc_bind = team->t.t_proc_bind; - int first_place = master_th->th.th_first_place; - int last_place = master_th->th.th_last_place; - int masters_place = master_th->th.th_current_place; - team->t.t_first_place = first_place; - team->t.t_last_place = last_place; - - KA_TRACE( 20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) bound to place %d partition = [%d,%d]\n", - proc_bind, __kmp_gtid_from_thread( team->t.t_threads[0] ), team->t.t_id, - masters_place, first_place, last_place ) ); - - switch ( proc_bind ) { - - case proc_bind_default: - // - // serial teams might have the proc_bind policy set to - // proc_bind_default. It doesn't matter, as we don't - // rebind the master thread for any proc_bind policy. - // - KMP_DEBUG_ASSERT( team->t.t_nproc == 1 ); - break; - - case proc_bind_master: - { - int f; - int n_th = team->t.t_nproc; - for ( f = 1; f < n_th; f++ ) { - kmp_info_t *th = team->t.t_threads[f]; - KMP_DEBUG_ASSERT( th != NULL ); - th->th.th_first_place = first_place; - th->th.th_last_place = last_place; - th->th.th_new_place = masters_place; - - KA_TRACE( 100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d partition = [%d,%d]\n", - __kmp_gtid_from_thread( team->t.t_threads[f] ), - team->t.t_id, f, masters_place, first_place, last_place ) ); - } +static void __kmp_partition_places(kmp_team_t *team, int update_master_only) { + // Copy the master thread's place partion to the team struct + kmp_info_t *master_th = team->t.t_threads[0]; + KMP_DEBUG_ASSERT(master_th != NULL); + kmp_proc_bind_t proc_bind = team->t.t_proc_bind; + int first_place = master_th->th.th_first_place; + int last_place = master_th->th.th_last_place; + int masters_place = master_th->th.th_current_place; + team->t.t_first_place = first_place; + team->t.t_last_place = last_place; + + KA_TRACE(20, ("__kmp_partition_places: enter: proc_bind = %d T#%d(%d:0) " + "bound to place %d partition = [%d,%d]\n", + proc_bind, __kmp_gtid_from_thread(team->t.t_threads[0]), + team->t.t_id, masters_place, first_place, last_place)); + + switch (proc_bind) { + + case proc_bind_default: + // serial teams might have the proc_bind policy set to proc_bind_default. It + // doesn't matter, as we don't rebind master thread for any proc_bind policy + KMP_DEBUG_ASSERT(team->t.t_nproc == 1); + break; + + case proc_bind_master: { + int f; + int n_th = team->t.t_nproc; + for (f = 1; f < n_th; f++) { + kmp_info_t *th = team->t.t_threads[f]; + KMP_DEBUG_ASSERT(th != NULL); + th->th.th_first_place = first_place; + th->th.th_last_place = last_place; + th->th.th_new_place = masters_place; + + KA_TRACE(100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d " + "partition = [%d,%d]\n", + __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, + f, masters_place, first_place, last_place)); + } + } break; + + case proc_bind_close: { + int f; + int n_th = team->t.t_nproc; + int n_places; + if (first_place <= last_place) { + n_places = last_place - first_place + 1; + } else { + n_places = __kmp_affinity_num_masks - first_place + last_place + 1; + } + if (n_th <= n_places) { + int place = masters_place; + for (f = 1; f < n_th; f++) { + kmp_info_t *th = team->t.t_threads[f]; + KMP_DEBUG_ASSERT(th != NULL); + + if (place == last_place) { + place = first_place; + } else if (place == (int)(__kmp_affinity_num_masks - 1)) { + place = 0; + } else { + place++; } - break; + th->th.th_first_place = first_place; + th->th.th_last_place = last_place; + th->th.th_new_place = place; - case proc_bind_close: - { - int f; - int n_th = team->t.t_nproc; - int n_places; - if ( first_place <= last_place ) { - n_places = last_place - first_place + 1; - } - else { - n_places = __kmp_affinity_num_masks - first_place + last_place + 1; - } - if ( n_th <= n_places ) { - int place = masters_place; - for ( f = 1; f < n_th; f++ ) { - kmp_info_t *th = team->t.t_threads[f]; - KMP_DEBUG_ASSERT( th != NULL ); - - if ( place == last_place ) { - place = first_place; - } - else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { - place = 0; - } - else { - place++; - } - th->th.th_first_place = first_place; - th->th.th_last_place = last_place; - th->th.th_new_place = place; - - KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n", - __kmp_gtid_from_thread( team->t.t_threads[f] ), - team->t.t_id, f, place, first_place, last_place ) ); - } - } - else { - int S, rem, gap, s_count; - S = n_th / n_places; - s_count = 0; - rem = n_th - ( S * n_places ); - gap = rem > 0 ? n_places/rem : n_places; - int place = masters_place; - int gap_ct = gap; - for ( f = 0; f < n_th; f++ ) { - kmp_info_t *th = team->t.t_threads[f]; - KMP_DEBUG_ASSERT( th != NULL ); - - th->th.th_first_place = first_place; - th->th.th_last_place = last_place; - th->th.th_new_place = place; - s_count++; - - if ( (s_count == S) && rem && (gap_ct == gap) ) { - // do nothing, add an extra thread to place on next iteration - } - else if ( (s_count == S+1) && rem && (gap_ct == gap) ) { - // we added an extra thread to this place; move to next place - if ( place == last_place ) { - place = first_place; - } - else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { - place = 0; - } - else { - place++; - } - s_count = 0; - gap_ct = 1; - rem--; - } - else if (s_count == S) { // place full; don't add extra - if ( place == last_place ) { - place = first_place; - } - else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { - place = 0; - } - else { - place++; - } - gap_ct++; - s_count = 0; - } - - KA_TRACE( 100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d partition = [%d,%d]\n", - __kmp_gtid_from_thread( team->t.t_threads[f] ), - team->t.t_id, f, th->th.th_new_place, first_place, - last_place ) ); - } - KMP_DEBUG_ASSERT( place == masters_place ); - } + KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d " + "partition = [%d,%d]\n", + __kmp_gtid_from_thread(team->t.t_threads[f]), + team->t.t_id, f, place, first_place, last_place)); + } + } else { + int S, rem, gap, s_count; + S = n_th / n_places; + s_count = 0; + rem = n_th - (S * n_places); + gap = rem > 0 ? n_places / rem : n_places; + int place = masters_place; + int gap_ct = gap; + for (f = 0; f < n_th; f++) { + kmp_info_t *th = team->t.t_threads[f]; + KMP_DEBUG_ASSERT(th != NULL); + + th->th.th_first_place = first_place; + th->th.th_last_place = last_place; + th->th.th_new_place = place; + s_count++; + + if ((s_count == S) && rem && (gap_ct == gap)) { + // do nothing, add an extra thread to place on next iteration + } else if ((s_count == S + 1) && rem && (gap_ct == gap)) { + // we added an extra thread to this place; move to next place + if (place == last_place) { + place = first_place; + } else if (place == (int)(__kmp_affinity_num_masks - 1)) { + place = 0; + } else { + place++; + } + s_count = 0; + gap_ct = 1; + rem--; + } else if (s_count == S) { // place full; don't add extra + if (place == last_place) { + place = first_place; + } else if (place == (int)(__kmp_affinity_num_masks - 1)) { + place = 0; + } else { + place++; + } + gap_ct++; + s_count = 0; + } + + KA_TRACE(100, + ("__kmp_partition_places: close: T#%d(%d:%d) place %d " + "partition = [%d,%d]\n", + __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f, + th->th.th_new_place, first_place, last_place)); + } + KMP_DEBUG_ASSERT(place == masters_place); + } + } break; + + case proc_bind_spread: { + int f; + int n_th = team->t.t_nproc; + int n_places; + int thidx; + if (first_place <= last_place) { + n_places = last_place - first_place + 1; + } else { + n_places = __kmp_affinity_num_masks - first_place + last_place + 1; + } + if (n_th <= n_places) { + int place = masters_place; + int S = n_places / n_th; + int s_count, rem, gap, gap_ct; + rem = n_places - n_th * S; + gap = rem ? n_th / rem : 1; + gap_ct = gap; + thidx = n_th; + if (update_master_only == 1) + thidx = 1; + for (f = 0; f < thidx; f++) { + kmp_info_t *th = team->t.t_threads[f]; + KMP_DEBUG_ASSERT(th != NULL); + + th->th.th_first_place = place; + th->th.th_new_place = place; + s_count = 1; + while (s_count < S) { + if (place == last_place) { + place = first_place; + } else if (place == (int)(__kmp_affinity_num_masks - 1)) { + place = 0; + } else { + place++; + } + s_count++; + } + if (rem && (gap_ct == gap)) { + if (place == last_place) { + place = first_place; + } else if (place == (int)(__kmp_affinity_num_masks - 1)) { + place = 0; + } else { + place++; + } + rem--; + gap_ct = 0; + } + th->th.th_last_place = place; + gap_ct++; + + if (place == last_place) { + place = first_place; + } else if (place == (int)(__kmp_affinity_num_masks - 1)) { + place = 0; + } else { + place++; } - break; - case proc_bind_spread: - { - int f; - int n_th = team->t.t_nproc; - int n_places; - int thidx; - if ( first_place <= last_place ) { - n_places = last_place - first_place + 1; - } - else { - n_places = __kmp_affinity_num_masks - first_place + last_place + 1; - } - if ( n_th <= n_places ) { - int place = masters_place; - int S = n_places/n_th; - int s_count, rem, gap, gap_ct; - rem = n_places - n_th*S; - gap = rem ? n_th/rem : 1; - gap_ct = gap; - thidx = n_th; - if (update_master_only == 1) - thidx = 1; - for ( f = 0; f < thidx; f++ ) { - kmp_info_t *th = team->t.t_threads[f]; - KMP_DEBUG_ASSERT( th != NULL ); - - th->th.th_first_place = place; - th->th.th_new_place = place; - s_count = 1; - while (s_count < S) { - if ( place == last_place ) { - place = first_place; - } - else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { - place = 0; - } - else { - place++; - } - s_count++; - } - if (rem && (gap_ct == gap)) { - if ( place == last_place ) { - place = first_place; - } - else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { - place = 0; - } - else { - place++; - } - rem--; - gap_ct = 0; - } - th->th.th_last_place = place; - gap_ct++; - - if ( place == last_place ) { - place = first_place; - } - else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { - place = 0; - } - else { - place++; - } - - KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n", - __kmp_gtid_from_thread( team->t.t_threads[f] ), - team->t.t_id, f, th->th.th_new_place, - th->th.th_first_place, th->th.th_last_place ) ); - } - KMP_DEBUG_ASSERT( update_master_only || place == masters_place ); - } - else { - int S, rem, gap, s_count; - S = n_th / n_places; - s_count = 0; - rem = n_th - ( S * n_places ); - gap = rem > 0 ? n_places/rem : n_places; - int place = masters_place; - int gap_ct = gap; - thidx = n_th; - if (update_master_only == 1) - thidx = 1; - for ( f = 0; f < thidx; f++ ) { - kmp_info_t *th = team->t.t_threads[f]; - KMP_DEBUG_ASSERT( th != NULL ); - - th->th.th_first_place = place; - th->th.th_last_place = place; - th->th.th_new_place = place; - s_count++; - - if ( (s_count == S) && rem && (gap_ct == gap) ) { - // do nothing, add an extra thread to place on next iteration - } - else if ( (s_count == S+1) && rem && (gap_ct == gap) ) { - // we added an extra thread to this place; move on to next place - if ( place == last_place ) { - place = first_place; - } - else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { - place = 0; - } - else { - place++; - } - s_count = 0; - gap_ct = 1; - rem--; - } - else if (s_count == S) { // place is full; don't add extra thread - if ( place == last_place ) { - place = first_place; - } - else if ( place == (int)(__kmp_affinity_num_masks - 1) ) { - place = 0; - } - else { - place++; - } - gap_ct++; - s_count = 0; - } - - KA_TRACE( 100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d partition = [%d,%d]\n", - __kmp_gtid_from_thread( team->t.t_threads[f] ), + KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " + "partition = [%d,%d]\n", + __kmp_gtid_from_thread(team->t.t_threads[f]), team->t.t_id, f, th->th.th_new_place, - th->th.th_first_place, th->th.th_last_place) ); - } - KMP_DEBUG_ASSERT( update_master_only || place == masters_place ); - } - } - break; - - default: - break; + th->th.th_first_place, th->th.th_last_place)); + } + KMP_DEBUG_ASSERT(update_master_only || place == masters_place); + } else { + int S, rem, gap, s_count; + S = n_th / n_places; + s_count = 0; + rem = n_th - (S * n_places); + gap = rem > 0 ? n_places / rem : n_places; + int place = masters_place; + int gap_ct = gap; + thidx = n_th; + if (update_master_only == 1) + thidx = 1; + for (f = 0; f < thidx; f++) { + kmp_info_t *th = team->t.t_threads[f]; + KMP_DEBUG_ASSERT(th != NULL); + + th->th.th_first_place = place; + th->th.th_last_place = place; + th->th.th_new_place = place; + s_count++; + + if ((s_count == S) && rem && (gap_ct == gap)) { + // do nothing, add an extra thread to place on next iteration + } else if ((s_count == S + 1) && rem && (gap_ct == gap)) { + // we added an extra thread to this place; move on to next place + if (place == last_place) { + place = first_place; + } else if (place == (int)(__kmp_affinity_num_masks - 1)) { + place = 0; + } else { + place++; + } + s_count = 0; + gap_ct = 1; + rem--; + } else if (s_count == S) { // place is full; don't add extra thread + if (place == last_place) { + place = first_place; + } else if (place == (int)(__kmp_affinity_num_masks - 1)) { + place = 0; + } else { + place++; + } + gap_ct++; + s_count = 0; + } + + KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " + "partition = [%d,%d]\n", + __kmp_gtid_from_thread(team->t.t_threads[f]), + team->t.t_id, f, th->th.th_new_place, + th->th.th_first_place, th->th.th_last_place)); + } + KMP_DEBUG_ASSERT(update_master_only || place == masters_place); } + } break; + + default: + break; + } - KA_TRACE( 20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id ) ); + KA_TRACE(20, ("__kmp_partition_places: exit T#%d\n", team->t.t_id)); } #endif /* OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED */ -/* allocate a new team data structure to use. take one off of the free pool if available */ +/* allocate a new team data structure to use. take one off of the free pool if + available */ kmp_team_t * -__kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, +__kmp_allocate_team(kmp_root_t *root, int new_nproc, int max_nproc, #if OMPT_SUPPORT - ompt_parallel_id_t ompt_parallel_id, + ompt_parallel_id_t ompt_parallel_id, #endif #if OMP_40_ENABLED - kmp_proc_bind_t new_proc_bind, + kmp_proc_bind_t new_proc_bind, #endif - kmp_internal_control_t *new_icvs, - int argc USE_NESTED_HOT_ARG(kmp_info_t *master) ) -{ - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team); - int f; - kmp_team_t *team; - int use_hot_team = ! root->r.r_active; - int level = 0; - - KA_TRACE( 20, ("__kmp_allocate_team: called\n")); - KMP_DEBUG_ASSERT( new_nproc >=1 && argc >=0 ); - KMP_DEBUG_ASSERT( max_nproc >= new_nproc ); - KMP_MB(); + kmp_internal_control_t *new_icvs, + int argc USE_NESTED_HOT_ARG(kmp_info_t *master)) { + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(KMP_allocate_team); + int f; + kmp_team_t *team; + int use_hot_team = !root->r.r_active; + int level = 0; -#if KMP_NESTED_HOT_TEAMS - kmp_hot_team_ptr_t *hot_teams; - if( master ) { - team = master->th.th_team; - level = team->t.t_active_level; - if( master->th.th_teams_microtask ) { // in teams construct? - if( master->th.th_teams_size.nteams > 1 && ( // #teams > 1 - team->t.t_pkfn == (microtask_t)__kmp_teams_master || // inner fork of the teams - master->th.th_teams_level < team->t.t_level ) ) { // or nested parallel inside the teams - ++level; // not increment if #teams==1, or for outer fork of the teams; increment otherwise - } - } - hot_teams = master->th.th_hot_teams; - if( level < __kmp_hot_teams_max_level && hot_teams && hot_teams[level].hot_team ) - { // hot team has already been allocated for given level - use_hot_team = 1; - } else { - use_hot_team = 0; - } + KA_TRACE(20, ("__kmp_allocate_team: called\n")); + KMP_DEBUG_ASSERT(new_nproc >= 1 && argc >= 0); + KMP_DEBUG_ASSERT(max_nproc >= new_nproc); + KMP_MB(); + +#if KMP_NESTED_HOT_TEAMS + kmp_hot_team_ptr_t *hot_teams; + if (master) { + team = master->th.th_team; + level = team->t.t_active_level; + if (master->th.th_teams_microtask) { // in teams construct? + if (master->th.th_teams_size.nteams > 1 && + ( // #teams > 1 + team->t.t_pkfn == + (microtask_t)__kmp_teams_master || // inner fork of the teams + master->th.th_teams_level < + team->t.t_level)) { // or nested parallel inside the teams + ++level; // not increment if #teams==1, or for outer fork of the teams; + // increment otherwise + } + } + hot_teams = master->th.th_hot_teams; + if (level < __kmp_hot_teams_max_level && hot_teams && + hot_teams[level] + .hot_team) { // hot team has already been allocated for given level + use_hot_team = 1; + } else { + use_hot_team = 0; } + } #endif - // Optimization to use a "hot" team - if( use_hot_team && new_nproc > 1 ) { - KMP_DEBUG_ASSERT( new_nproc == max_nproc ); + // Optimization to use a "hot" team + if (use_hot_team && new_nproc > 1) { + KMP_DEBUG_ASSERT(new_nproc == max_nproc); #if KMP_NESTED_HOT_TEAMS - team = hot_teams[level].hot_team; + team = hot_teams[level].hot_team; #else - team = root->r.r_hot_team; + team = root->r.r_hot_team; #endif #if KMP_DEBUG - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p before reinit\n", - team->t.t_task_team[0], team->t.t_task_team[1] )); - } -#endif - - // Has the number of threads changed? - /* Let's assume the most common case is that the number of threads is unchanged, and - put that case first. */ - if (team->t.t_nproc == new_nproc) { // Check changes in number of threads - KA_TRACE( 20, ("__kmp_allocate_team: reusing hot team\n" )); - // This case can mean that omp_set_num_threads() was called and the hot team size - // was already reduced, so we check the special flag - if ( team->t.t_size_changed == -1 ) { - team->t.t_size_changed = 1; - } else { - KMP_CHECK_UPDATE(team->t.t_size_changed, 0); - } - - // TODO???: team->t.t_max_active_levels = new_max_active_levels; - kmp_r_sched_t new_sched = new_icvs->sched; - if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || - team->t.t_sched.chunk != new_sched.chunk) - team->t.t_sched = new_sched; // set master's schedule as new run-time schedule - - __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident ); - - KF_TRACE( 10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", - 0, team->t.t_threads[0], team ) ); - __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 ); + if (__kmp_tasking_mode != tskm_immediate_exec) { + KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p " + "task_team[1] = %p before reinit\n", + team->t.t_task_team[0], team->t.t_task_team[1])); + } +#endif + + // Has the number of threads changed? + /* Let's assume the most common case is that the number of threads is + unchanged, and put that case first. */ + if (team->t.t_nproc == new_nproc) { // Check changes in number of threads + KA_TRACE(20, ("__kmp_allocate_team: reusing hot team\n")); + // This case can mean that omp_set_num_threads() was called and the hot + // team size + // was already reduced, so we check the special flag + if (team->t.t_size_changed == -1) { + team->t.t_size_changed = 1; + } else { + KMP_CHECK_UPDATE(team->t.t_size_changed, 0); + } + + // TODO???: team->t.t_max_active_levels = new_max_active_levels; + kmp_r_sched_t new_sched = new_icvs->sched; + if (team->t.t_sched.r_sched_type != new_sched.r_sched_type || + team->t.t_sched.chunk != new_sched.chunk) + team->t.t_sched = + new_sched; // set master's schedule as new run-time schedule + + __kmp_reinitialize_team(team, new_icvs, + root->r.r_uber_thread->th.th_ident); + + KF_TRACE(10, ("__kmp_allocate_team2: T#%d, this_thread=%p team=%p\n", 0, + team->t.t_threads[0], team)); + __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0); #if OMP_40_ENABLED -# if KMP_AFFINITY_SUPPORTED - if ( ( team->t.t_size_changed == 0 ) - && ( team->t.t_proc_bind == new_proc_bind ) ) { - if (new_proc_bind == proc_bind_spread) { - __kmp_partition_places(team, 1); // add flag to update only master for spread - } - KA_TRACE( 200, ("__kmp_allocate_team: reusing hot team #%d bindings: proc_bind = %d, partition = [%d,%d]\n", - team->t.t_id, new_proc_bind, team->t.t_first_place, - team->t.t_last_place ) ); - } - else { - KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); - __kmp_partition_places( team ); - } -# else - KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); -# endif /* KMP_AFFINITY_SUPPORTED */ +#if KMP_AFFINITY_SUPPORTED + if ((team->t.t_size_changed == 0) && + (team->t.t_proc_bind == new_proc_bind)) { + if (new_proc_bind == proc_bind_spread) { + __kmp_partition_places( + team, 1); // add flag to update only master for spread + } + KA_TRACE(200, ("__kmp_allocate_team: reusing hot team #%d bindings: " + "proc_bind = %d, partition = [%d,%d]\n", + team->t.t_id, new_proc_bind, team->t.t_first_place, + team->t.t_last_place)); + } else { + KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); + __kmp_partition_places(team); + } +#else + KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); +#endif /* KMP_AFFINITY_SUPPORTED */ #endif /* OMP_40_ENABLED */ - } - else if( team->t.t_nproc > new_nproc ) { - KA_TRACE( 20, ("__kmp_allocate_team: decreasing hot team thread count to %d\n", new_nproc )); + } else if (team->t.t_nproc > new_nproc) { + KA_TRACE(20, + ("__kmp_allocate_team: decreasing hot team thread count to %d\n", + new_nproc)); - team->t.t_size_changed = 1; + team->t.t_size_changed = 1; #if KMP_NESTED_HOT_TEAMS - if( __kmp_hot_teams_mode == 0 ) { - // AC: saved number of threads should correspond to team's value in this mode, - // can be bigger in mode 1, when hot team has some threads in reserve - KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc); - hot_teams[level].hot_team_nth = new_nproc; + if (__kmp_hot_teams_mode == 0) { + // AC: saved number of threads should correspond to team's value in this + // mode, can be bigger in mode 1, when hot team has threads in reserve + KMP_DEBUG_ASSERT(hot_teams[level].hot_team_nth == team->t.t_nproc); + hot_teams[level].hot_team_nth = new_nproc; #endif // KMP_NESTED_HOT_TEAMS - /* release the extra threads we don't need any more */ - for( f = new_nproc ; f < team->t.t_nproc ; f++ ) { - KMP_DEBUG_ASSERT( team->t.t_threads[ f ] ); - if ( __kmp_tasking_mode != tskm_immediate_exec) { - // When decreasing team size, threads no longer in the team should unref task team. - team->t.t_threads[f]->th.th_task_team = NULL; - } - __kmp_free_thread( team->t.t_threads[ f ] ); - team->t.t_threads[ f ] = NULL; - } + /* release the extra threads we don't need any more */ + for (f = new_nproc; f < team->t.t_nproc; f++) { + KMP_DEBUG_ASSERT(team->t.t_threads[f]); + if (__kmp_tasking_mode != tskm_immediate_exec) { + // When decreasing team size, threads no longer in the team should + // unref task team. + team->t.t_threads[f]->th.th_task_team = NULL; + } + __kmp_free_thread(team->t.t_threads[f]); + team->t.t_threads[f] = NULL; + } #if KMP_NESTED_HOT_TEAMS - } // (__kmp_hot_teams_mode == 0) - else { - // When keeping extra threads in team, switch threads to wait on own b_go flag - for (f=new_nproc; ft.t_nproc; ++f) { - KMP_DEBUG_ASSERT(team->t.t_threads[f]); - kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar; - for (int b=0; bt.t_nproc; ++f) { + KMP_DEBUG_ASSERT(team->t.t_threads[f]); + kmp_balign_t *balign = team->t.t_threads[f]->th.th_bar; + for (int b = 0; b < bs_last_barrier; ++b) { + if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) { + balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; } + KMP_CHECK_UPDATE(balign[b].bb.leaf_kids, 0); + } + } + } #endif // KMP_NESTED_HOT_TEAMS - team->t.t_nproc = new_nproc; - // TODO???: team->t.t_max_active_levels = new_max_active_levels; - if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type || - team->t.t_sched.chunk != new_icvs->sched.chunk) - team->t.t_sched = new_icvs->sched; - __kmp_reinitialize_team( team, new_icvs, root->r.r_uber_thread->th.th_ident ); - - /* update the remaining threads */ - for(f = 0; f < new_nproc; ++f) { - team->t.t_threads[f]->th.th_team_nproc = new_nproc; - } - // restore the current task state of the master thread: should be the implicit task - KF_TRACE( 10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", - 0, team->t.t_threads[0], team ) ); - - __kmp_push_current_task_to_thread( team->t.t_threads[ 0 ], team, 0 ); + team->t.t_nproc = new_nproc; + // TODO???: team->t.t_max_active_levels = new_max_active_levels; + if (team->t.t_sched.r_sched_type != new_icvs->sched.r_sched_type || + team->t.t_sched.chunk != new_icvs->sched.chunk) + team->t.t_sched = new_icvs->sched; + __kmp_reinitialize_team(team, new_icvs, + root->r.r_uber_thread->th.th_ident); + + /* update the remaining threads */ + for (f = 0; f < new_nproc; ++f) { + team->t.t_threads[f]->th.th_team_nproc = new_nproc; + } + // restore the current task state of the master thread: should be the + // implicit task + KF_TRACE(10, ("__kmp_allocate_team: T#%d, this_thread=%p team=%p\n", 0, + team->t.t_threads[0], team)); + + __kmp_push_current_task_to_thread(team->t.t_threads[0], team, 0); #ifdef KMP_DEBUG - for ( f = 0; f < team->t.t_nproc; f++ ) { - KMP_DEBUG_ASSERT( team->t.t_threads[f] && - team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc ); - } + for (f = 0; f < team->t.t_nproc; f++) { + KMP_DEBUG_ASSERT(team->t.t_threads[f] && + team->t.t_threads[f]->th.th_team_nproc == + team->t.t_nproc); + } #endif #if OMP_40_ENABLED - KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); -# if KMP_AFFINITY_SUPPORTED - __kmp_partition_places( team ); -# endif + KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); +#if KMP_AFFINITY_SUPPORTED + __kmp_partition_places(team); #endif - } - else { // team->t.t_nproc < new_nproc +#endif + } else { // team->t.t_nproc < new_nproc #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED - kmp_affin_mask_t *old_mask; - if ( KMP_AFFINITY_CAPABLE() ) { - KMP_CPU_ALLOC(old_mask); - } + kmp_affin_mask_t *old_mask; + if (KMP_AFFINITY_CAPABLE()) { + KMP_CPU_ALLOC(old_mask); + } #endif - KA_TRACE( 20, ("__kmp_allocate_team: increasing hot team thread count to %d\n", new_nproc )); + KA_TRACE(20, + ("__kmp_allocate_team: increasing hot team thread count to %d\n", + new_nproc)); - team->t.t_size_changed = 1; + team->t.t_size_changed = 1; #if KMP_NESTED_HOT_TEAMS - int avail_threads = hot_teams[level].hot_team_nth; - if( new_nproc < avail_threads ) - avail_threads = new_nproc; - kmp_info_t **other_threads = team->t.t_threads; - for ( f = team->t.t_nproc; f < avail_threads; ++f ) { - // Adjust barrier data of reserved threads (if any) of the team - // Other data will be set in __kmp_initialize_info() below. - int b; - kmp_balign_t * balign = other_threads[f]->th.th_bar; - for ( b = 0; b < bs_last_barrier; ++ b ) { - balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; - KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); + int avail_threads = hot_teams[level].hot_team_nth; + if (new_nproc < avail_threads) + avail_threads = new_nproc; + kmp_info_t **other_threads = team->t.t_threads; + for (f = team->t.t_nproc; f < avail_threads; ++f) { + // Adjust barrier data of reserved threads (if any) of the team + // Other data will be set in __kmp_initialize_info() below. + int b; + kmp_balign_t *balign = other_threads[f]->th.th_bar; + for (b = 0; b < bs_last_barrier; ++b) { + balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; + KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); #if USE_DEBUGGER - balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; -#endif - } - } - if( hot_teams[level].hot_team_nth >= new_nproc ) { - // we have all needed threads in reserve, no need to allocate any - // this only possible in mode 1, cannot have reserved threads in mode 0 - KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1); - team->t.t_nproc = new_nproc; // just get reserved threads involved - } else { - // we may have some threads in reserve, but not enough - team->t.t_nproc = hot_teams[level].hot_team_nth; // get reserved threads involved if any - hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size + balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; +#endif + } + } + if (hot_teams[level].hot_team_nth >= new_nproc) { + // we have all needed threads in reserve, no need to allocate any + // this only possible in mode 1, cannot have reserved threads in mode 0 + KMP_DEBUG_ASSERT(__kmp_hot_teams_mode == 1); + team->t.t_nproc = new_nproc; // just get reserved threads involved + } else { + // we may have some threads in reserve, but not enough + team->t.t_nproc = + hot_teams[level] + .hot_team_nth; // get reserved threads involved if any + hot_teams[level].hot_team_nth = new_nproc; // adjust hot team max size #endif // KMP_NESTED_HOT_TEAMS - if(team->t.t_max_nproc < new_nproc) { - /* reallocate larger arrays */ - __kmp_reallocate_team_arrays(team, new_nproc); - __kmp_reinitialize_team( team, new_icvs, NULL ); - } + if (team->t.t_max_nproc < new_nproc) { + /* reallocate larger arrays */ + __kmp_reallocate_team_arrays(team, new_nproc); + __kmp_reinitialize_team(team, new_icvs, NULL); + } #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED - /* Temporarily set full mask for master thread before - creation of workers. The reason is that workers inherit - the affinity from master, so if a lot of workers are - created on the single core quickly, they don't get - a chance to set their own affinity for a long time. - */ - __kmp_set_thread_affinity_mask_full_tmp( old_mask ); -#endif - - /* allocate new threads for the hot team */ - for( f = team->t.t_nproc ; f < new_nproc ; f++ ) { - kmp_info_t * new_worker = __kmp_allocate_thread( root, team, f ); - KMP_DEBUG_ASSERT( new_worker ); - team->t.t_threads[ f ] = new_worker; - - KA_TRACE( 20, ("__kmp_allocate_team: team %d init T#%d arrived: join=%llu, plain=%llu\n", - team->t.t_id, __kmp_gtid_from_tid( f, team ), team->t.t_id, f, - team->t.t_bar[bs_forkjoin_barrier].b_arrived, - team->t.t_bar[bs_plain_barrier].b_arrived ) ); - - { // Initialize barrier data for new threads. - int b; - kmp_balign_t * balign = new_worker->th.th_bar; - for( b = 0; b < bs_last_barrier; ++ b ) { - balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; - KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); + /* Temporarily set full mask for master thread before creation of + workers. The reason is that workers inherit the affinity from master, + so if a lot of workers are created on the single core quickly, they + don't get a chance to set their own affinity for a long time. */ + __kmp_set_thread_affinity_mask_full_tmp(old_mask); +#endif + + /* allocate new threads for the hot team */ + for (f = team->t.t_nproc; f < new_nproc; f++) { + kmp_info_t *new_worker = __kmp_allocate_thread(root, team, f); + KMP_DEBUG_ASSERT(new_worker); + team->t.t_threads[f] = new_worker; + + KA_TRACE(20, + ("__kmp_allocate_team: team %d init T#%d arrived: " + "join=%llu, plain=%llu\n", + team->t.t_id, __kmp_gtid_from_tid(f, team), team->t.t_id, f, + team->t.t_bar[bs_forkjoin_barrier].b_arrived, + team->t.t_bar[bs_plain_barrier].b_arrived)); + + { // Initialize barrier data for new threads. + int b; + kmp_balign_t *balign = new_worker->th.th_bar; + for (b = 0; b < bs_last_barrier; ++b) { + balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; + KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != + KMP_BARRIER_PARENT_FLAG); #if USE_DEBUGGER - balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; + balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; #endif - } - } } + } + } #if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED - if ( KMP_AFFINITY_CAPABLE() ) { - /* Restore initial master thread's affinity mask */ - __kmp_set_system_affinity( old_mask, TRUE ); - KMP_CPU_FREE(old_mask); - } + if (KMP_AFFINITY_CAPABLE()) { + /* Restore initial master thread's affinity mask */ + __kmp_set_system_affinity(old_mask, TRUE); + KMP_CPU_FREE(old_mask); + } #endif #if KMP_NESTED_HOT_TEAMS - } // end of check of t_nproc vs. new_nproc vs. hot_team_nth + } // end of check of t_nproc vs. new_nproc vs. hot_team_nth #endif // KMP_NESTED_HOT_TEAMS - /* make sure everyone is syncronized */ - int old_nproc = team->t.t_nproc; // save old value and use to update only new threads below - __kmp_initialize_team( team, new_nproc, new_icvs, root->r.r_uber_thread->th.th_ident ); - - /* reinitialize the threads */ - KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc); - for (f=0; f < team->t.t_nproc; ++f) - __kmp_initialize_info( team->t.t_threads[ f ], team, f, __kmp_gtid_from_tid( f, team ) ); - if (level) { // set th_task_state for new threads in nested hot team - // __kmp_initialize_info() no longer zeroes th_task_state, so we should only need to set the - // th_task_state for the new threads. th_task_state for master thread will not be accurate until - // after this in __kmp_fork_call(), so we look to the master's memo_stack to get the correct value. - for (f=old_nproc; f < team->t.t_nproc; ++f) - team->t.t_threads[f]->th.th_task_state = team->t.t_threads[0]->th.th_task_state_memo_stack[level]; - } - else { // set th_task_state for new threads in non-nested hot team - int old_state = team->t.t_threads[0]->th.th_task_state; // copy master's state - for (f=old_nproc; f < team->t.t_nproc; ++f) - team->t.t_threads[f]->th.th_task_state = old_state; - } + /* make sure everyone is syncronized */ + int old_nproc = team->t.t_nproc; // save old value and use to update only + // new threads below + __kmp_initialize_team(team, new_nproc, new_icvs, + root->r.r_uber_thread->th.th_ident); + + /* reinitialize the threads */ + KMP_DEBUG_ASSERT(team->t.t_nproc == new_nproc); + for (f = 0; f < team->t.t_nproc; ++f) + __kmp_initialize_info(team->t.t_threads[f], team, f, + __kmp_gtid_from_tid(f, team)); + if (level) { // set th_task_state for new threads in nested hot team + // __kmp_initialize_info() no longer zeroes th_task_state, so we should + // only need to set the th_task_state for the new threads. th_task_state + // for master thread will not be accurate until after this in + // __kmp_fork_call(), so we look to the master's memo_stack to get the + // correct value. + for (f = old_nproc; f < team->t.t_nproc; ++f) + team->t.t_threads[f]->th.th_task_state = + team->t.t_threads[0]->th.th_task_state_memo_stack[level]; + } else { // set th_task_state for new threads in non-nested hot team + int old_state = + team->t.t_threads[0]->th.th_task_state; // copy master's state + for (f = old_nproc; f < team->t.t_nproc; ++f) + team->t.t_threads[f]->th.th_task_state = old_state; + } #ifdef KMP_DEBUG - for ( f = 0; f < team->t.t_nproc; ++ f ) { - KMP_DEBUG_ASSERT( team->t.t_threads[f] && - team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc ); - } + for (f = 0; f < team->t.t_nproc; ++f) { + KMP_DEBUG_ASSERT(team->t.t_threads[f] && + team->t.t_threads[f]->th.th_team_nproc == + team->t.t_nproc); + } #endif #if OMP_40_ENABLED - KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); -# if KMP_AFFINITY_SUPPORTED - __kmp_partition_places( team ); -# endif + KMP_CHECK_UPDATE(team->t.t_proc_bind, new_proc_bind); +#if KMP_AFFINITY_SUPPORTED + __kmp_partition_places(team); +#endif #endif - } // Check changes in number of threads + } // Check changes in number of threads #if OMP_40_ENABLED - kmp_info_t *master = team->t.t_threads[0]; - if( master->th.th_teams_microtask ) { - for( f = 1; f < new_nproc; ++f ) { - // propagate teams construct specific info to workers - kmp_info_t *thr = team->t.t_threads[f]; - thr->th.th_teams_microtask = master->th.th_teams_microtask; - thr->th.th_teams_level = master->th.th_teams_level; - thr->th.th_teams_size = master->th.th_teams_size; - } - } + kmp_info_t *master = team->t.t_threads[0]; + if (master->th.th_teams_microtask) { + for (f = 1; f < new_nproc; ++f) { + // propagate teams construct specific info to workers + kmp_info_t *thr = team->t.t_threads[f]; + thr->th.th_teams_microtask = master->th.th_teams_microtask; + thr->th.th_teams_level = master->th.th_teams_level; + thr->th.th_teams_size = master->th.th_teams_size; + } + } #endif /* OMP_40_ENABLED */ #if KMP_NESTED_HOT_TEAMS - if( level ) { - // Sync barrier state for nested hot teams, not needed for outermost hot team. - for( f = 1; f < new_nproc; ++f ) { - kmp_info_t *thr = team->t.t_threads[f]; - int b; - kmp_balign_t * balign = thr->th.th_bar; - for( b = 0; b < bs_last_barrier; ++ b ) { - balign[ b ].bb.b_arrived = team->t.t_bar[ b ].b_arrived; - KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); + if (level) { + // Sync barrier state for nested hot teams, not needed for outermost hot + // team. + for (f = 1; f < new_nproc; ++f) { + kmp_info_t *thr = team->t.t_threads[f]; + int b; + kmp_balign_t *balign = thr->th.th_bar; + for (b = 0; b < bs_last_barrier; ++b) { + balign[b].bb.b_arrived = team->t.t_bar[b].b_arrived; + KMP_DEBUG_ASSERT(balign[b].bb.wait_flag != KMP_BARRIER_PARENT_FLAG); #if USE_DEBUGGER - balign[ b ].bb.b_worker_arrived = team->t.t_bar[ b ].b_team_arrived; + balign[b].bb.b_worker_arrived = team->t.t_bar[b].b_team_arrived; #endif - } - } } + } + } #endif // KMP_NESTED_HOT_TEAMS - /* reallocate space for arguments if necessary */ - __kmp_alloc_argv_entries( argc, team, TRUE ); - KMP_CHECK_UPDATE(team->t.t_argc, argc); - // - // The hot team re-uses the previous task team, - // if untouched during the previous release->gather phase. - // + /* reallocate space for arguments if necessary */ + __kmp_alloc_argv_entries(argc, team, TRUE); + KMP_CHECK_UPDATE(team->t.t_argc, argc); + // The hot team re-uses the previous task team, + // if untouched during the previous release->gather phase. - KF_TRACE( 10, ( " hot_team = %p\n", team ) ); + KF_TRACE(10, (" hot_team = %p\n", team)); #if KMP_DEBUG - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - KA_TRACE( 20, ("__kmp_allocate_team: hot team task_team[0] = %p task_team[1] = %p after reinit\n", - team->t.t_task_team[0], team->t.t_task_team[1] )); - } + if (__kmp_tasking_mode != tskm_immediate_exec) { + KA_TRACE(20, ("__kmp_allocate_team: hot team task_team[0] = %p " + "task_team[1] = %p after reinit\n", + team->t.t_task_team[0], team->t.t_task_team[1])); + } #endif #if OMPT_SUPPORT - __ompt_team_assign_id(team, ompt_parallel_id); + __ompt_team_assign_id(team, ompt_parallel_id); #endif - KMP_MB(); - - return team; - } - - /* next, let's try to take one from the team pool */ KMP_MB(); - for( team = (kmp_team_t*) __kmp_team_pool ; (team) ; ) - { - /* TODO: consider resizing undersized teams instead of reaping them, now that we have a resizing mechanism */ - if ( team->t.t_max_nproc >= max_nproc ) { - /* take this team from the team pool */ - __kmp_team_pool = team->t.t_next_pool; - - /* setup the team for fresh use */ - __kmp_initialize_team( team, new_nproc, new_icvs, NULL ); - - KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n", - &team->t.t_task_team[0], &team->t.t_task_team[1]) ); - team->t.t_task_team[0] = NULL; - team->t.t_task_team[1] = NULL; - - /* reallocate space for arguments if necessary */ - __kmp_alloc_argv_entries( argc, team, TRUE ); - KMP_CHECK_UPDATE(team->t.t_argc, argc); - - KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", - team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); - { // Initialize barrier data. - int b; - for ( b = 0; b < bs_last_barrier; ++ b) { - team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE; + + return team; + } + + /* next, let's try to take one from the team pool */ + KMP_MB(); + for (team = (kmp_team_t *)__kmp_team_pool; (team);) { + /* TODO: consider resizing undersized teams instead of reaping them, now + that we have a resizing mechanism */ + if (team->t.t_max_nproc >= max_nproc) { + /* take this team from the team pool */ + __kmp_team_pool = team->t.t_next_pool; + + /* setup the team for fresh use */ + __kmp_initialize_team(team, new_nproc, new_icvs, NULL); + + KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and " + "task_team[1] %p to NULL\n", + &team->t.t_task_team[0], &team->t.t_task_team[1])); + team->t.t_task_team[0] = NULL; + team->t.t_task_team[1] = NULL; + + /* reallocate space for arguments if necessary */ + __kmp_alloc_argv_entries(argc, team, TRUE); + KMP_CHECK_UPDATE(team->t.t_argc, argc); + + KA_TRACE( + 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", + team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); + { // Initialize barrier data. + int b; + for (b = 0; b < bs_last_barrier; ++b) { + team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE; #if USE_DEBUGGER - team->t.t_bar[ b ].b_master_arrived = 0; - team->t.t_bar[ b ].b_team_arrived = 0; + team->t.t_bar[b].b_master_arrived = 0; + team->t.t_bar[b].b_team_arrived = 0; #endif - } - } + } + } #if OMP_40_ENABLED - team->t.t_proc_bind = new_proc_bind; + team->t.t_proc_bind = new_proc_bind; #endif - KA_TRACE( 20, ("__kmp_allocate_team: using team from pool %d.\n", team->t.t_id )); + KA_TRACE(20, ("__kmp_allocate_team: using team from pool %d.\n", + team->t.t_id)); #if OMPT_SUPPORT - __ompt_team_assign_id(team, ompt_parallel_id); + __ompt_team_assign_id(team, ompt_parallel_id); #endif - KMP_MB(); - - return team; - } + KMP_MB(); - /* reap team if it is too small, then loop back and check the next one */ - /* not sure if this is wise, but, will be redone during the hot-teams rewrite. */ - /* TODO: Use technique to find the right size hot-team, don't reap them */ - team = __kmp_reap_team( team ); - __kmp_team_pool = team; + return team; } - /* nothing available in the pool, no matter, make a new team! */ - KMP_MB(); - team = (kmp_team_t*) __kmp_allocate( sizeof( kmp_team_t ) ); +/* reap team if it is too small, then loop back and check the next one */ +// not sure if this is wise, but, will be redone during the hot-teams rewrite. +/* TODO: Use technique to find the right size hot-team, don't reap them */ + team = __kmp_reap_team(team); + __kmp_team_pool = team; + } - /* and set it up */ - team->t.t_max_nproc = max_nproc; - /* NOTE well, for some reason allocating one big buffer and dividing it - * up seems to really hurt performance a lot on the P4, so, let's not use - * this... */ - __kmp_allocate_team_arrays( team, max_nproc ); + /* nothing available in the pool, no matter, make a new team! */ + KMP_MB(); + team = (kmp_team_t *)__kmp_allocate(sizeof(kmp_team_t)); - KA_TRACE( 20, ( "__kmp_allocate_team: making a new team\n" ) ); - __kmp_initialize_team( team, new_nproc, new_icvs, NULL ); + /* and set it up */ + team->t.t_max_nproc = max_nproc; + /* NOTE well, for some reason allocating one big buffer and dividing it up + seems to really hurt performance a lot on the P4, so, let's not use this */ + __kmp_allocate_team_arrays(team, max_nproc); - KA_TRACE( 20, ( "__kmp_allocate_team: setting task_team[0] %p and task_team[1] %p to NULL\n", - &team->t.t_task_team[0], &team->t.t_task_team[1] ) ); - team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate - team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes memory, no need to duplicate + KA_TRACE(20, ("__kmp_allocate_team: making a new team\n")); + __kmp_initialize_team(team, new_nproc, new_icvs, NULL); - if ( __kmp_storage_map ) { - __kmp_print_team_storage_map( "team", team, team->t.t_id, new_nproc ); - } + KA_TRACE(20, ("__kmp_allocate_team: setting task_team[0] %p and task_team[1] " + "%p to NULL\n", + &team->t.t_task_team[0], &team->t.t_task_team[1])); + team->t.t_task_team[0] = NULL; // to be removed, as __kmp_allocate zeroes + // memory, no need to duplicate + team->t.t_task_team[1] = NULL; // to be removed, as __kmp_allocate zeroes + // memory, no need to duplicate - /* allocate space for arguments */ - __kmp_alloc_argv_entries( argc, team, FALSE ); - team->t.t_argc = argc; + if (__kmp_storage_map) { + __kmp_print_team_storage_map("team", team, team->t.t_id, new_nproc); + } - KA_TRACE( 20, ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", - team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE )); - { // Initialize barrier data. - int b; - for ( b = 0; b < bs_last_barrier; ++ b ) { - team->t.t_bar[ b ].b_arrived = KMP_INIT_BARRIER_STATE; + /* allocate space for arguments */ + __kmp_alloc_argv_entries(argc, team, FALSE); + team->t.t_argc = argc; + + KA_TRACE(20, + ("__kmp_allocate_team: team %d init arrived: join=%u, plain=%u\n", + team->t.t_id, KMP_INIT_BARRIER_STATE, KMP_INIT_BARRIER_STATE)); + { // Initialize barrier data. + int b; + for (b = 0; b < bs_last_barrier; ++b) { + team->t.t_bar[b].b_arrived = KMP_INIT_BARRIER_STATE; #if USE_DEBUGGER - team->t.t_bar[ b ].b_master_arrived = 0; - team->t.t_bar[ b ].b_team_arrived = 0; + team->t.t_bar[b].b_master_arrived = 0; + team->t.t_bar[b].b_team_arrived = 0; #endif - } } + } #if OMP_40_ENABLED - team->t.t_proc_bind = new_proc_bind; + team->t.t_proc_bind = new_proc_bind; #endif #if OMPT_SUPPORT - __ompt_team_assign_id(team, ompt_parallel_id); - team->t.ompt_serialized_team_info = NULL; + __ompt_team_assign_id(team, ompt_parallel_id); + team->t.ompt_serialized_team_info = NULL; #endif - KMP_MB(); + KMP_MB(); - KA_TRACE( 20, ("__kmp_allocate_team: done creating a new team %d.\n", team->t.t_id )); + KA_TRACE(20, ("__kmp_allocate_team: done creating a new team %d.\n", + team->t.t_id)); - return team; + return team; } /* TODO implement hot-teams at all levels */ @@ -5224,136 +5231,138 @@ __kmp_allocate_team( kmp_root_t *root, int new_nproc, int max_nproc, /* free the team. return it to the team pool. release all the threads * associated with it */ -void -__kmp_free_team( kmp_root_t *root, kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master) ) -{ - int f; - KA_TRACE( 20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), team->t.t_id )); - - /* verify state */ - KMP_DEBUG_ASSERT( root ); - KMP_DEBUG_ASSERT( team ); - KMP_DEBUG_ASSERT( team->t.t_nproc <= team->t.t_max_nproc ); - KMP_DEBUG_ASSERT( team->t.t_threads ); - - int use_hot_team = team == root->r.r_hot_team; +void __kmp_free_team(kmp_root_t *root, + kmp_team_t *team USE_NESTED_HOT_ARG(kmp_info_t *master)) { + int f; + KA_TRACE(20, ("__kmp_free_team: T#%d freeing team %d\n", __kmp_get_gtid(), + team->t.t_id)); + + /* verify state */ + KMP_DEBUG_ASSERT(root); + KMP_DEBUG_ASSERT(team); + KMP_DEBUG_ASSERT(team->t.t_nproc <= team->t.t_max_nproc); + KMP_DEBUG_ASSERT(team->t.t_threads); + + int use_hot_team = team == root->r.r_hot_team; #if KMP_NESTED_HOT_TEAMS - int level; - kmp_hot_team_ptr_t *hot_teams; - if( master ) { - level = team->t.t_active_level - 1; - if( master->th.th_teams_microtask ) { // in teams construct? - if( master->th.th_teams_size.nteams > 1 ) { - ++level; // level was not increased in teams construct for team_of_masters - } - if( team->t.t_pkfn != (microtask_t)__kmp_teams_master && - master->th.th_teams_level == team->t.t_level ) { - ++level; // level was not increased in teams construct for team_of_workers before the parallel - } // team->t.t_level will be increased inside parallel - } - hot_teams = master->th.th_hot_teams; - if( level < __kmp_hot_teams_max_level ) { - KMP_DEBUG_ASSERT( team == hot_teams[level].hot_team ); - use_hot_team = 1; - } - } + int level; + kmp_hot_team_ptr_t *hot_teams; + if (master) { + level = team->t.t_active_level - 1; + if (master->th.th_teams_microtask) { // in teams construct? + if (master->th.th_teams_size.nteams > 1) { + ++level; // level was not increased in teams construct for + // team_of_masters + } + if (team->t.t_pkfn != (microtask_t)__kmp_teams_master && + master->th.th_teams_level == team->t.t_level) { + ++level; // level was not increased in teams construct for + // team_of_workers before the parallel + } // team->t.t_level will be increased inside parallel + } + hot_teams = master->th.th_hot_teams; + if (level < __kmp_hot_teams_max_level) { + KMP_DEBUG_ASSERT(team == hot_teams[level].hot_team); + use_hot_team = 1; + } + } #endif // KMP_NESTED_HOT_TEAMS - /* team is done working */ - TCW_SYNC_PTR(team->t.t_pkfn, NULL); // Important for Debugging Support Library. - team->t.t_copyin_counter = 0; // init counter for possible reuse - // Do not reset pointer to parent team to NULL for hot teams. - - /* if we are non-hot team, release our threads */ - if( ! use_hot_team ) { - if (__kmp_tasking_mode != tskm_immediate_exec) { - // Wait for threads to reach reapable state - for (f = 1; f < team->t.t_nproc; ++f) { - KMP_DEBUG_ASSERT(team->t.t_threads[f]); - kmp_info_t *th = team->t.t_threads[f]; - volatile kmp_uint32 *state = &th->th.th_reap_state; - while (*state != KMP_SAFE_TO_REAP) { + /* team is done working */ + TCW_SYNC_PTR(team->t.t_pkfn, + NULL); // Important for Debugging Support Library. + team->t.t_copyin_counter = 0; // init counter for possible reuse + // Do not reset pointer to parent team to NULL for hot teams. + + /* if we are non-hot team, release our threads */ + if (!use_hot_team) { + if (__kmp_tasking_mode != tskm_immediate_exec) { + // Wait for threads to reach reapable state + for (f = 1; f < team->t.t_nproc; ++f) { + KMP_DEBUG_ASSERT(team->t.t_threads[f]); + kmp_info_t *th = team->t.t_threads[f]; + volatile kmp_uint32 *state = &th->th.th_reap_state; + while (*state != KMP_SAFE_TO_REAP) { #if KMP_OS_WINDOWS - // On Windows a thread can be killed at any time, check this - DWORD ecode; - if (!__kmp_is_thread_alive(th, &ecode)) { - *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread - break; - } -#endif - // first check if thread is sleeping - kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th); - if (fl.is_sleeping()) - fl.resume(__kmp_gtid_from_thread(th)); - KMP_CPU_PAUSE(); - } - } - - // Delete task teams - int tt_idx; - for (tt_idx=0; tt_idx<2; ++tt_idx) { - kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; - if ( task_team != NULL ) { - for (f=0; ft.t_nproc; ++f) { // Have all threads unref task teams - team->t.t_threads[f]->th.th_task_team = NULL; - } - KA_TRACE( 20, ( "__kmp_free_team: T#%d deactivating task_team %p on team %d\n", __kmp_get_gtid(), task_team, team->t.t_id ) ); + // On Windows a thread can be killed at any time, check this + DWORD ecode; + if (!__kmp_is_thread_alive(th, &ecode)) { + *state = KMP_SAFE_TO_REAP; // reset the flag for dead thread + break; + } +#endif + // first check if thread is sleeping + kmp_flag_64 fl(&th->th.th_bar[bs_forkjoin_barrier].bb.b_go, th); + if (fl.is_sleeping()) + fl.resume(__kmp_gtid_from_thread(th)); + KMP_CPU_PAUSE(); + } + } + + // Delete task teams + int tt_idx; + for (tt_idx = 0; tt_idx < 2; ++tt_idx) { + kmp_task_team_t *task_team = team->t.t_task_team[tt_idx]; + if (task_team != NULL) { + for (f = 0; f < team->t.t_nproc; + ++f) { // Have all threads unref task teams + team->t.t_threads[f]->th.th_task_team = NULL; + } + KA_TRACE( + 20, + ("__kmp_free_team: T#%d deactivating task_team %p on team %d\n", + __kmp_get_gtid(), task_team, team->t.t_id)); #if KMP_NESTED_HOT_TEAMS - __kmp_free_task_team( master, task_team ); + __kmp_free_task_team(master, task_team); #endif - team->t.t_task_team[tt_idx] = NULL; - } - } + team->t.t_task_team[tt_idx] = NULL; } + } + } - // Reset pointer to parent team only for non-hot teams. - team->t.t_parent = NULL; - team->t.t_level = 0; - team->t.t_active_level = 0; - - /* free the worker threads */ - for ( f = 1; f < team->t.t_nproc; ++ f ) { - KMP_DEBUG_ASSERT( team->t.t_threads[ f ] ); - __kmp_free_thread( team->t.t_threads[ f ] ); - team->t.t_threads[ f ] = NULL; - } + // Reset pointer to parent team only for non-hot teams. + team->t.t_parent = NULL; + team->t.t_level = 0; + team->t.t_active_level = 0; - /* put the team back in the team pool */ - /* TODO limit size of team pool, call reap_team if pool too large */ - team->t.t_next_pool = (kmp_team_t*) __kmp_team_pool; - __kmp_team_pool = (volatile kmp_team_t*) team; + /* free the worker threads */ + for (f = 1; f < team->t.t_nproc; ++f) { + KMP_DEBUG_ASSERT(team->t.t_threads[f]); + __kmp_free_thread(team->t.t_threads[f]); + team->t.t_threads[f] = NULL; } - KMP_MB(); -} + /* put the team back in the team pool */ + /* TODO limit size of team pool, call reap_team if pool too large */ + team->t.t_next_pool = (kmp_team_t *)__kmp_team_pool; + __kmp_team_pool = (volatile kmp_team_t *)team; + } + KMP_MB(); +} /* reap the team. destroy it, reclaim all its resources and free its memory */ -kmp_team_t * -__kmp_reap_team( kmp_team_t *team ) -{ - kmp_team_t *next_pool = team->t.t_next_pool; +kmp_team_t *__kmp_reap_team(kmp_team_t *team) { + kmp_team_t *next_pool = team->t.t_next_pool; - KMP_DEBUG_ASSERT( team ); - KMP_DEBUG_ASSERT( team->t.t_dispatch ); - KMP_DEBUG_ASSERT( team->t.t_disp_buffer ); - KMP_DEBUG_ASSERT( team->t.t_threads ); - KMP_DEBUG_ASSERT( team->t.t_argv ); + KMP_DEBUG_ASSERT(team); + KMP_DEBUG_ASSERT(team->t.t_dispatch); + KMP_DEBUG_ASSERT(team->t.t_disp_buffer); + KMP_DEBUG_ASSERT(team->t.t_threads); + KMP_DEBUG_ASSERT(team->t.t_argv); - /* TODO clean the threads that are a part of this? */ + /* TODO clean the threads that are a part of this? */ - /* free stuff */ + /* free stuff */ + __kmp_free_team_arrays(team); + if (team->t.t_argv != &team->t.t_inline_argv[0]) + __kmp_free((void *)team->t.t_argv); + __kmp_free(team); - __kmp_free_team_arrays( team ); - if ( team->t.t_argv != &team->t.t_inline_argv[0] ) - __kmp_free( (void*) team->t.t_argv ); - __kmp_free( team ); - - KMP_MB(); - return next_pool; + KMP_MB(); + return next_pool; } -// // Free the thread. Don't reap it, just place it on the pool of available // threads. // @@ -5379,2343 +5388,2248 @@ __kmp_reap_team( kmp_team_t *team ) // grew and shrunk. // // Now, for single-level parallelism, the OMP tid is alway == gtid. -// -void -__kmp_free_thread( kmp_info_t *this_th ) -{ - int gtid; - kmp_info_t **scan; - - KA_TRACE( 20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n", - __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid )); - - KMP_DEBUG_ASSERT( this_th ); - - // When moving thread to pool, switch thread to wait on own b_go flag, and uninitialized (NULL team). - int b; - kmp_balign_t *balign = this_th->th.th_bar; - for (b=0; bth.th_task_state = 0; - - /* put thread back on the free pool */ - TCW_PTR(this_th->th.th_team, NULL); - TCW_PTR(this_th->th.th_root, NULL); - TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */ - - // - // If the __kmp_thread_pool_insert_pt is already past the new insert - // point, then we need to re-scan the entire list. - // - gtid = this_th->th.th_info.ds.ds_gtid; - if ( __kmp_thread_pool_insert_pt != NULL ) { - KMP_DEBUG_ASSERT( __kmp_thread_pool != NULL ); - if ( __kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid ) { - __kmp_thread_pool_insert_pt = NULL; - } - } - - // - // Scan down the list to find the place to insert the thread. - // scan is the address of a link in the list, possibly the address of - // __kmp_thread_pool itself. - // - // In the absence of nested parallism, the for loop will have 0 iterations. - // - if ( __kmp_thread_pool_insert_pt != NULL ) { - scan = &( __kmp_thread_pool_insert_pt->th.th_next_pool ); - } - else { - scan = (kmp_info_t **)&__kmp_thread_pool; - } - for (; ( *scan != NULL ) && ( (*scan)->th.th_info.ds.ds_gtid < gtid ); - scan = &( (*scan)->th.th_next_pool ) ); - - // - // Insert the new element on the list, and set __kmp_thread_pool_insert_pt - // to its address. - // - TCW_PTR(this_th->th.th_next_pool, *scan); - __kmp_thread_pool_insert_pt = *scan = this_th; - KMP_DEBUG_ASSERT( ( this_th->th.th_next_pool == NULL ) - || ( this_th->th.th_info.ds.ds_gtid - < this_th->th.th_next_pool->th.th_info.ds.ds_gtid ) ); - TCW_4(this_th->th.th_in_pool, TRUE); - __kmp_thread_pool_nth++; - - TCW_4(__kmp_nth, __kmp_nth - 1); +void __kmp_free_thread(kmp_info_t *this_th) { + int gtid; + kmp_info_t **scan; + + KA_TRACE(20, ("__kmp_free_thread: T#%d putting T#%d back on free pool.\n", + __kmp_get_gtid(), this_th->th.th_info.ds.ds_gtid)); + + KMP_DEBUG_ASSERT(this_th); + + // When moving thread to pool, switch thread to wait on own b_go flag, and + // uninitialized (NULL team). + int b; + kmp_balign_t *balign = this_th->th.th_bar; + for (b = 0; b < bs_last_barrier; ++b) { + if (balign[b].bb.wait_flag == KMP_BARRIER_PARENT_FLAG) + balign[b].bb.wait_flag = KMP_BARRIER_SWITCH_TO_OWN_FLAG; + balign[b].bb.team = NULL; + balign[b].bb.leaf_kids = 0; + } + this_th->th.th_task_state = 0; + + /* put thread back on the free pool */ + TCW_PTR(this_th->th.th_team, NULL); + TCW_PTR(this_th->th.th_root, NULL); + TCW_PTR(this_th->th.th_dispatch, NULL); /* NOT NEEDED */ + + // If the __kmp_thread_pool_insert_pt is already past the new insert + // point, then we need to re-scan the entire list. + gtid = this_th->th.th_info.ds.ds_gtid; + if (__kmp_thread_pool_insert_pt != NULL) { + KMP_DEBUG_ASSERT(__kmp_thread_pool != NULL); + if (__kmp_thread_pool_insert_pt->th.th_info.ds.ds_gtid > gtid) { + __kmp_thread_pool_insert_pt = NULL; + } + } + + // Scan down the list to find the place to insert the thread. + // scan is the address of a link in the list, possibly the address of + // __kmp_thread_pool itself. + // + // In the absence of nested parallism, the for loop will have 0 iterations. + if (__kmp_thread_pool_insert_pt != NULL) { + scan = &(__kmp_thread_pool_insert_pt->th.th_next_pool); + } else { + scan = (kmp_info_t **)&__kmp_thread_pool; + } + for (; (*scan != NULL) && ((*scan)->th.th_info.ds.ds_gtid < gtid); + scan = &((*scan)->th.th_next_pool)) + ; + + // Insert the new element on the list, and set __kmp_thread_pool_insert_pt + // to its address. + TCW_PTR(this_th->th.th_next_pool, *scan); + __kmp_thread_pool_insert_pt = *scan = this_th; + KMP_DEBUG_ASSERT((this_th->th.th_next_pool == NULL) || + (this_th->th.th_info.ds.ds_gtid < + this_th->th.th_next_pool->th.th_info.ds.ds_gtid)); + TCW_4(this_th->th.th_in_pool, TRUE); + __kmp_thread_pool_nth++; + + TCW_4(__kmp_nth, __kmp_nth - 1); #ifdef KMP_ADJUST_BLOCKTIME - /* Adjust blocktime back to user setting or default if necessary */ - /* Middle initialization might never have occurred */ - if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { - KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 ); - if ( __kmp_nth <= __kmp_avail_proc ) { - __kmp_zero_bt = FALSE; - } - } + /* Adjust blocktime back to user setting or default if necessary */ + /* Middle initialization might never have occurred */ + if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { + KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); + if (__kmp_nth <= __kmp_avail_proc) { + __kmp_zero_bt = FALSE; + } + } #endif /* KMP_ADJUST_BLOCKTIME */ - KMP_MB(); + KMP_MB(); } - /* ------------------------------------------------------------------------ */ -void * -__kmp_launch_thread( kmp_info_t *this_thr ) -{ - int gtid = this_thr->th.th_info.ds.ds_gtid; -/* void *stack_data;*/ - kmp_team_t *(*volatile pteam); +void *__kmp_launch_thread(kmp_info_t *this_thr) { + int gtid = this_thr->th.th_info.ds.ds_gtid; + /* void *stack_data;*/ + kmp_team_t *(*volatile pteam); - KMP_MB(); - KA_TRACE( 10, ("__kmp_launch_thread: T#%d start\n", gtid ) ); + KMP_MB(); + KA_TRACE(10, ("__kmp_launch_thread: T#%d start\n", gtid)); - if( __kmp_env_consistency_check ) { - this_thr->th.th_cons = __kmp_allocate_cons_stack( gtid ); // ATT: Memory leak? - } + if (__kmp_env_consistency_check) { + this_thr->th.th_cons = __kmp_allocate_cons_stack(gtid); // ATT: Memory leak? + } #if OMPT_SUPPORT - if (ompt_enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_overhead; - this_thr->th.ompt_thread_info.wait_id = 0; - this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0); - if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { - __ompt_thread_begin(ompt_thread_worker, gtid); - } + if (ompt_enabled) { + this_thr->th.ompt_thread_info.state = ompt_state_overhead; + this_thr->th.ompt_thread_info.wait_id = 0; + this_thr->th.ompt_thread_info.idle_frame = __builtin_frame_address(0); + if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { + __ompt_thread_begin(ompt_thread_worker, gtid); } + } #endif - /* This is the place where threads wait for work */ - while( ! TCR_4(__kmp_global.g.g_done) ) { - KMP_DEBUG_ASSERT( this_thr == __kmp_threads[ gtid ] ); - KMP_MB(); + /* This is the place where threads wait for work */ + while (!TCR_4(__kmp_global.g.g_done)) { + KMP_DEBUG_ASSERT(this_thr == __kmp_threads[gtid]); + KMP_MB(); - /* wait for work to do */ - KA_TRACE( 20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid )); + /* wait for work to do */ + KA_TRACE(20, ("__kmp_launch_thread: T#%d waiting for work\n", gtid)); #if OMPT_SUPPORT - if (ompt_enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_idle; - } + if (ompt_enabled) { + this_thr->th.ompt_thread_info.state = ompt_state_idle; + } #endif - /* No tid yet since not part of a team */ - __kmp_fork_barrier( gtid, KMP_GTID_DNE ); + /* No tid yet since not part of a team */ + __kmp_fork_barrier(gtid, KMP_GTID_DNE); #if OMPT_SUPPORT - if (ompt_enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_overhead; - } + if (ompt_enabled) { + this_thr->th.ompt_thread_info.state = ompt_state_overhead; + } #endif - pteam = (kmp_team_t *(*))(& this_thr->th.th_team); + pteam = (kmp_team_t * (*))(&this_thr->th.th_team); - /* have we been allocated? */ - if ( TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done) ) { + /* have we been allocated? */ + if (TCR_SYNC_PTR(*pteam) && !TCR_4(__kmp_global.g.g_done)) { #if OMPT_SUPPORT - ompt_task_info_t *task_info; - ompt_parallel_id_t my_parallel_id; - if (ompt_enabled) { - task_info = __ompt_get_taskinfo(0); - my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id; - } -#endif - /* we were just woken up, so run our new task */ - if ( TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL ) { - int rc; - KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n", - gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn)); - - updateHWFPControl (*pteam); + ompt_task_info_t *task_info; + ompt_parallel_id_t my_parallel_id; + if (ompt_enabled) { + task_info = __ompt_get_taskinfo(0); + my_parallel_id = (*pteam)->t.ompt_team_info.parallel_id; + } +#endif + /* we were just woken up, so run our new task */ + if (TCR_SYNC_PTR((*pteam)->t.t_pkfn) != NULL) { + int rc; + KA_TRACE(20, + ("__kmp_launch_thread: T#%d(%d:%d) invoke microtask = %p\n", + gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), + (*pteam)->t.t_pkfn)); + + updateHWFPControl(*pteam); #if OMPT_SUPPORT - if (ompt_enabled) { - this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; - // Initialize OMPT task id for implicit task. - int tid = __kmp_tid_from_gtid(gtid); - task_info->task_id = __ompt_task_id_new(tid); - } + if (ompt_enabled) { + this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; + // Initialize OMPT task id for implicit task. + int tid = __kmp_tid_from_gtid(gtid); + task_info->task_id = __ompt_task_id_new(tid); + } #endif - { - KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); - KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); - rc = (*pteam)->t.t_invoke( gtid ); - } - KMP_ASSERT( rc ); + { + KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); + KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); + rc = (*pteam)->t.t_invoke(gtid); + } + KMP_ASSERT(rc); #if OMPT_SUPPORT - if (ompt_enabled) { - /* no frame set while outside task */ - task_info->frame.exit_runtime_frame = NULL; + if (ompt_enabled) { + /* no frame set while outside task */ + task_info->frame.exit_runtime_frame = NULL; - this_thr->th.ompt_thread_info.state = ompt_state_overhead; - } + this_thr->th.ompt_thread_info.state = ompt_state_overhead; + } #endif - KMP_MB(); - KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n", - gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), (*pteam)->t.t_pkfn)); - } - /* join barrier after parallel region */ - __kmp_join_barrier( gtid ); + KMP_MB(); + KA_TRACE(20, ("__kmp_launch_thread: T#%d(%d:%d) done microtask = %p\n", + gtid, (*pteam)->t.t_id, __kmp_tid_from_gtid(gtid), + (*pteam)->t.t_pkfn)); + } + /* join barrier after parallel region */ + __kmp_join_barrier(gtid); #if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { - if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { - // don't access *pteam here: it may have already been freed - // by the master thread behind the barrier (possible race) - ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( - my_parallel_id, task_info->task_id); - } - task_info->frame.exit_runtime_frame = NULL; - task_info->task_id = 0; - } -#endif + if (ompt_enabled) { + if (ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)) { + // don't access *pteam here: it may have already been freed + // by the master thread behind the barrier (possible race) + ompt_callbacks.ompt_callback(ompt_event_implicit_task_end)( + my_parallel_id, task_info->task_id); } + task_info->frame.exit_runtime_frame = NULL; + task_info->task_id = 0; + } +#endif } - TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done); + } + TCR_SYNC_PTR((intptr_t)__kmp_global.g.g_done); #if OMPT_SUPPORT - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_thread_end)) { - __ompt_thread_end(ompt_thread_worker, gtid); - } + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_thread_end)) { + __ompt_thread_end(ompt_thread_worker, gtid); + } #endif - this_thr->th.th_task_team = NULL; - /* run the destructors for the threadprivate data for this thread */ - __kmp_common_destroy_gtid( gtid ); + this_thr->th.th_task_team = NULL; + /* run the destructors for the threadprivate data for this thread */ + __kmp_common_destroy_gtid(gtid); - KA_TRACE( 10, ("__kmp_launch_thread: T#%d done\n", gtid ) ); - KMP_MB(); - return this_thr; + KA_TRACE(10, ("__kmp_launch_thread: T#%d done\n", gtid)); + KMP_MB(); + return this_thr; } /* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ -void -__kmp_internal_end_dest( void *specific_gtid ) -{ - #if KMP_COMPILER_ICC - #pragma warning( push ) - #pragma warning( disable: 810 ) // conversion from "void *" to "int" may lose significant bits - #endif - // Make sure no significant bits are lost - int gtid = (kmp_intptr_t)specific_gtid - 1; - #if KMP_COMPILER_ICC - #pragma warning( pop ) - #endif - - KA_TRACE( 30, ("__kmp_internal_end_dest: T#%d\n", gtid)); - /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage - * this is because 0 is reserved for the nothing-stored case */ - - /* josh: One reason for setting the gtid specific data even when it is being - destroyed by pthread is to allow gtid lookup through thread specific data - (__kmp_gtid_get_specific). Some of the code, especially stat code, - that gets executed in the call to __kmp_internal_end_thread, actually - gets the gtid through the thread specific data. Setting it here seems - rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread - to run smoothly. - todo: get rid of this after we remove the dependence on - __kmp_gtid_get_specific - */ - if(gtid >= 0 && KMP_UBER_GTID(gtid)) - __kmp_gtid_set_specific( gtid ); - #ifdef KMP_TDATA_GTID - __kmp_gtid = gtid; - #endif - __kmp_internal_end_thread( gtid ); +void __kmp_internal_end_dest(void *specific_gtid) { +#if KMP_COMPILER_ICC +#pragma warning(push) +#pragma warning(disable : 810) // conversion from "void *" to "int" may lose +// significant bits +#endif + // Make sure no significant bits are lost + int gtid = (kmp_intptr_t)specific_gtid - 1; +#if KMP_COMPILER_ICC +#pragma warning(pop) +#endif + + KA_TRACE(30, ("__kmp_internal_end_dest: T#%d\n", gtid)); + /* NOTE: the gtid is stored as gitd+1 in the thread-local-storage + * this is because 0 is reserved for the nothing-stored case */ + + /* josh: One reason for setting the gtid specific data even when it is being + destroyed by pthread is to allow gtid lookup through thread specific data + (__kmp_gtid_get_specific). Some of the code, especially stat code, + that gets executed in the call to __kmp_internal_end_thread, actually + gets the gtid through the thread specific data. Setting it here seems + rather inelegant and perhaps wrong, but allows __kmp_internal_end_thread + to run smoothly. + todo: get rid of this after we remove the dependence on + __kmp_gtid_get_specific */ + if (gtid >= 0 && KMP_UBER_GTID(gtid)) + __kmp_gtid_set_specific(gtid); +#ifdef KMP_TDATA_GTID + __kmp_gtid = gtid; +#endif + __kmp_internal_end_thread(gtid); } #if KMP_OS_UNIX && KMP_DYNAMIC_LIB -// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases destructors work -// perfectly, but in real libomp.so I have no evidence it is ever called. However, -fini linker -// option in makefile.mk works fine. +// 2009-09-08 (lev): It looks the destructor does not work. In simple test cases +// destructors work perfectly, but in real libomp.so I have no evidence it is +// ever called. However, -fini linker option in makefile.mk works fine. -__attribute__(( destructor )) -void -__kmp_internal_end_dtor( void ) -{ - __kmp_internal_end_atexit(); -} - -void -__kmp_internal_end_fini( void ) -{ - __kmp_internal_end_atexit(); +__attribute__((destructor)) void __kmp_internal_end_dtor(void) { + __kmp_internal_end_atexit(); } +void __kmp_internal_end_fini(void) { __kmp_internal_end_atexit(); } + +#endif + +/* [Windows] josh: when the atexit handler is called, there may still be more + than one thread alive */ +void __kmp_internal_end_atexit(void) { + KA_TRACE(30, ("__kmp_internal_end_atexit\n")); + /* [Windows] + josh: ideally, we want to completely shutdown the library in this atexit + handler, but stat code that depends on thread specific data for gtid fails + because that data becomes unavailable at some point during the shutdown, so + we call __kmp_internal_end_thread instead. We should eventually remove the + dependency on __kmp_get_specific_gtid in the stat code and use + __kmp_internal_end_library to cleanly shutdown the library. + + // TODO: Can some of this comment about GVS be removed? + I suspect that the offending stat code is executed when the calling thread + tries to clean up a dead root thread's data structures, resulting in GVS + code trying to close the GVS structures for that thread, but since the stat + code uses __kmp_get_specific_gtid to get the gtid with the assumption that + the calling thread is cleaning up itself instead of another thread, it get + confused. This happens because allowing a thread to unregister and cleanup + another thread is a recent modification for addressing an issue. + Based on the current design (20050722), a thread may end up + trying to unregister another thread only if thread death does not trigger + the calling of __kmp_internal_end_thread. For Linux* OS, there is the + thread specific data destructor function to detect thread death. For + Windows dynamic, there is DllMain(THREAD_DETACH). For Windows static, there + is nothing. Thus, the workaround is applicable only for Windows static + stat library. */ + __kmp_internal_end_library(-1); +#if KMP_OS_WINDOWS + __kmp_close_console(); #endif - -/* [Windows] josh: when the atexit handler is called, there may still be more than one thread alive */ -void -__kmp_internal_end_atexit( void ) -{ - KA_TRACE( 30, ( "__kmp_internal_end_atexit\n" ) ); - /* [Windows] - josh: ideally, we want to completely shutdown the library in this atexit handler, but - stat code that depends on thread specific data for gtid fails because that data becomes - unavailable at some point during the shutdown, so we call __kmp_internal_end_thread - instead. We should eventually remove the dependency on __kmp_get_specific_gtid in the - stat code and use __kmp_internal_end_library to cleanly shutdown the library. - -// TODO: Can some of this comment about GVS be removed? - I suspect that the offending stat code is executed when the calling thread tries to - clean up a dead root thread's data structures, resulting in GVS code trying to close - the GVS structures for that thread, but since the stat code uses - __kmp_get_specific_gtid to get the gtid with the assumption that the calling thread is - cleaning up itself instead of another thread, it gets confused. This happens because - allowing a thread to unregister and cleanup another thread is a recent modification for - addressing an issue with Maxon Cinema4D. Based on the current design (20050722), a - thread may end up trying to unregister another thread only if thread death does not - trigger the calling of __kmp_internal_end_thread. For Linux* OS, there is the thread - specific data destructor function to detect thread death. For Windows dynamic, there - is DllMain(THREAD_DETACH). For Windows static, there is nothing. Thus, the - workaround is applicable only for Windows static stat library. - */ - __kmp_internal_end_library( -1 ); - #if KMP_OS_WINDOWS - __kmp_close_console(); - #endif } -static void -__kmp_reap_thread( - kmp_info_t * thread, - int is_root -) { +static void __kmp_reap_thread(kmp_info_t *thread, int is_root) { + // It is assumed __kmp_forkjoin_lock is acquired. - // It is assumed __kmp_forkjoin_lock is acquired. + int gtid; - int gtid; + KMP_DEBUG_ASSERT(thread != NULL); - KMP_DEBUG_ASSERT( thread != NULL ); + gtid = thread->th.th_info.ds.ds_gtid; - gtid = thread->th.th_info.ds.ds_gtid; + if (!is_root) { - if ( ! is_root ) { + if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { + /* Assume the threads are at the fork barrier here */ + KA_TRACE( + 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", + gtid)); + /* Need release fence here to prevent seg faults for tree forkjoin barrier + * (GEH) */ + ANNOTATE_HAPPENS_BEFORE(thread); + kmp_flag_64 flag(&thread->th.th_bar[bs_forkjoin_barrier].bb.b_go, thread); + __kmp_release_64(&flag); + }; // if - if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) { - /* Assume the threads are at the fork barrier here */ - KA_TRACE( 20, ("__kmp_reap_thread: releasing T#%d from fork barrier for reap\n", gtid ) ); - /* Need release fence here to prevent seg faults for tree forkjoin barrier (GEH) */ - ANNOTATE_HAPPENS_BEFORE(thread); - kmp_flag_64 flag(&thread->th.th_bar[ bs_forkjoin_barrier ].bb.b_go, thread); - __kmp_release_64(&flag); - }; // if + // Terminate OS thread. + __kmp_reap_worker(thread); - // Terminate OS thread. - __kmp_reap_worker( thread ); - - // - // The thread was killed asynchronously. If it was actively - // spinning in the thread pool, decrement the global count. - // - // There is a small timing hole here - if the worker thread was - // just waking up after sleeping in the pool, had reset it's - // th_active_in_pool flag but not decremented the global counter - // __kmp_thread_pool_active_nth yet, then the global counter - // might not get updated. - // - // Currently, this can only happen as the library is unloaded, - // so there are no harmful side effects. - // - if ( thread->th.th_active_in_pool ) { - thread->th.th_active_in_pool = FALSE; - KMP_TEST_THEN_DEC32( - (kmp_int32 *) &__kmp_thread_pool_active_nth ); - KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 ); - } + // The thread was killed asynchronously. If it was actively + // spinning in the thread pool, decrement the global count. + // + // There is a small timing hole here - if the worker thread was just waking + // up after sleeping in the pool, had reset it's th_active_in_pool flag but + // not decremented the global counter __kmp_thread_pool_active_nth yet, then + // the global counter might not get updated. + // + // Currently, this can only happen as the library is unloaded, + // so there are no harmful side effects. + if (thread->th.th_active_in_pool) { + thread->th.th_active_in_pool = FALSE; + KMP_TEST_THEN_DEC32((kmp_int32 *)&__kmp_thread_pool_active_nth); + KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); + } - // Decrement # of [worker] threads in the pool. - KMP_DEBUG_ASSERT( __kmp_thread_pool_nth > 0 ); - --__kmp_thread_pool_nth; - }; // if + // Decrement # of [worker] threads in the pool. + KMP_DEBUG_ASSERT(__kmp_thread_pool_nth > 0); + --__kmp_thread_pool_nth; + }; // if - __kmp_free_implicit_task(thread); + __kmp_free_implicit_task(thread); - // Free the fast memory for tasking - #if USE_FAST_MEMORY - __kmp_free_fast_memory( thread ); - #endif /* USE_FAST_MEMORY */ +// Free the fast memory for tasking +#if USE_FAST_MEMORY + __kmp_free_fast_memory(thread); +#endif /* USE_FAST_MEMORY */ - __kmp_suspend_uninitialize_thread( thread ); + __kmp_suspend_uninitialize_thread(thread); - KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] == thread ); - TCW_SYNC_PTR(__kmp_threads[gtid], NULL); + KMP_DEBUG_ASSERT(__kmp_threads[gtid] == thread); + TCW_SYNC_PTR(__kmp_threads[gtid], NULL); - -- __kmp_all_nth; - // __kmp_nth was decremented when thread is added to the pool. + --__kmp_all_nth; +// __kmp_nth was decremented when thread is added to the pool. #ifdef KMP_ADJUST_BLOCKTIME - /* Adjust blocktime back to user setting or default if necessary */ - /* Middle initialization might never have occurred */ - if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { - KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 ); - if ( __kmp_nth <= __kmp_avail_proc ) { - __kmp_zero_bt = FALSE; - } - } + /* Adjust blocktime back to user setting or default if necessary */ + /* Middle initialization might never have occurred */ + if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { + KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); + if (__kmp_nth <= __kmp_avail_proc) { + __kmp_zero_bt = FALSE; + } + } #endif /* KMP_ADJUST_BLOCKTIME */ - /* free the memory being used */ - if( __kmp_env_consistency_check ) { - if ( thread->th.th_cons ) { - __kmp_free_cons_stack( thread->th.th_cons ); - thread->th.th_cons = NULL; - }; // if - } - - if ( thread->th.th_pri_common != NULL ) { - __kmp_free( thread->th.th_pri_common ); - thread->th.th_pri_common = NULL; + /* free the memory being used */ + if (__kmp_env_consistency_check) { + if (thread->th.th_cons) { + __kmp_free_cons_stack(thread->th.th_cons); + thread->th.th_cons = NULL; }; // if + } - if (thread->th.th_task_state_memo_stack != NULL) { - __kmp_free(thread->th.th_task_state_memo_stack); - thread->th.th_task_state_memo_stack = NULL; - } + if (thread->th.th_pri_common != NULL) { + __kmp_free(thread->th.th_pri_common); + thread->th.th_pri_common = NULL; + }; // if - #if KMP_USE_BGET - if ( thread->th.th_local.bget_data != NULL ) { - __kmp_finalize_bget( thread ); - }; // if - #endif + if (thread->th.th_task_state_memo_stack != NULL) { + __kmp_free(thread->th.th_task_state_memo_stack); + thread->th.th_task_state_memo_stack = NULL; + } + +#if KMP_USE_BGET + if (thread->th.th_local.bget_data != NULL) { + __kmp_finalize_bget(thread); + }; // if +#endif #if KMP_AFFINITY_SUPPORTED - if ( thread->th.th_affin_mask != NULL ) { - KMP_CPU_FREE( thread->th.th_affin_mask ); - thread->th.th_affin_mask = NULL; - }; // if + if (thread->th.th_affin_mask != NULL) { + KMP_CPU_FREE(thread->th.th_affin_mask); + thread->th.th_affin_mask = NULL; + }; // if #endif /* KMP_AFFINITY_SUPPORTED */ - __kmp_reap_team( thread->th.th_serial_team ); - thread->th.th_serial_team = NULL; - __kmp_free( thread ); + __kmp_reap_team(thread->th.th_serial_team); + thread->th.th_serial_team = NULL; + __kmp_free(thread); - KMP_MB(); + KMP_MB(); } // __kmp_reap_thread -static void -__kmp_internal_end(void) -{ - int i; +static void __kmp_internal_end(void) { + int i; + + /* First, unregister the library */ + __kmp_unregister_library(); + +#if KMP_OS_WINDOWS + /* In Win static library, we can't tell when a root actually dies, so we + reclaim the data structures for any root threads that have died but not + unregistered themselves, in order to shut down cleanly. + In Win dynamic library we also can't tell when a thread dies. */ + __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of +// dead roots +#endif + + for (i = 0; i < __kmp_threads_capacity; i++) + if (__kmp_root[i]) + if (__kmp_root[i]->r.r_active) + break; + KMP_MB(); /* Flush all pending memory write invalidates. */ + TCW_SYNC_4(__kmp_global.g.g_done, TRUE); - /* First, unregister the library */ - __kmp_unregister_library(); - - #if KMP_OS_WINDOWS - /* In Win static library, we can't tell when a root actually dies, so we - reclaim the data structures for any root threads that have died but not - unregistered themselves, in order to shut down cleanly. - In Win dynamic library we also can't tell when a thread dies. - */ - __kmp_reclaim_dead_roots(); // AC: moved here to always clean resources of dead roots - #endif - - for( i=0 ; i<__kmp_threads_capacity ; i++ ) - if( __kmp_root[i] ) - if( __kmp_root[i]->r.r_active ) - break; - KMP_MB(); /* Flush all pending memory write invalidates. */ - TCW_SYNC_4(__kmp_global.g.g_done, TRUE); - - if ( i < __kmp_threads_capacity ) { + if (i < __kmp_threads_capacity) { #if KMP_USE_MONITOR - // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor?? - KMP_MB(); /* Flush all pending memory write invalidates. */ - - // - // Need to check that monitor was initialized before reaping it. - // If we are called form __kmp_atfork_child (which sets - // __kmp_init_parallel = 0), then __kmp_monitor will appear to - // contain valid data, but it is only valid in the parent process, - // not the child. - // - // New behavior (201008): instead of keying off of the flag - // __kmp_init_parallel, the monitor thread creation is keyed off - // of the new flag __kmp_init_monitor. - // - __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock ); - if ( TCR_4( __kmp_init_monitor ) ) { - __kmp_reap_monitor( & __kmp_monitor ); - TCW_4( __kmp_init_monitor, 0 ); - } - __kmp_release_bootstrap_lock( & __kmp_monitor_lock ); - KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) ); + // 2009-09-08 (lev): Other alive roots found. Why do we kill the monitor?? + KMP_MB(); /* Flush all pending memory write invalidates. */ + +// Need to check that monitor was initialized before reaping it. If we are +// called form __kmp_atfork_child (which sets __kmp_init_parallel = 0), then +// __kmp_monitor will appear to contain valid data, but it is only valid in the +// parent process, not the child. + // New behavior (201008): instead of keying off of the flag + // __kmp_init_parallel, the monitor thread creation is keyed off + // of the new flag __kmp_init_monitor. + __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); + if (TCR_4(__kmp_init_monitor)) { + __kmp_reap_monitor(&__kmp_monitor); + TCW_4(__kmp_init_monitor, 0); + } + __kmp_release_bootstrap_lock(&__kmp_monitor_lock); + KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n")); #endif // KMP_USE_MONITOR - } else { - /* TODO move this to cleanup code */ - #ifdef KMP_DEBUG - /* make sure that everything has properly ended */ - for ( i = 0; i < __kmp_threads_capacity; i++ ) { - if( __kmp_root[i] ) { -// KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: there can be uber threads alive here - KMP_ASSERT( ! __kmp_root[i]->r.r_active ); // TODO: can they be active? - } - } - #endif + } else { +/* TODO move this to cleanup code */ +#ifdef KMP_DEBUG + /* make sure that everything has properly ended */ + for (i = 0; i < __kmp_threads_capacity; i++) { + if (__kmp_root[i]) { + // KMP_ASSERT( ! KMP_UBER_GTID( i ) ); // AC: + // there can be uber threads alive here + KMP_ASSERT(!__kmp_root[i]->r.r_active); // TODO: can they be active? + } + } +#endif - KMP_MB(); + KMP_MB(); - // Reap the worker threads. - // This is valid for now, but be careful if threads are reaped sooner. - while ( __kmp_thread_pool != NULL ) { // Loop thru all the thread in the pool. - // Get the next thread from the pool. - kmp_info_t * thread = (kmp_info_t *) __kmp_thread_pool; - __kmp_thread_pool = thread->th.th_next_pool; - // Reap it. - KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP); - thread->th.th_next_pool = NULL; - thread->th.th_in_pool = FALSE; - __kmp_reap_thread( thread, 0 ); - }; // while - __kmp_thread_pool_insert_pt = NULL; - - // Reap teams. - while ( __kmp_team_pool != NULL ) { // Loop thru all the teams in the pool. - // Get the next team from the pool. - kmp_team_t * team = (kmp_team_t *) __kmp_team_pool; - __kmp_team_pool = team->t.t_next_pool; - // Reap it. - team->t.t_next_pool = NULL; - __kmp_reap_team( team ); - }; // while - - __kmp_reap_task_teams( ); - - for ( i = 0; i < __kmp_threads_capacity; ++ i ) { - // TBD: Add some checking... - // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL ); - } + // Reap the worker threads. + // This is valid for now, but be careful if threads are reaped sooner. + while (__kmp_thread_pool != NULL) { // Loop thru all the thread in the pool. + // Get the next thread from the pool. + kmp_info_t *thread = (kmp_info_t *)__kmp_thread_pool; + __kmp_thread_pool = thread->th.th_next_pool; + // Reap it. + KMP_DEBUG_ASSERT(thread->th.th_reap_state == KMP_SAFE_TO_REAP); + thread->th.th_next_pool = NULL; + thread->th.th_in_pool = FALSE; + __kmp_reap_thread(thread, 0); + }; // while + __kmp_thread_pool_insert_pt = NULL; + + // Reap teams. + while (__kmp_team_pool != NULL) { // Loop thru all the teams in the pool. + // Get the next team from the pool. + kmp_team_t *team = (kmp_team_t *)__kmp_team_pool; + __kmp_team_pool = team->t.t_next_pool; + // Reap it. + team->t.t_next_pool = NULL; + __kmp_reap_team(team); + }; // while - /* Make sure all threadprivate destructors get run by joining with all worker - threads before resetting this flag */ - TCW_SYNC_4(__kmp_init_common, FALSE); + __kmp_reap_task_teams(); - KA_TRACE( 10, ("__kmp_internal_end: all workers reaped\n" ) ); - KMP_MB(); + for (i = 0; i < __kmp_threads_capacity; ++i) { + // TBD: Add some checking... + // Something like KMP_DEBUG_ASSERT( __kmp_thread[ i ] == NULL ); + } + + /* Make sure all threadprivate destructors get run by joining with all + worker threads before resetting this flag */ + TCW_SYNC_4(__kmp_init_common, FALSE); + + KA_TRACE(10, ("__kmp_internal_end: all workers reaped\n")); + KMP_MB(); #if KMP_USE_MONITOR - // - // See note above: One of the possible fixes for CQ138434 / CQ140126 - // - // FIXME: push both code fragments down and CSE them? - // push them into __kmp_cleanup() ? - // - __kmp_acquire_bootstrap_lock( & __kmp_monitor_lock ); - if ( TCR_4( __kmp_init_monitor ) ) { - __kmp_reap_monitor( & __kmp_monitor ); - TCW_4( __kmp_init_monitor, 0 ); - } - __kmp_release_bootstrap_lock( & __kmp_monitor_lock ); - KA_TRACE( 10, ("__kmp_internal_end: monitor reaped\n" ) ); + // See note above: One of the possible fixes for CQ138434 / CQ140126 + // + // FIXME: push both code fragments down and CSE them? + // push them into __kmp_cleanup() ? + __kmp_acquire_bootstrap_lock(&__kmp_monitor_lock); + if (TCR_4(__kmp_init_monitor)) { + __kmp_reap_monitor(&__kmp_monitor); + TCW_4(__kmp_init_monitor, 0); + } + __kmp_release_bootstrap_lock(&__kmp_monitor_lock); + KA_TRACE(10, ("__kmp_internal_end: monitor reaped\n")); #endif - } /* else !__kmp_global.t_active */ - TCW_4(__kmp_init_gtid, FALSE); - KMP_MB(); /* Flush all pending memory write invalidates. */ + } /* else !__kmp_global.t_active */ + TCW_4(__kmp_init_gtid, FALSE); + KMP_MB(); /* Flush all pending memory write invalidates. */ - __kmp_cleanup(); + __kmp_cleanup(); #if OMPT_SUPPORT - ompt_fini(); + ompt_fini(); #endif } -void -__kmp_internal_end_library( int gtid_req ) -{ - /* if we have already cleaned up, don't try again, it wouldn't be pretty */ - /* this shouldn't be a race condition because __kmp_internal_end() is the - * only place to clear __kmp_serial_init */ - /* we'll check this later too, after we get the lock */ - // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundaant, - // because the next check will work in any case. - if( __kmp_global.g.g_abort ) { - KA_TRACE( 11, ("__kmp_internal_end_library: abort, exiting\n" )); - /* TODO abort? */ - return; - } - if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { - KA_TRACE( 10, ("__kmp_internal_end_library: already finished\n" )); +void __kmp_internal_end_library(int gtid_req) { + /* if we have already cleaned up, don't try again, it wouldn't be pretty */ + /* this shouldn't be a race condition because __kmp_internal_end() is the + only place to clear __kmp_serial_init */ + /* we'll check this later too, after we get the lock */ + // 2009-09-06: We do not set g_abort without setting g_done. This check looks + // redundaant, because the next check will work in any case. + if (__kmp_global.g.g_abort) { + KA_TRACE(11, ("__kmp_internal_end_library: abort, exiting\n")); + /* TODO abort? */ + return; + } + if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { + KA_TRACE(10, ("__kmp_internal_end_library: already finished\n")); + return; + } + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + /* find out who we are and what we should do */ + { + int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific(); + KA_TRACE( + 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req)); + if (gtid == KMP_GTID_SHUTDOWN) { + KA_TRACE(10, ("__kmp_internal_end_library: !__kmp_init_runtime, system " + "already shutdown\n")); + return; + } else if (gtid == KMP_GTID_MONITOR) { + KA_TRACE(10, ("__kmp_internal_end_library: monitor thread, gtid not " + "registered, or system shutdown\n")); + return; + } else if (gtid == KMP_GTID_DNE) { + KA_TRACE(10, ("__kmp_internal_end_library: gtid not registered or system " + "shutdown\n")); + /* we don't know who we are, but we may still shutdown the library */ + } else if (KMP_UBER_GTID(gtid)) { + /* unregister ourselves as an uber thread. gtid is no longer valid */ + if (__kmp_root[gtid]->r.r_active) { + __kmp_global.g.g_abort = -1; + TCW_SYNC_4(__kmp_global.g.g_done, TRUE); + KA_TRACE(10, + ("__kmp_internal_end_library: root still active, abort T#%d\n", + gtid)); return; + } else { + KA_TRACE( + 10, + ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid)); + __kmp_unregister_root_current_thread(gtid); + } + } else { +/* worker threads may call this function through the atexit handler, if they + * call exit() */ +/* For now, skip the usual subsequent processing and just dump the debug buffer. + TODO: do a thorough shutdown instead */ +#ifdef DUMP_DEBUG_ON_EXIT + if (__kmp_debug_buf) + __kmp_dump_debug_buffer(); +#endif + return; } + } + /* synchronize the termination process */ + __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); + /* have we already finished */ + if (__kmp_global.g.g_abort) { + KA_TRACE(10, ("__kmp_internal_end_library: abort, exiting\n")); + /* TODO abort? */ + __kmp_release_bootstrap_lock(&__kmp_initz_lock); + return; + } + if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { + __kmp_release_bootstrap_lock(&__kmp_initz_lock); + return; + } - KMP_MB(); /* Flush all pending memory write invalidates. */ - - /* find out who we are and what we should do */ - { - int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific(); - KA_TRACE( 10, ("__kmp_internal_end_library: enter T#%d (%d)\n", gtid, gtid_req )); - if( gtid == KMP_GTID_SHUTDOWN ) { - KA_TRACE( 10, ("__kmp_internal_end_library: !__kmp_init_runtime, system already shutdown\n" )); - return; - } else if( gtid == KMP_GTID_MONITOR ) { - KA_TRACE( 10, ("__kmp_internal_end_library: monitor thread, gtid not registered, or system shutdown\n" )); - return; - } else if( gtid == KMP_GTID_DNE ) { - KA_TRACE( 10, ("__kmp_internal_end_library: gtid not registered or system shutdown\n" )); - /* we don't know who we are, but we may still shutdown the library */ - } else if( KMP_UBER_GTID( gtid )) { - /* unregister ourselves as an uber thread. gtid is no longer valid */ - if( __kmp_root[gtid]->r.r_active ) { - __kmp_global.g.g_abort = -1; - TCW_SYNC_4(__kmp_global.g.g_done, TRUE); - KA_TRACE( 10, ("__kmp_internal_end_library: root still active, abort T#%d\n", gtid )); - return; - } else { - KA_TRACE( 10, ("__kmp_internal_end_library: unregistering sibling T#%d\n", gtid )); - __kmp_unregister_root_current_thread( gtid ); - } - } else { - /* worker threads may call this function through the atexit handler, if they call exit() */ - /* For now, skip the usual subsequent processing and just dump the debug buffer. - TODO: do a thorough shutdown instead - */ - #ifdef DUMP_DEBUG_ON_EXIT - if ( __kmp_debug_buf ) - __kmp_dump_debug_buffer( ); - #endif - return; - } - } - /* synchronize the termination process */ - __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); - - /* have we already finished */ - if( __kmp_global.g.g_abort ) { - KA_TRACE( 10, ("__kmp_internal_end_library: abort, exiting\n" )); - /* TODO abort? */ - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); - return; - } - if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); - return; - } - - /* We need this lock to enforce mutex between this reading of - __kmp_threads_capacity and the writing by __kmp_register_root. - Alternatively, we can use a counter of roots that is - atomically updated by __kmp_get_global_thread_id_reg, - __kmp_do_serial_initialize and __kmp_internal_end_*. - */ - __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); + /* We need this lock to enforce mutex between this reading of + __kmp_threads_capacity and the writing by __kmp_register_root. + Alternatively, we can use a counter of roots that is atomically updated by + __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and + __kmp_internal_end_*. */ + __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); - /* now we can safely conduct the actual termination */ - __kmp_internal_end(); + /* now we can safely conduct the actual termination */ + __kmp_internal_end(); - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); + __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); + __kmp_release_bootstrap_lock(&__kmp_initz_lock); - KA_TRACE( 10, ("__kmp_internal_end_library: exit\n" ) ); + KA_TRACE(10, ("__kmp_internal_end_library: exit\n")); - #ifdef DUMP_DEBUG_ON_EXIT - if ( __kmp_debug_buf ) - __kmp_dump_debug_buffer(); - #endif +#ifdef DUMP_DEBUG_ON_EXIT + if (__kmp_debug_buf) + __kmp_dump_debug_buffer(); +#endif - #if KMP_OS_WINDOWS - __kmp_close_console(); - #endif +#if KMP_OS_WINDOWS + __kmp_close_console(); +#endif - __kmp_fini_allocator(); + __kmp_fini_allocator(); } // __kmp_internal_end_library -void -__kmp_internal_end_thread( int gtid_req ) -{ - int i; - - /* if we have already cleaned up, don't try again, it wouldn't be pretty */ - /* this shouldn't be a race condition because __kmp_internal_end() is the - * only place to clear __kmp_serial_init */ - /* we'll check this later too, after we get the lock */ - // 2009-09-06: We do not set g_abort without setting g_done. This check looks redundant, - // because the next check will work in any case. - if( __kmp_global.g.g_abort ) { - KA_TRACE( 11, ("__kmp_internal_end_thread: abort, exiting\n" )); - /* TODO abort? */ - return; - } - if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { - KA_TRACE( 10, ("__kmp_internal_end_thread: already finished\n" )); - return; - } - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - /* find out who we are and what we should do */ - { - int gtid = (gtid_req>=0) ? gtid_req : __kmp_gtid_get_specific(); - KA_TRACE( 10, ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req )); - if( gtid == KMP_GTID_SHUTDOWN ) { - KA_TRACE( 10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system already shutdown\n" )); - return; - } else if( gtid == KMP_GTID_MONITOR ) { - KA_TRACE( 10, ("__kmp_internal_end_thread: monitor thread, gtid not registered, or system shutdown\n" )); - return; - } else if( gtid == KMP_GTID_DNE ) { - KA_TRACE( 10, ("__kmp_internal_end_thread: gtid not registered or system shutdown\n" )); - return; - /* we don't know who we are */ - } else if( KMP_UBER_GTID( gtid )) { - /* unregister ourselves as an uber thread. gtid is no longer valid */ - if( __kmp_root[gtid]->r.r_active ) { - __kmp_global.g.g_abort = -1; - TCW_SYNC_4(__kmp_global.g.g_done, TRUE); - KA_TRACE( 10, ("__kmp_internal_end_thread: root still active, abort T#%d\n", gtid )); - return; - } else { - KA_TRACE( 10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", gtid )); - __kmp_unregister_root_current_thread( gtid ); - } - } else { - /* just a worker thread, let's leave */ - KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid )); - - if ( gtid >= 0 ) { - __kmp_threads[gtid]->th.th_task_team = NULL; - } - - KA_TRACE( 10, ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", gtid )); - return; - } - } - #if defined KMP_DYNAMIC_LIB - // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber thread, - // because we will better shutdown later in the library destructor. - // The reason of this change is performance problem when non-openmp thread - // in a loop forks and joins many openmp threads. We can save a lot of time - // keeping worker threads alive until the program shutdown. - // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) and - // Windows(DPD200287443) that occurs when using critical sections from foreign threads. - KA_TRACE( 10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req) ); - return; - #endif - /* synchronize the termination process */ - __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); - - /* have we already finished */ - if( __kmp_global.g.g_abort ) { - KA_TRACE( 10, ("__kmp_internal_end_thread: abort, exiting\n" )); - /* TODO abort? */ - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); - return; - } - if( TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial ) { - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); +void __kmp_internal_end_thread(int gtid_req) { + int i; + + /* if we have already cleaned up, don't try again, it wouldn't be pretty */ + /* this shouldn't be a race condition because __kmp_internal_end() is the + * only place to clear __kmp_serial_init */ + /* we'll check this later too, after we get the lock */ + // 2009-09-06: We do not set g_abort without setting g_done. This check looks + // redundant, because the next check will work in any case. + if (__kmp_global.g.g_abort) { + KA_TRACE(11, ("__kmp_internal_end_thread: abort, exiting\n")); + /* TODO abort? */ + return; + } + if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { + KA_TRACE(10, ("__kmp_internal_end_thread: already finished\n")); + return; + } + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + /* find out who we are and what we should do */ + { + int gtid = (gtid_req >= 0) ? gtid_req : __kmp_gtid_get_specific(); + KA_TRACE(10, + ("__kmp_internal_end_thread: enter T#%d (%d)\n", gtid, gtid_req)); + if (gtid == KMP_GTID_SHUTDOWN) { + KA_TRACE(10, ("__kmp_internal_end_thread: !__kmp_init_runtime, system " + "already shutdown\n")); + return; + } else if (gtid == KMP_GTID_MONITOR) { + KA_TRACE(10, ("__kmp_internal_end_thread: monitor thread, gtid not " + "registered, or system shutdown\n")); + return; + } else if (gtid == KMP_GTID_DNE) { + KA_TRACE(10, ("__kmp_internal_end_thread: gtid not registered or system " + "shutdown\n")); + return; + /* we don't know who we are */ + } else if (KMP_UBER_GTID(gtid)) { + /* unregister ourselves as an uber thread. gtid is no longer valid */ + if (__kmp_root[gtid]->r.r_active) { + __kmp_global.g.g_abort = -1; + TCW_SYNC_4(__kmp_global.g.g_done, TRUE); + KA_TRACE(10, + ("__kmp_internal_end_thread: root still active, abort T#%d\n", + gtid)); return; - } - - /* We need this lock to enforce mutex between this reading of - __kmp_threads_capacity and the writing by __kmp_register_root. - Alternatively, we can use a counter of roots that is - atomically updated by __kmp_get_global_thread_id_reg, - __kmp_do_serial_initialize and __kmp_internal_end_*. - */ - - /* should we finish the run-time? are all siblings done? */ - __kmp_acquire_bootstrap_lock( &__kmp_forkjoin_lock ); - - for ( i = 0; i < __kmp_threads_capacity; ++ i ) { - if ( KMP_UBER_GTID( i ) ) { - KA_TRACE( 10, ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i )); - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); - return; - }; - } + } else { + KA_TRACE(10, ("__kmp_internal_end_thread: unregistering sibling T#%d\n", + gtid)); + __kmp_unregister_root_current_thread(gtid); + } + } else { + /* just a worker thread, let's leave */ + KA_TRACE(10, ("__kmp_internal_end_thread: worker thread T#%d\n", gtid)); + + if (gtid >= 0) { + __kmp_threads[gtid]->th.th_task_team = NULL; + } + + KA_TRACE(10, + ("__kmp_internal_end_thread: worker thread done, exiting T#%d\n", + gtid)); + return; + } + } +#if defined KMP_DYNAMIC_LIB + // AC: lets not shutdown the Linux* OS dynamic library at the exit of uber + // thread, because we will better shutdown later in the library destructor. + // The reason of this change is performance problem when non-openmp thread in + // a loop forks and joins many openmp threads. We can save a lot of time + // keeping worker threads alive until the program shutdown. + // OM: Removed Linux* OS restriction to fix the crash on OS X* (DPD200239966) + // and Windows(DPD200287443) that occurs when using critical sections from + // foreign threads. + KA_TRACE(10, ("__kmp_internal_end_thread: exiting T#%d\n", gtid_req)); + return; +#endif + /* synchronize the termination process */ + __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); + + /* have we already finished */ + if (__kmp_global.g.g_abort) { + KA_TRACE(10, ("__kmp_internal_end_thread: abort, exiting\n")); + /* TODO abort? */ + __kmp_release_bootstrap_lock(&__kmp_initz_lock); + return; + } + if (TCR_4(__kmp_global.g.g_done) || !__kmp_init_serial) { + __kmp_release_bootstrap_lock(&__kmp_initz_lock); + return; + } + + /* We need this lock to enforce mutex between this reading of + __kmp_threads_capacity and the writing by __kmp_register_root. + Alternatively, we can use a counter of roots that is atomically updated by + __kmp_get_global_thread_id_reg, __kmp_do_serial_initialize and + __kmp_internal_end_*. */ + + /* should we finish the run-time? are all siblings done? */ + __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock); + + for (i = 0; i < __kmp_threads_capacity; ++i) { + if (KMP_UBER_GTID(i)) { + KA_TRACE( + 10, + ("__kmp_internal_end_thread: remaining sibling task: gtid==%d\n", i)); + __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); + __kmp_release_bootstrap_lock(&__kmp_initz_lock); + return; + }; + } - /* now we can safely conduct the actual termination */ + /* now we can safely conduct the actual termination */ - __kmp_internal_end(); + __kmp_internal_end(); - __kmp_release_bootstrap_lock( &__kmp_forkjoin_lock ); - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); + __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); + __kmp_release_bootstrap_lock(&__kmp_initz_lock); - KA_TRACE( 10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req ) ); + KA_TRACE(10, ("__kmp_internal_end_thread: exit T#%d\n", gtid_req)); - #ifdef DUMP_DEBUG_ON_EXIT - if ( __kmp_debug_buf ) - __kmp_dump_debug_buffer(); - #endif +#ifdef DUMP_DEBUG_ON_EXIT + if (__kmp_debug_buf) + __kmp_dump_debug_buffer(); +#endif } // __kmp_internal_end_thread -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // Library registration stuff. -static long __kmp_registration_flag = 0; - // Random value used to indicate library initialization. -static char * __kmp_registration_str = NULL; - // Value to be saved in env var __KMP_REGISTERED_LIB_. - - -static inline -char * -__kmp_reg_status_name() { - /* - On RHEL 3u5 if linked statically, getpid() returns different values in each thread. - If registration and unregistration go in different threads (omp_misc_other_root_exit.cpp test case), - the name of registered_lib_env env var can not be found, because the name will contain different pid. - */ - return __kmp_str_format( "__KMP_REGISTERED_LIB_%d", (int) getpid() ); +static long __kmp_registration_flag = 0; +// Random value used to indicate library initialization. +static char *__kmp_registration_str = NULL; +// Value to be saved in env var __KMP_REGISTERED_LIB_. + +static inline char *__kmp_reg_status_name() { + /* On RHEL 3u5 if linked statically, getpid() returns different values in + each thread. If registration and unregistration go in different threads + (omp_misc_other_root_exit.cpp test case), the name of registered_lib_env + env var can not be found, because the name will contain different pid. */ + return __kmp_str_format("__KMP_REGISTERED_LIB_%d", (int)getpid()); } // __kmp_reg_status_get +void __kmp_register_library_startup(void) { -void -__kmp_register_library_startup( - void -) { - - char * name = __kmp_reg_status_name(); // Name of the environment variable. - int done = 0; - union { - double dtime; - long ltime; - } time; - #if KMP_ARCH_X86 || KMP_ARCH_X86_64 - __kmp_initialize_system_tick(); - #endif - __kmp_read_system_time( & time.dtime ); - __kmp_registration_flag = 0xCAFE0000L | ( time.ltime & 0x0000FFFFL ); - __kmp_registration_str = - __kmp_str_format( - "%p-%lx-%s", - & __kmp_registration_flag, - __kmp_registration_flag, - KMP_LIBRARY_FILE - ); - - KA_TRACE( 50, ( "__kmp_register_library_startup: %s=\"%s\"\n", name, __kmp_registration_str ) ); - - while ( ! done ) { - - char * value = NULL; // Actual value of the environment variable. - - // Set environment variable, but do not overwrite if it is exist. - __kmp_env_set( name, __kmp_registration_str, 0 ); - // Check the variable is written. - value = __kmp_env_get( name ); - if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) { - - done = 1; // Ok, environment variable set successfully, exit the loop. + char *name = __kmp_reg_status_name(); // Name of the environment variable. + int done = 0; + union { + double dtime; + long ltime; + } time; +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + __kmp_initialize_system_tick(); +#endif + __kmp_read_system_time(&time.dtime); + __kmp_registration_flag = 0xCAFE0000L | (time.ltime & 0x0000FFFFL); + __kmp_registration_str = + __kmp_str_format("%p-%lx-%s", &__kmp_registration_flag, + __kmp_registration_flag, KMP_LIBRARY_FILE); - } else { + KA_TRACE(50, ("__kmp_register_library_startup: %s=\"%s\"\n", name, + __kmp_registration_str)); + + while (!done) { - // Oops. Write failed. Another copy of OpenMP RTL is in memory. - // Check whether it alive or dead. - int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead. - char * tail = value; - char * flag_addr_str = NULL; - char * flag_val_str = NULL; - char const * file_name = NULL; - __kmp_str_split( tail, '-', & flag_addr_str, & tail ); - __kmp_str_split( tail, '-', & flag_val_str, & tail ); - file_name = tail; - if ( tail != NULL ) { - long * flag_addr = 0; - long flag_val = 0; - KMP_SSCANF( flag_addr_str, "%p", & flag_addr ); - KMP_SSCANF( flag_val_str, "%lx", & flag_val ); - if ( flag_addr != 0 && flag_val != 0 && strcmp( file_name, "" ) != 0 ) { - // First, check whether environment-encoded address is mapped into addr space. - // If so, dereference it to see if it still has the right value. - - if ( __kmp_is_address_mapped( flag_addr ) && * flag_addr == flag_val ) { - neighbor = 1; - } else { - // If not, then we know the other copy of the library is no longer running. - neighbor = 2; - }; // if - }; // if - }; // if - switch ( neighbor ) { - case 0 : // Cannot parse environment variable -- neighbor status unknown. - // Assume it is the incompatible format of future version of the library. - // Assume the other library is alive. - // WARN( ... ); // TODO: Issue a warning. - file_name = "unknown library"; - // Attention! Falling to the next case. That's intentional. - case 1 : { // Neighbor is alive. - // Check it is allowed. - char * duplicate_ok = __kmp_env_get( "KMP_DUPLICATE_LIB_OK" ); - if ( ! __kmp_str_match_true( duplicate_ok ) ) { - // That's not allowed. Issue fatal error. - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( DuplicateLibrary, KMP_LIBRARY_FILE, file_name ), - KMP_HNT( DuplicateLibrary ), - __kmp_msg_null - ); - }; // if - KMP_INTERNAL_FREE( duplicate_ok ); - __kmp_duplicate_library_ok = 1; - done = 1; // Exit the loop. - } break; - case 2 : { // Neighbor is dead. - // Clear the variable and try to register library again. - __kmp_env_unset( name ); - } break; - default : { - KMP_DEBUG_ASSERT( 0 ); - } break; - }; // switch + char *value = NULL; // Actual value of the environment variable. + + // Set environment variable, but do not overwrite if it is exist. + __kmp_env_set(name, __kmp_registration_str, 0); + // Check the variable is written. + value = __kmp_env_get(name); + if (value != NULL && strcmp(value, __kmp_registration_str) == 0) { + + done = 1; // Ok, environment variable set successfully, exit the loop. + + } else { + // Oops. Write failed. Another copy of OpenMP RTL is in memory. + // Check whether it alive or dead. + int neighbor = 0; // 0 -- unknown status, 1 -- alive, 2 -- dead. + char *tail = value; + char *flag_addr_str = NULL; + char *flag_val_str = NULL; + char const *file_name = NULL; + __kmp_str_split(tail, '-', &flag_addr_str, &tail); + __kmp_str_split(tail, '-', &flag_val_str, &tail); + file_name = tail; + if (tail != NULL) { + long *flag_addr = 0; + long flag_val = 0; + KMP_SSCANF(flag_addr_str, "%p", &flag_addr); + KMP_SSCANF(flag_val_str, "%lx", &flag_val); + if (flag_addr != 0 && flag_val != 0 && strcmp(file_name, "") != 0) { + // First, check whether environment-encoded address is mapped into + // addr space. + // If so, dereference it to see if it still has the right value. + if (__kmp_is_address_mapped(flag_addr) && *flag_addr == flag_val) { + neighbor = 1; + } else { + // If not, then we know the other copy of the library is no longer + // running. + neighbor = 2; + }; // if }; // if - KMP_INTERNAL_FREE( (void *) value ); + }; // if + switch (neighbor) { + case 0: // Cannot parse environment variable -- neighbor status unknown. + // Assume it is the incompatible format of future version of the + // library. Assume the other library is alive. + // WARN( ... ); // TODO: Issue a warning. + file_name = "unknown library"; + // Attention! Falling to the next case. That's intentional. + case 1: { // Neighbor is alive. + // Check it is allowed. + char *duplicate_ok = __kmp_env_get("KMP_DUPLICATE_LIB_OK"); + if (!__kmp_str_match_true(duplicate_ok)) { + // That's not allowed. Issue fatal error. + __kmp_msg(kmp_ms_fatal, + KMP_MSG(DuplicateLibrary, KMP_LIBRARY_FILE, file_name), + KMP_HNT(DuplicateLibrary), __kmp_msg_null); + }; // if + KMP_INTERNAL_FREE(duplicate_ok); + __kmp_duplicate_library_ok = 1; + done = 1; // Exit the loop. + } break; + case 2: { // Neighbor is dead. + // Clear the variable and try to register library again. + __kmp_env_unset(name); + } break; + default: { KMP_DEBUG_ASSERT(0); } break; + }; // switch - }; // while - KMP_INTERNAL_FREE( (void *) name ); + }; // if + KMP_INTERNAL_FREE((void *)value); -} // func __kmp_register_library_startup + }; // while + KMP_INTERNAL_FREE((void *)name); +} // func __kmp_register_library_startup -void -__kmp_unregister_library( void ) { +void __kmp_unregister_library(void) { - char * name = __kmp_reg_status_name(); - char * value = __kmp_env_get( name ); + char *name = __kmp_reg_status_name(); + char *value = __kmp_env_get(name); - KMP_DEBUG_ASSERT( __kmp_registration_flag != 0 ); - KMP_DEBUG_ASSERT( __kmp_registration_str != NULL ); - if ( value != NULL && strcmp( value, __kmp_registration_str ) == 0 ) { - // Ok, this is our variable. Delete it. - __kmp_env_unset( name ); - }; // if + KMP_DEBUG_ASSERT(__kmp_registration_flag != 0); + KMP_DEBUG_ASSERT(__kmp_registration_str != NULL); + if (value != NULL && strcmp(value, __kmp_registration_str) == 0) { + // Ok, this is our variable. Delete it. + __kmp_env_unset(name); + }; // if - KMP_INTERNAL_FREE( __kmp_registration_str ); - KMP_INTERNAL_FREE( value ); - KMP_INTERNAL_FREE( name ); + KMP_INTERNAL_FREE(__kmp_registration_str); + KMP_INTERNAL_FREE(value); + KMP_INTERNAL_FREE(name); - __kmp_registration_flag = 0; - __kmp_registration_str = NULL; + __kmp_registration_flag = 0; + __kmp_registration_str = NULL; } // __kmp_unregister_library - // End of Library registration stuff. -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) -static void __kmp_check_mic_type() -{ - kmp_cpuid_t cpuid_state = {0}; - kmp_cpuid_t * cs_p = &cpuid_state; - __kmp_x86_cpuid(1, 0, cs_p); - // We don't support mic1 at the moment - if( (cs_p->eax & 0xff0) == 0xB10 ) { - __kmp_mic_type = mic2; - } else if( (cs_p->eax & 0xf0ff0) == 0x50670 ) { - __kmp_mic_type = mic3; - } else { - __kmp_mic_type = non_mic; - } +static void __kmp_check_mic_type() { + kmp_cpuid_t cpuid_state = {0}; + kmp_cpuid_t *cs_p = &cpuid_state; + __kmp_x86_cpuid(1, 0, cs_p); + // We don't support mic1 at the moment + if ((cs_p->eax & 0xff0) == 0xB10) { + __kmp_mic_type = mic2; + } else if ((cs_p->eax & 0xf0ff0) == 0x50670) { + __kmp_mic_type = mic3; + } else { + __kmp_mic_type = non_mic; + } } #endif /* KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) */ -static void -__kmp_do_serial_initialize( void ) -{ - int i, gtid; - int size; +static void __kmp_do_serial_initialize(void) { + int i, gtid; + int size; - KA_TRACE( 10, ("__kmp_do_serial_initialize: enter\n" ) ); + KA_TRACE(10, ("__kmp_do_serial_initialize: enter\n")); - KMP_DEBUG_ASSERT( sizeof( kmp_int32 ) == 4 ); - KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == 4 ); - KMP_DEBUG_ASSERT( sizeof( kmp_int64 ) == 8 ); - KMP_DEBUG_ASSERT( sizeof( kmp_uint64 ) == 8 ); - KMP_DEBUG_ASSERT( sizeof( kmp_intptr_t ) == sizeof( void * ) ); + KMP_DEBUG_ASSERT(sizeof(kmp_int32) == 4); + KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == 4); + KMP_DEBUG_ASSERT(sizeof(kmp_int64) == 8); + KMP_DEBUG_ASSERT(sizeof(kmp_uint64) == 8); + KMP_DEBUG_ASSERT(sizeof(kmp_intptr_t) == sizeof(void *)); #if OMPT_SUPPORT - ompt_pre_init(); + ompt_pre_init(); #endif - __kmp_validate_locks(); + __kmp_validate_locks(); - /* Initialize internal memory allocator */ - __kmp_init_allocator(); + /* Initialize internal memory allocator */ + __kmp_init_allocator(); - /* Register the library startup via an environment variable - and check to see whether another copy of the library is already - registered. */ + /* Register the library startup via an environment variable and check to see + whether another copy of the library is already registered. */ - __kmp_register_library_startup( ); + __kmp_register_library_startup(); - /* TODO reinitialization of library */ - if( TCR_4(__kmp_global.g.g_done) ) { - KA_TRACE( 10, ("__kmp_do_serial_initialize: reinitialization of library\n" ) ); - } + /* TODO reinitialization of library */ + if (TCR_4(__kmp_global.g.g_done)) { + KA_TRACE(10, ("__kmp_do_serial_initialize: reinitialization of library\n")); + } - __kmp_global.g.g_abort = 0; - TCW_SYNC_4(__kmp_global.g.g_done, FALSE); + __kmp_global.g.g_abort = 0; + TCW_SYNC_4(__kmp_global.g.g_done, FALSE); - /* initialize the locks */ +/* initialize the locks */ #if KMP_USE_ADAPTIVE_LOCKS #if KMP_DEBUG_ADAPTIVE_LOCKS - __kmp_init_speculative_stats(); + __kmp_init_speculative_stats(); #endif #endif #if KMP_STATS_ENABLED - __kmp_stats_init(); -#endif - __kmp_init_lock( & __kmp_global_lock ); - __kmp_init_queuing_lock( & __kmp_dispatch_lock ); - __kmp_init_lock( & __kmp_debug_lock ); - __kmp_init_atomic_lock( & __kmp_atomic_lock ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_1i ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_2i ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_4i ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_4r ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_8i ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_8r ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_8c ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_10r ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_16r ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_16c ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_20c ); - __kmp_init_atomic_lock( & __kmp_atomic_lock_32c ); - __kmp_init_bootstrap_lock( & __kmp_forkjoin_lock ); - __kmp_init_bootstrap_lock( & __kmp_exit_lock ); + __kmp_stats_init(); +#endif + __kmp_init_lock(&__kmp_global_lock); + __kmp_init_queuing_lock(&__kmp_dispatch_lock); + __kmp_init_lock(&__kmp_debug_lock); + __kmp_init_atomic_lock(&__kmp_atomic_lock); + __kmp_init_atomic_lock(&__kmp_atomic_lock_1i); + __kmp_init_atomic_lock(&__kmp_atomic_lock_2i); + __kmp_init_atomic_lock(&__kmp_atomic_lock_4i); + __kmp_init_atomic_lock(&__kmp_atomic_lock_4r); + __kmp_init_atomic_lock(&__kmp_atomic_lock_8i); + __kmp_init_atomic_lock(&__kmp_atomic_lock_8r); + __kmp_init_atomic_lock(&__kmp_atomic_lock_8c); + __kmp_init_atomic_lock(&__kmp_atomic_lock_10r); + __kmp_init_atomic_lock(&__kmp_atomic_lock_16r); + __kmp_init_atomic_lock(&__kmp_atomic_lock_16c); + __kmp_init_atomic_lock(&__kmp_atomic_lock_20c); + __kmp_init_atomic_lock(&__kmp_atomic_lock_32c); + __kmp_init_bootstrap_lock(&__kmp_forkjoin_lock); + __kmp_init_bootstrap_lock(&__kmp_exit_lock); #if KMP_USE_MONITOR - __kmp_init_bootstrap_lock( & __kmp_monitor_lock ); + __kmp_init_bootstrap_lock(&__kmp_monitor_lock); #endif - __kmp_init_bootstrap_lock( & __kmp_tp_cached_lock ); + __kmp_init_bootstrap_lock(&__kmp_tp_cached_lock); - /* conduct initialization and initial setup of configuration */ + /* conduct initialization and initial setup of configuration */ - __kmp_runtime_initialize(); + __kmp_runtime_initialize(); #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) - __kmp_check_mic_type(); + __kmp_check_mic_type(); #endif - // Some global variable initialization moved here from kmp_env_initialize() +// Some global variable initialization moved here from kmp_env_initialize() #ifdef KMP_DEBUG - kmp_diag = 0; -#endif - __kmp_abort_delay = 0; - - // From __kmp_init_dflt_team_nth() - /* assume the entire machine will be used */ - __kmp_dflt_team_nth_ub = __kmp_xproc; - if( __kmp_dflt_team_nth_ub < KMP_MIN_NTH ) { - __kmp_dflt_team_nth_ub = KMP_MIN_NTH; - } - if( __kmp_dflt_team_nth_ub > __kmp_sys_max_nth ) { - __kmp_dflt_team_nth_ub = __kmp_sys_max_nth; - } - __kmp_max_nth = __kmp_sys_max_nth; - - // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" part - __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; + kmp_diag = 0; +#endif + __kmp_abort_delay = 0; + + // From __kmp_init_dflt_team_nth() + /* assume the entire machine will be used */ + __kmp_dflt_team_nth_ub = __kmp_xproc; + if (__kmp_dflt_team_nth_ub < KMP_MIN_NTH) { + __kmp_dflt_team_nth_ub = KMP_MIN_NTH; + } + if (__kmp_dflt_team_nth_ub > __kmp_sys_max_nth) { + __kmp_dflt_team_nth_ub = __kmp_sys_max_nth; + } + __kmp_max_nth = __kmp_sys_max_nth; + + // Three vars below moved here from __kmp_env_initialize() "KMP_BLOCKTIME" + // part + __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; #if KMP_USE_MONITOR - __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups ); - __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups ); -#endif - // From "KMP_LIBRARY" part of __kmp_env_initialize() - __kmp_library = library_throughput; - // From KMP_SCHEDULE initialization - __kmp_static = kmp_sch_static_balanced; - // AC: do not use analytical here, because it is non-monotonous - //__kmp_guided = kmp_sch_guided_iterative_chunked; - //__kmp_auto = kmp_sch_guided_analytical_chunked; // AC: it is the default, no need to repeate assignment - // Barrier initialization. Moved here from __kmp_env_initialize() Barrier branch bit control and barrier method - // control parts - #if KMP_FAST_REDUCTION_BARRIER - #define kmp_reduction_barrier_gather_bb ((int)1) - #define kmp_reduction_barrier_release_bb ((int)1) - #define kmp_reduction_barrier_gather_pat bp_hyper_bar - #define kmp_reduction_barrier_release_pat bp_hyper_bar - #endif // KMP_FAST_REDUCTION_BARRIER - for ( i=bs_plain_barrier; i 0 ); - if ( __kmp_avail_proc == 0 ) { - __kmp_avail_proc = __kmp_xproc; - } - - // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), correct them now - j = 0; - while ( ( j < __kmp_nested_nth.used ) && ! __kmp_nested_nth.nth[ j ] ) { - __kmp_nested_nth.nth[ j ] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_avail_proc; - j++; - } - - if ( __kmp_dflt_team_nth == 0 ) { + KMP_ASSERT(__kmp_xproc > 0); + if (__kmp_avail_proc == 0) { + __kmp_avail_proc = __kmp_xproc; + } + + // If there were empty places in num_threads list (OMP_NUM_THREADS=,,2,3), + // correct them now + j = 0; + while ((j < __kmp_nested_nth.used) && !__kmp_nested_nth.nth[j]) { + __kmp_nested_nth.nth[j] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = + __kmp_avail_proc; + j++; + } + + if (__kmp_dflt_team_nth == 0) { #ifdef KMP_DFLT_NTH_CORES - // - // Default #threads = #cores - // - __kmp_dflt_team_nth = __kmp_ncores; - KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_ncores (%d)\n", - __kmp_dflt_team_nth ) ); + // Default #threads = #cores + __kmp_dflt_team_nth = __kmp_ncores; + KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = " + "__kmp_ncores (%d)\n", + __kmp_dflt_team_nth)); #else - // - // Default #threads = #available OS procs - // - __kmp_dflt_team_nth = __kmp_avail_proc; - KA_TRACE( 20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = __kmp_avail_proc(%d)\n", - __kmp_dflt_team_nth ) ); + // Default #threads = #available OS procs + __kmp_dflt_team_nth = __kmp_avail_proc; + KA_TRACE(20, ("__kmp_middle_initialize: setting __kmp_dflt_team_nth = " + "__kmp_avail_proc(%d)\n", + __kmp_dflt_team_nth)); #endif /* KMP_DFLT_NTH_CORES */ - } - - if ( __kmp_dflt_team_nth < KMP_MIN_NTH ) { - __kmp_dflt_team_nth = KMP_MIN_NTH; - } - if( __kmp_dflt_team_nth > __kmp_sys_max_nth ) { - __kmp_dflt_team_nth = __kmp_sys_max_nth; - } - - // - // There's no harm in continuing if the following check fails, - // but it indicates an error in the previous logic. - // - KMP_DEBUG_ASSERT( __kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub ); - - if ( __kmp_dflt_team_nth != prev_dflt_team_nth ) { - // - // Run through the __kmp_threads array and set the num threads icv - // for each root thread that is currently registered with the RTL - // (which has not already explicitly set its nthreads-var with a - // call to omp_set_num_threads()). - // - for ( i = 0; i < __kmp_threads_capacity; i++ ) { - kmp_info_t *thread = __kmp_threads[ i ]; - if ( thread == NULL ) continue; - if ( thread->th.th_current_task->td_icvs.nproc != 0 ) continue; - - set__nproc( __kmp_threads[ i ], __kmp_dflt_team_nth ); - } - } - KA_TRACE( 20, ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n", - __kmp_dflt_team_nth) ); + } + + if (__kmp_dflt_team_nth < KMP_MIN_NTH) { + __kmp_dflt_team_nth = KMP_MIN_NTH; + } + if (__kmp_dflt_team_nth > __kmp_sys_max_nth) { + __kmp_dflt_team_nth = __kmp_sys_max_nth; + } + + // There's no harm in continuing if the following check fails, + // but it indicates an error in the previous logic. + KMP_DEBUG_ASSERT(__kmp_dflt_team_nth <= __kmp_dflt_team_nth_ub); + + if (__kmp_dflt_team_nth != prev_dflt_team_nth) { + // Run through the __kmp_threads array and set the num threads icv for each + // root thread that is currently registered with the RTL (which has not + // already explicitly set its nthreads-var with a call to + // omp_set_num_threads()). + for (i = 0; i < __kmp_threads_capacity; i++) { + kmp_info_t *thread = __kmp_threads[i]; + if (thread == NULL) + continue; + if (thread->th.th_current_task->td_icvs.nproc != 0) + continue; + + set__nproc(__kmp_threads[i], __kmp_dflt_team_nth); + } + } + KA_TRACE( + 20, + ("__kmp_middle_initialize: final value for __kmp_dflt_team_nth = %d\n", + __kmp_dflt_team_nth)); #ifdef KMP_ADJUST_BLOCKTIME - /* Adjust blocktime to zero if necessary */ - /* now that __kmp_avail_proc is set */ - if ( !__kmp_env_blocktime && ( __kmp_avail_proc > 0 ) ) { - KMP_DEBUG_ASSERT( __kmp_avail_proc > 0 ); - if ( __kmp_nth > __kmp_avail_proc ) { - __kmp_zero_bt = TRUE; - } + /* Adjust blocktime to zero if necessary now that __kmp_avail_proc is set */ + if (!__kmp_env_blocktime && (__kmp_avail_proc > 0)) { + KMP_DEBUG_ASSERT(__kmp_avail_proc > 0); + if (__kmp_nth > __kmp_avail_proc) { + __kmp_zero_bt = TRUE; } + } #endif /* KMP_ADJUST_BLOCKTIME */ - /* we have finished middle initialization */ - TCW_SYNC_4(__kmp_init_middle, TRUE); + /* we have finished middle initialization */ + TCW_SYNC_4(__kmp_init_middle, TRUE); - KA_TRACE( 10, ("__kmp_do_middle_initialize: exit\n" ) ); + KA_TRACE(10, ("__kmp_do_middle_initialize: exit\n")); } -void -__kmp_middle_initialize( void ) -{ - if ( __kmp_init_middle ) { - return; - } - __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); - if ( __kmp_init_middle ) { - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); - return; - } - __kmp_do_middle_initialize(); - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); +void __kmp_middle_initialize(void) { + if (__kmp_init_middle) { + return; + } + __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); + if (__kmp_init_middle) { + __kmp_release_bootstrap_lock(&__kmp_initz_lock); + return; + } + __kmp_do_middle_initialize(); + __kmp_release_bootstrap_lock(&__kmp_initz_lock); } -void -__kmp_parallel_initialize( void ) -{ - int gtid = __kmp_entry_gtid(); // this might be a new root - - /* synchronize parallel initialization (for sibling) */ - if( TCR_4(__kmp_init_parallel) ) return; - __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); - if( TCR_4(__kmp_init_parallel) ) { __kmp_release_bootstrap_lock( &__kmp_initz_lock ); return; } - - /* TODO reinitialization after we have already shut down */ - if( TCR_4(__kmp_global.g.g_done) ) { - KA_TRACE( 10, ("__kmp_parallel_initialize: attempt to init while shutting down\n" ) ); - __kmp_infinite_loop(); - } +void __kmp_parallel_initialize(void) { + int gtid = __kmp_entry_gtid(); // this might be a new root + + /* synchronize parallel initialization (for sibling) */ + if (TCR_4(__kmp_init_parallel)) + return; + __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); + if (TCR_4(__kmp_init_parallel)) { + __kmp_release_bootstrap_lock(&__kmp_initz_lock); + return; + } + + /* TODO reinitialization after we have already shut down */ + if (TCR_4(__kmp_global.g.g_done)) { + KA_TRACE( + 10, + ("__kmp_parallel_initialize: attempt to init while shutting down\n")); + __kmp_infinite_loop(); + } - /* jc: The lock __kmp_initz_lock is already held, so calling __kmp_serial_initialize - would cause a deadlock. So we call __kmp_do_serial_initialize directly. - */ - if( !__kmp_init_middle ) { - __kmp_do_middle_initialize(); - } + /* jc: The lock __kmp_initz_lock is already held, so calling + __kmp_serial_initialize would cause a deadlock. So we call + __kmp_do_serial_initialize directly. */ + if (!__kmp_init_middle) { + __kmp_do_middle_initialize(); + } - /* begin initialization */ - KA_TRACE( 10, ("__kmp_parallel_initialize: enter\n" ) ); - KMP_ASSERT( KMP_UBER_GTID( gtid ) ); + /* begin initialization */ + KA_TRACE(10, ("__kmp_parallel_initialize: enter\n")); + KMP_ASSERT(KMP_UBER_GTID(gtid)); #if KMP_ARCH_X86 || KMP_ARCH_X86_64 - // - // Save the FP control regs. - // Worker threads will set theirs to these values at thread startup. - // - __kmp_store_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word ); - __kmp_store_mxcsr( &__kmp_init_mxcsr ); - __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK; + // Save the FP control regs. + // Worker threads will set theirs to these values at thread startup. + __kmp_store_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word); + __kmp_store_mxcsr(&__kmp_init_mxcsr); + __kmp_init_mxcsr &= KMP_X86_MXCSR_MASK; #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ #if KMP_OS_UNIX -# if KMP_HANDLE_SIGNALS - /* must be after __kmp_serial_initialize */ - __kmp_install_signals( TRUE ); -# endif +#if KMP_HANDLE_SIGNALS + /* must be after __kmp_serial_initialize */ + __kmp_install_signals(TRUE); +#endif #endif - __kmp_suspend_initialize(); + __kmp_suspend_initialize(); #if defined(USE_LOAD_BALANCE) - if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) { - __kmp_global.g.g_dynamic_mode = dynamic_load_balance; - } + if (__kmp_global.g.g_dynamic_mode == dynamic_default) { + __kmp_global.g.g_dynamic_mode = dynamic_load_balance; + } #else - if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) { - __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; - } + if (__kmp_global.g.g_dynamic_mode == dynamic_default) { + __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; + } #endif - if ( __kmp_version ) { - __kmp_print_version_2(); - } + if (__kmp_version) { + __kmp_print_version_2(); + } - /* we have finished parallel initialization */ - TCW_SYNC_4(__kmp_init_parallel, TRUE); + /* we have finished parallel initialization */ + TCW_SYNC_4(__kmp_init_parallel, TRUE); - KMP_MB(); - KA_TRACE( 10, ("__kmp_parallel_initialize: exit\n" ) ); + KMP_MB(); + KA_TRACE(10, ("__kmp_parallel_initialize: exit\n")); - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); + __kmp_release_bootstrap_lock(&__kmp_initz_lock); } - /* ------------------------------------------------------------------------ */ -void -__kmp_run_before_invoked_task( int gtid, int tid, kmp_info_t *this_thr, - kmp_team_t *team ) -{ - kmp_disp_t *dispatch; +void __kmp_run_before_invoked_task(int gtid, int tid, kmp_info_t *this_thr, + kmp_team_t *team) { + kmp_disp_t *dispatch; - KMP_MB(); + KMP_MB(); - /* none of the threads have encountered any constructs, yet. */ - this_thr->th.th_local.this_construct = 0; + /* none of the threads have encountered any constructs, yet. */ + this_thr->th.th_local.this_construct = 0; #if KMP_CACHE_MANAGE - KMP_CACHE_PREFETCH( &this_thr->th.th_bar[ bs_forkjoin_barrier ].bb.b_arrived ); + KMP_CACHE_PREFETCH(&this_thr->th.th_bar[bs_forkjoin_barrier].bb.b_arrived); #endif /* KMP_CACHE_MANAGE */ - dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch); - KMP_DEBUG_ASSERT( dispatch ); - KMP_DEBUG_ASSERT( team->t.t_dispatch ); - //KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ this_thr->th.th_info.ds.ds_tid ] ); + dispatch = (kmp_disp_t *)TCR_PTR(this_thr->th.th_dispatch); + KMP_DEBUG_ASSERT(dispatch); + KMP_DEBUG_ASSERT(team->t.t_dispatch); + // KMP_DEBUG_ASSERT( this_thr->th.th_dispatch == &team->t.t_dispatch[ + // this_thr->th.th_info.ds.ds_tid ] ); - dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */ + dispatch->th_disp_index = 0; /* reset the dispatch buffer counter */ #if OMP_45_ENABLED - dispatch->th_doacross_buf_idx = 0; /* reset the doacross dispatch buffer counter */ + dispatch->th_doacross_buf_idx = + 0; /* reset the doacross dispatch buffer counter */ #endif - if( __kmp_env_consistency_check ) - __kmp_push_parallel( gtid, team->t.t_ident ); + if (__kmp_env_consistency_check) + __kmp_push_parallel(gtid, team->t.t_ident); - KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_MB(); /* Flush all pending memory write invalidates. */ } -void -__kmp_run_after_invoked_task( int gtid, int tid, kmp_info_t *this_thr, - kmp_team_t *team ) -{ - if( __kmp_env_consistency_check ) - __kmp_pop_parallel( gtid, team->t.t_ident ); +void __kmp_run_after_invoked_task(int gtid, int tid, kmp_info_t *this_thr, + kmp_team_t *team) { + if (__kmp_env_consistency_check) + __kmp_pop_parallel(gtid, team->t.t_ident); - __kmp_finish_implicit_task(this_thr); + __kmp_finish_implicit_task(this_thr); } -int -__kmp_invoke_task_func( int gtid ) -{ - int rc; - int tid = __kmp_tid_from_gtid( gtid ); - kmp_info_t *this_thr = __kmp_threads[ gtid ]; - kmp_team_t *team = this_thr->th.th_team; +int __kmp_invoke_task_func(int gtid) { + int rc; + int tid = __kmp_tid_from_gtid(gtid); + kmp_info_t *this_thr = __kmp_threads[gtid]; + kmp_team_t *team = this_thr->th.th_team; - __kmp_run_before_invoked_task( gtid, tid, this_thr, team ); + __kmp_run_before_invoked_task(gtid, tid, this_thr, team); #if USE_ITT_BUILD - if ( __itt_stack_caller_create_ptr ) { - __kmp_itt_stack_callee_enter( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about entering user's code - } + if (__itt_stack_caller_create_ptr) { + __kmp_itt_stack_callee_enter( + (__itt_caller) + team->t.t_stack_id); // inform ittnotify about entering user's code + } #endif /* USE_ITT_BUILD */ #if INCLUDE_SSC_MARKS - SSC_MARK_INVOKING(); + SSC_MARK_INVOKING(); #endif #if OMPT_SUPPORT - void *dummy; - void **exit_runtime_p; - ompt_task_id_t my_task_id; - ompt_parallel_id_t my_parallel_id; - - if (ompt_enabled) { - exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid]. - ompt_task_info.frame.exit_runtime_frame); - } else { - exit_runtime_p = &dummy; - } + void *dummy; + void **exit_runtime_p; + ompt_task_id_t my_task_id; + ompt_parallel_id_t my_parallel_id; + + if (ompt_enabled) { + exit_runtime_p = &(team->t.t_implicit_task_taskdata[tid] + .ompt_task_info.frame.exit_runtime_frame); + } else { + exit_runtime_p = &dummy; + } #if OMPT_TRACE - my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id; - my_parallel_id = team->t.ompt_team_info.parallel_id; - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { - ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)( - my_parallel_id, my_task_id); - } -#endif -#endif - - { - KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); - KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); - rc = __kmp_invoke_microtask( (microtask_t) TCR_SYNC_PTR(team->t.t_pkfn), - gtid, tid, (int) team->t.t_argc, (void **) team->t.t_argv + my_task_id = team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_id; + my_parallel_id = team->t.ompt_team_info.parallel_id; + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)) { + ompt_callbacks.ompt_callback(ompt_event_implicit_task_begin)(my_parallel_id, + my_task_id); + } +#endif +#endif + + { + KMP_TIME_PARTITIONED_BLOCK(OMP_parallel); + KMP_SET_THREAD_STATE_BLOCK(IMPLICIT_TASK); + rc = + __kmp_invoke_microtask((microtask_t)TCR_SYNC_PTR(team->t.t_pkfn), gtid, + tid, (int)team->t.t_argc, (void **)team->t.t_argv #if OMPT_SUPPORT - , exit_runtime_p + , + exit_runtime_p #endif - ); + ); #if OMPT_SUPPORT - *exit_runtime_p = NULL; + *exit_runtime_p = NULL; #endif - } + } #if USE_ITT_BUILD - if ( __itt_stack_caller_create_ptr ) { - __kmp_itt_stack_callee_leave( (__itt_caller)team->t.t_stack_id ); // inform ittnotify about leaving user's code - } + if (__itt_stack_caller_create_ptr) { + __kmp_itt_stack_callee_leave( + (__itt_caller) + team->t.t_stack_id); // inform ittnotify about leaving user's code + } #endif /* USE_ITT_BUILD */ - __kmp_run_after_invoked_task( gtid, tid, this_thr, team ); + __kmp_run_after_invoked_task(gtid, tid, this_thr, team); - return rc; + return rc; } #if OMP_40_ENABLED -void -__kmp_teams_master( int gtid ) -{ - // This routine is called by all master threads in teams construct - kmp_info_t *thr = __kmp_threads[ gtid ]; - kmp_team_t *team = thr->th.th_team; - ident_t *loc = team->t.t_ident; - thr->th.th_set_nproc = thr->th.th_teams_size.nth; - KMP_DEBUG_ASSERT( thr->th.th_teams_microtask ); - KMP_DEBUG_ASSERT( thr->th.th_set_nproc ); - KA_TRACE( 20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", - gtid, __kmp_tid_from_gtid( gtid ), thr->th.th_teams_microtask ) ); - // Launch league of teams now, but not let workers execute - // (they hang on fork barrier until next parallel) +void __kmp_teams_master(int gtid) { + // This routine is called by all master threads in teams construct + kmp_info_t *thr = __kmp_threads[gtid]; + kmp_team_t *team = thr->th.th_team; + ident_t *loc = team->t.t_ident; + thr->th.th_set_nproc = thr->th.th_teams_size.nth; + KMP_DEBUG_ASSERT(thr->th.th_teams_microtask); + KMP_DEBUG_ASSERT(thr->th.th_set_nproc); + KA_TRACE(20, ("__kmp_teams_master: T#%d, Tid %d, microtask %p\n", gtid, + __kmp_tid_from_gtid(gtid), thr->th.th_teams_microtask)); +// Launch league of teams now, but not let workers execute +// (they hang on fork barrier until next parallel) #if INCLUDE_SSC_MARKS - SSC_MARK_FORKING(); + SSC_MARK_FORKING(); #endif - __kmp_fork_call( loc, gtid, fork_context_intel, - team->t.t_argc, + __kmp_fork_call(loc, gtid, fork_context_intel, team->t.t_argc, #if OMPT_SUPPORT - (void *)thr->th.th_teams_microtask, // "unwrapped" task + (void *)thr->th.th_teams_microtask, // "unwrapped" task #endif - (microtask_t)thr->th.th_teams_microtask, // "wrapped" task - VOLATILE_CAST(launch_t) __kmp_invoke_task_func, - NULL ); + (microtask_t)thr->th.th_teams_microtask, // "wrapped" task + VOLATILE_CAST(launch_t) __kmp_invoke_task_func, NULL); #if INCLUDE_SSC_MARKS - SSC_MARK_JOINING(); + SSC_MARK_JOINING(); #endif - // AC: last parameter "1" eliminates join barrier which won't work because - // worker threads are in a fork barrier waiting for more parallel regions - __kmp_join_call( loc, gtid + // AC: last parameter "1" eliminates join barrier which won't work because + // worker threads are in a fork barrier waiting for more parallel regions + __kmp_join_call(loc, gtid #if OMPT_SUPPORT - , fork_context_intel + , + fork_context_intel #endif - , 1 ); + , + 1); } -int -__kmp_invoke_teams_master( int gtid ) -{ - kmp_info_t *this_thr = __kmp_threads[ gtid ]; - kmp_team_t *team = this_thr->th.th_team; - #if KMP_DEBUG - if ( !__kmp_threads[gtid]-> th.th_team->t.t_serialized ) - KMP_DEBUG_ASSERT( (void*)__kmp_threads[gtid]-> th.th_team->t.t_pkfn == (void*)__kmp_teams_master ); - #endif - __kmp_run_before_invoked_task( gtid, 0, this_thr, team ); - __kmp_teams_master( gtid ); - __kmp_run_after_invoked_task( gtid, 0, this_thr, team ); - return 1; +int __kmp_invoke_teams_master(int gtid) { + kmp_info_t *this_thr = __kmp_threads[gtid]; + kmp_team_t *team = this_thr->th.th_team; +#if KMP_DEBUG + if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) + KMP_DEBUG_ASSERT((void *)__kmp_threads[gtid]->th.th_team->t.t_pkfn == + (void *)__kmp_teams_master); +#endif + __kmp_run_before_invoked_task(gtid, 0, this_thr, team); + __kmp_teams_master(gtid); + __kmp_run_after_invoked_task(gtid, 0, this_thr, team); + return 1; } #endif /* OMP_40_ENABLED */ /* this sets the requested number of threads for the next parallel region - * encountered by this team */ -/* since this should be enclosed in the forkjoin critical section it - * should avoid race conditions with assymmetrical nested parallelism */ + encountered by this team. since this should be enclosed in the forkjoin + critical section it should avoid race conditions with assymmetrical nested + parallelism */ -void -__kmp_push_num_threads( ident_t *id, int gtid, int num_threads ) -{ - kmp_info_t *thr = __kmp_threads[gtid]; +void __kmp_push_num_threads(ident_t *id, int gtid, int num_threads) { + kmp_info_t *thr = __kmp_threads[gtid]; - if( num_threads > 0 ) - thr->th.th_set_nproc = num_threads; + if (num_threads > 0) + thr->th.th_set_nproc = num_threads; } #if OMP_40_ENABLED /* this sets the requested number of teams for the teams region and/or - * the number of threads for the next parallel region encountered */ -void -__kmp_push_num_teams( ident_t *id, int gtid, int num_teams, int num_threads ) -{ - kmp_info_t *thr = __kmp_threads[gtid]; - KMP_DEBUG_ASSERT(num_teams >= 0); - KMP_DEBUG_ASSERT(num_threads >= 0); - - if( num_teams == 0 ) - num_teams = 1; // default number of teams is 1. - if( num_teams > __kmp_max_nth ) { // if too many teams requested? - if ( !__kmp_reserve_warn ) { - __kmp_reserve_warn = 1; - __kmp_msg( - kmp_ms_warning, - KMP_MSG( CantFormThrTeam, num_teams, __kmp_max_nth ), - KMP_HNT( Unset_ALL_THREADS ), - __kmp_msg_null - ); - } - num_teams = __kmp_max_nth; - } - // Set number of teams (number of threads in the outer "parallel" of the teams) - thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; - - // Remember the number of threads for inner parallel regions - if( num_threads == 0 ) { - if( !TCR_4(__kmp_init_middle) ) - __kmp_middle_initialize(); // get __kmp_avail_proc calculated - num_threads = __kmp_avail_proc / num_teams; - if( num_teams * num_threads > __kmp_max_nth ) { - // adjust num_threads w/o warning as it is not user setting - num_threads = __kmp_max_nth / num_teams; - } - } else { - if( num_teams * num_threads > __kmp_max_nth ) { - int new_threads = __kmp_max_nth / num_teams; - if ( !__kmp_reserve_warn ) { // user asked for too many threads - __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT - __kmp_msg( - kmp_ms_warning, - KMP_MSG( CantFormThrTeam, num_threads, new_threads ), - KMP_HNT( Unset_ALL_THREADS ), - __kmp_msg_null - ); - } - num_threads = new_threads; - } - } - thr->th.th_teams_size.nth = num_threads; + the number of threads for the next parallel region encountered */ +void __kmp_push_num_teams(ident_t *id, int gtid, int num_teams, + int num_threads) { + kmp_info_t *thr = __kmp_threads[gtid]; + KMP_DEBUG_ASSERT(num_teams >= 0); + KMP_DEBUG_ASSERT(num_threads >= 0); + + if (num_teams == 0) + num_teams = 1; // default number of teams is 1. + if (num_teams > __kmp_max_nth) { // if too many teams requested? + if (!__kmp_reserve_warn) { + __kmp_reserve_warn = 1; + __kmp_msg(kmp_ms_warning, + KMP_MSG(CantFormThrTeam, num_teams, __kmp_max_nth), + KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); + } + num_teams = __kmp_max_nth; + } + // Set number of teams (number of threads in the outer "parallel" of the + // teams) + thr->th.th_set_nproc = thr->th.th_teams_size.nteams = num_teams; + + // Remember the number of threads for inner parallel regions + if (num_threads == 0) { + if (!TCR_4(__kmp_init_middle)) + __kmp_middle_initialize(); // get __kmp_avail_proc calculated + num_threads = __kmp_avail_proc / num_teams; + if (num_teams * num_threads > __kmp_max_nth) { + // adjust num_threads w/o warning as it is not user setting + num_threads = __kmp_max_nth / num_teams; + } + } else { + if (num_teams * num_threads > __kmp_max_nth) { + int new_threads = __kmp_max_nth / num_teams; + if (!__kmp_reserve_warn) { // user asked for too many threads + __kmp_reserve_warn = 1; // that conflicts with OMP_THREAD_LIMIT + __kmp_msg(kmp_ms_warning, + KMP_MSG(CantFormThrTeam, num_threads, new_threads), + KMP_HNT(Unset_ALL_THREADS), __kmp_msg_null); + } + num_threads = new_threads; + } + } + thr->th.th_teams_size.nth = num_threads; } - -// // Set the proc_bind var to use in the following parallel region. -// -void -__kmp_push_proc_bind( ident_t *id, int gtid, kmp_proc_bind_t proc_bind ) -{ - kmp_info_t *thr = __kmp_threads[gtid]; - thr->th.th_set_proc_bind = proc_bind; +void __kmp_push_proc_bind(ident_t *id, int gtid, kmp_proc_bind_t proc_bind) { + kmp_info_t *thr = __kmp_threads[gtid]; + thr->th.th_set_proc_bind = proc_bind; } #endif /* OMP_40_ENABLED */ /* Launch the worker threads into the microtask. */ -void -__kmp_internal_fork( ident_t *id, int gtid, kmp_team_t *team ) -{ - kmp_info_t *this_thr = __kmp_threads[gtid]; +void __kmp_internal_fork(ident_t *id, int gtid, kmp_team_t *team) { + kmp_info_t *this_thr = __kmp_threads[gtid]; #ifdef KMP_DEBUG - int f; + int f; #endif /* KMP_DEBUG */ - KMP_DEBUG_ASSERT( team ); - KMP_DEBUG_ASSERT( this_thr->th.th_team == team ); - KMP_ASSERT( KMP_MASTER_GTID(gtid) ); - KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_DEBUG_ASSERT(team); + KMP_DEBUG_ASSERT(this_thr->th.th_team == team); + KMP_ASSERT(KMP_MASTER_GTID(gtid)); + KMP_MB(); /* Flush all pending memory write invalidates. */ - team->t.t_construct = 0; /* no single directives seen yet */ - team->t.t_ordered.dt.t_value = 0; /* thread 0 enters the ordered section first */ + team->t.t_construct = 0; /* no single directives seen yet */ + team->t.t_ordered.dt.t_value = + 0; /* thread 0 enters the ordered section first */ - /* Reset the identifiers on the dispatch buffer */ - KMP_DEBUG_ASSERT( team->t.t_disp_buffer ); - if ( team->t.t_max_nproc > 1 ) { - int i; - for (i = 0; i < __kmp_dispatch_num_buffers; ++i) { - team->t.t_disp_buffer[ i ].buffer_index = i; + /* Reset the identifiers on the dispatch buffer */ + KMP_DEBUG_ASSERT(team->t.t_disp_buffer); + if (team->t.t_max_nproc > 1) { + int i; + for (i = 0; i < __kmp_dispatch_num_buffers; ++i) { + team->t.t_disp_buffer[i].buffer_index = i; #if OMP_45_ENABLED - team->t.t_disp_buffer[i].doacross_buf_idx = i; + team->t.t_disp_buffer[i].doacross_buf_idx = i; #endif - } - } else { - team->t.t_disp_buffer[ 0 ].buffer_index = 0; + } + } else { + team->t.t_disp_buffer[0].buffer_index = 0; #if OMP_45_ENABLED - team->t.t_disp_buffer[0].doacross_buf_idx = 0; + team->t.t_disp_buffer[0].doacross_buf_idx = 0; #endif - } + } - KMP_MB(); /* Flush all pending memory write invalidates. */ - KMP_ASSERT( this_thr->th.th_team == team ); + KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_ASSERT(this_thr->th.th_team == team); #ifdef KMP_DEBUG - for( f=0 ; ft.t_nproc ; f++ ) { - KMP_DEBUG_ASSERT( team->t.t_threads[f] && - team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc ); - } + for (f = 0; f < team->t.t_nproc; f++) { + KMP_DEBUG_ASSERT(team->t.t_threads[f] && + team->t.t_threads[f]->th.th_team_nproc == team->t.t_nproc); + } #endif /* KMP_DEBUG */ - /* release the worker threads so they may begin working */ - __kmp_fork_barrier( gtid, 0 ); + /* release the worker threads so they may begin working */ + __kmp_fork_barrier(gtid, 0); } +void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) { + kmp_info_t *this_thr = __kmp_threads[gtid]; -void -__kmp_internal_join( ident_t *id, int gtid, kmp_team_t *team ) -{ - kmp_info_t *this_thr = __kmp_threads[gtid]; + KMP_DEBUG_ASSERT(team); + KMP_DEBUG_ASSERT(this_thr->th.th_team == team); + KMP_ASSERT(KMP_MASTER_GTID(gtid)); + KMP_MB(); /* Flush all pending memory write invalidates. */ - KMP_DEBUG_ASSERT( team ); - KMP_DEBUG_ASSERT( this_thr->th.th_team == team ); - KMP_ASSERT( KMP_MASTER_GTID(gtid) ); - KMP_MB(); /* Flush all pending memory write invalidates. */ - - /* Join barrier after fork */ +/* Join barrier after fork */ #ifdef KMP_DEBUG - if (__kmp_threads[gtid] && __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc ) { - __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n",gtid, gtid, __kmp_threads[gtid]); - __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, team->t.t_nproc=%d\n", - gtid, __kmp_threads[gtid]->th.th_team_nproc, team, team->t.t_nproc); - __kmp_print_structure(); - } - KMP_DEBUG_ASSERT( __kmp_threads[gtid] && - __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc ); + if (__kmp_threads[gtid] && + __kmp_threads[gtid]->th.th_team_nproc != team->t.t_nproc) { + __kmp_printf("GTID: %d, __kmp_threads[%d]=%p\n", gtid, gtid, + __kmp_threads[gtid]); + __kmp_printf("__kmp_threads[%d]->th.th_team_nproc=%d, TEAM: %p, " + "team->t.t_nproc=%d\n", + gtid, __kmp_threads[gtid]->th.th_team_nproc, team, + team->t.t_nproc); + __kmp_print_structure(); + } + KMP_DEBUG_ASSERT(__kmp_threads[gtid] && + __kmp_threads[gtid]->th.th_team_nproc == team->t.t_nproc); #endif /* KMP_DEBUG */ - __kmp_join_barrier( gtid ); /* wait for everyone */ + __kmp_join_barrier(gtid); /* wait for everyone */ - KMP_MB(); /* Flush all pending memory write invalidates. */ - KMP_ASSERT( this_thr->th.th_team == team ); + KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_ASSERT(this_thr->th.th_team == team); } - -/* ------------------------------------------------------------------------ */ /* ------------------------------------------------------------------------ */ #ifdef USE_LOAD_BALANCE -// // Return the worker threads actively spinning in the hot team, if we // are at the outermost level of parallelism. Otherwise, return 0. -// -static int -__kmp_active_hot_team_nproc( kmp_root_t *root ) -{ - int i; - int retval; - kmp_team_t *hot_team; - - if ( root->r.r_active ) { - return 0; - } - hot_team = root->r.r_hot_team; - if ( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) { - return hot_team->t.t_nproc - 1; // Don't count master thread - } - - // - // Skip the master thread - it is accounted for elsewhere. - // - retval = 0; - for ( i = 1; i < hot_team->t.t_nproc; i++ ) { - if ( hot_team->t.t_threads[i]->th.th_active ) { - retval++; - } - } - return retval; +static int __kmp_active_hot_team_nproc(kmp_root_t *root) { + int i; + int retval; + kmp_team_t *hot_team; + + if (root->r.r_active) { + return 0; + } + hot_team = root->r.r_hot_team; + if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { + return hot_team->t.t_nproc - 1; // Don't count master thread + } + + // Skip the master thread - it is accounted for elsewhere. + retval = 0; + for (i = 1; i < hot_team->t.t_nproc; i++) { + if (hot_team->t.t_threads[i]->th.th_active) { + retval++; + } + } + return retval; } -// // Perform an automatic adjustment to the number of // threads used by the next parallel region. -// -static int -__kmp_load_balance_nproc( kmp_root_t *root, int set_nproc ) -{ - int retval; - int pool_active; - int hot_team_active; - int team_curr_active; - int system_active; - - KB_TRACE( 20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", - root, set_nproc ) ); - KMP_DEBUG_ASSERT( root ); - KMP_DEBUG_ASSERT( root->r.r_root_team->t.t_threads[0]->th.th_current_task->td_icvs.dynamic == TRUE ); - KMP_DEBUG_ASSERT( set_nproc > 1 ); - - if ( set_nproc == 1) { - KB_TRACE( 20, ("__kmp_load_balance_nproc: serial execution.\n" ) ); - return 1; - } - - // - // Threads that are active in the thread pool, active in the hot team - // for this particular root (if we are at the outer par level), and - // the currently executing thread (to become the master) are available - // to add to the new team, but are currently contributing to the system - // load, and must be accounted for. - // - pool_active = TCR_4(__kmp_thread_pool_active_nth); - hot_team_active = __kmp_active_hot_team_nproc( root ); - team_curr_active = pool_active + hot_team_active + 1; - - // - // Check the system load. - // - system_active = __kmp_get_load_balance( __kmp_avail_proc + team_curr_active ); - KB_TRACE( 30, ("__kmp_load_balance_nproc: system active = %d pool active = %d hot team active = %d\n", - system_active, pool_active, hot_team_active ) ); - - if ( system_active < 0 ) { - // - // There was an error reading the necessary info from /proc, - // so use the thread limit algorithm instead. Once we set - // __kmp_global.g.g_dynamic_mode = dynamic_thread_limit, - // we shouldn't wind up getting back here. - // - __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; - KMP_WARNING( CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit" ); - - // - // Make this call behave like the thread limit algorithm. - // - retval = __kmp_avail_proc - __kmp_nth + (root->r.r_active ? 1 - : root->r.r_hot_team->t.t_nproc); - if ( retval > set_nproc ) { - retval = set_nproc; - } - if ( retval < KMP_MIN_NTH ) { - retval = KMP_MIN_NTH; - } - - KB_TRACE( 20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", retval ) ); - return retval; - } - - // - // There is a slight delay in the load balance algorithm in detecting - // new running procs. The real system load at this instant should be - // at least as large as the #active omp thread that are available to - // add to the team. - // - if ( system_active < team_curr_active ) { - system_active = team_curr_active; - } - retval = __kmp_avail_proc - system_active + team_curr_active; - if ( retval > set_nproc ) { - retval = set_nproc; - } - if ( retval < KMP_MIN_NTH ) { - retval = KMP_MIN_NTH; - } - - KB_TRACE( 20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval ) ); +static int __kmp_load_balance_nproc(kmp_root_t *root, int set_nproc) { + int retval; + int pool_active; + int hot_team_active; + int team_curr_active; + int system_active; + + KB_TRACE(20, ("__kmp_load_balance_nproc: called root:%p set_nproc:%d\n", root, + set_nproc)); + KMP_DEBUG_ASSERT(root); + KMP_DEBUG_ASSERT(root->r.r_root_team->t.t_threads[0] + ->th.th_current_task->td_icvs.dynamic == TRUE); + KMP_DEBUG_ASSERT(set_nproc > 1); + + if (set_nproc == 1) { + KB_TRACE(20, ("__kmp_load_balance_nproc: serial execution.\n")); + return 1; + } + + // Threads that are active in the thread pool, active in the hot team for this + // particular root (if we are at the outer par level), and the currently + // executing thread (to become the master) are available to add to the new + // team, but are currently contributing to the system load, and must be + // accounted for. + pool_active = TCR_4(__kmp_thread_pool_active_nth); + hot_team_active = __kmp_active_hot_team_nproc(root); + team_curr_active = pool_active + hot_team_active + 1; + + // Check the system load. + system_active = __kmp_get_load_balance(__kmp_avail_proc + team_curr_active); + KB_TRACE(30, ("__kmp_load_balance_nproc: system active = %d pool active = %d " + "hot team active = %d\n", + system_active, pool_active, hot_team_active)); + + if (system_active < 0) { + // There was an error reading the necessary info from /proc, so use the + // thread limit algorithm instead. Once we set __kmp_global.g.g_dynamic_mode + // = dynamic_thread_limit, we shouldn't wind up getting back here. + __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; + KMP_WARNING(CantLoadBalUsing, "KMP_DYNAMIC_MODE=thread limit"); + + // Make this call behave like the thread limit algorithm. + retval = __kmp_avail_proc - __kmp_nth + + (root->r.r_active ? 1 : root->r.r_hot_team->t.t_nproc); + if (retval > set_nproc) { + retval = set_nproc; + } + if (retval < KMP_MIN_NTH) { + retval = KMP_MIN_NTH; + } + + KB_TRACE(20, ("__kmp_load_balance_nproc: thread limit exit. retval:%d\n", + retval)); return retval; + } + + // There is a slight delay in the load balance algorithm in detecting new + // running procs. The real system load at this instant should be at least as + // large as the #active omp thread that are available to add to the team. + if (system_active < team_curr_active) { + system_active = team_curr_active; + } + retval = __kmp_avail_proc - system_active + team_curr_active; + if (retval > set_nproc) { + retval = set_nproc; + } + if (retval < KMP_MIN_NTH) { + retval = KMP_MIN_NTH; + } + + KB_TRACE(20, ("__kmp_load_balance_nproc: exit. retval:%d\n", retval)); + return retval; } // __kmp_load_balance_nproc() #endif /* USE_LOAD_BALANCE */ /* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ /* NOTE: this is called with the __kmp_init_lock held */ -void -__kmp_cleanup( void ) -{ - int f; +void __kmp_cleanup(void) { + int f; - KA_TRACE( 10, ("__kmp_cleanup: enter\n" ) ); + KA_TRACE(10, ("__kmp_cleanup: enter\n")); - if (TCR_4(__kmp_init_parallel)) { + if (TCR_4(__kmp_init_parallel)) { #if KMP_HANDLE_SIGNALS - __kmp_remove_signals(); + __kmp_remove_signals(); #endif - TCW_4(__kmp_init_parallel, FALSE); - } + TCW_4(__kmp_init_parallel, FALSE); + } - if (TCR_4(__kmp_init_middle)) { + if (TCR_4(__kmp_init_middle)) { #if KMP_AFFINITY_SUPPORTED - __kmp_affinity_uninitialize(); + __kmp_affinity_uninitialize(); #endif /* KMP_AFFINITY_SUPPORTED */ - __kmp_cleanup_hierarchy(); - TCW_4(__kmp_init_middle, FALSE); - } + __kmp_cleanup_hierarchy(); + TCW_4(__kmp_init_middle, FALSE); + } + + KA_TRACE(10, ("__kmp_cleanup: go serial cleanup\n")); + + if (__kmp_init_serial) { + __kmp_runtime_destroy(); + __kmp_init_serial = FALSE; + } + + for (f = 0; f < __kmp_threads_capacity; f++) { + if (__kmp_root[f] != NULL) { + __kmp_free(__kmp_root[f]); + __kmp_root[f] = NULL; + } + } + __kmp_free(__kmp_threads); + // __kmp_threads and __kmp_root were allocated at once, as single block, so + // there is no need in freeing __kmp_root. + __kmp_threads = NULL; + __kmp_root = NULL; + __kmp_threads_capacity = 0; - KA_TRACE( 10, ("__kmp_cleanup: go serial cleanup\n" ) ); +#if KMP_USE_DYNAMIC_LOCK + __kmp_cleanup_indirect_user_locks(); +#else + __kmp_cleanup_user_locks(); +#endif - if (__kmp_init_serial) { - __kmp_runtime_destroy(); - __kmp_init_serial = FALSE; - } +#if KMP_AFFINITY_SUPPORTED + KMP_INTERNAL_FREE((void *)__kmp_cpuinfo_file); + __kmp_cpuinfo_file = NULL; +#endif /* KMP_AFFINITY_SUPPORTED */ - for ( f = 0; f < __kmp_threads_capacity; f++ ) { - if ( __kmp_root[ f ] != NULL ) { - __kmp_free( __kmp_root[ f ] ); - __kmp_root[ f ] = NULL; - } - } - __kmp_free( __kmp_threads ); - // __kmp_threads and __kmp_root were allocated at once, as single block, so there is no need in - // freeing __kmp_root. - __kmp_threads = NULL; - __kmp_root = NULL; - __kmp_threads_capacity = 0; +#if KMP_USE_ADAPTIVE_LOCKS +#if KMP_DEBUG_ADAPTIVE_LOCKS + __kmp_print_speculative_stats(); +#endif +#endif + KMP_INTERNAL_FREE(__kmp_nested_nth.nth); + __kmp_nested_nth.nth = NULL; + __kmp_nested_nth.size = 0; + __kmp_nested_nth.used = 0; + KMP_INTERNAL_FREE(__kmp_nested_proc_bind.bind_types); + __kmp_nested_proc_bind.bind_types = NULL; + __kmp_nested_proc_bind.size = 0; + __kmp_nested_proc_bind.used = 0; -#if KMP_USE_DYNAMIC_LOCK - __kmp_cleanup_indirect_user_locks(); -#else - __kmp_cleanup_user_locks(); -#endif - - #if KMP_AFFINITY_SUPPORTED - KMP_INTERNAL_FREE( (void *) __kmp_cpuinfo_file ); - __kmp_cpuinfo_file = NULL; - #endif /* KMP_AFFINITY_SUPPORTED */ - - #if KMP_USE_ADAPTIVE_LOCKS - #if KMP_DEBUG_ADAPTIVE_LOCKS - __kmp_print_speculative_stats(); - #endif - #endif - KMP_INTERNAL_FREE( __kmp_nested_nth.nth ); - __kmp_nested_nth.nth = NULL; - __kmp_nested_nth.size = 0; - __kmp_nested_nth.used = 0; - KMP_INTERNAL_FREE( __kmp_nested_proc_bind.bind_types ); - __kmp_nested_proc_bind.bind_types = NULL; - __kmp_nested_proc_bind.size = 0; - __kmp_nested_proc_bind.used = 0; - - __kmp_i18n_catclose(); + __kmp_i18n_catclose(); #if KMP_STATS_ENABLED - __kmp_stats_fini(); + __kmp_stats_fini(); #endif - KA_TRACE( 10, ("__kmp_cleanup: exit\n" ) ); + KA_TRACE(10, ("__kmp_cleanup: exit\n")); } /* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -int -__kmp_ignore_mppbeg( void ) -{ - char *env; - - if ((env = getenv( "KMP_IGNORE_MPPBEG" )) != NULL) { - if (__kmp_str_match_false( env )) - return FALSE; - } - // By default __kmpc_begin() is no-op. - return TRUE; -} -int -__kmp_ignore_mppend( void ) -{ - char *env; +int __kmp_ignore_mppbeg(void) { + char *env; - if ((env = getenv( "KMP_IGNORE_MPPEND" )) != NULL) { - if (__kmp_str_match_false( env )) - return FALSE; - } - // By default __kmpc_end() is no-op. - return TRUE; + if ((env = getenv("KMP_IGNORE_MPPBEG")) != NULL) { + if (__kmp_str_match_false(env)) + return FALSE; + } + // By default __kmpc_begin() is no-op. + return TRUE; } -void -__kmp_internal_begin( void ) -{ - int gtid; - kmp_root_t *root; - - /* this is a very important step as it will register new sibling threads - * and assign these new uber threads a new gtid */ - gtid = __kmp_entry_gtid(); - root = __kmp_threads[ gtid ]->th.th_root; - KMP_ASSERT( KMP_UBER_GTID( gtid )); - - if( root->r.r_begin ) return; - __kmp_acquire_lock( &root->r.r_begin_lock, gtid ); - if( root->r.r_begin ) { - __kmp_release_lock( & root->r.r_begin_lock, gtid ); - return; - } - - root->r.r_begin = TRUE; +int __kmp_ignore_mppend(void) { + char *env; - __kmp_release_lock( & root->r.r_begin_lock, gtid ); + if ((env = getenv("KMP_IGNORE_MPPEND")) != NULL) { + if (__kmp_str_match_false(env)) + return FALSE; + } + // By default __kmpc_end() is no-op. + return TRUE; } +void __kmp_internal_begin(void) { + int gtid; + kmp_root_t *root; -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_user_set_library (enum library_type arg) -{ - int gtid; - kmp_root_t *root; - kmp_info_t *thread; - - /* first, make sure we are initialized so we can get our gtid */ + /* this is a very important step as it will register new sibling threads + and assign these new uber threads a new gtid */ + gtid = __kmp_entry_gtid(); + root = __kmp_threads[gtid]->th.th_root; + KMP_ASSERT(KMP_UBER_GTID(gtid)); - gtid = __kmp_entry_gtid(); - thread = __kmp_threads[ gtid ]; + if (root->r.r_begin) + return; + __kmp_acquire_lock(&root->r.r_begin_lock, gtid); + if (root->r.r_begin) { + __kmp_release_lock(&root->r.r_begin_lock, gtid); + return; + } - root = thread->th.th_root; + root->r.r_begin = TRUE; - KA_TRACE( 20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, library_serial )); - if (root->r.r_in_parallel) { /* Must be called in serial section of top-level thread */ - KMP_WARNING( SetLibraryIncorrectCall ); - return; - } + __kmp_release_lock(&root->r.r_begin_lock, gtid); +} - switch ( arg ) { - case library_serial : - thread->th.th_set_nproc = 0; - set__nproc( thread, 1 ); - break; - case library_turnaround : - thread->th.th_set_nproc = 0; - set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub ); - break; - case library_throughput : - thread->th.th_set_nproc = 0; - set__nproc( thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth : __kmp_dflt_team_nth_ub ); - break; - default: - KMP_FATAL( UnknownLibraryType, arg ); - } +/* ------------------------------------------------------------------------ */ - __kmp_aux_set_library ( arg ); +void __kmp_user_set_library(enum library_type arg) { + int gtid; + kmp_root_t *root; + kmp_info_t *thread; + + /* first, make sure we are initialized so we can get our gtid */ + + gtid = __kmp_entry_gtid(); + thread = __kmp_threads[gtid]; + + root = thread->th.th_root; + + KA_TRACE(20, ("__kmp_user_set_library: enter T#%d, arg: %d, %d\n", gtid, arg, + library_serial)); + if (root->r.r_in_parallel) { /* Must be called in serial section of top-level + thread */ + KMP_WARNING(SetLibraryIncorrectCall); + return; + } + + switch (arg) { + case library_serial: + thread->th.th_set_nproc = 0; + set__nproc(thread, 1); + break; + case library_turnaround: + thread->th.th_set_nproc = 0; + set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth + : __kmp_dflt_team_nth_ub); + break; + case library_throughput: + thread->th.th_set_nproc = 0; + set__nproc(thread, __kmp_dflt_team_nth ? __kmp_dflt_team_nth + : __kmp_dflt_team_nth_ub); + break; + default: + KMP_FATAL(UnknownLibraryType, arg); + } + + __kmp_aux_set_library(arg); } -void -__kmp_aux_set_stacksize( size_t arg ) -{ - if (! __kmp_init_serial) - __kmp_serial_initialize(); +void __kmp_aux_set_stacksize(size_t arg) { + if (!__kmp_init_serial) + __kmp_serial_initialize(); #if KMP_OS_DARWIN - if (arg & (0x1000 - 1)) { - arg &= ~(0x1000 - 1); - if(arg + 0x1000) /* check for overflow if we round up */ - arg += 0x1000; - } + if (arg & (0x1000 - 1)) { + arg &= ~(0x1000 - 1); + if (arg + 0x1000) /* check for overflow if we round up */ + arg += 0x1000; + } #endif - __kmp_acquire_bootstrap_lock( &__kmp_initz_lock ); + __kmp_acquire_bootstrap_lock(&__kmp_initz_lock); - /* only change the default stacksize before the first parallel region */ - if (! TCR_4(__kmp_init_parallel)) { - size_t value = arg; /* argument is in bytes */ + /* only change the default stacksize before the first parallel region */ + if (!TCR_4(__kmp_init_parallel)) { + size_t value = arg; /* argument is in bytes */ - if (value < __kmp_sys_min_stksize ) - value = __kmp_sys_min_stksize ; - else if (value > KMP_MAX_STKSIZE) - value = KMP_MAX_STKSIZE; + if (value < __kmp_sys_min_stksize) + value = __kmp_sys_min_stksize; + else if (value > KMP_MAX_STKSIZE) + value = KMP_MAX_STKSIZE; - __kmp_stksize = value; + __kmp_stksize = value; - __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */ - } + __kmp_env_stksize = TRUE; /* was KMP_STACKSIZE specified? */ + } - __kmp_release_bootstrap_lock( &__kmp_initz_lock ); + __kmp_release_bootstrap_lock(&__kmp_initz_lock); } /* set the behaviour of the runtime library */ /* TODO this can cause some odd behaviour with sibling parallelism... */ -void -__kmp_aux_set_library (enum library_type arg) -{ - __kmp_library = arg; - - switch ( __kmp_library ) { - case library_serial : - { - KMP_INFORM( LibraryIsSerial ); - (void) __kmp_change_library( TRUE ); - } - break; - case library_turnaround : - (void) __kmp_change_library( TRUE ); - break; - case library_throughput : - (void) __kmp_change_library( FALSE ); - break; - default: - KMP_FATAL( UnknownLibraryType, arg ); - } +void __kmp_aux_set_library(enum library_type arg) { + __kmp_library = arg; + + switch (__kmp_library) { + case library_serial: { + KMP_INFORM(LibraryIsSerial); + (void)__kmp_change_library(TRUE); + } break; + case library_turnaround: + (void)__kmp_change_library(TRUE); + break; + case library_throughput: + (void)__kmp_change_library(FALSE); + break; + default: + KMP_FATAL(UnknownLibraryType, arg); + } } /* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ -void -__kmp_aux_set_blocktime (int arg, kmp_info_t *thread, int tid) -{ - int blocktime = arg; /* argument is in milliseconds */ +void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) { + int blocktime = arg; /* argument is in milliseconds */ #if KMP_USE_MONITOR - int bt_intervals; + int bt_intervals; #endif - int bt_set; + int bt_set; - __kmp_save_internal_controls( thread ); + __kmp_save_internal_controls(thread); - /* Normalize and set blocktime for the teams */ - if (blocktime < KMP_MIN_BLOCKTIME) - blocktime = KMP_MIN_BLOCKTIME; - else if (blocktime > KMP_MAX_BLOCKTIME) - blocktime = KMP_MAX_BLOCKTIME; + /* Normalize and set blocktime for the teams */ + if (blocktime < KMP_MIN_BLOCKTIME) + blocktime = KMP_MIN_BLOCKTIME; + else if (blocktime > KMP_MAX_BLOCKTIME) + blocktime = KMP_MAX_BLOCKTIME; - set__blocktime_team( thread->th.th_team, tid, blocktime ); - set__blocktime_team( thread->th.th_serial_team, 0, blocktime ); + set__blocktime_team(thread->th.th_team, tid, blocktime); + set__blocktime_team(thread->th.th_serial_team, 0, blocktime); #if KMP_USE_MONITOR - /* Calculate and set blocktime intervals for the teams */ - bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups); + /* Calculate and set blocktime intervals for the teams */ + bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME(blocktime, __kmp_monitor_wakeups); - set__bt_intervals_team( thread->th.th_team, tid, bt_intervals ); - set__bt_intervals_team( thread->th.th_serial_team, 0, bt_intervals ); + set__bt_intervals_team(thread->th.th_team, tid, bt_intervals); + set__bt_intervals_team(thread->th.th_serial_team, 0, bt_intervals); #endif - /* Set whether blocktime has been set to "TRUE" */ - bt_set = TRUE; + /* Set whether blocktime has been set to "TRUE" */ + bt_set = TRUE; - set__bt_set_team( thread->th.th_team, tid, bt_set ); - set__bt_set_team( thread->th.th_serial_team, 0, bt_set ); + set__bt_set_team(thread->th.th_team, tid, bt_set); + set__bt_set_team(thread->th.th_serial_team, 0, bt_set); #if KMP_USE_MONITOR - KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " - "bt_intervals=%d, monitor_updates=%d\n", - __kmp_gtid_from_tid(tid, thread->th.th_team), - thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, - __kmp_monitor_wakeups)); + KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d, " + "bt_intervals=%d, monitor_updates=%d\n", + __kmp_gtid_from_tid(tid, thread->th.th_team), + thread->th.th_team->t.t_id, tid, blocktime, bt_intervals, + __kmp_monitor_wakeups)); #else - KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n", - __kmp_gtid_from_tid(tid, thread->th.th_team), - thread->th.th_team->t.t_id, tid, blocktime)); + KF_TRACE(10, ("kmp_set_blocktime: T#%d(%d:%d), blocktime=%d\n", + __kmp_gtid_from_tid(tid, thread->th.th_team), + thread->th.th_team->t.t_id, tid, blocktime)); #endif } -void -__kmp_aux_set_defaults( - char const * str, - int len -) { - if ( ! __kmp_init_serial ) { - __kmp_serial_initialize(); - }; - __kmp_env_initialize( str ); +void __kmp_aux_set_defaults(char const *str, int len) { + if (!__kmp_init_serial) { + __kmp_serial_initialize(); + }; + __kmp_env_initialize(str); - if (__kmp_settings + if (__kmp_settings #if OMP_40_ENABLED - || __kmp_display_env || __kmp_display_env_verbose + || __kmp_display_env || __kmp_display_env_verbose #endif // OMP_40_ENABLED - ) { - __kmp_env_print(); - } + ) { + __kmp_env_print(); + } } // __kmp_aux_set_defaults /* ------------------------------------------------------------------------ */ - -/* - * internal fast reduction routines - */ +/* internal fast reduction routines */ PACKED_REDUCTION_METHOD_T -__kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid, - kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), - kmp_critical_name *lck ) -{ +__kmp_determine_reduction_method( + ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, + void *reduce_data, void (*reduce_func)(void *lhs_data, void *rhs_data), + kmp_critical_name *lck) { - // Default reduction method: critical construct ( lck != NULL, like in current PAROPT ) - // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method can be selected by RTL - // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method can be selected by RTL - // Finally, it's up to OpenMP RTL to make a decision on which method to select among generated by PAROPT. + // Default reduction method: critical construct ( lck != NULL, like in current + // PAROPT ) + // If ( reduce_data!=NULL && reduce_func!=NULL ): the tree-reduction method + // can be selected by RTL + // If loc->flags contains KMP_IDENT_ATOMIC_REDUCE, the atomic reduce method + // can be selected by RTL + // Finally, it's up to OpenMP RTL to make a decision on which method to select + // among generated by PAROPT. - PACKED_REDUCTION_METHOD_T retval; + PACKED_REDUCTION_METHOD_T retval; - int team_size; + int team_size; - KMP_DEBUG_ASSERT( loc ); // it would be nice to test ( loc != 0 ) - KMP_DEBUG_ASSERT( lck ); // it would be nice to test ( lck != 0 ) + KMP_DEBUG_ASSERT(loc); // it would be nice to test ( loc != 0 ) + KMP_DEBUG_ASSERT(lck); // it would be nice to test ( lck != 0 ) - #define FAST_REDUCTION_ATOMIC_METHOD_GENERATED ( ( loc->flags & ( KMP_IDENT_ATOMIC_REDUCE ) ) == ( KMP_IDENT_ATOMIC_REDUCE ) ) - #define FAST_REDUCTION_TREE_METHOD_GENERATED ( ( reduce_data ) && ( reduce_func ) ) +#define FAST_REDUCTION_ATOMIC_METHOD_GENERATED \ + ((loc->flags & (KMP_IDENT_ATOMIC_REDUCE)) == (KMP_IDENT_ATOMIC_REDUCE)) +#define FAST_REDUCTION_TREE_METHOD_GENERATED ((reduce_data) && (reduce_func)) - retval = critical_reduce_block; + retval = critical_reduce_block; - team_size = __kmp_get_team_num_threads( global_tid ); // another choice of getting a team size ( with 1 dynamic deference ) is slower + // another choice of getting a team size (with 1 dynamic deference) is slower + team_size = __kmp_get_team_num_threads(global_tid); + if (team_size == 1) { - if( team_size == 1 ) { + retval = empty_reduce_block; - retval = empty_reduce_block; + } else { - } else { - - int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; - int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; + int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; + int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; - #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 +#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 - #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN +#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || \ + KMP_OS_DARWIN - int teamsize_cutoff = 4; + int teamsize_cutoff = 4; #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) - if( __kmp_mic_type != non_mic ) { - teamsize_cutoff = 8; - } + if (__kmp_mic_type != non_mic) { + teamsize_cutoff = 8; + } #endif - if( tree_available ) { - if( team_size <= teamsize_cutoff ) { - if ( atomic_available ) { - retval = atomic_reduce_block; - } - } else { - retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; - } - } else if ( atomic_available ) { - retval = atomic_reduce_block; - } - #else - #error "Unknown or unsupported OS" - #endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN + if (tree_available) { + if (team_size <= teamsize_cutoff) { + if (atomic_available) { + retval = atomic_reduce_block; + } + } else { + retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; + } + } else if (atomic_available) { + retval = atomic_reduce_block; + } +#else +#error "Unknown or unsupported OS" +#endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || +// KMP_OS_DARWIN - #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS +#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS - #if KMP_OS_LINUX || KMP_OS_WINDOWS +#if KMP_OS_LINUX || KMP_OS_WINDOWS - // basic tuning + // basic tuning - if( atomic_available ) { - if( num_vars <= 2 ) { // && ( team_size <= 8 ) due to false-sharing ??? - retval = atomic_reduce_block; - } - } // otherwise: use critical section + if (atomic_available) { + if (num_vars <= 2) { // && ( team_size <= 8 ) due to false-sharing ??? + retval = atomic_reduce_block; + } + } // otherwise: use critical section - #elif KMP_OS_DARWIN +#elif KMP_OS_DARWIN - if( atomic_available && ( num_vars <= 3 ) ) { - retval = atomic_reduce_block; - } else if( tree_available ) { - if( ( reduce_size > ( 9 * sizeof( kmp_real64 ) ) ) && ( reduce_size < ( 2000 * sizeof( kmp_real64 ) ) ) ) { - retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER; - } - } // otherwise: use critical section + if (atomic_available && (num_vars <= 3)) { + retval = atomic_reduce_block; + } else if (tree_available) { + if ((reduce_size > (9 * sizeof(kmp_real64))) && + (reduce_size < (2000 * sizeof(kmp_real64)))) { + retval = TREE_REDUCE_BLOCK_WITH_PLAIN_BARRIER; + } + } // otherwise: use critical section - #else - #error "Unknown or unsupported OS" - #endif +#else +#error "Unknown or unsupported OS" +#endif - #else - #error "Unknown or unsupported architecture" - #endif +#else +#error "Unknown or unsupported architecture" +#endif + } - } + // KMP_FORCE_REDUCTION - // KMP_FORCE_REDUCTION + // If the team is serialized (team_size == 1), ignore the forced reduction + // method and stay with the unsynchronized method (empty_reduce_block) + if (__kmp_force_reduction_method != reduction_method_not_defined && + team_size != 1) { - // If the team is serialized (team_size == 1), ignore the forced reduction - // method and stay with the unsynchronized method (empty_reduce_block) - if( __kmp_force_reduction_method != reduction_method_not_defined && team_size != 1) { + PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block; - PACKED_REDUCTION_METHOD_T forced_retval = critical_reduce_block; + int atomic_available, tree_available; - int atomic_available, tree_available; + switch ((forced_retval = __kmp_force_reduction_method)) { + case critical_reduce_block: + KMP_ASSERT(lck); // lck should be != 0 + break; - switch( ( forced_retval = __kmp_force_reduction_method ) ) - { - case critical_reduce_block: - KMP_ASSERT( lck ); // lck should be != 0 - break; - - case atomic_reduce_block: - atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; - if( ! atomic_available ) { - KMP_WARNING(RedMethodNotSupported, "atomic"); - forced_retval = critical_reduce_block; - } - break; - - case tree_reduce_block: - tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; - if( ! tree_available ) { - KMP_WARNING(RedMethodNotSupported, "tree"); - forced_retval = critical_reduce_block; - } else { - #if KMP_FAST_REDUCTION_BARRIER - forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; - #endif - } - break; - - default: - KMP_ASSERT( 0 ); // "unsupported method specified" - } + case atomic_reduce_block: + atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED; + if (!atomic_available) { + KMP_WARNING(RedMethodNotSupported, "atomic"); + forced_retval = critical_reduce_block; + } + break; + + case tree_reduce_block: + tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED; + if (!tree_available) { + KMP_WARNING(RedMethodNotSupported, "tree"); + forced_retval = critical_reduce_block; + } else { +#if KMP_FAST_REDUCTION_BARRIER + forced_retval = TREE_REDUCE_BLOCK_WITH_REDUCTION_BARRIER; +#endif + } + break; - retval = forced_retval; + default: + KMP_ASSERT(0); // "unsupported method specified" } - KA_TRACE(10, ( "reduction method selected=%08x\n", retval ) ); + retval = forced_retval; + } + + KA_TRACE(10, ("reduction method selected=%08x\n", retval)); - #undef FAST_REDUCTION_TREE_METHOD_GENERATED - #undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED +#undef FAST_REDUCTION_TREE_METHOD_GENERATED +#undef FAST_REDUCTION_ATOMIC_METHOD_GENERATED - return ( retval ); + return (retval); } // this function is for testing set/get/determine reduce method -kmp_int32 -__kmp_get_reduce_method( void ) { - return ( ( __kmp_entry_thread()->th.th_local.packed_reduction_method ) >> 8 ); +kmp_int32 __kmp_get_reduce_method(void) { + return ((__kmp_entry_thread()->th.th_local.packed_reduction_method) >> 8); } - -/* ------------------------------------------------------------------------ */ diff --git a/openmp/runtime/src/kmp_safe_c_api.h b/openmp/runtime/src/kmp_safe_c_api.h index 1feaf8c..992f826 100644 --- a/openmp/runtime/src/kmp_safe_c_api.h +++ b/openmp/runtime/src/kmp_safe_c_api.h @@ -12,50 +12,49 @@ #ifndef KMP_SAFE_C_API_H #define KMP_SAFE_C_API_H -// // Replacement for banned C API -// // Not every unsafe call listed here is handled now, but keeping everything // in one place should be handy for future maintenance. #if KMP_OS_WINDOWS -# define RSIZE_MAX_STR ( 4UL << 10 ) // 4KB +#define RSIZE_MAX_STR (4UL << 10) // 4KB // _malloca was suggested, but it is not a drop-in replacement for _alloca -# define KMP_ALLOCA _alloca +#define KMP_ALLOCA _alloca -# define KMP_MEMCPY_S memcpy_s -# define KMP_SNPRINTF sprintf_s -# define KMP_SSCANF sscanf_s -# define KMP_STRCPY_S strcpy_s -# define KMP_STRNCPY_S strncpy_s +#define KMP_MEMCPY_S memcpy_s +#define KMP_SNPRINTF sprintf_s +#define KMP_SSCANF sscanf_s +#define KMP_STRCPY_S strcpy_s +#define KMP_STRNCPY_S strncpy_s // Use this only when buffer size is unknown -# define KMP_MEMCPY(dst, src, cnt) memcpy_s(dst, cnt, src, cnt) +#define KMP_MEMCPY(dst, src, cnt) memcpy_s(dst, cnt, src, cnt) -# define KMP_STRLEN(str) strnlen_s(str, RSIZE_MAX_STR) +#define KMP_STRLEN(str) strnlen_s(str, RSIZE_MAX_STR) // Use this only when buffer size is unknown -# define KMP_STRNCPY(dst, src, cnt) strncpy_s(dst, cnt, src, cnt) +#define KMP_STRNCPY(dst, src, cnt) strncpy_s(dst, cnt, src, cnt) // _TRUNCATE insures buffer size > max string to print. -# define KMP_VSNPRINTF(dst, cnt, fmt, arg) vsnprintf_s(dst, cnt, _TRUNCATE, fmt, arg) +#define KMP_VSNPRINTF(dst, cnt, fmt, arg) \ + vsnprintf_s(dst, cnt, _TRUNCATE, fmt, arg) #else // KMP_OS_WINDOWS // For now, these macros use the existing API. -# define KMP_ALLOCA alloca -# define KMP_MEMCPY_S(dst, bsz, src, cnt) memcpy(dst, src, cnt) -# define KMP_SNPRINTF snprintf -# define KMP_SSCANF sscanf -# define KMP_STRCPY_S(dst, bsz, src) strcpy(dst, src) -# define KMP_STRNCPY_S(dst, bsz, src, cnt) strncpy(dst, src, cnt) -# define KMP_VSNPRINTF vsnprintf -# define KMP_STRNCPY strncpy -# define KMP_STRLEN strlen -# define KMP_MEMCPY memcpy +#define KMP_ALLOCA alloca +#define KMP_MEMCPY_S(dst, bsz, src, cnt) memcpy(dst, src, cnt) +#define KMP_SNPRINTF snprintf +#define KMP_SSCANF sscanf +#define KMP_STRCPY_S(dst, bsz, src) strcpy(dst, src) +#define KMP_STRNCPY_S(dst, bsz, src, cnt) strncpy(dst, src, cnt) +#define KMP_VSNPRINTF vsnprintf +#define KMP_STRNCPY strncpy +#define KMP_STRLEN strlen +#define KMP_MEMCPY memcpy #endif // KMP_OS_WINDOWS diff --git a/openmp/runtime/src/kmp_sched.cpp b/openmp/runtime/src/kmp_sched.cpp index 7ebbb62..c5d591c 100644 --- a/openmp/runtime/src/kmp_sched.cpp +++ b/openmp/runtime/src/kmp_sched.cpp @@ -13,21 +13,18 @@ //===----------------------------------------------------------------------===// -/* - * Static scheduling initialization. - * - * NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however - * it may change values between parallel regions. __kmp_max_nth - * is the largest value __kmp_nth may take, 1 is the smallest. - * - */ +/* Static scheduling initialization. + + NOTE: team->t.t_nproc is a constant inside of any dispatch loop, however + it may change values between parallel regions. __kmp_max_nth + is the largest value __kmp_nth may take, 1 is the smallest. */ #include "kmp.h" -#include "kmp_i18n.h" -#include "kmp_str.h" #include "kmp_error.h" -#include "kmp_stats.h" +#include "kmp_i18n.h" #include "kmp_itt.h" +#include "kmp_stats.h" +#include "kmp_str.h" #if OMPT_SUPPORT #include "ompt-specific.h" @@ -36,699 +33,694 @@ #ifdef KMP_DEBUG //------------------------------------------------------------------------- // template for debug prints specification ( d, u, lld, llu ) - char const * traits_t< int >::spec = "d"; - char const * traits_t< unsigned int >::spec = "u"; - char const * traits_t< long long >::spec = "lld"; - char const * traits_t< unsigned long long >::spec = "llu"; +char const *traits_t::spec = "d"; +char const *traits_t::spec = "u"; +char const *traits_t::spec = "lld"; +char const *traits_t::spec = "llu"; //------------------------------------------------------------------------- #endif -template< typename T > -static void -__kmp_for_static_init( - ident_t *loc, - kmp_int32 global_tid, - kmp_int32 schedtype, - kmp_int32 *plastiter, - T *plower, - T *pupper, - typename traits_t< T >::signed_t *pstride, - typename traits_t< T >::signed_t incr, - typename traits_t< T >::signed_t chunk -) { - KMP_COUNT_BLOCK(OMP_FOR_static); - KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling); - - typedef typename traits_t< T >::unsigned_t UT; - typedef typename traits_t< T >::signed_t ST; - /* this all has to be changed back to TID and such.. */ - register kmp_int32 gtid = global_tid; - register kmp_uint32 tid; - register kmp_uint32 nth; - register UT trip_count; - register kmp_team_t *team; - register kmp_info_t *th = __kmp_threads[ gtid ]; +template +static void __kmp_for_static_init(ident_t *loc, kmp_int32 global_tid, + kmp_int32 schedtype, kmp_int32 *plastiter, + T *plower, T *pupper, + typename traits_t::signed_t *pstride, + typename traits_t::signed_t incr, + typename traits_t::signed_t chunk) { + KMP_COUNT_BLOCK(OMP_FOR_static); + KMP_TIME_PARTITIONED_BLOCK(FOR_static_scheduling); + + typedef typename traits_t::unsigned_t UT; + typedef typename traits_t::signed_t ST; + /* this all has to be changed back to TID and such.. */ + register kmp_int32 gtid = global_tid; + register kmp_uint32 tid; + register kmp_uint32 nth; + register UT trip_count; + register kmp_team_t *team; + register kmp_info_t *th = __kmp_threads[gtid]; #if OMPT_SUPPORT && OMPT_TRACE - ompt_team_info_t *team_info = NULL; - ompt_task_info_t *task_info = NULL; + ompt_team_info_t *team_info = NULL; + ompt_task_info_t *task_info = NULL; + + if (ompt_enabled) { + // Only fully initialize variables needed by OMPT if OMPT is enabled. + team_info = __ompt_get_teaminfo(0, NULL); + task_info = __ompt_get_taskinfo(0); + } +#endif - if (ompt_enabled) { - // Only fully initialize variables needed by OMPT if OMPT is enabled. - team_info = __ompt_get_teaminfo(0, NULL); - task_info = __ompt_get_taskinfo(0); - } + KMP_DEBUG_ASSERT(plastiter && plower && pupper && pstride); + KE_TRACE(10, ("__kmpc_for_static_init called (%d)\n", global_tid)); +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," + " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", + traits_t::spec, traits_t::spec, traits_t::spec, + traits_t::spec, traits_t::spec, traits_t::spec); + KD_TRACE(100, (buff, global_tid, schedtype, *plastiter, *plower, *pupper, + *pstride, incr, chunk)); + __kmp_str_free(&buff); + } #endif - KMP_DEBUG_ASSERT( plastiter && plower && pupper && pstride ); - KE_TRACE( 10, ("__kmpc_for_static_init called (%d)\n", global_tid)); - #ifdef KMP_DEBUG + if (__kmp_env_consistency_check) { + __kmp_push_workshare(global_tid, ct_pdo, loc); + if (incr == 0) { + __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, + loc); + } + } + /* special handling for zero-trip loops */ + if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { + if (plastiter != NULL) + *plastiter = FALSE; + /* leave pupper and plower set to entire iteration space */ + *pstride = incr; /* value should never be used */ +// *plower = *pupper - incr; +// let compiler bypass the illegal loop (like for(i=1;i<10;i--)) +// THE LINE COMMENTED ABOVE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE +// ON A ZERO-TRIP LOOP (lower=1, upper=0,stride=1) - JPH June 23, 2009. +#ifdef KMP_DEBUG { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmpc_for_static_init: T#%%d sched=%%d liter=%%d iter=(%%%s," \ - " %%%s, %%%s) incr=%%%s chunk=%%%s signed?<%s>\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, - traits_t< ST >::spec, traits_t< ST >::spec, traits_t< T >::spec ); - KD_TRACE(100, ( buff, global_tid, schedtype, *plastiter, - *plower, *pupper, *pstride, incr, chunk ) ); - __kmp_str_free( &buff ); + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmpc_for_static_init:(ZERO TRIP) liter=%%d " + "lower=%%%s upper=%%%s stride = %%%s " + "signed?<%s>, loc = %%s\n", + traits_t::spec, traits_t::spec, + traits_t::spec, traits_t::spec); + KD_TRACE(100, + (buff, *plastiter, *plower, *pupper, *pstride, loc->psource)); + __kmp_str_free(&buff); } - #endif +#endif + KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); - if ( __kmp_env_consistency_check ) { - __kmp_push_workshare( global_tid, ct_pdo, loc ); - if ( incr == 0 ) { - __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); - } +#if OMPT_SUPPORT && OMPT_TRACE + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { + ompt_callbacks.ompt_callback(ompt_event_loop_begin)( + team_info->parallel_id, task_info->task_id, team_info->microtask); } - /* special handling for zero-trip loops */ - if ( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) { - if( plastiter != NULL ) - *plastiter = FALSE; - /* leave pupper and plower set to entire iteration space */ - *pstride = incr; /* value should never be used */ - // *plower = *pupper - incr; // let compiler bypass the illegal loop (like for(i=1;i<10;i--)) THIS LINE CAUSED shape2F/h_tests_1.f TO HAVE A FAILURE ON A ZERO-TRIP LOOP (lower=1,\ - upper=0,stride=1) - JPH June 23, 2009. - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmpc_for_static_init:(ZERO TRIP) liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>, loc = %%s\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec ); - KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride, loc->psource ) ); - __kmp_str_free( &buff ); - } - #endif - KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); +#endif + KMP_COUNT_VALUE(FOR_static_iterations, 0); + return; + } -#if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { - ompt_callbacks.ompt_callback(ompt_event_loop_begin)( - team_info->parallel_id, task_info->task_id, - team_info->microtask); - } +#if OMP_40_ENABLED + // Although there are schedule enumerations above kmp_ord_upper which are not + // schedules for "distribute", the only ones which are useful are dynamic, so + // cannot be seen here, since this codepath is only executed for static + // schedules. + if (schedtype > kmp_ord_upper) { + // we are in DISTRIBUTE construct + schedtype += kmp_sch_static - + kmp_distribute_static; // AC: convert to usual schedule type + tid = th->th.th_team->t.t_master_tid; + team = th->th.th_team->t.t_parent; + } else #endif - KMP_COUNT_VALUE (FOR_static_iterations, 0); - return; - } + { + tid = __kmp_tid_from_gtid(global_tid); + team = th->th.th_team; + } - #if OMP_40_ENABLED - // Although there are schedule enumerations above kmp_ord_upper which are not schedules for "distribute", - // the only ones which are useful are dynamic, so cannot be seen here, since this codepath is only executed - // for static schedules. - if ( schedtype > kmp_ord_upper ) { - // we are in DISTRIBUTE construct - schedtype += kmp_sch_static - kmp_distribute_static; // AC: convert to usual schedule type - tid = th->th.th_team->t.t_master_tid; - team = th->th.th_team->t.t_parent; - } else - #endif + /* determine if "for" loop is an active worksharing construct */ + if (team->t.t_serialized) { + /* serialized parallel, each thread executes whole iteration space */ + if (plastiter != NULL) + *plastiter = TRUE; + /* leave pupper and plower set to entire iteration space */ + *pstride = + (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); + +#ifdef KMP_DEBUG { - tid = __kmp_tid_from_gtid( global_tid ); - team = th->th.th_team; + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " + "lower=%%%s upper=%%%s stride = %%%s\n", + traits_t::spec, traits_t::spec, + traits_t::spec); + KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); + __kmp_str_free(&buff); } - - /* determine if "for" loop is an active worksharing construct */ - if ( team -> t.t_serialized ) { - /* serialized parallel, each thread executes whole iteration space */ - if( plastiter != NULL ) - *plastiter = TRUE; - /* leave pupper and plower set to entire iteration space */ - *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); - - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); - KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); - __kmp_str_free( &buff ); - } - #endif - KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); +#endif + KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); #if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { - ompt_callbacks.ompt_callback(ompt_event_loop_begin)( - team_info->parallel_id, task_info->task_id, - team_info->microtask); - } + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { + ompt_callbacks.ompt_callback(ompt_event_loop_begin)( + team_info->parallel_id, task_info->task_id, team_info->microtask); + } #endif - return; + return; + } + nth = team->t.t_nproc; + if (nth == 1) { + if (plastiter != NULL) + *plastiter = TRUE; + *pstride = + (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmpc_for_static_init: (serial) liter=%%d " + "lower=%%%s upper=%%%s stride = %%%s\n", + traits_t::spec, traits_t::spec, + traits_t::spec); + KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); + __kmp_str_free(&buff); } - nth = team->t.t_nproc; - if ( nth == 1 ) { - if( plastiter != NULL ) - *plastiter = TRUE; - *pstride = (incr > 0) ? (*pupper - *plower + 1) : (-(*plower - *pupper + 1)); - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmpc_for_static_init: (serial) liter=%%d lower=%%%s upper=%%%s stride = %%%s\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec ); - KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); - __kmp_str_free( &buff ); - } - #endif - KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); +#endif + KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); #if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { - ompt_callbacks.ompt_callback(ompt_event_loop_begin)( - team_info->parallel_id, task_info->task_id, - team_info->microtask); - } + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { + ompt_callbacks.ompt_callback(ompt_event_loop_begin)( + team_info->parallel_id, task_info->task_id, team_info->microtask); + } #endif - return; + return; + } + + /* compute trip count */ + if (incr == 1) { + trip_count = *pupper - *plower + 1; + } else if (incr == -1) { + trip_count = *plower - *pupper + 1; + } else if (incr > 0) { + // upper-lower can exceed the limit of signed type + trip_count = (UT)(*pupper - *plower) / incr + 1; + } else { + trip_count = (UT)(*plower - *pupper) / (-incr) + 1; + } + + if (__kmp_env_consistency_check) { + /* tripcount overflow? */ + if (trip_count == 0 && *pupper != *plower) { + __kmp_error_construct(kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, + loc); } - - /* compute trip count */ - if ( incr == 1 ) { - trip_count = *pupper - *plower + 1; - } else if (incr == -1) { - trip_count = *plower - *pupper + 1; - } else if ( incr > 0 ) { - // upper-lower can exceed the limit of signed type - trip_count = (UT)(*pupper - *plower) / incr + 1; + } + KMP_COUNT_VALUE(FOR_static_iterations, trip_count); + + /* compute remaining parameters */ + switch (schedtype) { + case kmp_sch_static: { + if (trip_count < nth) { + KMP_DEBUG_ASSERT( + __kmp_static == kmp_sch_static_greedy || + __kmp_static == + kmp_sch_static_balanced); // Unknown static scheduling type. + if (tid < trip_count) { + *pupper = *plower = *plower + tid * incr; + } else { + *plower = *pupper + incr; + } + if (plastiter != NULL) + *plastiter = (tid == trip_count - 1); } else { - trip_count = (UT)(*plower - *pupper) / (-incr) + 1; - } - - if ( __kmp_env_consistency_check ) { - /* tripcount overflow? */ - if ( trip_count == 0 && *pupper != *plower ) { - __kmp_error_construct( kmp_i18n_msg_CnsIterationRangeTooLarge, ct_pdo, loc ); + if (__kmp_static == kmp_sch_static_balanced) { + register UT small_chunk = trip_count / nth; + register UT extras = trip_count % nth; + *plower += incr * (tid * small_chunk + (tid < extras ? tid : extras)); + *pupper = *plower + small_chunk * incr - (tid < extras ? 0 : incr); + if (plastiter != NULL) + *plastiter = (tid == nth - 1); + } else { + register T big_chunk_inc_count = + (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; + register T old_upper = *pupper; + + KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); + // Unknown static scheduling type. + + *plower += tid * big_chunk_inc_count; + *pupper = *plower + big_chunk_inc_count - incr; + if (incr > 0) { + if (*pupper < *plower) + *pupper = traits_t::max_value; + if (plastiter != NULL) + *plastiter = *plower <= old_upper && *pupper > old_upper - incr; + if (*pupper > old_upper) + *pupper = old_upper; // tracker C73258 + } else { + if (*pupper > *plower) + *pupper = traits_t::min_value; + if (plastiter != NULL) + *plastiter = *plower >= old_upper && *pupper < old_upper - incr; + if (*pupper < old_upper) + *pupper = old_upper; // tracker C73258 } + } } - KMP_COUNT_VALUE (FOR_static_iterations, trip_count); - - /* compute remaining parameters */ - switch ( schedtype ) { - case kmp_sch_static: - { - if ( trip_count < nth ) { - KMP_DEBUG_ASSERT( - __kmp_static == kmp_sch_static_greedy || \ - __kmp_static == kmp_sch_static_balanced - ); // Unknown static scheduling type. - if ( tid < trip_count ) { - *pupper = *plower = *plower + tid * incr; - } else { - *plower = *pupper + incr; - } - if( plastiter != NULL ) - *plastiter = ( tid == trip_count - 1 ); - } else { - if ( __kmp_static == kmp_sch_static_balanced ) { - register UT small_chunk = trip_count / nth; - register UT extras = trip_count % nth; - *plower += incr * ( tid * small_chunk + ( tid < extras ? tid : extras ) ); - *pupper = *plower + small_chunk * incr - ( tid < extras ? 0 : incr ); - if( plastiter != NULL ) - *plastiter = ( tid == nth - 1 ); - } else { - register T big_chunk_inc_count = ( trip_count/nth + - ( ( trip_count % nth ) ? 1 : 0) ) * incr; - register T old_upper = *pupper; - - KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); - // Unknown static scheduling type. - - *plower += tid * big_chunk_inc_count; - *pupper = *plower + big_chunk_inc_count - incr; - if ( incr > 0 ) { - if( *pupper < *plower ) - *pupper = traits_t::max_value; - if( plastiter != NULL ) - *plastiter = *plower <= old_upper && *pupper > old_upper - incr; - if ( *pupper > old_upper ) *pupper = old_upper; // tracker C73258 - } else { - if( *pupper > *plower ) - *pupper = traits_t::min_value; - if( plastiter != NULL ) - *plastiter = *plower >= old_upper && *pupper < old_upper - incr; - if ( *pupper < old_upper ) *pupper = old_upper; // tracker C73258 - } - } - } - *pstride = trip_count; - break; - } - case kmp_sch_static_chunked: - { - register ST span; - if ( chunk < 1 ) { - chunk = 1; - } - span = chunk * incr; - *pstride = span * nth; - *plower = *plower + (span * tid); - *pupper = *plower + span - incr; - if( plastiter != NULL ) - *plastiter = (tid == ((trip_count - 1)/( UT )chunk) % nth); - break; - } + *pstride = trip_count; + break; + } + case kmp_sch_static_chunked: { + register ST span; + if (chunk < 1) { + chunk = 1; + } + span = chunk * incr; + *pstride = span * nth; + *plower = *plower + (span * tid); + *pupper = *plower + span - incr; + if (plastiter != NULL) + *plastiter = (tid == ((trip_count - 1) / (UT)chunk) % nth); + break; + } #if OMP_45_ENABLED - case kmp_sch_static_balanced_chunked: - { - register T old_upper = *pupper; - // round up to make sure the chunk is enough to cover all iterations - register UT span = (trip_count+nth-1) / nth; - - // perform chunk adjustment - chunk = (span + chunk - 1) & ~(chunk-1); - - span = chunk * incr; - *plower = *plower + (span * tid); - *pupper = *plower + span - incr; - if ( incr > 0 ) { - if ( *pupper > old_upper ) *pupper = old_upper; - } else - if ( *pupper < old_upper ) *pupper = old_upper; - - if( plastiter != NULL ) - *plastiter = ( tid == ((trip_count - 1)/( UT )chunk) ); - break; - } + case kmp_sch_static_balanced_chunked: { + register T old_upper = *pupper; + // round up to make sure the chunk is enough to cover all iterations + register UT span = (trip_count + nth - 1) / nth; + + // perform chunk adjustment + chunk = (span + chunk - 1) & ~(chunk - 1); + + span = chunk * incr; + *plower = *plower + (span * tid); + *pupper = *plower + span - incr; + if (incr > 0) { + if (*pupper > old_upper) + *pupper = old_upper; + } else if (*pupper < old_upper) + *pupper = old_upper; + + if (plastiter != NULL) + *plastiter = (tid == ((trip_count - 1) / (UT)chunk)); + break; + } #endif - default: - KMP_ASSERT2( 0, "__kmpc_for_static_init: unknown scheduling type" ); - break; - } + default: + KMP_ASSERT2(0, "__kmpc_for_static_init: unknown scheduling type"); + break; + } #if USE_ITT_BUILD - // Report loop metadata - if ( KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && __kmp_forkjoin_frames_mode == 3 && + // Report loop metadata + if (KMP_MASTER_TID(tid) && __itt_metadata_add_ptr && + __kmp_forkjoin_frames_mode == 3 && #if OMP_40_ENABLED - th->th.th_teams_microtask == NULL && + th->th.th_teams_microtask == NULL && #endif - team->t.t_active_level == 1 ) - { - kmp_uint64 cur_chunk = chunk; - // Calculate chunk in case it was not specified; it is specified for kmp_sch_static_chunked - if ( schedtype == kmp_sch_static ) { - cur_chunk = trip_count / nth + ( ( trip_count % nth ) ? 1 : 0); - } - // 0 - "static" schedule - __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); + team->t.t_active_level == 1) { + kmp_uint64 cur_chunk = chunk; + // Calculate chunk in case it was not specified; it is specified for + // kmp_sch_static_chunked + if (schedtype == kmp_sch_static) { + cur_chunk = trip_count / nth + ((trip_count % nth) ? 1 : 0); } + // 0 - "static" schedule + __kmp_itt_metadata_loop(loc, 0, trip_count, cur_chunk); + } #endif - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmpc_for_static_init: liter=%%d lower=%%%s upper=%%%s stride = %%%s signed?<%s>\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, traits_t< T >::spec ); - KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pstride ) ); - __kmp_str_free( &buff ); - } - #endif - KE_TRACE( 10, ("__kmpc_for_static_init: T#%d return\n", global_tid ) ); +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmpc_for_static_init: liter=%%d lower=%%%s " + "upper=%%%s stride = %%%s signed?<%s>\n", + traits_t::spec, traits_t::spec, + traits_t::spec, traits_t::spec); + KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pstride)); + __kmp_str_free(&buff); + } +#endif + KE_TRACE(10, ("__kmpc_for_static_init: T#%d return\n", global_tid)); #if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { - ompt_callbacks.ompt_callback(ompt_event_loop_begin)( - team_info->parallel_id, task_info->task_id, team_info->microtask); - } + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_loop_begin)) { + ompt_callbacks.ompt_callback(ompt_event_loop_begin)( + team_info->parallel_id, task_info->task_id, team_info->microtask); + } #endif - return; + return; } -template< typename T > -static void -__kmp_dist_for_static_init( - ident_t *loc, - kmp_int32 gtid, - kmp_int32 schedule, - kmp_int32 *plastiter, - T *plower, - T *pupper, - T *pupperDist, - typename traits_t< T >::signed_t *pstride, - typename traits_t< T >::signed_t incr, - typename traits_t< T >::signed_t chunk -) { - KMP_COUNT_BLOCK(OMP_DISTRIBUTE); - typedef typename traits_t< T >::unsigned_t UT; - typedef typename traits_t< T >::signed_t ST; - register kmp_uint32 tid; - register kmp_uint32 nth; - register kmp_uint32 team_id; - register kmp_uint32 nteams; - register UT trip_count; - register kmp_team_t *team; - kmp_info_t * th; - - KMP_DEBUG_ASSERT( plastiter && plower && pupper && pupperDist && pstride ); - KE_TRACE( 10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d "\ - "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, - traits_t< ST >::spec, traits_t< T >::spec ); - KD_TRACE(100, ( buff, gtid, schedule, *plastiter, - *plower, *pupper, incr, chunk ) ); - __kmp_str_free( &buff ); - } - #endif +template +static void __kmp_dist_for_static_init(ident_t *loc, kmp_int32 gtid, + kmp_int32 schedule, kmp_int32 *plastiter, + T *plower, T *pupper, T *pupperDist, + typename traits_t::signed_t *pstride, + typename traits_t::signed_t incr, + typename traits_t::signed_t chunk) { + KMP_COUNT_BLOCK(OMP_DISTRIBUTE); + typedef typename traits_t::unsigned_t UT; + typedef typename traits_t::signed_t ST; + register kmp_uint32 tid; + register kmp_uint32 nth; + register kmp_uint32 team_id; + register kmp_uint32 nteams; + register UT trip_count; + register kmp_team_t *team; + kmp_info_t *th; + + KMP_DEBUG_ASSERT(plastiter && plower && pupper && pupperDist && pstride); + KE_TRACE(10, ("__kmpc_dist_for_static_init called (%d)\n", gtid)); +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmpc_dist_for_static_init: T#%%d schedLoop=%%d liter=%%d " + "iter=(%%%s, %%%s, %%%s) chunk=%%%s signed?<%s>\n", + traits_t::spec, traits_t::spec, traits_t::spec, + traits_t::spec, traits_t::spec); + KD_TRACE(100, + (buff, gtid, schedule, *plastiter, *plower, *pupper, incr, chunk)); + __kmp_str_free(&buff); + } +#endif - if( __kmp_env_consistency_check ) { - __kmp_push_workshare( gtid, ct_pdo, loc ); - if( incr == 0 ) { - __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); + if (__kmp_env_consistency_check) { + __kmp_push_workshare(gtid, ct_pdo, loc); + if (incr == 0) { + __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, + loc); + } + if (incr > 0 ? (*pupper < *plower) : (*plower < *pupper)) { + // The loop is illegal. + // Some zero-trip loops maintained by compiler, e.g.: + // for(i=10;i<0;++i) // lower >= upper - run-time check + // for(i=0;i>10;--i) // lower <= upper - run-time check + // for(i=0;i>10;++i) // incr > 0 - compile-time check + // for(i=10;i<0;--i) // incr < 0 - compile-time check + // Compiler does not check the following illegal loops: + // for(i=0;i<10;i+=incr) // where incr<0 + // for(i=10;i>0;i-=incr) // where incr<0 + __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); + } + } + tid = __kmp_tid_from_gtid(gtid); + th = __kmp_threads[gtid]; + nth = th->th.th_team_nproc; + team = th->th.th_team; +#if OMP_40_ENABLED + KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct + nteams = th->th.th_teams_size.nteams; +#endif + team_id = team->t.t_master_tid; + KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); + + // compute global trip count + if (incr == 1) { + trip_count = *pupper - *plower + 1; + } else if (incr == -1) { + trip_count = *plower - *pupper + 1; + } else if (incr > 0) { + // upper-lower can exceed the limit of signed type + trip_count = (UT)(*pupper - *plower) / incr + 1; + } else { + trip_count = (UT)(*plower - *pupper) / (-incr) + 1; + } + + *pstride = *pupper - *plower; // just in case (can be unused) + if (trip_count <= nteams) { + KMP_DEBUG_ASSERT( + __kmp_static == kmp_sch_static_greedy || + __kmp_static == + kmp_sch_static_balanced); // Unknown static scheduling type. + // only masters of some teams get single iteration, other threads get + // nothing + if (team_id < trip_count && tid == 0) { + *pupper = *pupperDist = *plower = *plower + team_id * incr; + } else { + *pupperDist = *pupper; + *plower = *pupper + incr; // compiler should skip loop body + } + if (plastiter != NULL) + *plastiter = (tid == 0 && team_id == trip_count - 1); + } else { + // Get the team's chunk first (each team gets at most one chunk) + if (__kmp_static == kmp_sch_static_balanced) { + register UT chunkD = trip_count / nteams; + register UT extras = trip_count % nteams; + *plower += + incr * (team_id * chunkD + (team_id < extras ? team_id : extras)); + *pupperDist = *plower + chunkD * incr - (team_id < extras ? 0 : incr); + if (plastiter != NULL) + *plastiter = (team_id == nteams - 1); + } else { + register T chunk_inc_count = + (trip_count / nteams + ((trip_count % nteams) ? 1 : 0)) * incr; + register T upper = *pupper; + KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); + // Unknown static scheduling type. + *plower += team_id * chunk_inc_count; + *pupperDist = *plower + chunk_inc_count - incr; + // Check/correct bounds if needed + if (incr > 0) { + if (*pupperDist < *plower) + *pupperDist = traits_t::max_value; + if (plastiter != NULL) + *plastiter = *plower <= upper && *pupperDist > upper - incr; + if (*pupperDist > upper) + *pupperDist = upper; // tracker C73258 + if (*plower > *pupperDist) { + *pupper = *pupperDist; // no iterations available for the team + goto end; } - if( incr > 0 ? (*pupper < *plower) : (*plower < *pupper) ) { - // The loop is illegal. - // Some zero-trip loops maintained by compiler, e.g.: - // for(i=10;i<0;++i) // lower >= upper - run-time check - // for(i=0;i>10;--i) // lower <= upper - run-time check - // for(i=0;i>10;++i) // incr > 0 - compile-time check - // for(i=10;i<0;--i) // incr < 0 - compile-time check - // Compiler does not check the following illegal loops: - // for(i=0;i<10;i+=incr) // where incr<0 - // for(i=10;i>0;i-=incr) // where incr<0 - __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc ); + } else { + if (*pupperDist > *plower) + *pupperDist = traits_t::min_value; + if (plastiter != NULL) + *plastiter = *plower >= upper && *pupperDist < upper - incr; + if (*pupperDist < upper) + *pupperDist = upper; // tracker C73258 + if (*plower < *pupperDist) { + *pupper = *pupperDist; // no iterations available for the team + goto end; } + } } - tid = __kmp_tid_from_gtid( gtid ); - th = __kmp_threads[gtid]; - nth = th->th.th_team_nproc; - team = th->th.th_team; - #if OMP_40_ENABLED - KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct - nteams = th->th.th_teams_size.nteams; - #endif - team_id = team->t.t_master_tid; - KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); - - // compute global trip count - if( incr == 1 ) { - trip_count = *pupper - *plower + 1; - } else if(incr == -1) { - trip_count = *plower - *pupper + 1; - } else if ( incr > 0 ) { - // upper-lower can exceed the limit of signed type - trip_count = (UT)(*pupper - *plower) / incr + 1; + // Get the parallel loop chunk now (for thread) + // compute trip count for team's chunk + if (incr == 1) { + trip_count = *pupperDist - *plower + 1; + } else if (incr == -1) { + trip_count = *plower - *pupperDist + 1; + } else if (incr > 1) { + // upper-lower can exceed the limit of signed type + trip_count = (UT)(*pupperDist - *plower) / incr + 1; } else { - trip_count = (UT)(*plower - *pupper) / (-incr) + 1; + trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; } - - *pstride = *pupper - *plower; // just in case (can be unused) - if( trip_count <= nteams ) { + KMP_DEBUG_ASSERT(trip_count); + switch (schedule) { + case kmp_sch_static: { + if (trip_count <= nth) { KMP_DEBUG_ASSERT( - __kmp_static == kmp_sch_static_greedy || \ - __kmp_static == kmp_sch_static_balanced - ); // Unknown static scheduling type. - // only masters of some teams get single iteration, other threads get nothing - if( team_id < trip_count && tid == 0 ) { - *pupper = *pupperDist = *plower = *plower + team_id * incr; + __kmp_static == kmp_sch_static_greedy || + __kmp_static == + kmp_sch_static_balanced); // Unknown static scheduling type. + if (tid < trip_count) + *pupper = *plower = *plower + tid * incr; + else + *plower = *pupper + incr; // no iterations available + if (plastiter != NULL) + if (*plastiter != 0 && !(tid == trip_count - 1)) + *plastiter = 0; + } else { + if (__kmp_static == kmp_sch_static_balanced) { + register UT chunkL = trip_count / nth; + register UT extras = trip_count % nth; + *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); + *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); + if (plastiter != NULL) + if (*plastiter != 0 && !(tid == nth - 1)) + *plastiter = 0; } else { - *pupperDist = *pupper; - *plower = *pupper + incr; // compiler should skip loop body - } - if( plastiter != NULL ) - *plastiter = ( tid == 0 && team_id == trip_count - 1 ); - } else { - // Get the team's chunk first (each team gets at most one chunk) - if( __kmp_static == kmp_sch_static_balanced ) { - register UT chunkD = trip_count / nteams; - register UT extras = trip_count % nteams; - *plower += incr * ( team_id * chunkD + ( team_id < extras ? team_id : extras ) ); - *pupperDist = *plower + chunkD * incr - ( team_id < extras ? 0 : incr ); - if( plastiter != NULL ) - *plastiter = ( team_id == nteams - 1 ); - } else { - register T chunk_inc_count = - ( trip_count / nteams + ( ( trip_count % nteams ) ? 1 : 0) ) * incr; - register T upper = *pupper; - KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); - // Unknown static scheduling type. - *plower += team_id * chunk_inc_count; - *pupperDist = *plower + chunk_inc_count - incr; - // Check/correct bounds if needed - if( incr > 0 ) { - if( *pupperDist < *plower ) - *pupperDist = traits_t::max_value; - if( plastiter != NULL ) - *plastiter = *plower <= upper && *pupperDist > upper - incr; - if( *pupperDist > upper ) - *pupperDist = upper; // tracker C73258 - if( *plower > *pupperDist ) { - *pupper = *pupperDist; // no iterations available for the team - goto end; - } - } else { - if( *pupperDist > *plower ) - *pupperDist = traits_t::min_value; - if( plastiter != NULL ) - *plastiter = *plower >= upper && *pupperDist < upper - incr; - if( *pupperDist < upper ) - *pupperDist = upper; // tracker C73258 - if( *plower < *pupperDist ) { - *pupper = *pupperDist; // no iterations available for the team - goto end; - } - } - } - // Get the parallel loop chunk now (for thread) - // compute trip count for team's chunk - if( incr == 1 ) { - trip_count = *pupperDist - *plower + 1; - } else if(incr == -1) { - trip_count = *plower - *pupperDist + 1; - } else if ( incr > 1 ) { - // upper-lower can exceed the limit of signed type - trip_count = (UT)(*pupperDist - *plower) / incr + 1; - } else { - trip_count = (UT)(*plower - *pupperDist) / (-incr) + 1; - } - KMP_DEBUG_ASSERT( trip_count ); - switch( schedule ) { - case kmp_sch_static: - { - if( trip_count <= nth ) { - KMP_DEBUG_ASSERT( - __kmp_static == kmp_sch_static_greedy || \ - __kmp_static == kmp_sch_static_balanced - ); // Unknown static scheduling type. - if( tid < trip_count ) - *pupper = *plower = *plower + tid * incr; - else - *plower = *pupper + incr; // no iterations available - if( plastiter != NULL ) - if( *plastiter != 0 && !( tid == trip_count - 1 ) ) - *plastiter = 0; - } else { - if( __kmp_static == kmp_sch_static_balanced ) { - register UT chunkL = trip_count / nth; - register UT extras = trip_count % nth; - *plower += incr * (tid * chunkL + (tid < extras ? tid : extras)); - *pupper = *plower + chunkL * incr - (tid < extras ? 0 : incr); - if( plastiter != NULL ) - if( *plastiter != 0 && !( tid == nth - 1 ) ) - *plastiter = 0; - } else { - register T chunk_inc_count = - ( trip_count / nth + ( ( trip_count % nth ) ? 1 : 0) ) * incr; - register T upper = *pupperDist; - KMP_DEBUG_ASSERT( __kmp_static == kmp_sch_static_greedy ); - // Unknown static scheduling type. - *plower += tid * chunk_inc_count; - *pupper = *plower + chunk_inc_count - incr; - if( incr > 0 ) { - if( *pupper < *plower ) - *pupper = traits_t::max_value; - if( plastiter != NULL ) - if( *plastiter != 0 && !(*plower <= upper && *pupper > upper - incr) ) - *plastiter = 0; - if( *pupper > upper ) - *pupper = upper;//tracker C73258 - } else { - if( *pupper > *plower ) - *pupper = traits_t::min_value; - if( plastiter != NULL ) - if( *plastiter != 0 && !(*plower >= upper && *pupper < upper - incr) ) - *plastiter = 0; - if( *pupper < upper ) - *pupper = upper;//tracker C73258 - } - } - } - break; - } - case kmp_sch_static_chunked: - { - register ST span; - if( chunk < 1 ) - chunk = 1; - span = chunk * incr; - *pstride = span * nth; - *plower = *plower + (span * tid); - *pupper = *plower + span - incr; - if( plastiter != NULL ) - if( *plastiter != 0 && !(tid == ((trip_count - 1) / ( UT )chunk) % nth) ) - *plastiter = 0; - break; - } - default: - KMP_ASSERT2( 0, "__kmpc_dist_for_static_init: unknown loop scheduling type" ); - break; + register T chunk_inc_count = + (trip_count / nth + ((trip_count % nth) ? 1 : 0)) * incr; + register T upper = *pupperDist; + KMP_DEBUG_ASSERT(__kmp_static == kmp_sch_static_greedy); + // Unknown static scheduling type. + *plower += tid * chunk_inc_count; + *pupper = *plower + chunk_inc_count - incr; + if (incr > 0) { + if (*pupper < *plower) + *pupper = traits_t::max_value; + if (plastiter != NULL) + if (*plastiter != 0 && + !(*plower <= upper && *pupper > upper - incr)) + *plastiter = 0; + if (*pupper > upper) + *pupper = upper; // tracker C73258 + } else { + if (*pupper > *plower) + *pupper = traits_t::min_value; + if (plastiter != NULL) + if (*plastiter != 0 && + !(*plower >= upper && *pupper < upper - incr)) + *plastiter = 0; + if (*pupper < upper) + *pupper = upper; // tracker C73258 + } } + } + break; } - end:; - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( - "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s "\ - "stride=%%%s signed?<%s>\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< T >::spec, - traits_t< ST >::spec, traits_t< T >::spec ); - KD_TRACE(100, ( buff, *plastiter, *plower, *pupper, *pupperDist, *pstride ) ); - __kmp_str_free( &buff ); + case kmp_sch_static_chunked: { + register ST span; + if (chunk < 1) + chunk = 1; + span = chunk * incr; + *pstride = span * nth; + *plower = *plower + (span * tid); + *pupper = *plower + span - incr; + if (plastiter != NULL) + if (*plastiter != 0 && !(tid == ((trip_count - 1) / (UT)chunk) % nth)) + *plastiter = 0; + break; } - #endif - KE_TRACE( 10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid ) ); - return; + default: + KMP_ASSERT2(0, + "__kmpc_dist_for_static_init: unknown loop scheduling type"); + break; + } + } +end:; +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format( + "__kmpc_dist_for_static_init: last=%%d lo=%%%s up=%%%s upDist=%%%s " + "stride=%%%s signed?<%s>\n", + traits_t::spec, traits_t::spec, traits_t::spec, + traits_t::spec, traits_t::spec); + KD_TRACE(100, (buff, *plastiter, *plower, *pupper, *pupperDist, *pstride)); + __kmp_str_free(&buff); + } +#endif + KE_TRACE(10, ("__kmpc_dist_for_static_init: T#%d return\n", gtid)); + return; } -template< typename T > -static void -__kmp_team_static_init( - ident_t *loc, - kmp_int32 gtid, - kmp_int32 *p_last, - T *p_lb, - T *p_ub, - typename traits_t< T >::signed_t *p_st, - typename traits_t< T >::signed_t incr, - typename traits_t< T >::signed_t chunk -) { - // The routine returns the first chunk distributed to the team and - // stride for next chunks calculation. - // Last iteration flag set for the team that will execute - // the last iteration of the loop. - // The routine is called for dist_schedue(static,chunk) only. - typedef typename traits_t< T >::unsigned_t UT; - typedef typename traits_t< T >::signed_t ST; - kmp_uint32 team_id; - kmp_uint32 nteams; - UT trip_count; - T lower; - T upper; - ST span; - kmp_team_t *team; - kmp_info_t *th; - - KMP_DEBUG_ASSERT( p_last && p_lb && p_ub && p_st ); - KE_TRACE( 10, ("__kmp_team_static_init called (%d)\n", gtid)); - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( "__kmp_team_static_init enter: T#%%d liter=%%d "\ - "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, - traits_t< ST >::spec, traits_t< T >::spec ); - KD_TRACE(100, ( buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk ) ); - __kmp_str_free( &buff ); - } - #endif +template +static void __kmp_team_static_init(ident_t *loc, kmp_int32 gtid, + kmp_int32 *p_last, T *p_lb, T *p_ub, + typename traits_t::signed_t *p_st, + typename traits_t::signed_t incr, + typename traits_t::signed_t chunk) { + // The routine returns the first chunk distributed to the team and + // stride for next chunks calculation. + // Last iteration flag set for the team that will execute + // the last iteration of the loop. + // The routine is called for dist_schedue(static,chunk) only. + typedef typename traits_t::unsigned_t UT; + typedef typename traits_t::signed_t ST; + kmp_uint32 team_id; + kmp_uint32 nteams; + UT trip_count; + T lower; + T upper; + ST span; + kmp_team_t *team; + kmp_info_t *th; + + KMP_DEBUG_ASSERT(p_last && p_lb && p_ub && p_st); + KE_TRACE(10, ("__kmp_team_static_init called (%d)\n", gtid)); +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = __kmp_str_format("__kmp_team_static_init enter: T#%%d liter=%%d " + "iter=(%%%s, %%%s, %%%s) chunk %%%s; signed?<%s>\n", + traits_t::spec, traits_t::spec, + traits_t::spec, traits_t::spec, + traits_t::spec); + KD_TRACE(100, (buff, gtid, *p_last, *p_lb, *p_ub, *p_st, chunk)); + __kmp_str_free(&buff); + } +#endif - lower = *p_lb; - upper = *p_ub; - if( __kmp_env_consistency_check ) { - if( incr == 0 ) { - __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, loc ); - } - if( incr > 0 ? (upper < lower) : (lower < upper) ) { - // The loop is illegal. - // Some zero-trip loops maintained by compiler, e.g.: - // for(i=10;i<0;++i) // lower >= upper - run-time check - // for(i=0;i>10;--i) // lower <= upper - run-time check - // for(i=0;i>10;++i) // incr > 0 - compile-time check - // for(i=10;i<0;--i) // incr < 0 - compile-time check - // Compiler does not check the following illegal loops: - // for(i=0;i<10;i+=incr) // where incr<0 - // for(i=10;i>0;i-=incr) // where incr<0 - __kmp_error_construct( kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc ); - } + lower = *p_lb; + upper = *p_ub; + if (__kmp_env_consistency_check) { + if (incr == 0) { + __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrZeroProhibited, ct_pdo, + loc); } - th = __kmp_threads[gtid]; - team = th->th.th_team; - #if OMP_40_ENABLED - KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct - nteams = th->th.th_teams_size.nteams; - #endif - team_id = team->t.t_master_tid; - KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); - - // compute trip count - if( incr == 1 ) { - trip_count = upper - lower + 1; - } else if(incr == -1) { - trip_count = lower - upper + 1; - } else if ( incr > 0 ) { - // upper-lower can exceed the limit of signed type - trip_count = (UT)(upper - lower) / incr + 1; - } else { - trip_count = (UT)(lower - upper) / (-incr) + 1; - } - if( chunk < 1 ) - chunk = 1; - span = chunk * incr; - *p_st = span * nteams; - *p_lb = lower + (span * team_id); - *p_ub = *p_lb + span - incr; - if ( p_last != NULL ) - *p_last = (team_id == ((trip_count - 1)/(UT)chunk) % nteams); - // Correct upper bound if needed - if( incr > 0 ) { - if( *p_ub < *p_lb ) // overflow? - *p_ub = traits_t::max_value; - if( *p_ub > upper ) - *p_ub = upper; // tracker C73258 - } else { // incr < 0 - if( *p_ub > *p_lb ) - *p_ub = traits_t::min_value; - if( *p_ub < upper ) - *p_ub = upper; // tracker C73258 - } - #ifdef KMP_DEBUG - { - const char * buff; - // create format specifiers before the debug output - buff = __kmp_str_format( "__kmp_team_static_init exit: T#%%d team%%u liter=%%d "\ - "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", - traits_t< T >::spec, traits_t< T >::spec, traits_t< ST >::spec, - traits_t< ST >::spec ); - KD_TRACE(100, ( buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk ) ); - __kmp_str_free( &buff ); + if (incr > 0 ? (upper < lower) : (lower < upper)) { + // The loop is illegal. + // Some zero-trip loops maintained by compiler, e.g.: + // for(i=10;i<0;++i) // lower >= upper - run-time check + // for(i=0;i>10;--i) // lower <= upper - run-time check + // for(i=0;i>10;++i) // incr > 0 - compile-time check + // for(i=10;i<0;--i) // incr < 0 - compile-time check + // Compiler does not check the following illegal loops: + // for(i=0;i<10;i+=incr) // where incr<0 + // for(i=10;i>0;i-=incr) // where incr<0 + __kmp_error_construct(kmp_i18n_msg_CnsLoopIncrIllegal, ct_pdo, loc); } - #endif + } + th = __kmp_threads[gtid]; + team = th->th.th_team; +#if OMP_40_ENABLED + KMP_DEBUG_ASSERT(th->th.th_teams_microtask); // we are in the teams construct + nteams = th->th.th_teams_size.nteams; +#endif + team_id = team->t.t_master_tid; + KMP_DEBUG_ASSERT(nteams == team->t.t_parent->t.t_nproc); + + // compute trip count + if (incr == 1) { + trip_count = upper - lower + 1; + } else if (incr == -1) { + trip_count = lower - upper + 1; + } else if (incr > 0) { + // upper-lower can exceed the limit of signed type + trip_count = (UT)(upper - lower) / incr + 1; + } else { + trip_count = (UT)(lower - upper) / (-incr) + 1; + } + if (chunk < 1) + chunk = 1; + span = chunk * incr; + *p_st = span * nteams; + *p_lb = lower + (span * team_id); + *p_ub = *p_lb + span - incr; + if (p_last != NULL) + *p_last = (team_id == ((trip_count - 1) / (UT)chunk) % nteams); + // Correct upper bound if needed + if (incr > 0) { + if (*p_ub < *p_lb) // overflow? + *p_ub = traits_t::max_value; + if (*p_ub > upper) + *p_ub = upper; // tracker C73258 + } else { // incr < 0 + if (*p_ub > *p_lb) + *p_ub = traits_t::min_value; + if (*p_ub < upper) + *p_ub = upper; // tracker C73258 + } +#ifdef KMP_DEBUG + { + const char *buff; + // create format specifiers before the debug output + buff = + __kmp_str_format("__kmp_team_static_init exit: T#%%d team%%u liter=%%d " + "iter=(%%%s, %%%s, %%%s) chunk %%%s\n", + traits_t::spec, traits_t::spec, + traits_t::spec, traits_t::spec); + KD_TRACE(100, (buff, gtid, team_id, *p_last, *p_lb, *p_ub, *p_st, chunk)); + __kmp_str_free(&buff); + } +#endif } -//-------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ extern "C" { - /*! @ingroup WORK_SHARING @param loc Source code location @@ -743,55 +735,54 @@ extern "C" { Each of the four functions here are identical apart from the argument types. -The functions compute the upper and lower bounds and stride to be used for the set of iterations -to be executed by the current thread from the statically scheduled loop that is described by the -initial values of the bounds, stride, increment and chunk size. +The functions compute the upper and lower bounds and stride to be used for the +set of iterations to be executed by the current thread from the statically +scheduled loop that is described by the initial values of the bounds, stride, +increment and chunk size. @{ */ -void -__kmpc_for_static_init_4( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, - kmp_int32 *plower, kmp_int32 *pupper, - kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) -{ - __kmp_for_static_init< kmp_int32 >( - loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); +void __kmpc_for_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, + kmp_int32 *plastiter, kmp_int32 *plower, + kmp_int32 *pupper, kmp_int32 *pstride, + kmp_int32 incr, kmp_int32 chunk) { + __kmp_for_static_init(loc, gtid, schedtype, plastiter, plower, + pupper, pstride, incr, chunk); } /*! See @ref __kmpc_for_static_init_4 */ -void -__kmpc_for_static_init_4u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, - kmp_uint32 *plower, kmp_uint32 *pupper, - kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) -{ - __kmp_for_static_init< kmp_uint32 >( - loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); +void __kmpc_for_static_init_4u(ident_t *loc, kmp_int32 gtid, + kmp_int32 schedtype, kmp_int32 *plastiter, + kmp_uint32 *plower, kmp_uint32 *pupper, + kmp_int32 *pstride, kmp_int32 incr, + kmp_int32 chunk) { + __kmp_for_static_init(loc, gtid, schedtype, plastiter, plower, + pupper, pstride, incr, chunk); } /*! See @ref __kmpc_for_static_init_4 */ -void -__kmpc_for_static_init_8( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, - kmp_int64 *plower, kmp_int64 *pupper, - kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) -{ - __kmp_for_static_init< kmp_int64 >( - loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); +void __kmpc_for_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, + kmp_int32 *plastiter, kmp_int64 *plower, + kmp_int64 *pupper, kmp_int64 *pstride, + kmp_int64 incr, kmp_int64 chunk) { + __kmp_for_static_init(loc, gtid, schedtype, plastiter, plower, + pupper, pstride, incr, chunk); } /*! See @ref __kmpc_for_static_init_4 */ -void -__kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, kmp_int32 *plastiter, - kmp_uint64 *plower, kmp_uint64 *pupper, - kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) -{ - __kmp_for_static_init< kmp_uint64 >( - loc, gtid, schedtype, plastiter, plower, pupper, pstride, incr, chunk ); +void __kmpc_for_static_init_8u(ident_t *loc, kmp_int32 gtid, + kmp_int32 schedtype, kmp_int32 *plastiter, + kmp_uint64 *plower, kmp_uint64 *pupper, + kmp_int64 *pstride, kmp_int64 incr, + kmp_int64 chunk) { + __kmp_for_static_init(loc, gtid, schedtype, plastiter, plower, + pupper, pstride, incr, chunk); } /*! @} @@ -812,66 +803,62 @@ __kmpc_for_static_init_8u( ident_t *loc, kmp_int32 gtid, kmp_int32 schedtype, km Each of the four functions here are identical apart from the argument types. -The functions compute the upper and lower bounds and strides to be used for the set of iterations -to be executed by the current thread from the statically scheduled loop that is described by the -initial values of the bounds, strides, increment and chunks for parallel loop and distribute -constructs. +The functions compute the upper and lower bounds and strides to be used for the +set of iterations to be executed by the current thread from the statically +scheduled loop that is described by the initial values of the bounds, strides, +increment and chunks for parallel loop and distribute constructs. @{ */ -void -__kmpc_dist_for_static_init_4( - ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, - kmp_int32 *plower, kmp_int32 *pupper, kmp_int32 *pupperD, - kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) -{ - __kmp_dist_for_static_init< kmp_int32 >( - loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); +void __kmpc_dist_for_static_init_4(ident_t *loc, kmp_int32 gtid, + kmp_int32 schedule, kmp_int32 *plastiter, + kmp_int32 *plower, kmp_int32 *pupper, + kmp_int32 *pupperD, kmp_int32 *pstride, + kmp_int32 incr, kmp_int32 chunk) { + __kmp_dist_for_static_init(loc, gtid, schedule, plastiter, plower, + pupper, pupperD, pstride, incr, chunk); } /*! See @ref __kmpc_dist_for_static_init_4 */ -void -__kmpc_dist_for_static_init_4u( - ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, - kmp_uint32 *plower, kmp_uint32 *pupper, kmp_uint32 *pupperD, - kmp_int32 *pstride, kmp_int32 incr, kmp_int32 chunk ) -{ - __kmp_dist_for_static_init< kmp_uint32 >( - loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); +void __kmpc_dist_for_static_init_4u(ident_t *loc, kmp_int32 gtid, + kmp_int32 schedule, kmp_int32 *plastiter, + kmp_uint32 *plower, kmp_uint32 *pupper, + kmp_uint32 *pupperD, kmp_int32 *pstride, + kmp_int32 incr, kmp_int32 chunk) { + __kmp_dist_for_static_init(loc, gtid, schedule, plastiter, plower, + pupper, pupperD, pstride, incr, chunk); } /*! See @ref __kmpc_dist_for_static_init_4 */ -void -__kmpc_dist_for_static_init_8( - ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, - kmp_int64 *plower, kmp_int64 *pupper, kmp_int64 *pupperD, - kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) -{ - __kmp_dist_for_static_init< kmp_int64 >( - loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); +void __kmpc_dist_for_static_init_8(ident_t *loc, kmp_int32 gtid, + kmp_int32 schedule, kmp_int32 *plastiter, + kmp_int64 *plower, kmp_int64 *pupper, + kmp_int64 *pupperD, kmp_int64 *pstride, + kmp_int64 incr, kmp_int64 chunk) { + __kmp_dist_for_static_init(loc, gtid, schedule, plastiter, plower, + pupper, pupperD, pstride, incr, chunk); } /*! See @ref __kmpc_dist_for_static_init_4 */ -void -__kmpc_dist_for_static_init_8u( - ident_t *loc, kmp_int32 gtid, kmp_int32 schedule, kmp_int32 *plastiter, - kmp_uint64 *plower, kmp_uint64 *pupper, kmp_uint64 *pupperD, - kmp_int64 *pstride, kmp_int64 incr, kmp_int64 chunk ) -{ - __kmp_dist_for_static_init< kmp_uint64 >( - loc, gtid, schedule, plastiter, plower, pupper, pupperD, pstride, incr, chunk ); +void __kmpc_dist_for_static_init_8u(ident_t *loc, kmp_int32 gtid, + kmp_int32 schedule, kmp_int32 *plastiter, + kmp_uint64 *plower, kmp_uint64 *pupper, + kmp_uint64 *pupperD, kmp_int64 *pstride, + kmp_int64 incr, kmp_int64 chunk) { + __kmp_dist_for_static_init(loc, gtid, schedule, plastiter, plower, + pupper, pupperD, pstride, incr, chunk); } /*! @} */ -//----------------------------------------------------------------------------------------- +//------------------------------------------------------------------------------ // Auxiliary routines for Distribute Parallel Loop construct implementation // Transfer call to template< type T > // __kmp_team_static_init( ident_t *loc, int gtid, @@ -889,60 +876,60 @@ __kmpc_dist_for_static_init_8u( @param incr Loop increment @param chunk The chunk size to block with -The functions compute the upper and lower bounds and stride to be used for the set of iterations -to be executed by the current team from the statically scheduled loop that is described by the -initial values of the bounds, stride, increment and chunk for the distribute construct as part of -composite distribute parallel loop construct. -These functions are all identical apart from the types of the arguments. +The functions compute the upper and lower bounds and stride to be used for the +set of iterations to be executed by the current team from the statically +scheduled loop that is described by the initial values of the bounds, stride, +increment and chunk for the distribute construct as part of composite distribute +parallel loop construct. These functions are all identical apart from the types +of the arguments. */ -void -__kmpc_team_static_init_4( - ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_int32 *p_lb, kmp_int32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_team_static_init< kmp_int32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); +void __kmpc_team_static_init_4(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_int32 *p_lb, kmp_int32 *p_ub, + kmp_int32 *p_st, kmp_int32 incr, + kmp_int32 chunk) { + KMP_DEBUG_ASSERT(__kmp_init_serial); + __kmp_team_static_init(loc, gtid, p_last, p_lb, p_ub, p_st, incr, + chunk); } /*! See @ref __kmpc_team_static_init_4 */ -void -__kmpc_team_static_init_4u( - ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_uint32 *p_lb, kmp_uint32 *p_ub, kmp_int32 *p_st, kmp_int32 incr, kmp_int32 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_team_static_init< kmp_uint32 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); +void __kmpc_team_static_init_4u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_uint32 *p_lb, kmp_uint32 *p_ub, + kmp_int32 *p_st, kmp_int32 incr, + kmp_int32 chunk) { + KMP_DEBUG_ASSERT(__kmp_init_serial); + __kmp_team_static_init(loc, gtid, p_last, p_lb, p_ub, p_st, incr, + chunk); } /*! See @ref __kmpc_team_static_init_4 */ -void -__kmpc_team_static_init_8( - ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_int64 *p_lb, kmp_int64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_team_static_init< kmp_int64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); +void __kmpc_team_static_init_8(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_int64 *p_lb, kmp_int64 *p_ub, + kmp_int64 *p_st, kmp_int64 incr, + kmp_int64 chunk) { + KMP_DEBUG_ASSERT(__kmp_init_serial); + __kmp_team_static_init(loc, gtid, p_last, p_lb, p_ub, p_st, incr, + chunk); } /*! See @ref __kmpc_team_static_init_4 */ -void -__kmpc_team_static_init_8u( - ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, - kmp_uint64 *p_lb, kmp_uint64 *p_ub, kmp_int64 *p_st, kmp_int64 incr, kmp_int64 chunk ) -{ - KMP_DEBUG_ASSERT( __kmp_init_serial ); - __kmp_team_static_init< kmp_uint64 >( loc, gtid, p_last, p_lb, p_ub, p_st, incr, chunk ); +void __kmpc_team_static_init_8u(ident_t *loc, kmp_int32 gtid, kmp_int32 *p_last, + kmp_uint64 *p_lb, kmp_uint64 *p_ub, + kmp_int64 *p_st, kmp_int64 incr, + kmp_int64 chunk) { + KMP_DEBUG_ASSERT(__kmp_init_serial); + __kmp_team_static_init(loc, gtid, p_last, p_lb, p_ub, p_st, incr, + chunk); } /*! @} */ } // extern "C" - diff --git a/openmp/runtime/src/kmp_settings.cpp b/openmp/runtime/src/kmp_settings.cpp index dc3d39b..d8332a2 100644 --- a/openmp/runtime/src/kmp_settings.cpp +++ b/openmp/runtime/src/kmp_settings.cpp @@ -14,84 +14,80 @@ #include "kmp.h" -#include "kmp_wrapper_getpid.h" -#include "kmp_environment.h" +#include "kmp_affinity.h" #include "kmp_atomic.h" -#include "kmp_itt.h" -#include "kmp_str.h" -#include "kmp_settings.h" +#include "kmp_environment.h" #include "kmp_i18n.h" -#include "kmp_lock.h" #include "kmp_io.h" -#include "kmp_affinity.h" +#include "kmp_itt.h" +#include "kmp_lock.h" +#include "kmp_settings.h" +#include "kmp_str.h" +#include "kmp_wrapper_getpid.h" #include // toupper() -static int __kmp_env_toPrint( char const * name, int flag ); +static int __kmp_env_toPrint(char const *name, int flag); bool __kmp_env_format = 0; // 0 - old format; 1 - new format -// ------------------------------------------------------------------------------------------------- + +// ----------------------------------------------------------------------------- // Helper string functions. Subject to move to kmp_str. -// ------------------------------------------------------------------------------------------------- -static double -__kmp_convert_to_double( char const * s ) -{ - double result; +static double __kmp_convert_to_double(char const *s) { + double result; - if ( KMP_SSCANF( s, "%lf", &result ) < 1 ) { - result = 0.0; - } + if (KMP_SSCANF(s, "%lf", &result) < 1) { + result = 0.0; + } - return result; + return result; } #ifdef KMP_DEBUG -static unsigned int -__kmp_readstr_with_sentinel(char *dest, char const * src, size_t len, char sentinel) { - unsigned int i; - for (i = 0; i < len; i++) { - if ((*src == '\0') || (*src == sentinel)) { - break; - } - *(dest++) = *(src++); +static unsigned int __kmp_readstr_with_sentinel(char *dest, char const *src, + size_t len, char sentinel) { + unsigned int i; + for (i = 0; i < len; i++) { + if ((*src == '\0') || (*src == sentinel)) { + break; } - *dest = '\0'; - return i; + *(dest++) = *(src++); + } + *dest = '\0'; + return i; } #endif -static int -__kmp_match_with_sentinel( char const * a, char const * b, size_t len, char sentinel ) { - size_t l = 0; - - if(a == NULL) - a = ""; - if(b == NULL) - b = ""; - while(*a && *b && *b != sentinel) { - char ca = *a, cb = *b; - - if(ca >= 'a' && ca <= 'z') - ca -= 'a' - 'A'; - if(cb >= 'a' && cb <= 'z') - cb -= 'a' - 'A'; - if(ca != cb) - return FALSE; - ++l; - ++a; - ++b; - } - return l >= len; +static int __kmp_match_with_sentinel(char const *a, char const *b, size_t len, + char sentinel) { + size_t l = 0; + + if (a == NULL) + a = ""; + if (b == NULL) + b = ""; + while (*a && *b && *b != sentinel) { + char ca = *a, cb = *b; + + if (ca >= 'a' && ca <= 'z') + ca -= 'a' - 'A'; + if (cb >= 'a' && cb <= 'z') + cb -= 'a' - 'A'; + if (ca != cb) + return FALSE; + ++l; + ++a; + ++b; + } + return l >= len; } -// // Expected usage: // token is the token to check for. // buf is the string being parsed. // *end returns the char after the end of the token. // it is not modified unless a match occurs. // -// // Example 1: // // if (__kmp_match_str("token", buf, *end) { @@ -108,2405 +104,2295 @@ __kmp_match_with_sentinel( char const * a, char const * b, size_t len, char sent // **end = save; // buf = end; // } -// -static int -__kmp_match_str( char const *token, char const *buf, const char **end) { +static int __kmp_match_str(char const *token, char const *buf, + const char **end) { - KMP_ASSERT(token != NULL); - KMP_ASSERT(buf != NULL); - KMP_ASSERT(end != NULL); + KMP_ASSERT(token != NULL); + KMP_ASSERT(buf != NULL); + KMP_ASSERT(end != NULL); - while (*token && *buf) { - char ct = *token, cb = *buf; + while (*token && *buf) { + char ct = *token, cb = *buf; - if(ct >= 'a' && ct <= 'z') - ct -= 'a' - 'A'; - if(cb >= 'a' && cb <= 'z') - cb -= 'a' - 'A'; - if (ct != cb) - return FALSE; - ++token; - ++buf; - } - if (*token) { - return FALSE; - } - *end = buf; - return TRUE; + if (ct >= 'a' && ct <= 'z') + ct -= 'a' - 'A'; + if (cb >= 'a' && cb <= 'z') + cb -= 'a' - 'A'; + if (ct != cb) + return FALSE; + ++token; + ++buf; + } + if (*token) { + return FALSE; + } + *end = buf; + return TRUE; } - -static size_t -__kmp_round4k( size_t size ) { - size_t _4k = 4 * 1024; - if ( size & ( _4k - 1 ) ) { - size &= ~ ( _4k - 1 ); - if ( size <= KMP_SIZE_T_MAX - _4k ) { - size += _4k; // Round up if there is no overflow. - }; // if +static size_t __kmp_round4k(size_t size) { + size_t _4k = 4 * 1024; + if (size & (_4k - 1)) { + size &= ~(_4k - 1); + if (size <= KMP_SIZE_T_MAX - _4k) { + size += _4k; // Round up if there is no overflow. }; // if - return size; + }; // if + return size; } // __kmp_round4k - -/* - Here, multipliers are like __kmp_convert_to_seconds, but floating-point - values are allowed, and the return value is in milliseconds. The default - multiplier is milliseconds. Returns INT_MAX only if the value specified - matches "infinit*". Returns -1 if specified string is invalid. -*/ -int -__kmp_convert_to_milliseconds( char const * data ) -{ - int ret, nvalues, factor; - char mult, extra; - double value; - - if (data == NULL) return (-1); - if ( __kmp_str_match( "infinit", -1, data)) return (INT_MAX); - value = (double) 0.0; +/* Here, multipliers are like __kmp_convert_to_seconds, but floating-point + values are allowed, and the return value is in milliseconds. The default + multiplier is milliseconds. Returns INT_MAX only if the value specified + matches "infinit*". Returns -1 if specified string is invalid. */ +int __kmp_convert_to_milliseconds(char const *data) { + int ret, nvalues, factor; + char mult, extra; + double value; + + if (data == NULL) + return (-1); + if (__kmp_str_match("infinit", -1, data)) + return (INT_MAX); + value = (double)0.0; + mult = '\0'; + nvalues = KMP_SSCANF(data, "%lf%c%c", &value, &mult, &extra); + if (nvalues < 1) + return (-1); + if (nvalues == 1) mult = '\0'; - nvalues = KMP_SSCANF (data, "%lf%c%c", &value, &mult, &extra); - if (nvalues < 1) return (-1); - if (nvalues == 1) mult = '\0'; - if (nvalues == 3) return (-1); - - if (value < 0) return (-1); - - switch (mult) { - case '\0': - /* default is milliseconds */ - factor = 1; - break; - case 's': case 'S': - factor = 1000; - break; - case 'm': case 'M': - factor = 1000 * 60; - break; - case 'h': case 'H': - factor = 1000 * 60 * 60; - break; - case 'd': case 'D': - factor = 1000 * 24 * 60 * 60; - break; - default: - return (-1); - } + if (nvalues == 3) + return (-1); + + if (value < 0) + return (-1); + + switch (mult) { + case '\0': + /* default is milliseconds */ + factor = 1; + break; + case 's': + case 'S': + factor = 1000; + break; + case 'm': + case 'M': + factor = 1000 * 60; + break; + case 'h': + case 'H': + factor = 1000 * 60 * 60; + break; + case 'd': + case 'D': + factor = 1000 * 24 * 60 * 60; + break; + default: + return (-1); + } - if ( value >= ( (INT_MAX-1) / factor) ) - ret = INT_MAX-1; /* Don't allow infinite value here */ - else - ret = (int) (value * (double) factor); /* truncate to int */ + if (value >= ((INT_MAX - 1) / factor)) + ret = INT_MAX - 1; /* Don't allow infinite value here */ + else + ret = (int)(value * (double)factor); /* truncate to int */ - return ret; + return ret; } - -static int -__kmp_strcasecmp_with_sentinel( char const * a, char const * b, char sentinel ) { - if(a == NULL) - a = ""; - if(b == NULL) - b = ""; - while(*a && *b && *b != sentinel) { - char ca = *a, cb = *b; - - if(ca >= 'a' && ca <= 'z') - ca -= 'a' - 'A'; - if(cb >= 'a' && cb <= 'z') - cb -= 'a' - 'A'; - if(ca != cb) - return (int)(unsigned char)*a - (int)(unsigned char)*b; - ++a; - ++b; - } - return *a ? - (*b && *b != sentinel) ? (int)(unsigned char)*a - (int)(unsigned char)*b : 1 : - (*b && *b != sentinel) ? -1 : 0; +static int __kmp_strcasecmp_with_sentinel(char const *a, char const *b, + char sentinel) { + if (a == NULL) + a = ""; + if (b == NULL) + b = ""; + while (*a && *b && *b != sentinel) { + char ca = *a, cb = *b; + + if (ca >= 'a' && ca <= 'z') + ca -= 'a' - 'A'; + if (cb >= 'a' && cb <= 'z') + cb -= 'a' - 'A'; + if (ca != cb) + return (int)(unsigned char)*a - (int)(unsigned char)*b; + ++a; + ++b; + } + return *a + ? (*b && *b != sentinel) + ? (int)(unsigned char)*a - (int)(unsigned char)*b + : 1 + : (*b && *b != sentinel) ? -1 : 0; } - -// ================================================================================================= +// ============================================================================= // Table structures and helper functions. -// ================================================================================================= -typedef struct __kmp_setting kmp_setting_t; -typedef struct __kmp_stg_ss_data kmp_stg_ss_data_t; -typedef struct __kmp_stg_wp_data kmp_stg_wp_data_t; -typedef struct __kmp_stg_fr_data kmp_stg_fr_data_t; +typedef struct __kmp_setting kmp_setting_t; +typedef struct __kmp_stg_ss_data kmp_stg_ss_data_t; +typedef struct __kmp_stg_wp_data kmp_stg_wp_data_t; +typedef struct __kmp_stg_fr_data kmp_stg_fr_data_t; -typedef void ( * kmp_stg_parse_func_t )( char const * name, char const * value, void * data ); -typedef void ( * kmp_stg_print_func_t )( kmp_str_buf_t * buffer, char const * name, void * data ); +typedef void (*kmp_stg_parse_func_t)(char const *name, char const *value, + void *data); +typedef void (*kmp_stg_print_func_t)(kmp_str_buf_t *buffer, char const *name, + void *data); struct __kmp_setting { - char const * name; // Name of setting (environment variable). - kmp_stg_parse_func_t parse; // Parser function. - kmp_stg_print_func_t print; // Print function. - void * data; // Data passed to parser and printer. - int set; // Variable set during this "session" - // (__kmp_env_initialize() or kmp_set_defaults() call). - int defined; // Variable set in any "session". + char const *name; // Name of setting (environment variable). + kmp_stg_parse_func_t parse; // Parser function. + kmp_stg_print_func_t print; // Print function. + void *data; // Data passed to parser and printer. + int set; // Variable set during this "session" + // (__kmp_env_initialize() or kmp_set_defaults() call). + int defined; // Variable set in any "session". }; // struct __kmp_setting struct __kmp_stg_ss_data { - size_t factor; // Default factor: 1 for KMP_STACKSIZE, 1024 for others. - kmp_setting_t * * rivals; // Array of pointers to rivals (including itself). + size_t factor; // Default factor: 1 for KMP_STACKSIZE, 1024 for others. + kmp_setting_t **rivals; // Array of pointers to rivals (including itself). }; // struct __kmp_stg_ss_data struct __kmp_stg_wp_data { - int omp; // 0 -- KMP_LIBRARY, 1 -- OMP_WAIT_POLICY. - kmp_setting_t * * rivals; // Array of pointers to rivals (including itself). + int omp; // 0 -- KMP_LIBRARY, 1 -- OMP_WAIT_POLICY. + kmp_setting_t **rivals; // Array of pointers to rivals (including itself). }; // struct __kmp_stg_wp_data struct __kmp_stg_fr_data { - int force; // 0 -- KMP_DETERMINISTIC_REDUCTION, 1 -- KMP_FORCE_REDUCTION. - kmp_setting_t * * rivals; // Array of pointers to rivals (including itself). + int force; // 0 -- KMP_DETERMINISTIC_REDUCTION, 1 -- KMP_FORCE_REDUCTION. + kmp_setting_t **rivals; // Array of pointers to rivals (including itself). }; // struct __kmp_stg_fr_data -static int -__kmp_stg_check_rivals( // 0 -- Ok, 1 -- errors found. - char const * name, // Name of variable. - char const * value, // Value of the variable. - kmp_setting_t * * rivals // List of rival settings (the list must include current one). -); - +static int __kmp_stg_check_rivals( // 0 -- Ok, 1 -- errors found. + char const *name, // Name of variable. + char const *value, // Value of the variable. + kmp_setting_t **rivals // List of rival settings (must include current one). + ); -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // Helper parse functions. -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_bool( - char const * name, - char const * value, - int * out -) { - if ( __kmp_str_match_true( value ) ) { - * out = TRUE; - } else if (__kmp_str_match_false( value ) ) { - * out = FALSE; - } else { - __kmp_msg( - kmp_ms_warning, - KMP_MSG( BadBoolValue, name, value ), - KMP_HNT( ValidBoolValues ), - __kmp_msg_null - ); - }; // if + +static void __kmp_stg_parse_bool(char const *name, char const *value, + int *out) { + if (__kmp_str_match_true(value)) { + *out = TRUE; + } else if (__kmp_str_match_false(value)) { + *out = FALSE; + } else { + __kmp_msg(kmp_ms_warning, KMP_MSG(BadBoolValue, name, value), + KMP_HNT(ValidBoolValues), __kmp_msg_null); + }; // if } // __kmp_stg_parse_bool -static void -__kmp_stg_parse_size( - char const * name, - char const * value, - size_t size_min, - size_t size_max, - int * is_specified, - size_t * out, - size_t factor -) { - char const * msg = NULL; - #if KMP_OS_DARWIN - size_min = __kmp_round4k( size_min ); - size_max = __kmp_round4k( size_max ); - #endif // KMP_OS_DARWIN - if ( value ) { - if ( is_specified != NULL ) { - * is_specified = 1; - }; // if - __kmp_str_to_size( value, out, factor, & msg ); - if ( msg == NULL ) { - if ( * out > size_max ) { - * out = size_max; - msg = KMP_I18N_STR( ValueTooLarge ); - } else if ( * out < size_min ) { - * out = size_min; - msg = KMP_I18N_STR( ValueTooSmall ); - } else { - #if KMP_OS_DARWIN - size_t round4k = __kmp_round4k( * out ); - if ( * out != round4k ) { - * out = round4k; - msg = KMP_I18N_STR( NotMultiple4K ); - }; // if - #endif - }; // if - } else { - // If integer overflow occurred, * out == KMP_SIZE_T_MAX. Cut it to size_max silently. - if ( * out < size_min ) { - * out = size_max; - } - else if ( * out > size_max ) { - * out = size_max; - }; // if - }; // if - if ( msg != NULL ) { - // Message is not empty. Print warning. - kmp_str_buf_t buf; - __kmp_str_buf_init( & buf ); - __kmp_str_buf_print_size( & buf, * out ); - KMP_WARNING( ParseSizeIntWarn, name, value, msg ); - KMP_INFORM( Using_str_Value, name, buf.str ); - __kmp_str_buf_free( & buf ); +static void __kmp_stg_parse_size(char const *name, char const *value, + size_t size_min, size_t size_max, + int *is_specified, size_t *out, + size_t factor) { + char const *msg = NULL; +#if KMP_OS_DARWIN + size_min = __kmp_round4k(size_min); + size_max = __kmp_round4k(size_max); +#endif // KMP_OS_DARWIN + if (value) { + if (is_specified != NULL) { + *is_specified = 1; + }; // if + __kmp_str_to_size(value, out, factor, &msg); + if (msg == NULL) { + if (*out > size_max) { + *out = size_max; + msg = KMP_I18N_STR(ValueTooLarge); + } else if (*out < size_min) { + *out = size_min; + msg = KMP_I18N_STR(ValueTooSmall); + } else { +#if KMP_OS_DARWIN + size_t round4k = __kmp_round4k(*out); + if (*out != round4k) { + *out = round4k; + msg = KMP_I18N_STR(NotMultiple4K); }; // if +#endif + }; // if + } else { + // If integer overflow occurred, * out == KMP_SIZE_T_MAX. Cut it to + // size_max silently. + if (*out < size_min) { + *out = size_max; + } else if (*out > size_max) { + *out = size_max; + }; // if + }; // if + if (msg != NULL) { + // Message is not empty. Print warning. + kmp_str_buf_t buf; + __kmp_str_buf_init(&buf); + __kmp_str_buf_print_size(&buf, *out); + KMP_WARNING(ParseSizeIntWarn, name, value, msg); + KMP_INFORM(Using_str_Value, name, buf.str); + __kmp_str_buf_free(&buf); }; // if + }; // if } // __kmp_stg_parse_size #if KMP_AFFINITY_SUPPORTED -static void -__kmp_stg_parse_str( - char const * name, - char const * value, - char const * * out -) { - __kmp_str_free(out); - * out = __kmp_str_format( "%s", value ); +static void __kmp_stg_parse_str(char const *name, char const *value, + char const **out) { + __kmp_str_free(out); + *out = __kmp_str_format("%s", value); } // __kmp_stg_parse_str #endif -static void -__kmp_stg_parse_int( - char const * name, // I: Name of environment variable (used in warning messages). - char const * value, // I: Value of environment variable to parse. - int min, // I: Miminal allowed value. - int max, // I: Maximum allowed value. - int * out // O: Output (parsed) value. -) { - char const * msg = NULL; - kmp_uint64 uint = * out; - __kmp_str_to_uint( value, & uint, & msg ); - if ( msg == NULL ) { - if ( uint < (unsigned int)min ) { - msg = KMP_I18N_STR( ValueTooSmall ); - uint = min; - } else if ( uint > (unsigned int)max ) { - msg = KMP_I18N_STR( ValueTooLarge ); - uint = max; - }; // if - } else { - // If overflow occurred msg contains error message and uint is very big. Cut tmp it - // to INT_MAX. - if ( uint < (unsigned int)min ) { - uint = min; - } - else if ( uint > (unsigned int)max ) { - uint = max; - }; // if +static void __kmp_stg_parse_int( + char const + *name, // I: Name of environment variable (used in warning messages). + char const *value, // I: Value of environment variable to parse. + int min, // I: Miminal allowed value. + int max, // I: Maximum allowed value. + int *out // O: Output (parsed) value. + ) { + char const *msg = NULL; + kmp_uint64 uint = *out; + __kmp_str_to_uint(value, &uint, &msg); + if (msg == NULL) { + if (uint < (unsigned int)min) { + msg = KMP_I18N_STR(ValueTooSmall); + uint = min; + } else if (uint > (unsigned int)max) { + msg = KMP_I18N_STR(ValueTooLarge); + uint = max; }; // if - if ( msg != NULL ) { - // Message is not empty. Print warning. - kmp_str_buf_t buf; - KMP_WARNING( ParseSizeIntWarn, name, value, msg ); - __kmp_str_buf_init( & buf ); - __kmp_str_buf_print( &buf, "%" KMP_UINT64_SPEC "", uint ); - KMP_INFORM( Using_uint64_Value, name, buf.str ); - __kmp_str_buf_free( &buf ); + } else { + // If overflow occurred msg contains error message and uint is very big. Cut + // tmp it to INT_MAX. + if (uint < (unsigned int)min) { + uint = min; + } else if (uint > (unsigned int)max) { + uint = max; }; // if - * out = uint; + }; // if + if (msg != NULL) { + // Message is not empty. Print warning. + kmp_str_buf_t buf; + KMP_WARNING(ParseSizeIntWarn, name, value, msg); + __kmp_str_buf_init(&buf); + __kmp_str_buf_print(&buf, "%" KMP_UINT64_SPEC "", uint); + KMP_INFORM(Using_uint64_Value, name, buf.str); + __kmp_str_buf_free(&buf); + }; // if + *out = uint; } // __kmp_stg_parse_int - #if KMP_DEBUG_ADAPTIVE_LOCKS -static void -__kmp_stg_parse_file( - char const * name, - char const * value, - char * suffix, - char * * out -) { - char buffer[256]; - char *t; - int hasSuffix; - __kmp_str_free(out); - t = (char *) strrchr(value, '.'); - hasSuffix = t && __kmp_str_eqf( t, suffix ); - t = __kmp_str_format( "%s%s", value, hasSuffix ? "" : suffix ); - __kmp_expand_file_name( buffer, sizeof(buffer), t); - __kmp_str_free(&t); - * out = __kmp_str_format( "%s", buffer ); +static void __kmp_stg_parse_file(char const *name, char const *value, + char *suffix, char **out) { + char buffer[256]; + char *t; + int hasSuffix; + __kmp_str_free(out); + t = (char *)strrchr(value, '.'); + hasSuffix = t && __kmp_str_eqf(t, suffix); + t = __kmp_str_format("%s%s", value, hasSuffix ? "" : suffix); + __kmp_expand_file_name(buffer, sizeof(buffer), t); + __kmp_str_free(&t); + *out = __kmp_str_format("%s", buffer); } // __kmp_stg_parse_file #endif #ifdef KMP_DEBUG -static char * par_range_to_print = NULL; - -static void -__kmp_stg_parse_par_range( - char const * name, - char const * value, - int * out_range, - char * out_routine, - char * out_file, - int * out_lb, - int * out_ub -) { - size_t len = KMP_STRLEN( value + 1 ); - par_range_to_print = (char *) KMP_INTERNAL_MALLOC( len +1 ); - KMP_STRNCPY_S( par_range_to_print, len + 1, value, len + 1); - __kmp_par_range = +1; - __kmp_par_range_lb = 0; - __kmp_par_range_ub = INT_MAX; - for (;;) { - unsigned int len; - if (( value == NULL ) || ( *value == '\0' )) { - break; - } - if ( ! __kmp_strcasecmp_with_sentinel( "routine", value, '=' )) { - value = strchr( value, '=' ) + 1; - len = __kmp_readstr_with_sentinel( out_routine, - value, KMP_PAR_RANGE_ROUTINE_LEN - 1, ',' ); - if ( len == 0 ) { - goto par_range_error; - } - value = strchr( value, ',' ); - if ( value != NULL ) { - value++; - } - continue; - } - if ( ! __kmp_strcasecmp_with_sentinel( "filename", value, '=' )) { - value = strchr( value, '=' ) + 1; - len = __kmp_readstr_with_sentinel( out_file, - value, KMP_PAR_RANGE_FILENAME_LEN - 1, ',' ); - if ( len == 0) { - goto par_range_error; - } - value = strchr( value, ',' ); - if ( value != NULL ) { - value++; - } - continue; - } - if (( ! __kmp_strcasecmp_with_sentinel( "range", value, '=' )) - || ( ! __kmp_strcasecmp_with_sentinel( "incl_range", value, '=' ))) { - value = strchr( value, '=' ) + 1; - if ( KMP_SSCANF( value, "%d:%d", out_lb, out_ub ) != 2 ) { - goto par_range_error; - } - *out_range = +1; - value = strchr( value, ',' ); - if ( value != NULL ) { - value++; - } - continue; - } - if ( ! __kmp_strcasecmp_with_sentinel( "excl_range", value, '=' )) { - value = strchr( value, '=' ) + 1; - if ( KMP_SSCANF( value, "%d:%d", out_lb, out_ub) != 2 ) { - goto par_range_error; - } - *out_range = -1; - value = strchr( value, ',' ); - if ( value != NULL ) { - value++; - } - continue; - } - par_range_error: - KMP_WARNING( ParRangeSyntax, name ); - __kmp_par_range = 0; - break; +static char *par_range_to_print = NULL; + +static void __kmp_stg_parse_par_range(char const *name, char const *value, + int *out_range, char *out_routine, + char *out_file, int *out_lb, + int *out_ub) { + size_t len = KMP_STRLEN(value + 1); + par_range_to_print = (char *)KMP_INTERNAL_MALLOC(len + 1); + KMP_STRNCPY_S(par_range_to_print, len + 1, value, len + 1); + __kmp_par_range = +1; + __kmp_par_range_lb = 0; + __kmp_par_range_ub = INT_MAX; + for (;;) { + unsigned int len; + if ((value == NULL) || (*value == '\0')) { + break; + } + if (!__kmp_strcasecmp_with_sentinel("routine", value, '=')) { + value = strchr(value, '=') + 1; + len = __kmp_readstr_with_sentinel(out_routine, value, + KMP_PAR_RANGE_ROUTINE_LEN - 1, ','); + if (len == 0) { + goto par_range_error; + } + value = strchr(value, ','); + if (value != NULL) { + value++; + } + continue; + } + if (!__kmp_strcasecmp_with_sentinel("filename", value, '=')) { + value = strchr(value, '=') + 1; + len = __kmp_readstr_with_sentinel(out_file, value, + KMP_PAR_RANGE_FILENAME_LEN - 1, ','); + if (len == 0) { + goto par_range_error; + } + value = strchr(value, ','); + if (value != NULL) { + value++; + } + continue; + } + if ((!__kmp_strcasecmp_with_sentinel("range", value, '=')) || + (!__kmp_strcasecmp_with_sentinel("incl_range", value, '='))) { + value = strchr(value, '=') + 1; + if (KMP_SSCANF(value, "%d:%d", out_lb, out_ub) != 2) { + goto par_range_error; + } + *out_range = +1; + value = strchr(value, ','); + if (value != NULL) { + value++; + } + continue; + } + if (!__kmp_strcasecmp_with_sentinel("excl_range", value, '=')) { + value = strchr(value, '=') + 1; + if (KMP_SSCANF(value, "%d:%d", out_lb, out_ub) != 2) { + goto par_range_error; + } + *out_range = -1; + value = strchr(value, ','); + if (value != NULL) { + value++; + } + continue; } + par_range_error: + KMP_WARNING(ParRangeSyntax, name); + __kmp_par_range = 0; + break; + } } // __kmp_stg_parse_par_range #endif -int -__kmp_initial_threads_capacity( int req_nproc ) -{ - int nth = 32; +int __kmp_initial_threads_capacity(int req_nproc) { + int nth = 32; - /* MIN( MAX( 32, 4 * $OMP_NUM_THREADS, 4 * omp_get_num_procs() ), __kmp_max_nth) */ - if (nth < (4 * req_nproc)) - nth = (4 * req_nproc); - if (nth < (4 * __kmp_xproc)) - nth = (4 * __kmp_xproc); + /* MIN( MAX( 32, 4 * $OMP_NUM_THREADS, 4 * omp_get_num_procs() ), + * __kmp_max_nth) */ + if (nth < (4 * req_nproc)) + nth = (4 * req_nproc); + if (nth < (4 * __kmp_xproc)) + nth = (4 * __kmp_xproc); - if (nth > __kmp_max_nth) - nth = __kmp_max_nth; + if (nth > __kmp_max_nth) + nth = __kmp_max_nth; - return nth; + return nth; } +int __kmp_default_tp_capacity(int req_nproc, int max_nth, + int all_threads_specified) { + int nth = 128; -int -__kmp_default_tp_capacity( int req_nproc, int max_nth, int all_threads_specified) { - int nth = 128; - - if(all_threads_specified) - return max_nth; - /* MIN( MAX (128, 4 * $OMP_NUM_THREADS, 4 * omp_get_num_procs() ), __kmp_max_nth ) */ - if (nth < (4 * req_nproc)) - nth = (4 * req_nproc); - if (nth < (4 * __kmp_xproc)) - nth = (4 * __kmp_xproc); + if (all_threads_specified) + return max_nth; + /* MIN( MAX (128, 4 * $OMP_NUM_THREADS, 4 * omp_get_num_procs() ), + * __kmp_max_nth ) */ + if (nth < (4 * req_nproc)) + nth = (4 * req_nproc); + if (nth < (4 * __kmp_xproc)) + nth = (4 * __kmp_xproc); - if (nth > __kmp_max_nth) - nth = __kmp_max_nth; + if (nth > __kmp_max_nth) + nth = __kmp_max_nth; - return nth; + return nth; } - -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // Helper print functions. -// ------------------------------------------------------------------------------------------------- -static void -__kmp_stg_print_bool( kmp_str_buf_t * buffer, char const * name, int value ) { - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_BOOL; - } else { - __kmp_str_buf_print( buffer, " %s=%s\n", name, value ? "true" : "false" ); - } +static void __kmp_stg_print_bool(kmp_str_buf_t *buffer, char const *name, + int value) { + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_BOOL; + } else { + __kmp_str_buf_print(buffer, " %s=%s\n", name, value ? "true" : "false"); + } } // __kmp_stg_print_bool -static void -__kmp_stg_print_int( kmp_str_buf_t * buffer, char const * name, int value ) { - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_INT; - } else { - __kmp_str_buf_print( buffer, " %s=%d\n", name, value ); - } +static void __kmp_stg_print_int(kmp_str_buf_t *buffer, char const *name, + int value) { + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_INT; + } else { + __kmp_str_buf_print(buffer, " %s=%d\n", name, value); + } } // __kmp_stg_print_int -static void -__kmp_stg_print_uint64( kmp_str_buf_t * buffer, char const * name, kmp_uint64 value ) { - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_UINT64; - } else { - __kmp_str_buf_print( buffer, " %s=%" KMP_UINT64_SPEC "\n", name, value ); - } +static void __kmp_stg_print_uint64(kmp_str_buf_t *buffer, char const *name, + kmp_uint64 value) { + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_UINT64; + } else { + __kmp_str_buf_print(buffer, " %s=%" KMP_UINT64_SPEC "\n", name, value); + } } // __kmp_stg_print_uint64 -static void -__kmp_stg_print_str( kmp_str_buf_t * buffer, char const * name, char const * value ) { - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_STR; - } else { - __kmp_str_buf_print( buffer, " %s=%s\n", name, value ); - } +static void __kmp_stg_print_str(kmp_str_buf_t *buffer, char const *name, + char const *value) { + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_STR; + } else { + __kmp_str_buf_print(buffer, " %s=%s\n", name, value); + } } // __kmp_stg_print_str -static void -__kmp_stg_print_size( kmp_str_buf_t * buffer, char const * name, size_t value ) { - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME_EX(name); - __kmp_str_buf_print_size( buffer, value ); - __kmp_str_buf_print( buffer, "'\n" ); - } else { - __kmp_str_buf_print( buffer, " %s=", name ); - __kmp_str_buf_print_size( buffer, value ); - __kmp_str_buf_print( buffer, "\n" ); - return; - } +static void __kmp_stg_print_size(kmp_str_buf_t *buffer, char const *name, + size_t value) { + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_NAME_EX(name); + __kmp_str_buf_print_size(buffer, value); + __kmp_str_buf_print(buffer, "'\n"); + } else { + __kmp_str_buf_print(buffer, " %s=", name); + __kmp_str_buf_print_size(buffer, value); + __kmp_str_buf_print(buffer, "\n"); + return; + } } // __kmp_stg_print_size - -// ================================================================================================= +// ============================================================================= // Parse and print functions. -// ================================================================================================= -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_ALL_THREADS, KMP_MAX_THREADS, OMP_THREAD_LIMIT -// ------------------------------------------------------------------------------------------------- -static void -__kmp_stg_parse_all_threads( char const * name, char const * value, void * data ) { - - kmp_setting_t * * rivals = (kmp_setting_t * *) data; - int rc; - rc = __kmp_stg_check_rivals( name, value, rivals ); - if ( rc ) { - return; - }; // if - if ( ! __kmp_strcasecmp_with_sentinel( "all", value, 0 ) ) { - __kmp_max_nth = __kmp_xproc; - __kmp_allThreadsSpecified = 1; - } else { - __kmp_stg_parse_int( name, value, 1, __kmp_sys_max_nth, & __kmp_max_nth ); - __kmp_allThreadsSpecified = 0; - } - K_DIAG( 1, ( "__kmp_max_nth == %d\n", __kmp_max_nth ) ); +static void __kmp_stg_parse_all_threads(char const *name, char const *value, + void *data) { + + kmp_setting_t **rivals = (kmp_setting_t **)data; + int rc; + rc = __kmp_stg_check_rivals(name, value, rivals); + if (rc) { + return; + }; // if + if (!__kmp_strcasecmp_with_sentinel("all", value, 0)) { + __kmp_max_nth = __kmp_xproc; + __kmp_allThreadsSpecified = 1; + } else { + __kmp_stg_parse_int(name, value, 1, __kmp_sys_max_nth, &__kmp_max_nth); + __kmp_allThreadsSpecified = 0; + } + K_DIAG(1, ("__kmp_max_nth == %d\n", __kmp_max_nth)); } // __kmp_stg_parse_all_threads -static void -__kmp_stg_print_all_threads( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_max_nth ); +static void __kmp_stg_print_all_threads(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_int(buffer, name, __kmp_max_nth); } // __kmp_stg_print_all_threads -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_BLOCKTIME -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_blocktime( char const * name, char const * value, void * data ) { - __kmp_dflt_blocktime = __kmp_convert_to_milliseconds( value ); - if ( __kmp_dflt_blocktime < 0 ) { - __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; - __kmp_msg( kmp_ms_warning, KMP_MSG( InvalidValue, name, value ), __kmp_msg_null ); - KMP_INFORM( Using_int_Value, name, __kmp_dflt_blocktime ); - __kmp_env_blocktime = FALSE; // Revert to default as if var not set. - } else { - if ( __kmp_dflt_blocktime < KMP_MIN_BLOCKTIME ) { - __kmp_dflt_blocktime = KMP_MIN_BLOCKTIME; - __kmp_msg( kmp_ms_warning, KMP_MSG( SmallValue, name, value ), __kmp_msg_null ); - KMP_INFORM( MinValueUsing, name, __kmp_dflt_blocktime ); - } else if ( __kmp_dflt_blocktime > KMP_MAX_BLOCKTIME ) { - __kmp_dflt_blocktime = KMP_MAX_BLOCKTIME; - __kmp_msg( kmp_ms_warning, KMP_MSG( LargeValue, name, value ), __kmp_msg_null ); - KMP_INFORM( MaxValueUsing, name, __kmp_dflt_blocktime ); - }; // if - __kmp_env_blocktime = TRUE; // KMP_BLOCKTIME was specified. + +static void __kmp_stg_parse_blocktime(char const *name, char const *value, + void *data) { + __kmp_dflt_blocktime = __kmp_convert_to_milliseconds(value); + if (__kmp_dflt_blocktime < 0) { + __kmp_dflt_blocktime = KMP_DEFAULT_BLOCKTIME; + __kmp_msg(kmp_ms_warning, KMP_MSG(InvalidValue, name, value), + __kmp_msg_null); + KMP_INFORM(Using_int_Value, name, __kmp_dflt_blocktime); + __kmp_env_blocktime = FALSE; // Revert to default as if var not set. + } else { + if (__kmp_dflt_blocktime < KMP_MIN_BLOCKTIME) { + __kmp_dflt_blocktime = KMP_MIN_BLOCKTIME; + __kmp_msg(kmp_ms_warning, KMP_MSG(SmallValue, name, value), + __kmp_msg_null); + KMP_INFORM(MinValueUsing, name, __kmp_dflt_blocktime); + } else if (__kmp_dflt_blocktime > KMP_MAX_BLOCKTIME) { + __kmp_dflt_blocktime = KMP_MAX_BLOCKTIME; + __kmp_msg(kmp_ms_warning, KMP_MSG(LargeValue, name, value), + __kmp_msg_null); + KMP_INFORM(MaxValueUsing, name, __kmp_dflt_blocktime); }; // if + __kmp_env_blocktime = TRUE; // KMP_BLOCKTIME was specified. + }; // if #if KMP_USE_MONITOR - // calculate number of monitor thread wakeup intervals corresponding to blocktime. - __kmp_monitor_wakeups = KMP_WAKEUPS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups ); - __kmp_bt_intervals = KMP_INTERVALS_FROM_BLOCKTIME( __kmp_dflt_blocktime, __kmp_monitor_wakeups ); + // calculate number of monitor thread wakeup intervals corresponding to + // blocktime. + __kmp_monitor_wakeups = + KMP_WAKEUPS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups); + __kmp_bt_intervals = + KMP_INTERVALS_FROM_BLOCKTIME(__kmp_dflt_blocktime, __kmp_monitor_wakeups); #endif - K_DIAG( 1, ( "__kmp_env_blocktime == %d\n", __kmp_env_blocktime ) ); - if ( __kmp_env_blocktime ) { - K_DIAG( 1, ( "__kmp_dflt_blocktime == %d\n", __kmp_dflt_blocktime ) ); - } + K_DIAG(1, ("__kmp_env_blocktime == %d\n", __kmp_env_blocktime)); + if (__kmp_env_blocktime) { + K_DIAG(1, ("__kmp_dflt_blocktime == %d\n", __kmp_dflt_blocktime)); + } } // __kmp_stg_parse_blocktime -static void -__kmp_stg_print_blocktime( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_dflt_blocktime ); +static void __kmp_stg_print_blocktime(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_int(buffer, name, __kmp_dflt_blocktime); } // __kmp_stg_print_blocktime -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_DUPLICATE_LIB_OK -// ------------------------------------------------------------------------------------------------- -static void -__kmp_stg_parse_duplicate_lib_ok( char const * name, char const * value, void * data ) { - /* actually this variable is not supported, - put here for compatibility with earlier builds and for static/dynamic combination */ - __kmp_stg_parse_bool( name, value, & __kmp_duplicate_library_ok ); +static void __kmp_stg_parse_duplicate_lib_ok(char const *name, + char const *value, void *data) { + /* actually this variable is not supported, put here for compatibility with + earlier builds and for static/dynamic combination */ + __kmp_stg_parse_bool(name, value, &__kmp_duplicate_library_ok); } // __kmp_stg_parse_duplicate_lib_ok -static void -__kmp_stg_print_duplicate_lib_ok( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_duplicate_library_ok ); +static void __kmp_stg_print_duplicate_lib_ok(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_bool(buffer, name, __kmp_duplicate_library_ok); } // __kmp_stg_print_duplicate_lib_ok -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_INHERIT_FP_CONTROL -// ------------------------------------------------------------------------------------------------- #if KMP_ARCH_X86 || KMP_ARCH_X86_64 -static void -__kmp_stg_parse_inherit_fp_control( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_inherit_fp_control ); +static void __kmp_stg_parse_inherit_fp_control(char const *name, + char const *value, void *data) { + __kmp_stg_parse_bool(name, value, &__kmp_inherit_fp_control); } // __kmp_stg_parse_inherit_fp_control -static void -__kmp_stg_print_inherit_fp_control( kmp_str_buf_t * buffer, char const * name, void * data ) { +static void __kmp_stg_print_inherit_fp_control(kmp_str_buf_t *buffer, + char const *name, void *data) { #if KMP_DEBUG - __kmp_stg_print_bool( buffer, name, __kmp_inherit_fp_control ); + __kmp_stg_print_bool(buffer, name, __kmp_inherit_fp_control); #endif /* KMP_DEBUG */ } // __kmp_stg_print_inherit_fp_control #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_LIBRARY, OMP_WAIT_POLICY -// ------------------------------------------------------------------------------------------------- static char const *blocktime_str = NULL; -static void -__kmp_stg_parse_wait_policy( char const * name, char const * value, void * data ) { +static void __kmp_stg_parse_wait_policy(char const *name, char const *value, + void *data) { - kmp_stg_wp_data_t * wait = (kmp_stg_wp_data_t *) data; - int rc; + kmp_stg_wp_data_t *wait = (kmp_stg_wp_data_t *)data; + int rc; - rc = __kmp_stg_check_rivals( name, value, wait->rivals ); - if ( rc ) { - return; - }; // if + rc = __kmp_stg_check_rivals(name, value, wait->rivals); + if (rc) { + return; + }; // if - if ( wait->omp ) { - if ( __kmp_str_match( "ACTIVE", 1, value ) ) { - __kmp_library = library_turnaround; - if ( blocktime_str == NULL ) { - // KMP_BLOCKTIME not specified, so set default to "infinite". - __kmp_dflt_blocktime = KMP_MAX_BLOCKTIME; - } - } else if ( __kmp_str_match( "PASSIVE", 1, value ) ) { - __kmp_library = library_throughput; - if ( blocktime_str == NULL ) { - // KMP_BLOCKTIME not specified, so set default to 0. - __kmp_dflt_blocktime = 0; - } - } else { - KMP_WARNING( StgInvalidValue, name, value ); - }; // if + if (wait->omp) { + if (__kmp_str_match("ACTIVE", 1, value)) { + __kmp_library = library_turnaround; + if (blocktime_str == NULL) { + // KMP_BLOCKTIME not specified, so set default to "infinite". + __kmp_dflt_blocktime = KMP_MAX_BLOCKTIME; + } + } else if (__kmp_str_match("PASSIVE", 1, value)) { + __kmp_library = library_throughput; + if (blocktime_str == NULL) { + // KMP_BLOCKTIME not specified, so set default to 0. + __kmp_dflt_blocktime = 0; + } } else { - if ( __kmp_str_match( "serial", 1, value ) ) { /* S */ - __kmp_library = library_serial; - } else if ( __kmp_str_match( "throughput", 2, value ) ) { /* TH */ - __kmp_library = library_throughput; - } else if ( __kmp_str_match( "turnaround", 2, value ) ) { /* TU */ - __kmp_library = library_turnaround; - } else if ( __kmp_str_match( "dedicated", 1, value ) ) { /* D */ - __kmp_library = library_turnaround; - } else if ( __kmp_str_match( "multiuser", 1, value ) ) { /* M */ - __kmp_library = library_throughput; - } else { - KMP_WARNING( StgInvalidValue, name, value ); - }; // if + KMP_WARNING(StgInvalidValue, name, value); + }; // if + } else { + if (__kmp_str_match("serial", 1, value)) { /* S */ + __kmp_library = library_serial; + } else if (__kmp_str_match("throughput", 2, value)) { /* TH */ + __kmp_library = library_throughput; + } else if (__kmp_str_match("turnaround", 2, value)) { /* TU */ + __kmp_library = library_turnaround; + } else if (__kmp_str_match("dedicated", 1, value)) { /* D */ + __kmp_library = library_turnaround; + } else if (__kmp_str_match("multiuser", 1, value)) { /* M */ + __kmp_library = library_throughput; + } else { + KMP_WARNING(StgInvalidValue, name, value); }; // if - __kmp_aux_set_library( __kmp_library ); + }; // if + __kmp_aux_set_library(__kmp_library); } // __kmp_stg_parse_wait_policy -static void -__kmp_stg_print_wait_policy( kmp_str_buf_t * buffer, char const * name, void * data ) { - - kmp_stg_wp_data_t * wait = (kmp_stg_wp_data_t *) data; - char const * value = NULL; - - if ( wait->omp ) { - switch ( __kmp_library ) { - case library_turnaround : { - value = "ACTIVE"; - } break; - case library_throughput : { - value = "PASSIVE"; - } break; - }; // switch - } else { - switch ( __kmp_library ) { - case library_serial : { - value = "serial"; - } break; - case library_turnaround : { - value = "turnaround"; - } break; - case library_throughput : { - value = "throughput"; - } break; - }; // switch - }; // if - if ( value != NULL ) { - __kmp_stg_print_str( buffer, name, value ); - }; // if +static void __kmp_stg_print_wait_policy(kmp_str_buf_t *buffer, char const *name, + void *data) { + + kmp_stg_wp_data_t *wait = (kmp_stg_wp_data_t *)data; + char const *value = NULL; + + if (wait->omp) { + switch (__kmp_library) { + case library_turnaround: { + value = "ACTIVE"; + } break; + case library_throughput: { + value = "PASSIVE"; + } break; + }; // switch + } else { + switch (__kmp_library) { + case library_serial: { + value = "serial"; + } break; + case library_turnaround: { + value = "turnaround"; + } break; + case library_throughput: { + value = "throughput"; + } break; + }; // switch + }; // if + if (value != NULL) { + __kmp_stg_print_str(buffer, name, value); + }; // if } // __kmp_stg_print_wait_policy #if KMP_USE_MONITOR -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_MONITOR_STACKSIZE -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_monitor_stacksize( char const * name, char const * value, void * data ) { - __kmp_stg_parse_size( - name, - value, - __kmp_sys_min_stksize, - KMP_MAX_STKSIZE, - NULL, - & __kmp_monitor_stksize, - 1 - ); -} // __kmp_stg_parse_monitor_stacksize -static void -__kmp_stg_print_monitor_stacksize( kmp_str_buf_t * buffer, char const * name, void * data ) { - if( __kmp_env_format ) { - if ( __kmp_monitor_stksize > 0 ) - KMP_STR_BUF_PRINT_NAME_EX(name); - else - KMP_STR_BUF_PRINT_NAME; - } else { - __kmp_str_buf_print( buffer, " %s", name ); - } - if ( __kmp_monitor_stksize > 0 ) { - __kmp_str_buf_print_size( buffer, __kmp_monitor_stksize ); - } else { - __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); - } - if( __kmp_env_format && __kmp_monitor_stksize ) { - __kmp_str_buf_print( buffer, "'\n"); - } +static void __kmp_stg_parse_monitor_stacksize(char const *name, + char const *value, void *data) { + __kmp_stg_parse_size(name, value, __kmp_sys_min_stksize, KMP_MAX_STKSIZE, + NULL, &__kmp_monitor_stksize, 1); +} // __kmp_stg_parse_monitor_stacksize +static void __kmp_stg_print_monitor_stacksize(kmp_str_buf_t *buffer, + char const *name, void *data) { + if (__kmp_env_format) { + if (__kmp_monitor_stksize > 0) + KMP_STR_BUF_PRINT_NAME_EX(name); + else + KMP_STR_BUF_PRINT_NAME; + } else { + __kmp_str_buf_print(buffer, " %s", name); + } + if (__kmp_monitor_stksize > 0) { + __kmp_str_buf_print_size(buffer, __kmp_monitor_stksize); + } else { + __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); + } + if (__kmp_env_format && __kmp_monitor_stksize) { + __kmp_str_buf_print(buffer, "'\n"); + } } // __kmp_stg_print_monitor_stacksize #endif // KMP_USE_MONITOR -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_SETTINGS -// ------------------------------------------------------------------------------------------------- -static void -__kmp_stg_parse_settings( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_settings ); +static void __kmp_stg_parse_settings(char const *name, char const *value, + void *data) { + __kmp_stg_parse_bool(name, value, &__kmp_settings); } // __kmp_stg_parse_settings -static void -__kmp_stg_print_settings( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_settings ); +static void __kmp_stg_print_settings(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_bool(buffer, name, __kmp_settings); } // __kmp_stg_print_settings -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_STACKPAD -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_stackpad( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( - name, // Env var name - value, // Env var value - KMP_MIN_STKPADDING, // Min value - KMP_MAX_STKPADDING, // Max value - & __kmp_stkpadding // Var to initialize - ); + +static void __kmp_stg_parse_stackpad(char const *name, char const *value, + void *data) { + __kmp_stg_parse_int(name, // Env var name + value, // Env var value + KMP_MIN_STKPADDING, // Min value + KMP_MAX_STKPADDING, // Max value + &__kmp_stkpadding // Var to initialize + ); } // __kmp_stg_parse_stackpad -static void -__kmp_stg_print_stackpad( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_stkpadding ); +static void __kmp_stg_print_stackpad(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_int(buffer, name, __kmp_stkpadding); } // __kmp_stg_print_stackpad -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_STACKOFFSET -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_stackoffset( char const * name, char const * value, void * data ) { - __kmp_stg_parse_size( - name, // Env var name - value, // Env var value - KMP_MIN_STKOFFSET, // Min value - KMP_MAX_STKOFFSET, // Max value - NULL, // - & __kmp_stkoffset, // Var to initialize - 1 - ); + +static void __kmp_stg_parse_stackoffset(char const *name, char const *value, + void *data) { + __kmp_stg_parse_size(name, // Env var name + value, // Env var value + KMP_MIN_STKOFFSET, // Min value + KMP_MAX_STKOFFSET, // Max value + NULL, // + &__kmp_stkoffset, // Var to initialize + 1); } // __kmp_stg_parse_stackoffset -static void -__kmp_stg_print_stackoffset( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_size( buffer, name, __kmp_stkoffset ); +static void __kmp_stg_print_stackoffset(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_size(buffer, name, __kmp_stkoffset); } // __kmp_stg_print_stackoffset -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_STACKSIZE, OMP_STACKSIZE, GOMP_STACKSIZE -// ------------------------------------------------------------------------------------------------- -static void -__kmp_stg_parse_stacksize( char const * name, char const * value, void * data ) { +static void __kmp_stg_parse_stacksize(char const *name, char const *value, + void *data) { - kmp_stg_ss_data_t * stacksize = (kmp_stg_ss_data_t *) data; - int rc; + kmp_stg_ss_data_t *stacksize = (kmp_stg_ss_data_t *)data; + int rc; - rc = __kmp_stg_check_rivals( name, value, stacksize->rivals ); - if ( rc ) { - return; - }; // if - __kmp_stg_parse_size( - name, // Env var name - value, // Env var value - __kmp_sys_min_stksize, // Min value - KMP_MAX_STKSIZE, // Max value - & __kmp_env_stksize, // - & __kmp_stksize, // Var to initialize - stacksize->factor - ); + rc = __kmp_stg_check_rivals(name, value, stacksize->rivals); + if (rc) { + return; + }; // if + __kmp_stg_parse_size(name, // Env var name + value, // Env var value + __kmp_sys_min_stksize, // Min value + KMP_MAX_STKSIZE, // Max value + &__kmp_env_stksize, // + &__kmp_stksize, // Var to initialize + stacksize->factor); } // __kmp_stg_parse_stacksize -// This function is called for printing both KMP_STACKSIZE (factor is 1) and OMP_STACKSIZE (factor is 1024). -// Currently it is not possible to print OMP_STACKSIZE value in bytes. We can consider adding this -// possibility by a customer request in future. -static void -__kmp_stg_print_stacksize( kmp_str_buf_t * buffer, char const * name, void * data ) { - kmp_stg_ss_data_t * stacksize = (kmp_stg_ss_data_t *) data; - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME_EX(name); - __kmp_str_buf_print_size( buffer, (__kmp_stksize % 1024) ? __kmp_stksize / stacksize->factor : __kmp_stksize ); - __kmp_str_buf_print( buffer, "'\n" ); - } else { - __kmp_str_buf_print( buffer, " %s=", name ); - __kmp_str_buf_print_size( buffer, (__kmp_stksize % 1024) ? __kmp_stksize / stacksize->factor : __kmp_stksize ); - __kmp_str_buf_print( buffer, "\n" ); - } +// This function is called for printing both KMP_STACKSIZE (factor is 1) and +// OMP_STACKSIZE (factor is 1024). Currently it is not possible to print +// OMP_STACKSIZE value in bytes. We can consider adding this possibility by a +// customer request in future. +static void __kmp_stg_print_stacksize(kmp_str_buf_t *buffer, char const *name, + void *data) { + kmp_stg_ss_data_t *stacksize = (kmp_stg_ss_data_t *)data; + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_NAME_EX(name); + __kmp_str_buf_print_size(buffer, (__kmp_stksize % 1024) + ? __kmp_stksize / stacksize->factor + : __kmp_stksize); + __kmp_str_buf_print(buffer, "'\n"); + } else { + __kmp_str_buf_print(buffer, " %s=", name); + __kmp_str_buf_print_size(buffer, (__kmp_stksize % 1024) + ? __kmp_stksize / stacksize->factor + : __kmp_stksize); + __kmp_str_buf_print(buffer, "\n"); + } } // __kmp_stg_print_stacksize -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_VERSION -// ------------------------------------------------------------------------------------------------- -static void -__kmp_stg_parse_version( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_version ); +static void __kmp_stg_parse_version(char const *name, char const *value, + void *data) { + __kmp_stg_parse_bool(name, value, &__kmp_version); } // __kmp_stg_parse_version -static void -__kmp_stg_print_version( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_version ); +static void __kmp_stg_print_version(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_bool(buffer, name, __kmp_version); } // __kmp_stg_print_version -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_WARNINGS -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_warnings( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_generate_warnings ); - if (__kmp_generate_warnings != kmp_warnings_off) { // AC: we have only 0/1 values documented, - __kmp_generate_warnings = kmp_warnings_explicit; // so reset it to explicit in order to - } // distinguish from default setting + +static void __kmp_stg_parse_warnings(char const *name, char const *value, + void *data) { + __kmp_stg_parse_bool(name, value, &__kmp_generate_warnings); + if (__kmp_generate_warnings != kmp_warnings_off) { + // AC: only 0/1 values documented, so reset to explicit to distinguish from + // default setting + __kmp_generate_warnings = kmp_warnings_explicit; + } } // __kmp_env_parse_warnings -static void -__kmp_stg_print_warnings( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_generate_warnings ); // AC: TODO: change to print_int? -} // __kmp_env_print_warnings // (needs documentation change)... +static void __kmp_stg_print_warnings(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_bool( + buffer, name, __kmp_generate_warnings); // AC: TODO: change to print_int? +} // __kmp_env_print_warnings // (needs + // documentation change)... -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // OMP_NESTED, OMP_NUM_THREADS -// ------------------------------------------------------------------------------------------------- -static void -__kmp_stg_parse_nested( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_dflt_nested ); +static void __kmp_stg_parse_nested(char const *name, char const *value, + void *data) { + __kmp_stg_parse_bool(name, value, &__kmp_dflt_nested); } // __kmp_stg_parse_nested -static void -__kmp_stg_print_nested( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_dflt_nested ); +static void __kmp_stg_print_nested(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_bool(buffer, name, __kmp_dflt_nested); } // __kmp_stg_print_nested -static void -__kmp_parse_nested_num_threads( const char *var, const char *env, kmp_nested_nthreads_t *nth_array ) -{ - const char *next = env; - const char *scan = next; +static void __kmp_parse_nested_num_threads(const char *var, const char *env, + kmp_nested_nthreads_t *nth_array) { + const char *next = env; + const char *scan = next; - int total = 0; // Count elements that were set. It'll be used as an array size - int prev_comma = FALSE; // For correct processing sequential commas + int total = 0; // Count elements that were set. It'll be used as an array size + int prev_comma = FALSE; // For correct processing sequential commas - // Count the number of values in the env. var string - for ( ; ; ) { - SKIP_WS( next ); + // Count the number of values in the env. var string + for (;;) { + SKIP_WS(next); - if ( *next == '\0' ) { - break; - } - // Next character is not an integer or not a comma => end of list - if ( ( ( *next < '0' ) || ( *next > '9' ) ) && ( *next !=',') ) { - KMP_WARNING( NthSyntaxError, var, env ); - return; - } - // The next character is ',' - if ( *next == ',' ) { - // ',' is the fisrt character - if ( total == 0 || prev_comma ) { - total++; - } - prev_comma = TRUE; - next++; //skip ',' - SKIP_WS( next ); - } - // Next character is a digit - if ( *next >= '0' && *next <= '9' ) { - prev_comma = FALSE; - SKIP_DIGITS( next ); - total++; - const char *tmp = next; - SKIP_WS( tmp ); - if ( ( *next == ' ' || *next == '\t' ) && ( *tmp >= '0' && *tmp <= '9' ) ) { - KMP_WARNING( NthSpacesNotAllowed, var, env ); - return; - } - } + if (*next == '\0') { + break; } - KMP_DEBUG_ASSERT( total > 0 ); - if( total <= 0 ) { - KMP_WARNING( NthSyntaxError, var, env ); + // Next character is not an integer or not a comma => end of list + if (((*next < '0') || (*next > '9')) && (*next != ',')) { + KMP_WARNING(NthSyntaxError, var, env); + return; + } + // The next character is ',' + if (*next == ',') { + // ',' is the fisrt character + if (total == 0 || prev_comma) { + total++; + } + prev_comma = TRUE; + next++; // skip ',' + SKIP_WS(next); + } + // Next character is a digit + if (*next >= '0' && *next <= '9') { + prev_comma = FALSE; + SKIP_DIGITS(next); + total++; + const char *tmp = next; + SKIP_WS(tmp); + if ((*next == ' ' || *next == '\t') && (*tmp >= '0' && *tmp <= '9')) { + KMP_WARNING(NthSpacesNotAllowed, var, env); return; + } } + } + KMP_DEBUG_ASSERT(total > 0); + if (total <= 0) { + KMP_WARNING(NthSyntaxError, var, env); + return; + } - // Check if the nested nthreads array exists - if ( ! nth_array->nth ) { - // Allocate an array of double size - nth_array->nth = ( int * )KMP_INTERNAL_MALLOC( sizeof( int ) * total * 2 ); - if ( nth_array->nth == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - } - nth_array->size = total * 2; - } else { - if ( nth_array->size < total ) { - // Increase the array size - do { - nth_array->size *= 2; - } while ( nth_array->size < total ); - - nth_array->nth = (int *) KMP_INTERNAL_REALLOC( - nth_array->nth, sizeof( int ) * nth_array->size ); - if ( nth_array->nth == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - } - } + // Check if the nested nthreads array exists + if (!nth_array->nth) { + // Allocate an array of double size + nth_array->nth = (int *)KMP_INTERNAL_MALLOC(sizeof(int) * total * 2); + if (nth_array->nth == NULL) { + KMP_FATAL(MemoryAllocFailed); + } + nth_array->size = total * 2; + } else { + if (nth_array->size < total) { + // Increase the array size + do { + nth_array->size *= 2; + } while (nth_array->size < total); + + nth_array->nth = (int *)KMP_INTERNAL_REALLOC( + nth_array->nth, sizeof(int) * nth_array->size); + if (nth_array->nth == NULL) { + KMP_FATAL(MemoryAllocFailed); + } } - nth_array->used = total; - int i = 0; + } + nth_array->used = total; + int i = 0; - prev_comma = FALSE; - total = 0; - // Save values in the array - for ( ; ; ) { - SKIP_WS( scan ); - if ( *scan == '\0' ) { - break; - } - // The next character is ',' - if ( *scan == ',' ) { - // ',' in the beginning of the list - if ( total == 0 ) { - // The value is supposed to be equal to __kmp_avail_proc but it is unknown at the moment. - // So let's put a placeholder (#threads = 0) to correct it later. - nth_array->nth[i++] = 0; - total++; - }else if ( prev_comma ) { - // Num threads is inherited from the previous level - nth_array->nth[i] = nth_array->nth[i - 1]; - i++; - total++; - } - prev_comma = TRUE; - scan++; //skip ',' - SKIP_WS( scan ); - } - // Next character is a digit - if ( *scan >= '0' && *scan <= '9' ) { - int num; - const char *buf = scan; - char const * msg = NULL; - prev_comma = FALSE; - SKIP_DIGITS( scan ); - total++; - - num = __kmp_str_to_int( buf, *scan ); - if ( num < KMP_MIN_NTH ) { - msg = KMP_I18N_STR( ValueTooSmall ); - num = KMP_MIN_NTH; - } else if ( num > __kmp_sys_max_nth ) { - msg = KMP_I18N_STR( ValueTooLarge ); - num = __kmp_sys_max_nth; - } - if ( msg != NULL ) { - // Message is not empty. Print warning. - KMP_WARNING( ParseSizeIntWarn, var, env, msg ); - KMP_INFORM( Using_int_Value, var, num ); - } - nth_array->nth[i++] = num; - } + prev_comma = FALSE; + total = 0; + // Save values in the array + for (;;) { + SKIP_WS(scan); + if (*scan == '\0') { + break; + } + // The next character is ',' + if (*scan == ',') { + // ',' in the beginning of the list + if (total == 0) { + // The value is supposed to be equal to __kmp_avail_proc but it is + // unknown at the moment. + // So let's put a placeholder (#threads = 0) to correct it later. + nth_array->nth[i++] = 0; + total++; + } else if (prev_comma) { + // Num threads is inherited from the previous level + nth_array->nth[i] = nth_array->nth[i - 1]; + i++; + total++; + } + prev_comma = TRUE; + scan++; // skip ',' + SKIP_WS(scan); + } + // Next character is a digit + if (*scan >= '0' && *scan <= '9') { + int num; + const char *buf = scan; + char const *msg = NULL; + prev_comma = FALSE; + SKIP_DIGITS(scan); + total++; + + num = __kmp_str_to_int(buf, *scan); + if (num < KMP_MIN_NTH) { + msg = KMP_I18N_STR(ValueTooSmall); + num = KMP_MIN_NTH; + } else if (num > __kmp_sys_max_nth) { + msg = KMP_I18N_STR(ValueTooLarge); + num = __kmp_sys_max_nth; + } + if (msg != NULL) { + // Message is not empty. Print warning. + KMP_WARNING(ParseSizeIntWarn, var, env, msg); + KMP_INFORM(Using_int_Value, var, num); + } + nth_array->nth[i++] = num; } + } } -static void -__kmp_stg_parse_num_threads( char const * name, char const * value, void * data ) { - // TODO: Remove this option. OMP_NUM_THREADS is a list of positive integers! - if ( ! __kmp_strcasecmp_with_sentinel( "all", value, 0 ) ) { - // The array of 1 element - __kmp_nested_nth.nth = ( int* )KMP_INTERNAL_MALLOC( sizeof( int ) ); - __kmp_nested_nth.size = __kmp_nested_nth.used = 1; - __kmp_nested_nth.nth[0] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = __kmp_xproc; - } else { - __kmp_parse_nested_num_threads( name, value, & __kmp_nested_nth ); - if ( __kmp_nested_nth.nth ) { - __kmp_dflt_team_nth = __kmp_nested_nth.nth[0]; - if ( __kmp_dflt_team_nth_ub < __kmp_dflt_team_nth ) { - __kmp_dflt_team_nth_ub = __kmp_dflt_team_nth; - } - } - }; // if - K_DIAG( 1, ( "__kmp_dflt_team_nth == %d\n", __kmp_dflt_team_nth ) ); +static void __kmp_stg_parse_num_threads(char const *name, char const *value, + void *data) { + // TODO: Remove this option. OMP_NUM_THREADS is a list of positive integers! + if (!__kmp_strcasecmp_with_sentinel("all", value, 0)) { + // The array of 1 element + __kmp_nested_nth.nth = (int *)KMP_INTERNAL_MALLOC(sizeof(int)); + __kmp_nested_nth.size = __kmp_nested_nth.used = 1; + __kmp_nested_nth.nth[0] = __kmp_dflt_team_nth = __kmp_dflt_team_nth_ub = + __kmp_xproc; + } else { + __kmp_parse_nested_num_threads(name, value, &__kmp_nested_nth); + if (__kmp_nested_nth.nth) { + __kmp_dflt_team_nth = __kmp_nested_nth.nth[0]; + if (__kmp_dflt_team_nth_ub < __kmp_dflt_team_nth) { + __kmp_dflt_team_nth_ub = __kmp_dflt_team_nth; + } + } + }; // if + K_DIAG(1, ("__kmp_dflt_team_nth == %d\n", __kmp_dflt_team_nth)); } // __kmp_stg_parse_num_threads -static void -__kmp_stg_print_num_threads( kmp_str_buf_t * buffer, char const * name, void * data ) { - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME; - } else { - __kmp_str_buf_print( buffer, " %s", name ); - } - if ( __kmp_nested_nth.used ) { - kmp_str_buf_t buf; - __kmp_str_buf_init( &buf ); - for ( int i = 0; i < __kmp_nested_nth.used; i++) { - __kmp_str_buf_print( &buf, "%d", __kmp_nested_nth.nth[i] ); - if ( i < __kmp_nested_nth.used - 1 ) { - __kmp_str_buf_print( &buf, "," ); - } - } - __kmp_str_buf_print( buffer, "='%s'\n", buf.str ); - __kmp_str_buf_free(&buf); - } else { - __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); +static void __kmp_stg_print_num_threads(kmp_str_buf_t *buffer, char const *name, + void *data) { + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_NAME; + } else { + __kmp_str_buf_print(buffer, " %s", name); + } + if (__kmp_nested_nth.used) { + kmp_str_buf_t buf; + __kmp_str_buf_init(&buf); + for (int i = 0; i < __kmp_nested_nth.used; i++) { + __kmp_str_buf_print(&buf, "%d", __kmp_nested_nth.nth[i]); + if (i < __kmp_nested_nth.used - 1) { + __kmp_str_buf_print(&buf, ","); + } } + __kmp_str_buf_print(buffer, "='%s'\n", buf.str); + __kmp_str_buf_free(&buf); + } else { + __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); + } } // __kmp_stg_print_num_threads -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // OpenMP 3.0: KMP_TASKING, OMP_MAX_ACTIVE_LEVELS, -// ------------------------------------------------------------------------------------------------- -static void -__kmp_stg_parse_tasking( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( name, value, 0, (int)tskm_max, (int *)&__kmp_tasking_mode ); +static void __kmp_stg_parse_tasking(char const *name, char const *value, + void *data) { + __kmp_stg_parse_int(name, value, 0, (int)tskm_max, + (int *)&__kmp_tasking_mode); } // __kmp_stg_parse_tasking -static void -__kmp_stg_print_tasking( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_tasking_mode ); +static void __kmp_stg_print_tasking(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_int(buffer, name, __kmp_tasking_mode); } // __kmp_stg_print_tasking -static void -__kmp_stg_parse_task_stealing( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( name, value, 0, 1, (int *)&__kmp_task_stealing_constraint ); +static void __kmp_stg_parse_task_stealing(char const *name, char const *value, + void *data) { + __kmp_stg_parse_int(name, value, 0, 1, + (int *)&__kmp_task_stealing_constraint); } // __kmp_stg_parse_task_stealing -static void -__kmp_stg_print_task_stealing( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_task_stealing_constraint ); +static void __kmp_stg_print_task_stealing(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_int(buffer, name, __kmp_task_stealing_constraint); } // __kmp_stg_print_task_stealing -static void -__kmp_stg_parse_max_active_levels( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, & __kmp_dflt_max_active_levels ); +static void __kmp_stg_parse_max_active_levels(char const *name, + char const *value, void *data) { + __kmp_stg_parse_int(name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, + &__kmp_dflt_max_active_levels); } // __kmp_stg_parse_max_active_levels -static void -__kmp_stg_print_max_active_levels( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_dflt_max_active_levels ); +static void __kmp_stg_print_max_active_levels(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_int(buffer, name, __kmp_dflt_max_active_levels); } // __kmp_stg_print_max_active_levels #if OMP_40_ENABLED -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // OpenMP 4.0: OMP_DEFAULT_DEVICE -// ------------------------------------------------------------------------------------------------- -static void __kmp_stg_parse_default_device(char const *name, char const *value, void *data) { - __kmp_stg_parse_int(name, value, 0, KMP_MAX_DEFAULT_DEVICE_LIMIT, &__kmp_default_device); +static void __kmp_stg_parse_default_device(char const *name, char const *value, + void *data) { + __kmp_stg_parse_int(name, value, 0, KMP_MAX_DEFAULT_DEVICE_LIMIT, + &__kmp_default_device); } // __kmp_stg_parse_default_device -static void __kmp_stg_print_default_device(kmp_str_buf_t *buffer, char const *name, void *data) { +static void __kmp_stg_print_default_device(kmp_str_buf_t *buffer, + char const *name, void *data) { __kmp_stg_print_int(buffer, name, __kmp_default_device); } // __kmp_stg_print_default_device #endif #if OMP_45_ENABLED -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // OpenMP 4.5: OMP_MAX_TASK_PRIORITY -// ------------------------------------------------------------------------------------------------- -static void -__kmp_stg_parse_max_task_priority(char const *name, char const *value, void *data) { - __kmp_stg_parse_int(name, value, 0, KMP_MAX_TASK_PRIORITY_LIMIT, &__kmp_max_task_priority); +static void __kmp_stg_parse_max_task_priority(char const *name, + char const *value, void *data) { + __kmp_stg_parse_int(name, value, 0, KMP_MAX_TASK_PRIORITY_LIMIT, + &__kmp_max_task_priority); } // __kmp_stg_parse_max_task_priority -static void -__kmp_stg_print_max_task_priority(kmp_str_buf_t *buffer, char const *name, void *data) { - __kmp_stg_print_int(buffer, name, __kmp_max_task_priority); +static void __kmp_stg_print_max_task_priority(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_int(buffer, name, __kmp_max_task_priority); } // __kmp_stg_print_max_task_priority #endif // OMP_45_ENABLED -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_DISP_NUM_BUFFERS -// ------------------------------------------------------------------------------------------------- -static void -__kmp_stg_parse_disp_buffers( char const * name, char const * value, void * data ) { - if ( TCR_4(__kmp_init_serial) ) { - KMP_WARNING( EnvSerialWarn, name ); - return; - } // read value before serial initialization only - __kmp_stg_parse_int( name, value, 1, KMP_MAX_NTH, & __kmp_dispatch_num_buffers ); +static void __kmp_stg_parse_disp_buffers(char const *name, char const *value, + void *data) { + if (TCR_4(__kmp_init_serial)) { + KMP_WARNING(EnvSerialWarn, name); + return; + } // read value before serial initialization only + __kmp_stg_parse_int(name, value, 1, KMP_MAX_NTH, &__kmp_dispatch_num_buffers); } // __kmp_stg_parse_disp_buffers -static void -__kmp_stg_print_disp_buffers( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_dispatch_num_buffers ); +static void __kmp_stg_print_disp_buffers(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_int(buffer, name, __kmp_dispatch_num_buffers); } // __kmp_stg_print_disp_buffers #if KMP_NESTED_HOT_TEAMS -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_HOT_TEAMS_MAX_LEVEL, KMP_HOT_TEAMS_MODE -// ------------------------------------------------------------------------------------------------- -static void -__kmp_stg_parse_hot_teams_level( char const * name, char const * value, void * data ) { - if ( TCR_4(__kmp_init_parallel) ) { - KMP_WARNING( EnvParallelWarn, name ); - return; - } // read value before first parallel only - __kmp_stg_parse_int( name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, & __kmp_hot_teams_max_level ); +static void __kmp_stg_parse_hot_teams_level(char const *name, char const *value, + void *data) { + if (TCR_4(__kmp_init_parallel)) { + KMP_WARNING(EnvParallelWarn, name); + return; + } // read value before first parallel only + __kmp_stg_parse_int(name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, + &__kmp_hot_teams_max_level); } // __kmp_stg_parse_hot_teams_level -static void -__kmp_stg_print_hot_teams_level( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_hot_teams_max_level ); +static void __kmp_stg_print_hot_teams_level(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_int(buffer, name, __kmp_hot_teams_max_level); } // __kmp_stg_print_hot_teams_level -static void -__kmp_stg_parse_hot_teams_mode( char const * name, char const * value, void * data ) { - if ( TCR_4(__kmp_init_parallel) ) { - KMP_WARNING( EnvParallelWarn, name ); - return; - } // read value before first parallel only - __kmp_stg_parse_int( name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, & __kmp_hot_teams_mode ); +static void __kmp_stg_parse_hot_teams_mode(char const *name, char const *value, + void *data) { + if (TCR_4(__kmp_init_parallel)) { + KMP_WARNING(EnvParallelWarn, name); + return; + } // read value before first parallel only + __kmp_stg_parse_int(name, value, 0, KMP_MAX_ACTIVE_LEVELS_LIMIT, + &__kmp_hot_teams_mode); } // __kmp_stg_parse_hot_teams_mode -static void -__kmp_stg_print_hot_teams_mode( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_hot_teams_mode ); +static void __kmp_stg_print_hot_teams_mode(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_int(buffer, name, __kmp_hot_teams_mode); } // __kmp_stg_print_hot_teams_mode #endif // KMP_NESTED_HOT_TEAMS -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_HANDLE_SIGNALS -// ------------------------------------------------------------------------------------------------- #if KMP_HANDLE_SIGNALS -static void -__kmp_stg_parse_handle_signals( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_handle_signals ); +static void __kmp_stg_parse_handle_signals(char const *name, char const *value, + void *data) { + __kmp_stg_parse_bool(name, value, &__kmp_handle_signals); } // __kmp_stg_parse_handle_signals -static void -__kmp_stg_print_handle_signals( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_handle_signals ); +static void __kmp_stg_print_handle_signals(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_bool(buffer, name, __kmp_handle_signals); } // __kmp_stg_print_handle_signals #endif // KMP_HANDLE_SIGNALS -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_X_DEBUG, KMP_DEBUG, KMP_DEBUG_BUF_*, KMP_DIAG -// ------------------------------------------------------------------------------------------------- #ifdef KMP_DEBUG -#define KMP_STG_X_DEBUG( x ) \ - static void __kmp_stg_parse_##x##_debug( char const * name, char const * value, void * data ) { \ - __kmp_stg_parse_int( name, value, 0, INT_MAX, & kmp_##x##_debug ); \ - } /* __kmp_stg_parse_x_debug */ \ - static void __kmp_stg_print_##x##_debug( kmp_str_buf_t * buffer, char const * name, void * data ) { \ - __kmp_stg_print_int( buffer, name, kmp_##x##_debug ); \ - } /* __kmp_stg_print_x_debug */ - -KMP_STG_X_DEBUG( a ) -KMP_STG_X_DEBUG( b ) -KMP_STG_X_DEBUG( c ) -KMP_STG_X_DEBUG( d ) -KMP_STG_X_DEBUG( e ) -KMP_STG_X_DEBUG( f ) +#define KMP_STG_X_DEBUG(x) \ + static void __kmp_stg_parse_##x##_debug(char const *name, char const *value, \ + void *data) { \ + __kmp_stg_parse_int(name, value, 0, INT_MAX, &kmp_##x##_debug); \ + } /* __kmp_stg_parse_x_debug */ \ + static void __kmp_stg_print_##x##_debug(kmp_str_buf_t *buffer, \ + char const *name, void *data) { \ + __kmp_stg_print_int(buffer, name, kmp_##x##_debug); \ + } /* __kmp_stg_print_x_debug */ + +KMP_STG_X_DEBUG(a) +KMP_STG_X_DEBUG(b) +KMP_STG_X_DEBUG(c) +KMP_STG_X_DEBUG(d) +KMP_STG_X_DEBUG(e) +KMP_STG_X_DEBUG(f) #undef KMP_STG_X_DEBUG -static void -__kmp_stg_parse_debug( char const * name, char const * value, void * data ) { - int debug = 0; - __kmp_stg_parse_int( name, value, 0, INT_MAX, & debug ); - if ( kmp_a_debug < debug ) { - kmp_a_debug = debug; - }; // if - if ( kmp_b_debug < debug ) { - kmp_b_debug = debug; - }; // if - if ( kmp_c_debug < debug ) { - kmp_c_debug = debug; - }; // if - if ( kmp_d_debug < debug ) { - kmp_d_debug = debug; - }; // if - if ( kmp_e_debug < debug ) { - kmp_e_debug = debug; - }; // if - if ( kmp_f_debug < debug ) { - kmp_f_debug = debug; - }; // if +static void __kmp_stg_parse_debug(char const *name, char const *value, + void *data) { + int debug = 0; + __kmp_stg_parse_int(name, value, 0, INT_MAX, &debug); + if (kmp_a_debug < debug) { + kmp_a_debug = debug; + }; // if + if (kmp_b_debug < debug) { + kmp_b_debug = debug; + }; // if + if (kmp_c_debug < debug) { + kmp_c_debug = debug; + }; // if + if (kmp_d_debug < debug) { + kmp_d_debug = debug; + }; // if + if (kmp_e_debug < debug) { + kmp_e_debug = debug; + }; // if + if (kmp_f_debug < debug) { + kmp_f_debug = debug; + }; // if } // __kmp_stg_parse_debug -static void -__kmp_stg_parse_debug_buf( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_debug_buf ); - // !!! TODO: Move buffer initialization of of this file! It may works incorrectly if - // KMP_DEBUG_BUF is parsed before KMP_DEBUG_BUF_LINES or KMP_DEBUG_BUF_CHARS. - if ( __kmp_debug_buf ) { - int i; - int elements = __kmp_debug_buf_lines * __kmp_debug_buf_chars; - - /* allocate and initialize all entries in debug buffer to empty */ - __kmp_debug_buffer = (char *) __kmp_page_allocate( elements * sizeof( char ) ); - for ( i = 0; i < elements; i += __kmp_debug_buf_chars ) - __kmp_debug_buffer[i] = '\0'; - - __kmp_debug_count = 0; - } - K_DIAG( 1, ( "__kmp_debug_buf = %d\n", __kmp_debug_buf ) ); +static void __kmp_stg_parse_debug_buf(char const *name, char const *value, + void *data) { + __kmp_stg_parse_bool(name, value, &__kmp_debug_buf); + // !!! TODO: Move buffer initialization of of this file! It may works + // incorrectly if KMP_DEBUG_BUF is parsed before KMP_DEBUG_BUF_LINES or + // KMP_DEBUG_BUF_CHARS. + if (__kmp_debug_buf) { + int i; + int elements = __kmp_debug_buf_lines * __kmp_debug_buf_chars; + + /* allocate and initialize all entries in debug buffer to empty */ + __kmp_debug_buffer = (char *)__kmp_page_allocate(elements * sizeof(char)); + for (i = 0; i < elements; i += __kmp_debug_buf_chars) + __kmp_debug_buffer[i] = '\0'; + + __kmp_debug_count = 0; + } + K_DIAG(1, ("__kmp_debug_buf = %d\n", __kmp_debug_buf)); } // __kmp_stg_parse_debug_buf -static void -__kmp_stg_print_debug_buf( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_debug_buf ); +static void __kmp_stg_print_debug_buf(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_bool(buffer, name, __kmp_debug_buf); } // __kmp_stg_print_debug_buf -static void -__kmp_stg_parse_debug_buf_atomic( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_debug_buf_atomic ); +static void __kmp_stg_parse_debug_buf_atomic(char const *name, + char const *value, void *data) { + __kmp_stg_parse_bool(name, value, &__kmp_debug_buf_atomic); } // __kmp_stg_parse_debug_buf_atomic -static void -__kmp_stg_print_debug_buf_atomic( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_debug_buf_atomic ); +static void __kmp_stg_print_debug_buf_atomic(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_bool(buffer, name, __kmp_debug_buf_atomic); } // __kmp_stg_print_debug_buf_atomic -static void -__kmp_stg_parse_debug_buf_chars( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( - name, - value, - KMP_DEBUG_BUF_CHARS_MIN, - INT_MAX, - & __kmp_debug_buf_chars - ); +static void __kmp_stg_parse_debug_buf_chars(char const *name, char const *value, + void *data) { + __kmp_stg_parse_int(name, value, KMP_DEBUG_BUF_CHARS_MIN, INT_MAX, + &__kmp_debug_buf_chars); } // __kmp_stg_debug_parse_buf_chars -static void -__kmp_stg_print_debug_buf_chars( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_debug_buf_chars ); +static void __kmp_stg_print_debug_buf_chars(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_int(buffer, name, __kmp_debug_buf_chars); } // __kmp_stg_print_debug_buf_chars -static void -__kmp_stg_parse_debug_buf_lines( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( - name, - value, - KMP_DEBUG_BUF_LINES_MIN, - INT_MAX, - & __kmp_debug_buf_lines - ); +static void __kmp_stg_parse_debug_buf_lines(char const *name, char const *value, + void *data) { + __kmp_stg_parse_int(name, value, KMP_DEBUG_BUF_LINES_MIN, INT_MAX, + &__kmp_debug_buf_lines); } // __kmp_stg_parse_debug_buf_lines -static void -__kmp_stg_print_debug_buf_lines( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_debug_buf_lines ); +static void __kmp_stg_print_debug_buf_lines(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_int(buffer, name, __kmp_debug_buf_lines); } // __kmp_stg_print_debug_buf_lines -static void -__kmp_stg_parse_diag( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( name, value, 0, INT_MAX, & kmp_diag ); +static void __kmp_stg_parse_diag(char const *name, char const *value, + void *data) { + __kmp_stg_parse_int(name, value, 0, INT_MAX, &kmp_diag); } // __kmp_stg_parse_diag -static void -__kmp_stg_print_diag( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, kmp_diag ); +static void __kmp_stg_print_diag(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_int(buffer, name, kmp_diag); } // __kmp_stg_print_diag #endif // KMP_DEBUG -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_ALIGN_ALLOC -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_align_alloc( char const * name, char const * value, void * data ) { - __kmp_stg_parse_size( - name, - value, - CACHE_LINE, - INT_MAX, - NULL, - & __kmp_align_alloc, - 1 - ); + +static void __kmp_stg_parse_align_alloc(char const *name, char const *value, + void *data) { + __kmp_stg_parse_size(name, value, CACHE_LINE, INT_MAX, NULL, + &__kmp_align_alloc, 1); } // __kmp_stg_parse_align_alloc -static void -__kmp_stg_print_align_alloc( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_size( buffer, name, __kmp_align_alloc ); +static void __kmp_stg_print_align_alloc(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_size(buffer, name, __kmp_align_alloc); } // __kmp_stg_print_align_alloc -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_PLAIN_BARRIER, KMP_FORKJOIN_BARRIER, KMP_REDUCTION_BARRIER -// ------------------------------------------------------------------------------------------------- - -// TODO: Remove __kmp_barrier_branch_bit_env_name varibale, remove loops from parse and print -// functions, pass required info through data argument. - -static void -__kmp_stg_parse_barrier_branch_bit( char const * name, char const * value, void * data ) { - const char *var; - - /* ---------- Barrier branch bit control ------------ */ - for ( int i=bs_plain_barrier; i KMP_MAX_BRANCH_BITS ) { - __kmp_msg( kmp_ms_warning, KMP_MSG( BarrReleaseValueInvalid, name, comma + 1 ), __kmp_msg_null ); - __kmp_barrier_release_branch_bits[ i ] = __kmp_barrier_release_bb_dflt; - } - } - if ( __kmp_barrier_gather_branch_bits[ i ] > KMP_MAX_BRANCH_BITS ) { - KMP_WARNING( BarrGatherValueInvalid, name, value ); - KMP_INFORM( Using_uint_Value, name, __kmp_barrier_gather_bb_dflt ); - __kmp_barrier_gather_branch_bits[ i ] = __kmp_barrier_gather_bb_dflt; - } + +// TODO: Remove __kmp_barrier_branch_bit_env_name varibale, remove loops from +// parse and print functions, pass required info through data argument. + +static void __kmp_stg_parse_barrier_branch_bit(char const *name, + char const *value, void *data) { + const char *var; + + /* ---------- Barrier branch bit control ------------ */ + for (int i = bs_plain_barrier; i < bs_last_barrier; i++) { + var = __kmp_barrier_branch_bit_env_name[i]; + if ((strcmp(var, name) == 0) && (value != 0)) { + char *comma; + + comma = (char *)strchr(value, ','); + __kmp_barrier_gather_branch_bits[i] = + (kmp_uint32)__kmp_str_to_int(value, ','); + /* is there a specified release parameter? */ + if (comma == NULL) { + __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt; + } else { + __kmp_barrier_release_branch_bits[i] = + (kmp_uint32)__kmp_str_to_int(comma + 1, 0); + + if (__kmp_barrier_release_branch_bits[i] > KMP_MAX_BRANCH_BITS) { + __kmp_msg(kmp_ms_warning, + KMP_MSG(BarrReleaseValueInvalid, name, comma + 1), + __kmp_msg_null); + __kmp_barrier_release_branch_bits[i] = __kmp_barrier_release_bb_dflt; } - K_DIAG(1, ("%s == %d,%d\n", __kmp_barrier_branch_bit_env_name[ i ], \ - __kmp_barrier_gather_branch_bits [ i ], \ - __kmp_barrier_release_branch_bits [ i ])) + } + if (__kmp_barrier_gather_branch_bits[i] > KMP_MAX_BRANCH_BITS) { + KMP_WARNING(BarrGatherValueInvalid, name, value); + KMP_INFORM(Using_uint_Value, name, __kmp_barrier_gather_bb_dflt); + __kmp_barrier_gather_branch_bits[i] = __kmp_barrier_gather_bb_dflt; + } } + K_DIAG(1, ("%s == %d,%d\n", __kmp_barrier_branch_bit_env_name[i], + __kmp_barrier_gather_branch_bits[i], + __kmp_barrier_release_branch_bits[i])) + } } // __kmp_stg_parse_barrier_branch_bit -static void -__kmp_stg_print_barrier_branch_bit( kmp_str_buf_t * buffer, char const * name, void * data ) { - const char *var; - for ( int i=bs_plain_barrier; irivals ); - if ( rc ) { - return; - }; // if - if ( reduction->force ) { - if( value != 0 ) { - if( __kmp_str_match( "critical", 0, value ) ) - __kmp_force_reduction_method = critical_reduce_block; - else if( __kmp_str_match( "atomic", 0, value ) ) - __kmp_force_reduction_method = atomic_reduce_block; - else if( __kmp_str_match( "tree", 0, value ) ) - __kmp_force_reduction_method = tree_reduce_block; - else { - KMP_FATAL( UnknownForceReduction, name, value ); - } - } +static void __kmp_stg_parse_force_reduction(char const *name, char const *value, + void *data) { + kmp_stg_fr_data_t *reduction = (kmp_stg_fr_data_t *)data; + int rc; + + rc = __kmp_stg_check_rivals(name, value, reduction->rivals); + if (rc) { + return; + }; // if + if (reduction->force) { + if (value != 0) { + if (__kmp_str_match("critical", 0, value)) + __kmp_force_reduction_method = critical_reduce_block; + else if (__kmp_str_match("atomic", 0, value)) + __kmp_force_reduction_method = atomic_reduce_block; + else if (__kmp_str_match("tree", 0, value)) + __kmp_force_reduction_method = tree_reduce_block; + else { + KMP_FATAL(UnknownForceReduction, name, value); + } + } + } else { + __kmp_stg_parse_bool(name, value, &__kmp_determ_red); + if (__kmp_determ_red) { + __kmp_force_reduction_method = tree_reduce_block; } else { - __kmp_stg_parse_bool( name, value, & __kmp_determ_red ); - if( __kmp_determ_red ) { - __kmp_force_reduction_method = tree_reduce_block; - } else { - __kmp_force_reduction_method = reduction_method_not_defined; - } + __kmp_force_reduction_method = reduction_method_not_defined; } - K_DIAG( 1, ( "__kmp_force_reduction_method == %d\n", __kmp_force_reduction_method ) ); + } + K_DIAG(1, ("__kmp_force_reduction_method == %d\n", + __kmp_force_reduction_method)); } // __kmp_stg_parse_force_reduction -static void -__kmp_stg_print_force_reduction( kmp_str_buf_t * buffer, char const * name, void * data ) { - - kmp_stg_fr_data_t * reduction = (kmp_stg_fr_data_t *) data; - if ( reduction->force ) { - if( __kmp_force_reduction_method == critical_reduce_block) { - __kmp_stg_print_str( buffer, name, "critical"); - } else if ( __kmp_force_reduction_method == atomic_reduce_block ) { - __kmp_stg_print_str( buffer, name, "atomic"); - } else if ( __kmp_force_reduction_method == tree_reduce_block ) { - __kmp_stg_print_str( buffer, name, "tree"); - } else { - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME; - } else { - __kmp_str_buf_print( buffer, " %s", name ); - } - __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); - } +static void __kmp_stg_print_force_reduction(kmp_str_buf_t *buffer, + char const *name, void *data) { + + kmp_stg_fr_data_t *reduction = (kmp_stg_fr_data_t *)data; + if (reduction->force) { + if (__kmp_force_reduction_method == critical_reduce_block) { + __kmp_stg_print_str(buffer, name, "critical"); + } else if (__kmp_force_reduction_method == atomic_reduce_block) { + __kmp_stg_print_str(buffer, name, "atomic"); + } else if (__kmp_force_reduction_method == tree_reduce_block) { + __kmp_stg_print_str(buffer, name, "tree"); } else { - __kmp_stg_print_bool( buffer, name, __kmp_determ_red ); + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_NAME; + } else { + __kmp_str_buf_print(buffer, " %s", name); + } + __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); } - + } else { + __kmp_stg_print_bool(buffer, name, __kmp_determ_red); + } } // __kmp_stg_print_force_reduction -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_STORAGE_MAP -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_storage_map( char const * name, char const * value, void * data ) { - if ( __kmp_str_match( "verbose", 1, value ) ) { - __kmp_storage_map = TRUE; - __kmp_storage_map_verbose = TRUE; - __kmp_storage_map_verbose_specified = TRUE; - } else { - __kmp_storage_map_verbose = FALSE; - __kmp_stg_parse_bool( name, value, & __kmp_storage_map ); // !!! - }; // if +static void __kmp_stg_parse_storage_map(char const *name, char const *value, + void *data) { + if (__kmp_str_match("verbose", 1, value)) { + __kmp_storage_map = TRUE; + __kmp_storage_map_verbose = TRUE; + __kmp_storage_map_verbose_specified = TRUE; + + } else { + __kmp_storage_map_verbose = FALSE; + __kmp_stg_parse_bool(name, value, &__kmp_storage_map); // !!! + }; // if } // __kmp_stg_parse_storage_map -static void -__kmp_stg_print_storage_map( kmp_str_buf_t * buffer, char const * name, void * data ) { - if ( __kmp_storage_map_verbose || __kmp_storage_map_verbose_specified ) { - __kmp_stg_print_str( buffer, name, "verbose" ); - } else { - __kmp_stg_print_bool( buffer, name, __kmp_storage_map ); - } +static void __kmp_stg_print_storage_map(kmp_str_buf_t *buffer, char const *name, + void *data) { + if (__kmp_storage_map_verbose || __kmp_storage_map_verbose_specified) { + __kmp_stg_print_str(buffer, name, "verbose"); + } else { + __kmp_stg_print_bool(buffer, name, __kmp_storage_map); + } } // __kmp_stg_print_storage_map -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_ALL_THREADPRIVATE -// ------------------------------------------------------------------------------------------------- -static void -__kmp_stg_parse_all_threadprivate( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( name, value, __kmp_allThreadsSpecified ? __kmp_max_nth : 1, __kmp_max_nth, - & __kmp_tp_capacity ); +static void __kmp_stg_parse_all_threadprivate(char const *name, + char const *value, void *data) { + __kmp_stg_parse_int(name, value, + __kmp_allThreadsSpecified ? __kmp_max_nth : 1, + __kmp_max_nth, &__kmp_tp_capacity); } // __kmp_stg_parse_all_threadprivate -static void -__kmp_stg_print_all_threadprivate( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_tp_capacity ); - +static void __kmp_stg_print_all_threadprivate(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_int(buffer, name, __kmp_tp_capacity); } -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_FOREIGN_THREADS_THREADPRIVATE -// ------------------------------------------------------------------------------------------------- -static void -__kmp_stg_parse_foreign_threads_threadprivate( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_foreign_tp ); +static void __kmp_stg_parse_foreign_threads_threadprivate(char const *name, + char const *value, + void *data) { + __kmp_stg_parse_bool(name, value, &__kmp_foreign_tp); } // __kmp_stg_parse_foreign_threads_threadprivate -static void -__kmp_stg_print_foreign_threads_threadprivate( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_foreign_tp ); +static void __kmp_stg_print_foreign_threads_threadprivate(kmp_str_buf_t *buffer, + char const *name, + void *data) { + __kmp_stg_print_bool(buffer, name, __kmp_foreign_tp); } // __kmp_stg_print_foreign_threads_threadprivate - -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_AFFINITY, GOMP_CPU_AFFINITY, KMP_TOPOLOGY_METHOD -// ------------------------------------------------------------------------------------------------- #if KMP_AFFINITY_SUPPORTED -// // Parse the proc id list. Return TRUE if successful, FALSE otherwise. -// -static int -__kmp_parse_affinity_proc_id_list( const char *var, const char *env, - const char **nextEnv, char **proclist ) -{ - const char *scan = env; - const char *next = scan; - int empty = TRUE; - - *proclist = NULL; +static int __kmp_parse_affinity_proc_id_list(const char *var, const char *env, + const char **nextEnv, + char **proclist) { + const char *scan = env; + const char *next = scan; + int empty = TRUE; - for (;;) { - int start, end, stride; + *proclist = NULL; - SKIP_WS(scan); - next = scan; - if (*next == '\0') { - break; - } + for (;;) { + int start, end, stride; - if (*next == '{') { - int num; - next++; // skip '{' - SKIP_WS(next); - scan = next; - - // - // Read the first integer in the set. - // - if ((*next < '0') || (*next > '9')) { - KMP_WARNING( AffSyntaxError, var ); - return FALSE; - } - SKIP_DIGITS(next); - num = __kmp_str_to_int(scan, *next); - KMP_ASSERT(num >= 0); - - for (;;) { - // - // Check for end of set. - // - SKIP_WS(next); - if (*next == '}') { - next++; // skip '}' - break; - } - - // - // Skip optional comma. - // - if (*next == ',') { - next++; - } - SKIP_WS(next); - - // - // Read the next integer in the set. - // - scan = next; - if ((*next < '0') || (*next > '9')) { - KMP_WARNING( AffSyntaxError, var ); - return FALSE; - } - - SKIP_DIGITS(next); - num = __kmp_str_to_int(scan, *next); - KMP_ASSERT(num >= 0); - } - empty = FALSE; + SKIP_WS(scan); + next = scan; + if (*next == '\0') { + break; + } - SKIP_WS(next); - if (*next == ',') { - next++; - } - scan = next; - continue; - } + if (*next == '{') { + int num; + next++; // skip '{' + SKIP_WS(next); + scan = next; - // - // Next character is not an integer => end of list - // - if ((*next < '0') || (*next > '9')) { - if (empty) { - KMP_WARNING( AffSyntaxError, var ); - return FALSE; - } - break; - } + // Read the first integer in the set. + if ((*next < '0') || (*next > '9')) { + KMP_WARNING(AffSyntaxError, var); + return FALSE; + } + SKIP_DIGITS(next); + num = __kmp_str_to_int(scan, *next); + KMP_ASSERT(num >= 0); - // - // Read the first integer. - // - SKIP_DIGITS(next); - start = __kmp_str_to_int(scan, *next); - KMP_ASSERT(start >= 0); + for (;;) { + // Check for end of set. SKIP_WS(next); - - // - // If this isn't a range, then go on. - // - if (*next != '-') { - empty = FALSE; - - // - // Skip optional comma. - // - if (*next == ',') { - next++; - } - scan = next; - continue; + if (*next == '}') { + next++; // skip '}' + break; } - // - // This is a range. Skip over the '-' and read in the 2nd int. - // - next++; // skip '-' + // Skip optional comma. + if (*next == ',') { + next++; + } SKIP_WS(next); + + // Read the next integer in the set. scan = next; if ((*next < '0') || (*next > '9')) { - KMP_WARNING( AffSyntaxError, var ); - return FALSE; + KMP_WARNING(AffSyntaxError, var); + return FALSE; } + SKIP_DIGITS(next); - end = __kmp_str_to_int(scan, *next); - KMP_ASSERT(end >= 0); + num = __kmp_str_to_int(scan, *next); + KMP_ASSERT(num >= 0); + } + empty = FALSE; - // - // Check for a stride parameter - // - stride = 1; - SKIP_WS(next); - if (*next == ':') { - // - // A stride is specified. Skip over the ':" and read the 3rd int. - // - int sign = +1; - next++; // skip ':' - SKIP_WS(next); - scan = next; - if (*next == '-') { - sign = -1; - next++; - SKIP_WS(next); - scan = next; - } - if ((*next < '0') || (*next > '9')) { - KMP_WARNING( AffSyntaxError, var ); - return FALSE; - } - SKIP_DIGITS(next); - stride = __kmp_str_to_int(scan, *next); - KMP_ASSERT(stride >= 0); - stride *= sign; - } + SKIP_WS(next); + if (*next == ',') { + next++; + } + scan = next; + continue; + } - // - // Do some range checks. - // - if (stride == 0) { - KMP_WARNING( AffZeroStride, var ); - return FALSE; - } - if (stride > 0) { - if (start > end) { - KMP_WARNING( AffStartGreaterEnd, var, start, end ); - return FALSE; - } - } - else { - if (start < end) { - KMP_WARNING( AffStrideLessZero, var, start, end ); - return FALSE; - } - } - if ((end - start) / stride > 65536 ) { - KMP_WARNING( AffRangeTooBig, var, end, start, stride ); - return FALSE; - } + // Next character is not an integer => end of list + if ((*next < '0') || (*next > '9')) { + if (empty) { + KMP_WARNING(AffSyntaxError, var); + return FALSE; + } + break; + } - empty = FALSE; + // Read the first integer. + SKIP_DIGITS(next); + start = __kmp_str_to_int(scan, *next); + KMP_ASSERT(start >= 0); + SKIP_WS(next); - // - // Skip optional comma. - // + // If this isn't a range, then go on. + if (*next != '-') { + empty = FALSE; + + // Skip optional comma. + if (*next == ',') { + next++; + } + scan = next; + continue; + } + + // This is a range. Skip over the '-' and read in the 2nd int. + next++; // skip '-' + SKIP_WS(next); + scan = next; + if ((*next < '0') || (*next > '9')) { + KMP_WARNING(AffSyntaxError, var); + return FALSE; + } + SKIP_DIGITS(next); + end = __kmp_str_to_int(scan, *next); + KMP_ASSERT(end >= 0); + + // Check for a stride parameter + stride = 1; + SKIP_WS(next); + if (*next == ':') { + // A stride is specified. Skip over the ':" and read the 3rd int. + int sign = +1; + next++; // skip ':' + SKIP_WS(next); + scan = next; + if (*next == '-') { + sign = -1; + next++; SKIP_WS(next); - if (*next == ',') { - next++; - } scan = next; + } + if ((*next < '0') || (*next > '9')) { + KMP_WARNING(AffSyntaxError, var); + return FALSE; + } + SKIP_DIGITS(next); + stride = __kmp_str_to_int(scan, *next); + KMP_ASSERT(stride >= 0); + stride *= sign; + } + + // Do some range checks. + if (stride == 0) { + KMP_WARNING(AffZeroStride, var); + return FALSE; + } + if (stride > 0) { + if (start > end) { + KMP_WARNING(AffStartGreaterEnd, var, start, end); + return FALSE; + } + } else { + if (start < end) { + KMP_WARNING(AffStrideLessZero, var, start, end); + return FALSE; + } + } + if ((end - start) / stride > 65536) { + KMP_WARNING(AffRangeTooBig, var, end, start, stride); + return FALSE; } - *nextEnv = next; + empty = FALSE; - { - int len = next - env; - char *retlist = (char *)__kmp_allocate((len + 1) * sizeof(char)); - KMP_MEMCPY_S(retlist, (len+1)*sizeof(char), env, len * sizeof(char)); - retlist[len] = '\0'; - *proclist = retlist; + // Skip optional comma. + SKIP_WS(next); + if (*next == ',') { + next++; } - return TRUE; -} + scan = next; + } + *nextEnv = next; + + { + int len = next - env; + char *retlist = (char *)__kmp_allocate((len + 1) * sizeof(char)); + KMP_MEMCPY_S(retlist, (len + 1) * sizeof(char), env, len * sizeof(char)); + retlist[len] = '\0'; + *proclist = retlist; + } + return TRUE; +} -// // If KMP_AFFINITY is specified without a type, then // __kmp_affinity_notype should point to its setting. -// static kmp_setting_t *__kmp_affinity_notype = NULL; -static void -__kmp_parse_affinity_env( char const * name, char const * value, - enum affinity_type * out_type, - char ** out_proclist, - int * out_verbose, - int * out_warn, - int * out_respect, - enum affinity_gran * out_gran, - int * out_gran_levels, - int * out_dups, - int * out_compact, - int * out_offset -) -{ - char * buffer = NULL; // Copy of env var value. - char * buf = NULL; // Buffer for strtok_r() function. - char * next = NULL; // end of token / start of next. - const char * start; // start of current token (for err msgs) - int count = 0; // Counter of parsed integer numbers. - int number[ 2 ]; // Parsed numbers. - - // Guards. - int type = 0; - int proclist = 0; - int max_proclist = 0; - int verbose = 0; - int warnings = 0; - int respect = 0; - int gran = 0; - int dups = 0; - - KMP_ASSERT( value != NULL ); - - if ( TCR_4(__kmp_init_middle) ) { - KMP_WARNING( EnvMiddleWarn, name ); - __kmp_env_toPrint( name, 0 ); - return; - } - __kmp_env_toPrint( name, 1 ); +static void __kmp_parse_affinity_env(char const *name, char const *value, + enum affinity_type *out_type, + char **out_proclist, int *out_verbose, + int *out_warn, int *out_respect, + enum affinity_gran *out_gran, + int *out_gran_levels, int *out_dups, + int *out_compact, int *out_offset) { + char *buffer = NULL; // Copy of env var value. + char *buf = NULL; // Buffer for strtok_r() function. + char *next = NULL; // end of token / start of next. + const char *start; // start of current token (for err msgs) + int count = 0; // Counter of parsed integer numbers. + int number[2]; // Parsed numbers. + + // Guards. + int type = 0; + int proclist = 0; + int max_proclist = 0; + int verbose = 0; + int warnings = 0; + int respect = 0; + int gran = 0; + int dups = 0; + + KMP_ASSERT(value != NULL); + + if (TCR_4(__kmp_init_middle)) { + KMP_WARNING(EnvMiddleWarn, name); + __kmp_env_toPrint(name, 0); + return; + } + __kmp_env_toPrint(name, 1); - buffer = __kmp_str_format( "%s", value ); // Copy env var to keep original intact. - buf = buffer; - SKIP_WS(buf); + buffer = + __kmp_str_format("%s", value); // Copy env var to keep original intact. + buf = buffer; + SKIP_WS(buf); - // Helper macros. +// Helper macros. - // - // If we see a parse error, emit a warning and scan to the next ",". - // - // FIXME - there's got to be a better way to print an error - // message, hopefully without overwritting peices of buf. - // - #define EMIT_WARN(skip,errlist) \ - { \ - char ch; \ - if (skip) { \ - SKIP_TO(next, ','); \ - } \ - ch = *next; \ - *next = '\0'; \ - KMP_WARNING errlist; \ - *next = ch; \ - if (skip) { \ - if (ch == ',') next++; \ - } \ - buf = next; \ - } +// If we see a parse error, emit a warning and scan to the next ",". +// +// FIXME - there's got to be a better way to print an error +// message, hopefully without overwritting peices of buf. +#define EMIT_WARN(skip, errlist) \ + { \ + char ch; \ + if (skip) { \ + SKIP_TO(next, ','); \ + } \ + ch = *next; \ + *next = '\0'; \ + KMP_WARNING errlist; \ + *next = ch; \ + if (skip) { \ + if (ch == ',') \ + next++; \ + } \ + buf = next; \ + } - #define _set_param(_guard,_var,_val) \ - { \ - if ( _guard == 0 ) { \ - _var = _val; \ - } else { \ - EMIT_WARN( FALSE, ( AffParamDefined, name, start ) ); \ - }; \ - ++ _guard; \ - } +#define _set_param(_guard, _var, _val) \ + { \ + if (_guard == 0) { \ + _var = _val; \ + } else { \ + EMIT_WARN(FALSE, (AffParamDefined, name, start)); \ + }; \ + ++_guard; \ + } - #define set_type(val) _set_param( type, *out_type, val ) - #define set_verbose(val) _set_param( verbose, *out_verbose, val ) - #define set_warnings(val) _set_param( warnings, *out_warn, val ) - #define set_respect(val) _set_param( respect, *out_respect, val ) - #define set_dups(val) _set_param( dups, *out_dups, val ) - #define set_proclist(val) _set_param( proclist, *out_proclist, val ) - - #define set_gran(val,levels) \ - { \ - if ( gran == 0 ) { \ - *out_gran = val; \ - *out_gran_levels = levels; \ - } else { \ - EMIT_WARN( FALSE, ( AffParamDefined, name, start ) ); \ - }; \ - ++ gran; \ - } +#define set_type(val) _set_param(type, *out_type, val) +#define set_verbose(val) _set_param(verbose, *out_verbose, val) +#define set_warnings(val) _set_param(warnings, *out_warn, val) +#define set_respect(val) _set_param(respect, *out_respect, val) +#define set_dups(val) _set_param(dups, *out_dups, val) +#define set_proclist(val) _set_param(proclist, *out_proclist, val) + +#define set_gran(val, levels) \ + { \ + if (gran == 0) { \ + *out_gran = val; \ + *out_gran_levels = levels; \ + } else { \ + EMIT_WARN(FALSE, (AffParamDefined, name, start)); \ + }; \ + ++gran; \ + } -# if OMP_40_ENABLED - KMP_DEBUG_ASSERT( ( __kmp_nested_proc_bind.bind_types != NULL ) - && ( __kmp_nested_proc_bind.used > 0 ) ); -# endif +#if OMP_40_ENABLED + KMP_DEBUG_ASSERT((__kmp_nested_proc_bind.bind_types != NULL) && + (__kmp_nested_proc_bind.used > 0)); +#endif - while ( *buf != '\0' ) { - start = next = buf; + while (*buf != '\0') { + start = next = buf; - if (__kmp_match_str("none", buf, (const char **)&next)) { - set_type( affinity_none ); -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; -# endif - buf = next; - } else if (__kmp_match_str("scatter", buf, (const char **)&next)) { - set_type( affinity_scatter ); -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -# endif - buf = next; - } else if (__kmp_match_str("compact", buf, (const char **)&next)) { - set_type( affinity_compact ); -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -# endif - buf = next; - } else if (__kmp_match_str("logical", buf, (const char **)&next)) { - set_type( affinity_logical ); -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -# endif - buf = next; - } else if (__kmp_match_str("physical", buf, (const char **)&next)) { - set_type( affinity_physical ); -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -# endif - buf = next; - } else if (__kmp_match_str("explicit", buf, (const char **)&next)) { - set_type( affinity_explicit ); -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -# endif - buf = next; - } else if (__kmp_match_str("balanced", buf, (const char **)&next)) { - set_type( affinity_balanced ); -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -# endif - buf = next; - } else if (__kmp_match_str("disabled", buf, (const char **)&next)) { - set_type( affinity_disabled ); -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; -# endif - buf = next; - } else if (__kmp_match_str("verbose", buf, (const char **)&next)) { - set_verbose( TRUE ); - buf = next; - } else if (__kmp_match_str("noverbose", buf, (const char **)&next)) { - set_verbose( FALSE ); - buf = next; - } else if (__kmp_match_str("warnings", buf, (const char **)&next)) { - set_warnings( TRUE ); - buf = next; - } else if (__kmp_match_str("nowarnings", buf, (const char **)&next)) { - set_warnings( FALSE ); - buf = next; - } else if (__kmp_match_str("respect", buf, (const char **)&next)) { - set_respect( TRUE ); - buf = next; - } else if (__kmp_match_str("norespect", buf, (const char **)&next)) { - set_respect( FALSE ); - buf = next; - } else if (__kmp_match_str("duplicates", buf, (const char **)&next) - || __kmp_match_str("dups", buf, (const char **)&next)) { - set_dups( TRUE ); - buf = next; - } else if (__kmp_match_str("noduplicates", buf, (const char **)&next) - || __kmp_match_str("nodups", buf, (const char **)&next)) { - set_dups( FALSE ); - buf = next; - } else if (__kmp_match_str("granularity", buf, (const char **)&next) - || __kmp_match_str("gran", buf, (const char **)&next)) { - SKIP_WS(next); - if (*next != '=') { - EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); - continue; - } - next++; // skip '=' - SKIP_WS(next); - - buf = next; - if (__kmp_match_str("fine", buf, (const char **)&next)) { - set_gran( affinity_gran_fine, -1 ); - buf = next; - } else if (__kmp_match_str("thread", buf, (const char **)&next)) { - set_gran( affinity_gran_thread, -1 ); - buf = next; - } else if (__kmp_match_str("core", buf, (const char **)&next)) { - set_gran( affinity_gran_core, -1 ); - buf = next; - } else if (__kmp_match_str("package", buf, (const char **)&next)) { - set_gran( affinity_gran_package, -1 ); - buf = next; - } else if (__kmp_match_str("node", buf, (const char **)&next)) { - set_gran( affinity_gran_node, -1 ); - buf = next; -# if KMP_GROUP_AFFINITY - } else if (__kmp_match_str("group", buf, (const char **)&next)) { - set_gran( affinity_gran_group, -1 ); - buf = next; -# endif /* KMP_GROUP AFFINITY */ - } else if ((*buf >= '0') && (*buf <= '9')) { - int n; - next = buf; - SKIP_DIGITS(next); - n = __kmp_str_to_int( buf, *next ); - KMP_ASSERT(n >= 0); - buf = next; - set_gran( affinity_gran_default, n ); - } else { - EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); - continue; - } - } else if (__kmp_match_str("proclist", buf, (const char **)&next)) { - char *temp_proclist; + if (__kmp_match_str("none", buf, (const char **)&next)) { + set_type(affinity_none); +#if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; +#endif + buf = next; + } else if (__kmp_match_str("scatter", buf, (const char **)&next)) { + set_type(affinity_scatter); +#if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; +#endif + buf = next; + } else if (__kmp_match_str("compact", buf, (const char **)&next)) { + set_type(affinity_compact); +#if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; +#endif + buf = next; + } else if (__kmp_match_str("logical", buf, (const char **)&next)) { + set_type(affinity_logical); +#if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; +#endif + buf = next; + } else if (__kmp_match_str("physical", buf, (const char **)&next)) { + set_type(affinity_physical); +#if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; +#endif + buf = next; + } else if (__kmp_match_str("explicit", buf, (const char **)&next)) { + set_type(affinity_explicit); +#if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; +#endif + buf = next; + } else if (__kmp_match_str("balanced", buf, (const char **)&next)) { + set_type(affinity_balanced); +#if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; +#endif + buf = next; + } else if (__kmp_match_str("disabled", buf, (const char **)&next)) { + set_type(affinity_disabled); +#if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; +#endif + buf = next; + } else if (__kmp_match_str("verbose", buf, (const char **)&next)) { + set_verbose(TRUE); + buf = next; + } else if (__kmp_match_str("noverbose", buf, (const char **)&next)) { + set_verbose(FALSE); + buf = next; + } else if (__kmp_match_str("warnings", buf, (const char **)&next)) { + set_warnings(TRUE); + buf = next; + } else if (__kmp_match_str("nowarnings", buf, (const char **)&next)) { + set_warnings(FALSE); + buf = next; + } else if (__kmp_match_str("respect", buf, (const char **)&next)) { + set_respect(TRUE); + buf = next; + } else if (__kmp_match_str("norespect", buf, (const char **)&next)) { + set_respect(FALSE); + buf = next; + } else if (__kmp_match_str("duplicates", buf, (const char **)&next) || + __kmp_match_str("dups", buf, (const char **)&next)) { + set_dups(TRUE); + buf = next; + } else if (__kmp_match_str("noduplicates", buf, (const char **)&next) || + __kmp_match_str("nodups", buf, (const char **)&next)) { + set_dups(FALSE); + buf = next; + } else if (__kmp_match_str("granularity", buf, (const char **)&next) || + __kmp_match_str("gran", buf, (const char **)&next)) { + SKIP_WS(next); + if (*next != '=') { + EMIT_WARN(TRUE, (AffInvalidParam, name, start)); + continue; + } + next++; // skip '=' + SKIP_WS(next); - SKIP_WS(next); - if (*next != '=') { - EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); - continue; - } - next++; // skip '=' - SKIP_WS(next); - if (*next != '[') { - EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); - continue; - } - next++; // skip '[' - buf = next; - if (! __kmp_parse_affinity_proc_id_list(name, buf, - (const char **)&next, &temp_proclist)) { - // - // warning already emitted. - // - SKIP_TO(next, ']'); - if (*next == ']') next++; - SKIP_TO(next, ','); - if (*next == ',') next++; - buf = next; - continue; - } - if (*next != ']') { - EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); - continue; - } - next++; // skip ']' - set_proclist( temp_proclist ); - } else if ((*buf >= '0') && (*buf <= '9')) { - // Parse integer numbers -- permute and offset. - int n; - next = buf; - SKIP_DIGITS(next); - n = __kmp_str_to_int( buf, *next ); - KMP_ASSERT(n >= 0); - buf = next; - if ( count < 2 ) { - number[ count ] = n; - } else { - KMP_WARNING( AffManyParams, name, start ); - }; // if - ++ count; - } else { - EMIT_WARN( TRUE, ( AffInvalidParam, name, start ) ); - continue; - } + buf = next; + if (__kmp_match_str("fine", buf, (const char **)&next)) { + set_gran(affinity_gran_fine, -1); + buf = next; + } else if (__kmp_match_str("thread", buf, (const char **)&next)) { + set_gran(affinity_gran_thread, -1); + buf = next; + } else if (__kmp_match_str("core", buf, (const char **)&next)) { + set_gran(affinity_gran_core, -1); + buf = next; + } else if (__kmp_match_str("package", buf, (const char **)&next)) { + set_gran(affinity_gran_package, -1); + buf = next; + } else if (__kmp_match_str("node", buf, (const char **)&next)) { + set_gran(affinity_gran_node, -1); + buf = next; +#if KMP_GROUP_AFFINITY + } else if (__kmp_match_str("group", buf, (const char **)&next)) { + set_gran(affinity_gran_group, -1); + buf = next; +#endif /* KMP_GROUP AFFINITY */ + } else if ((*buf >= '0') && (*buf <= '9')) { + int n; + next = buf; + SKIP_DIGITS(next); + n = __kmp_str_to_int(buf, *next); + KMP_ASSERT(n >= 0); + buf = next; + set_gran(affinity_gran_default, n); + } else { + EMIT_WARN(TRUE, (AffInvalidParam, name, start)); + continue; + } + } else if (__kmp_match_str("proclist", buf, (const char **)&next)) { + char *temp_proclist; - SKIP_WS(next); - if (*next == ',') { - next++; - SKIP_WS(next); - } - else if (*next != '\0') { - const char *temp = next; - EMIT_WARN( TRUE, ( ParseExtraCharsWarn, name, temp ) ); - continue; - } + SKIP_WS(next); + if (*next != '=') { + EMIT_WARN(TRUE, (AffInvalidParam, name, start)); + continue; + } + next++; // skip '=' + SKIP_WS(next); + if (*next != '[') { + EMIT_WARN(TRUE, (AffInvalidParam, name, start)); + continue; + } + next++; // skip '[' + buf = next; + if (!__kmp_parse_affinity_proc_id_list(name, buf, (const char **)&next, + &temp_proclist)) { + // warning already emitted. + SKIP_TO(next, ']'); + if (*next == ']') + next++; + SKIP_TO(next, ','); + if (*next == ',') + next++; buf = next; - } // while - - #undef EMIT_WARN - #undef _set_param - #undef set_type - #undef set_verbose - #undef set_warnings - #undef set_respect - #undef set_granularity - - __kmp_str_free((const char **) &buffer); - - if ( proclist ) { - if ( ! type ) { - KMP_WARNING( AffProcListNoType, name ); - *out_type = affinity_explicit; -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -# endif - } - else if ( *out_type != affinity_explicit ) { - KMP_WARNING( AffProcListNotExplicit, name ); - KMP_ASSERT( *out_proclist != NULL ); - KMP_INTERNAL_FREE( *out_proclist ); - *out_proclist = NULL; - } + continue; + } + if (*next != ']') { + EMIT_WARN(TRUE, (AffInvalidParam, name, start)); + continue; + } + next++; // skip ']' + set_proclist(temp_proclist); + } else if ((*buf >= '0') && (*buf <= '9')) { + // Parse integer numbers -- permute and offset. + int n; + next = buf; + SKIP_DIGITS(next); + n = __kmp_str_to_int(buf, *next); + KMP_ASSERT(n >= 0); + buf = next; + if (count < 2) { + number[count] = n; + } else { + KMP_WARNING(AffManyParams, name, start); + }; // if + ++count; + } else { + EMIT_WARN(TRUE, (AffInvalidParam, name, start)); + continue; + } + + SKIP_WS(next); + if (*next == ',') { + next++; + SKIP_WS(next); + } else if (*next != '\0') { + const char *temp = next; + EMIT_WARN(TRUE, (ParseExtraCharsWarn, name, temp)); + continue; + } + buf = next; + } // while + +#undef EMIT_WARN +#undef _set_param +#undef set_type +#undef set_verbose +#undef set_warnings +#undef set_respect +#undef set_granularity + + __kmp_str_free((const char **)&buffer); + + if (proclist) { + if (!type) { + KMP_WARNING(AffProcListNoType, name); + *out_type = affinity_explicit; +#if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; +#endif + } else if (*out_type != affinity_explicit) { + KMP_WARNING(AffProcListNotExplicit, name); + KMP_ASSERT(*out_proclist != NULL); + KMP_INTERNAL_FREE(*out_proclist); + *out_proclist = NULL; } - switch ( *out_type ) { - case affinity_logical: - case affinity_physical: { - if ( count > 0 ) { - *out_offset = number[ 0 ]; - }; // if - if ( count > 1 ) { - KMP_WARNING( AffManyParamsForLogic, name, number[ 1 ] ); - }; // if - } break; - case affinity_balanced: { - if ( count > 0 ) { - *out_compact = number[ 0 ]; - }; // if - if ( count > 1 ) { - *out_offset = number[ 1 ]; - }; // if - - if ( __kmp_affinity_gran == affinity_gran_default ) { + } + switch (*out_type) { + case affinity_logical: + case affinity_physical: { + if (count > 0) { + *out_offset = number[0]; + }; // if + if (count > 1) { + KMP_WARNING(AffManyParamsForLogic, name, number[1]); + }; // if + } break; + case affinity_balanced: { + if (count > 0) { + *out_compact = number[0]; + }; // if + if (count > 1) { + *out_offset = number[1]; + }; // if + + if (__kmp_affinity_gran == affinity_gran_default) { #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) - if( __kmp_mic_type != non_mic ) { - if( __kmp_affinity_verbose || __kmp_affinity_warnings ) { - KMP_WARNING( AffGranUsing, "KMP_AFFINITY", "fine" ); - } - __kmp_affinity_gran = affinity_gran_fine; - } else + if (__kmp_mic_type != non_mic) { + if (__kmp_affinity_verbose || __kmp_affinity_warnings) { + KMP_WARNING(AffGranUsing, "KMP_AFFINITY", "fine"); + } + __kmp_affinity_gran = affinity_gran_fine; + } else #endif - { - if( __kmp_affinity_verbose || __kmp_affinity_warnings ) { - KMP_WARNING( AffGranUsing, "KMP_AFFINITY", "core" ); - } - __kmp_affinity_gran = affinity_gran_core; - } - } - } break; - case affinity_scatter: - case affinity_compact: { - if ( count > 0 ) { - *out_compact = number[ 0 ]; - }; // if - if ( count > 1 ) { - *out_offset = number[ 1 ]; - }; // if - } break; - case affinity_explicit: { - if ( *out_proclist == NULL ) { - KMP_WARNING( AffNoProcList, name ); - __kmp_affinity_type = affinity_none; - } - if ( count > 0 ) { - KMP_WARNING( AffNoParam, name, "explicit" ); - } - } break; - case affinity_none: { - if ( count > 0 ) { - KMP_WARNING( AffNoParam, name, "none" ); - }; // if - } break; - case affinity_disabled: { - if ( count > 0 ) { - KMP_WARNING( AffNoParam, name, "disabled" ); - }; // if - } break; - case affinity_default: { - if ( count > 0 ) { - KMP_WARNING( AffNoParam, name, "default" ); - }; // if - } break; - default: { - KMP_ASSERT( 0 ); - }; - }; // switch + { + if (__kmp_affinity_verbose || __kmp_affinity_warnings) { + KMP_WARNING(AffGranUsing, "KMP_AFFINITY", "core"); + } + __kmp_affinity_gran = affinity_gran_core; + } + } + } break; + case affinity_scatter: + case affinity_compact: { + if (count > 0) { + *out_compact = number[0]; + }; // if + if (count > 1) { + *out_offset = number[1]; + }; // if + } break; + case affinity_explicit: { + if (*out_proclist == NULL) { + KMP_WARNING(AffNoProcList, name); + __kmp_affinity_type = affinity_none; + } + if (count > 0) { + KMP_WARNING(AffNoParam, name, "explicit"); + } + } break; + case affinity_none: { + if (count > 0) { + KMP_WARNING(AffNoParam, name, "none"); + }; // if + } break; + case affinity_disabled: { + if (count > 0) { + KMP_WARNING(AffNoParam, name, "disabled"); + }; // if + } break; + case affinity_default: { + if (count > 0) { + KMP_WARNING(AffNoParam, name, "default"); + }; // if + } break; + default: { KMP_ASSERT(0); }; + }; // switch } // __kmp_parse_affinity_env -static void -__kmp_stg_parse_affinity( char const * name, char const * value, void * data ) -{ - kmp_setting_t **rivals = (kmp_setting_t **) data; - int rc; +static void __kmp_stg_parse_affinity(char const *name, char const *value, + void *data) { + kmp_setting_t **rivals = (kmp_setting_t **)data; + int rc; - rc = __kmp_stg_check_rivals( name, value, rivals ); - if ( rc ) { - return; - } + rc = __kmp_stg_check_rivals(name, value, rivals); + if (rc) { + return; + } - __kmp_parse_affinity_env( name, value, & __kmp_affinity_type, - & __kmp_affinity_proclist, & __kmp_affinity_verbose, - & __kmp_affinity_warnings, & __kmp_affinity_respect_mask, - & __kmp_affinity_gran, & __kmp_affinity_gran_levels, - & __kmp_affinity_dups, & __kmp_affinity_compact, - & __kmp_affinity_offset ); + __kmp_parse_affinity_env(name, value, &__kmp_affinity_type, + &__kmp_affinity_proclist, &__kmp_affinity_verbose, + &__kmp_affinity_warnings, + &__kmp_affinity_respect_mask, &__kmp_affinity_gran, + &__kmp_affinity_gran_levels, &__kmp_affinity_dups, + &__kmp_affinity_compact, &__kmp_affinity_offset); } // __kmp_stg_parse_affinity -static void -__kmp_stg_print_affinity( kmp_str_buf_t * buffer, char const * name, void * data ) { - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME_EX(name); +static void __kmp_stg_print_affinity(kmp_str_buf_t *buffer, char const *name, + void *data) { + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_NAME_EX(name); + } else { + __kmp_str_buf_print(buffer, " %s='", name); + } + if (__kmp_affinity_verbose) { + __kmp_str_buf_print(buffer, "%s,", "verbose"); + } else { + __kmp_str_buf_print(buffer, "%s,", "noverbose"); + } + if (__kmp_affinity_warnings) { + __kmp_str_buf_print(buffer, "%s,", "warnings"); + } else { + __kmp_str_buf_print(buffer, "%s,", "nowarnings"); + } + if (KMP_AFFINITY_CAPABLE()) { + if (__kmp_affinity_respect_mask) { + __kmp_str_buf_print(buffer, "%s,", "respect"); } else { - __kmp_str_buf_print( buffer, " %s='", name ); + __kmp_str_buf_print(buffer, "%s,", "norespect"); } - if ( __kmp_affinity_verbose ) { - __kmp_str_buf_print( buffer, "%s,", "verbose"); - } else { - __kmp_str_buf_print( buffer, "%s,", "noverbose"); + switch (__kmp_affinity_gran) { + case affinity_gran_default: + __kmp_str_buf_print(buffer, "%s", "granularity=default,"); + break; + case affinity_gran_fine: + __kmp_str_buf_print(buffer, "%s", "granularity=fine,"); + break; + case affinity_gran_thread: + __kmp_str_buf_print(buffer, "%s", "granularity=thread,"); + break; + case affinity_gran_core: + __kmp_str_buf_print(buffer, "%s", "granularity=core,"); + break; + case affinity_gran_package: + __kmp_str_buf_print(buffer, "%s", "granularity=package,"); + break; + case affinity_gran_node: + __kmp_str_buf_print(buffer, "%s", "granularity=node,"); + break; +#if KMP_GROUP_AFFINITY + case affinity_gran_group: + __kmp_str_buf_print(buffer, "%s", "granularity=group,"); + break; +#endif /* KMP_GROUP_AFFINITY */ } - if ( __kmp_affinity_warnings ) { - __kmp_str_buf_print( buffer, "%s,", "warnings"); + if (__kmp_affinity_dups) { + __kmp_str_buf_print(buffer, "%s,", "duplicates"); } else { - __kmp_str_buf_print( buffer, "%s,", "nowarnings"); - } - if ( KMP_AFFINITY_CAPABLE() ) { - if ( __kmp_affinity_respect_mask ) { - __kmp_str_buf_print( buffer, "%s,", "respect"); - } else { - __kmp_str_buf_print( buffer, "%s,", "norespect"); - } - switch ( __kmp_affinity_gran ) { - case affinity_gran_default: - __kmp_str_buf_print( buffer, "%s", "granularity=default,"); - break; - case affinity_gran_fine: - __kmp_str_buf_print( buffer, "%s", "granularity=fine,"); - break; - case affinity_gran_thread: - __kmp_str_buf_print( buffer, "%s", "granularity=thread,"); - break; - case affinity_gran_core: - __kmp_str_buf_print( buffer, "%s", "granularity=core,"); - break; - case affinity_gran_package: - __kmp_str_buf_print( buffer, "%s", "granularity=package,"); - break; - case affinity_gran_node: - __kmp_str_buf_print( buffer, "%s", "granularity=node,"); - break; -# if KMP_GROUP_AFFINITY - case affinity_gran_group: - __kmp_str_buf_print( buffer, "%s", "granularity=group,"); - break; -# endif /* KMP_GROUP_AFFINITY */ - } - if ( __kmp_affinity_dups ) { - __kmp_str_buf_print( buffer, "%s,", "duplicates"); - } else { - __kmp_str_buf_print( buffer, "%s,", "noduplicates"); - } + __kmp_str_buf_print(buffer, "%s,", "noduplicates"); } - if ( ! KMP_AFFINITY_CAPABLE() ) { - __kmp_str_buf_print( buffer, "%s", "disabled" ); - } - else switch ( __kmp_affinity_type ){ - case affinity_none: - __kmp_str_buf_print( buffer, "%s", "none"); - break; - case affinity_physical: - __kmp_str_buf_print( buffer, "%s,%d", "physical", - __kmp_affinity_offset ); - break; - case affinity_logical: - __kmp_str_buf_print( buffer, "%s,%d", "logical", - __kmp_affinity_offset ); - break; - case affinity_compact: - __kmp_str_buf_print( buffer, "%s,%d,%d", "compact", - __kmp_affinity_compact, __kmp_affinity_offset ); - break; - case affinity_scatter: - __kmp_str_buf_print( buffer, "%s,%d,%d", "scatter", - __kmp_affinity_compact, __kmp_affinity_offset ); - break; - case affinity_explicit: - __kmp_str_buf_print( buffer, "%s=[%s],%s", "proclist", - __kmp_affinity_proclist, "explicit" ); - break; - case affinity_balanced: - __kmp_str_buf_print( buffer, "%s,%d,%d", "balanced", - __kmp_affinity_compact, __kmp_affinity_offset ); - break; - case affinity_disabled: - __kmp_str_buf_print( buffer, "%s", "disabled"); - break; - case affinity_default: - __kmp_str_buf_print( buffer, "%s", "default"); - break; - default: - __kmp_str_buf_print( buffer, "%s", ""); - break; + } + if (!KMP_AFFINITY_CAPABLE()) { + __kmp_str_buf_print(buffer, "%s", "disabled"); + } else + switch (__kmp_affinity_type) { + case affinity_none: + __kmp_str_buf_print(buffer, "%s", "none"); + break; + case affinity_physical: + __kmp_str_buf_print(buffer, "%s,%d", "physical", __kmp_affinity_offset); + break; + case affinity_logical: + __kmp_str_buf_print(buffer, "%s,%d", "logical", __kmp_affinity_offset); + break; + case affinity_compact: + __kmp_str_buf_print(buffer, "%s,%d,%d", "compact", __kmp_affinity_compact, + __kmp_affinity_offset); + break; + case affinity_scatter: + __kmp_str_buf_print(buffer, "%s,%d,%d", "scatter", __kmp_affinity_compact, + __kmp_affinity_offset); + break; + case affinity_explicit: + __kmp_str_buf_print(buffer, "%s=[%s],%s", "proclist", + __kmp_affinity_proclist, "explicit"); + break; + case affinity_balanced: + __kmp_str_buf_print(buffer, "%s,%d,%d", "balanced", + __kmp_affinity_compact, __kmp_affinity_offset); + break; + case affinity_disabled: + __kmp_str_buf_print(buffer, "%s", "disabled"); + break; + case affinity_default: + __kmp_str_buf_print(buffer, "%s", "default"); + break; + default: + __kmp_str_buf_print(buffer, "%s", ""); + break; } - __kmp_str_buf_print( buffer, "'\n" ); + __kmp_str_buf_print(buffer, "'\n"); } //__kmp_stg_print_affinity -# ifdef KMP_GOMP_COMPAT +#ifdef KMP_GOMP_COMPAT -static void -__kmp_stg_parse_gomp_cpu_affinity( char const * name, char const * value, void * data ) -{ - const char * next = NULL; - char * temp_proclist; - kmp_setting_t **rivals = (kmp_setting_t **) data; - int rc; +static void __kmp_stg_parse_gomp_cpu_affinity(char const *name, + char const *value, void *data) { + const char *next = NULL; + char *temp_proclist; + kmp_setting_t **rivals = (kmp_setting_t **)data; + int rc; - rc = __kmp_stg_check_rivals( name, value, rivals ); - if ( rc ) { - return; - } + rc = __kmp_stg_check_rivals(name, value, rivals); + if (rc) { + return; + } - if ( TCR_4(__kmp_init_middle) ) { - KMP_WARNING( EnvMiddleWarn, name ); - __kmp_env_toPrint( name, 0 ); - return; - } + if (TCR_4(__kmp_init_middle)) { + KMP_WARNING(EnvMiddleWarn, name); + __kmp_env_toPrint(name, 0); + return; + } - __kmp_env_toPrint( name, 1 ); + __kmp_env_toPrint(name, 1); - if ( __kmp_parse_affinity_proc_id_list( name, value, &next, - &temp_proclist )) { - SKIP_WS(next); - if (*next == '\0') { - // - // GOMP_CPU_AFFINITY => granularity=fine,explicit,proclist=... - // - __kmp_affinity_proclist = temp_proclist; - __kmp_affinity_type = affinity_explicit; - __kmp_affinity_gran = affinity_gran_fine; -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -# endif - } - else { - KMP_WARNING( AffSyntaxError, name ); - if (temp_proclist != NULL) { - KMP_INTERNAL_FREE((void *)temp_proclist); - } - } - } - else { - // - // Warning already emitted - // - __kmp_affinity_type = affinity_none; -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; -# endif + if (__kmp_parse_affinity_proc_id_list(name, value, &next, &temp_proclist)) { + SKIP_WS(next); + if (*next == '\0') { + // GOMP_CPU_AFFINITY => granularity=fine,explicit,proclist=... + __kmp_affinity_proclist = temp_proclist; + __kmp_affinity_type = affinity_explicit; + __kmp_affinity_gran = affinity_gran_fine; +#if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; +#endif + } else { + KMP_WARNING(AffSyntaxError, name); + if (temp_proclist != NULL) { + KMP_INTERNAL_FREE((void *)temp_proclist); + } } + } else { + // Warning already emitted + __kmp_affinity_type = affinity_none; +#if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; +#endif + } } // __kmp_stg_parse_gomp_cpu_affinity -# endif /* KMP_GOMP_COMPAT */ - +#endif /* KMP_GOMP_COMPAT */ -# if OMP_40_ENABLED +#if OMP_40_ENABLED /*----------------------------------------------------------------------------- - The OMP_PLACES proc id list parser. Here is the grammar: place_list := place @@ -2524,1775 +2410,1654 @@ subplace := num : num : signed signed := num signed := + signed signed := - signed - -----------------------------------------------------------------------------*/ -static int -__kmp_parse_subplace_list( const char *var, const char **scan ) -{ - const char *next; - - for (;;) { - int start, count, stride; - - // - // Read in the starting proc id - // - SKIP_WS(*scan); - if ((**scan < '0') || (**scan > '9')) { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - next = *scan; - SKIP_DIGITS(next); - start = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(start >= 0); - *scan = next; - - // - // valid follow sets are ',' ':' and '}' - // - SKIP_WS(*scan); - if (**scan == '}') { - break; - } - if (**scan == ',') { - (*scan)++; // skip ',' - continue; - } - if (**scan != ':') { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - (*scan)++; // skip ':' - - // - // Read count parameter - // - SKIP_WS(*scan); - if ((**scan < '0') || (**scan > '9')) { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - next = *scan; - SKIP_DIGITS(next); - count = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(count >= 0); - *scan = next; - - // - // valid follow sets are ',' ':' and '}' - // - SKIP_WS(*scan); - if (**scan == '}') { - break; - } - if (**scan == ',') { - (*scan)++; // skip ',' - continue; - } - if (**scan != ':') { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - (*scan)++; // skip ':' - - // - // Read stride parameter - // - int sign = +1; - for (;;) { - SKIP_WS(*scan); - if (**scan == '+') { - (*scan)++; // skip '+' - continue; - } - if (**scan == '-') { - sign *= -1; - (*scan)++; // skip '-' - continue; - } - break; - } - SKIP_WS(*scan); - if ((**scan < '0') || (**scan > '9')) { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - next = *scan; - SKIP_DIGITS(next); - stride = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(stride >= 0); - *scan = next; - stride *= sign; - - // - // valid follow sets are ',' and '}' - // - SKIP_WS(*scan); - if (**scan == '}') { - break; - } - if (**scan == ',') { - (*scan)++; // skip ',' - continue; - } - - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - return TRUE; -} +static int __kmp_parse_subplace_list(const char *var, const char **scan) { + const char *next; -static int -__kmp_parse_place( const char *var, const char ** scan ) -{ - const char *next; + for (;;) { + int start, count, stride; // - // valid follow sets are '{' '!' and num + // Read in the starting proc id // SKIP_WS(*scan); - if (**scan == '{') { - (*scan)++; // skip '{' - if (! __kmp_parse_subplace_list(var, scan)) { - return FALSE; - } - if (**scan != '}') { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - (*scan)++; // skip '}' + if ((**scan < '0') || (**scan > '9')) { + KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); + return FALSE; } - else if (**scan == '!') { - (*scan)++; // skip '!' - return __kmp_parse_place(var, scan); //'!' has lower precedence than ':' + next = *scan; + SKIP_DIGITS(next); + start = __kmp_str_to_int(*scan, *next); + KMP_ASSERT(start >= 0); + *scan = next; + + // valid follow sets are ',' ':' and '}' + SKIP_WS(*scan); + if (**scan == '}') { + break; } - else if ((**scan >= '0') && (**scan <= '9')) { - next = *scan; - SKIP_DIGITS(next); - int proc = __kmp_str_to_int(*scan, *next); - KMP_ASSERT(proc >= 0); - *scan = next; + if (**scan == ',') { + (*scan)++; // skip ',' + continue; } - else { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; + if (**scan != ':') { + KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); + return FALSE; } - return TRUE; -} - -static int -__kmp_parse_place_list( const char *var, const char *env, char **place_list ) -{ - const char *scan = env; - const char *next = scan; - - for (;;) { - int start, count, stride; - - if (! __kmp_parse_place(var, &scan)) { - return FALSE; - } - - // - // valid follow sets are ',' ':' and EOL - // - SKIP_WS(scan); - if (*scan == '\0') { - break; - } - if (*scan == ',') { - scan++; // skip ',' - continue; - } - if (*scan != ':') { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - scan++; // skip ':' - - // - // Read count parameter - // - SKIP_WS(scan); - if ((*scan < '0') || (*scan > '9')) { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - next = scan; - SKIP_DIGITS(next); - count = __kmp_str_to_int(scan, *next); - KMP_ASSERT(count >= 0); - scan = next; - - // - // valid follow sets are ',' ':' and EOL - // - SKIP_WS(scan); - if (*scan == '\0') { - break; - } - if (*scan == ',') { - scan++; // skip ',' - continue; - } - if (*scan != ':') { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - scan++; // skip ':' - - // - // Read stride parameter - // - int sign = +1; - for (;;) { - SKIP_WS(scan); - if (*scan == '+') { - scan++; // skip '+' - continue; - } - if (*scan == '-') { - sign *= -1; - scan++; // skip '-' - continue; - } - break; - } - SKIP_WS(scan); - if ((*scan < '0') || (*scan > '9')) { - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; - } - next = scan; - SKIP_DIGITS(next); - stride = __kmp_str_to_int(scan, *next); - KMP_ASSERT(stride >= 0); - scan = next; - stride *= sign; - - // - // valid follow sets are ',' and EOL - // - SKIP_WS(scan); - if (*scan == '\0') { - break; - } - if (*scan == ',') { - scan++; // skip ',' - continue; - } + (*scan)++; // skip ':' - KMP_WARNING( SyntaxErrorUsing, var, "\"threads\"" ); - return FALSE; + // Read count parameter + SKIP_WS(*scan); + if ((**scan < '0') || (**scan > '9')) { + KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); + return FALSE; } + next = *scan; + SKIP_DIGITS(next); + count = __kmp_str_to_int(*scan, *next); + KMP_ASSERT(count >= 0); + *scan = next; - { - int len = scan - env; - char *retlist = (char *)__kmp_allocate((len + 1) * sizeof(char)); - KMP_MEMCPY_S(retlist, (len+1)*sizeof(char), env, len * sizeof(char)); - retlist[len] = '\0'; - *place_list = retlist; + // valid follow sets are ',' ':' and '}' + SKIP_WS(*scan); + if (**scan == '}') { + break; } - return TRUE; -} - -static void -__kmp_stg_parse_places( char const * name, char const * value, void * data ) -{ - int count; - const char *scan = value; - const char *next = scan; - const char *kind = "\"threads\""; - kmp_setting_t **rivals = (kmp_setting_t **) data; - int rc; - - rc = __kmp_stg_check_rivals( name, value, rivals ); - if ( rc ) { - return; + if (**scan == ',') { + (*scan)++; // skip ',' + continue; } - - // - // If OMP_PROC_BIND is not specified but OMP_PLACES is, - // then let OMP_PROC_BIND default to true. - // - if ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_default ) { - __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; + if (**scan != ':') { + KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); + return FALSE; } + (*scan)++; // skip ':' - //__kmp_affinity_num_places = 0; - - if ( __kmp_match_str( "threads", scan, &next ) ) { - scan = next; - __kmp_affinity_type = affinity_compact; - __kmp_affinity_gran = affinity_gran_thread; - __kmp_affinity_dups = FALSE; - kind = "\"threads\""; + // Read stride parameter + int sign = +1; + for (;;) { + SKIP_WS(*scan); + if (**scan == '+') { + (*scan)++; // skip '+' + continue; + } + if (**scan == '-') { + sign *= -1; + (*scan)++; // skip '-' + continue; + } + break; } - else if ( __kmp_match_str( "cores", scan, &next ) ) { - scan = next; - __kmp_affinity_type = affinity_compact; - __kmp_affinity_gran = affinity_gran_core; - __kmp_affinity_dups = FALSE; - kind = "\"cores\""; + SKIP_WS(*scan); + if ((**scan < '0') || (**scan > '9')) { + KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); + return FALSE; } - else if ( __kmp_match_str( "sockets", scan, &next ) ) { - scan = next; - __kmp_affinity_type = affinity_compact; - __kmp_affinity_gran = affinity_gran_package; - __kmp_affinity_dups = FALSE; - kind = "\"sockets\""; + next = *scan; + SKIP_DIGITS(next); + stride = __kmp_str_to_int(*scan, *next); + KMP_ASSERT(stride >= 0); + *scan = next; + stride *= sign; + + // valid follow sets are ',' and '}' + SKIP_WS(*scan); + if (**scan == '}') { + break; } - else { - if ( __kmp_affinity_proclist != NULL ) { - KMP_INTERNAL_FREE( (void *)__kmp_affinity_proclist ); - __kmp_affinity_proclist = NULL; - } - if ( __kmp_parse_place_list( name, value, &__kmp_affinity_proclist ) ) { - __kmp_affinity_type = affinity_explicit; - __kmp_affinity_gran = affinity_gran_fine; - __kmp_affinity_dups = FALSE; - if ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_default ) { - __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; - } - } - return; + if (**scan == ',') { + (*scan)++; // skip ',' + continue; } - if ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_default ) { - __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; + KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); + return FALSE; + } + return TRUE; +} + +static int __kmp_parse_place(const char *var, const char **scan) { + const char *next; + + // valid follow sets are '{' '!' and num + SKIP_WS(*scan); + if (**scan == '{') { + (*scan)++; // skip '{' + if (!__kmp_parse_subplace_list(var, scan)) { + return FALSE; + } + if (**scan != '}') { + KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); + return FALSE; + } + (*scan)++; // skip '}' + } else if (**scan == '!') { + (*scan)++; // skip '!' + return __kmp_parse_place(var, scan); //'!' has lower precedence than ':' + } else if ((**scan >= '0') && (**scan <= '9')) { + next = *scan; + SKIP_DIGITS(next); + int proc = __kmp_str_to_int(*scan, *next); + KMP_ASSERT(proc >= 0); + *scan = next; + } else { + KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); + return FALSE; + } + return TRUE; +} + +static int __kmp_parse_place_list(const char *var, const char *env, + char **place_list) { + const char *scan = env; + const char *next = scan; + + for (;;) { + int start, count, stride; + + if (!__kmp_parse_place(var, &scan)) { + return FALSE; } + // valid follow sets are ',' ':' and EOL SKIP_WS(scan); - if ( *scan == '\0' ) { - return; + if (*scan == '\0') { + break; } - - // - // Parse option count parameter in parentheses - // - if ( *scan != '(' ) { - KMP_WARNING( SyntaxErrorUsing, name, kind ); - return; + if (*scan == ',') { + scan++; // skip ',' + continue; } - scan++; // skip '(' + if (*scan != ':') { + KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); + return FALSE; + } + scan++; // skip ':' + // Read count parameter SKIP_WS(scan); + if ((*scan < '0') || (*scan > '9')) { + KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); + return FALSE; + } next = scan; SKIP_DIGITS(next); count = __kmp_str_to_int(scan, *next); KMP_ASSERT(count >= 0); scan = next; + // valid follow sets are ',' ':' and EOL SKIP_WS(scan); - if ( *scan != ')' ) { - KMP_WARNING( SyntaxErrorUsing, name, kind ); - return; + if (*scan == '\0') { + break; } - scan++; // skip ')' - - SKIP_WS(scan); - if ( *scan != '\0' ) { - KMP_WARNING( ParseExtraCharsWarn, name, scan ); + if (*scan == ',') { + scan++; // skip ',' + continue; } - __kmp_affinity_num_places = count; -} - -static void -__kmp_stg_print_places( kmp_str_buf_t * buffer, char const * name, - void * data ) -{ - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME; - } else { - __kmp_str_buf_print( buffer, " %s", name ); + if (*scan != ':') { + KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); + return FALSE; } - if ( ( __kmp_nested_proc_bind.used == 0 ) - || ( __kmp_nested_proc_bind.bind_types == NULL ) - || ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_false ) ) { - __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); + scan++; // skip ':' + + // Read stride parameter + int sign = +1; + for (;;) { + SKIP_WS(scan); + if (*scan == '+') { + scan++; // skip '+' + continue; + } + if (*scan == '-') { + sign *= -1; + scan++; // skip '-' + continue; + } + break; } - else if ( __kmp_affinity_type == affinity_explicit ) { - if ( __kmp_affinity_proclist != NULL ) { - __kmp_str_buf_print( buffer, "='%s'\n", __kmp_affinity_proclist ); - } - else { - __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); - } + SKIP_WS(scan); + if ((*scan < '0') || (*scan > '9')) { + KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); + return FALSE; } - else if ( __kmp_affinity_type == affinity_compact ) { - int num; - if ( __kmp_affinity_num_masks > 0 ) { - num = __kmp_affinity_num_masks; - } - else if ( __kmp_affinity_num_places > 0 ) { - num = __kmp_affinity_num_places; - } - else { - num = 0; - } - if ( __kmp_affinity_gran == affinity_gran_thread ) { - if ( num > 0 ) { - __kmp_str_buf_print( buffer, "='threads(%d)'\n", num ); - } - else { - __kmp_str_buf_print( buffer, "='threads'\n" ); - } - } - else if ( __kmp_affinity_gran == affinity_gran_core ) { - if ( num > 0 ) { - __kmp_str_buf_print( buffer, "='cores(%d)' \n", num ); - } - else { - __kmp_str_buf_print( buffer, "='cores'\n" ); - } - } - else if ( __kmp_affinity_gran == affinity_gran_package ) { - if ( num > 0 ) { - __kmp_str_buf_print( buffer, "='sockets(%d)'\n", num ); - } - else { - __kmp_str_buf_print( buffer, "='sockets'\n" ); - } - } - else { - __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); - } + next = scan; + SKIP_DIGITS(next); + stride = __kmp_str_to_int(scan, *next); + KMP_ASSERT(stride >= 0); + scan = next; + stride *= sign; + + // valid follow sets are ',' and EOL + SKIP_WS(scan); + if (*scan == '\0') { + break; } - else { - __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); + if (*scan == ',') { + scan++; // skip ',' + continue; } + + KMP_WARNING(SyntaxErrorUsing, var, "\"threads\""); + return FALSE; + } + + { + int len = scan - env; + char *retlist = (char *)__kmp_allocate((len + 1) * sizeof(char)); + KMP_MEMCPY_S(retlist, (len + 1) * sizeof(char), env, len * sizeof(char)); + retlist[len] = '\0'; + *place_list = retlist; + } + return TRUE; } -# endif /* OMP_40_ENABLED */ +static void __kmp_stg_parse_places(char const *name, char const *value, + void *data) { + int count; + const char *scan = value; + const char *next = scan; + const char *kind = "\"threads\""; + kmp_setting_t **rivals = (kmp_setting_t **)data; + int rc; + + rc = __kmp_stg_check_rivals(name, value, rivals); + if (rc) { + return; + } -# if (! OMP_40_ENABLED) + // If OMP_PROC_BIND is not specified but OMP_PLACES is, + // then let OMP_PROC_BIND default to true. + if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) { + __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; + } -static void -__kmp_stg_parse_proc_bind( char const * name, char const * value, void * data ) -{ - int enabled; - kmp_setting_t **rivals = (kmp_setting_t **) data; - int rc; + //__kmp_affinity_num_places = 0; - rc = __kmp_stg_check_rivals( name, value, rivals ); - if ( rc ) { - return; + if (__kmp_match_str("threads", scan, &next)) { + scan = next; + __kmp_affinity_type = affinity_compact; + __kmp_affinity_gran = affinity_gran_thread; + __kmp_affinity_dups = FALSE; + kind = "\"threads\""; + } else if (__kmp_match_str("cores", scan, &next)) { + scan = next; + __kmp_affinity_type = affinity_compact; + __kmp_affinity_gran = affinity_gran_core; + __kmp_affinity_dups = FALSE; + kind = "\"cores\""; + } else if (__kmp_match_str("sockets", scan, &next)) { + scan = next; + __kmp_affinity_type = affinity_compact; + __kmp_affinity_gran = affinity_gran_package; + __kmp_affinity_dups = FALSE; + kind = "\"sockets\""; + } else { + if (__kmp_affinity_proclist != NULL) { + KMP_INTERNAL_FREE((void *)__kmp_affinity_proclist); + __kmp_affinity_proclist = NULL; + } + if (__kmp_parse_place_list(name, value, &__kmp_affinity_proclist)) { + __kmp_affinity_type = affinity_explicit; + __kmp_affinity_gran = affinity_gran_fine; + __kmp_affinity_dups = FALSE; + if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) { + __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; + } } + return; + } - // - // in OMP 3.1, OMP_PROC_BIND is strictly a boolean - // - __kmp_stg_parse_bool( name, value, & enabled ); - if ( enabled ) { - // - // OMP_PROC_BIND => granularity=fine,scatter on MIC - // OMP_PROC_BIND => granularity=core,scatter elsewhere - // - __kmp_affinity_type = affinity_scatter; -# if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) - if( __kmp_mic_type != non_mic ) - __kmp_affinity_gran = affinity_gran_fine; - else -# endif - __kmp_affinity_gran = affinity_gran_core; - } - else { - __kmp_affinity_type = affinity_none; - } -} // __kmp_parse_proc_bind + if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default) { + __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; + } + + SKIP_WS(scan); + if (*scan == '\0') { + return; + } -# endif /* if (! OMP_40_ENABLED) */ + // Parse option count parameter in parentheses + if (*scan != '(') { + KMP_WARNING(SyntaxErrorUsing, name, kind); + return; + } + scan++; // skip '(' + + SKIP_WS(scan); + next = scan; + SKIP_DIGITS(next); + count = __kmp_str_to_int(scan, *next); + KMP_ASSERT(count >= 0); + scan = next; + + SKIP_WS(scan); + if (*scan != ')') { + KMP_WARNING(SyntaxErrorUsing, name, kind); + return; + } + scan++; // skip ')' + SKIP_WS(scan); + if (*scan != '\0') { + KMP_WARNING(ParseExtraCharsWarn, name, scan); + } + __kmp_affinity_num_places = count; +} -static void -__kmp_stg_parse_topology_method( char const * name, char const * value, - void * data ) { - if ( __kmp_str_match( "all", 1, value ) ) { - __kmp_affinity_top_method = affinity_top_method_all; - } -# if KMP_ARCH_X86 || KMP_ARCH_X86_64 - else if ( __kmp_str_match( "x2apic id", 9, value ) - || __kmp_str_match( "x2apic_id", 9, value ) - || __kmp_str_match( "x2apic-id", 9, value ) - || __kmp_str_match( "x2apicid", 8, value ) - || __kmp_str_match( "cpuid leaf 11", 13, value ) - || __kmp_str_match( "cpuid_leaf_11", 13, value ) - || __kmp_str_match( "cpuid-leaf-11", 13, value ) - || __kmp_str_match( "cpuid leaf11", 12, value ) - || __kmp_str_match( "cpuid_leaf11", 12, value ) - || __kmp_str_match( "cpuid-leaf11", 12, value ) - || __kmp_str_match( "cpuidleaf 11", 12, value ) - || __kmp_str_match( "cpuidleaf_11", 12, value ) - || __kmp_str_match( "cpuidleaf-11", 12, value ) - || __kmp_str_match( "cpuidleaf11", 11, value ) - || __kmp_str_match( "cpuid 11", 8, value ) - || __kmp_str_match( "cpuid_11", 8, value ) - || __kmp_str_match( "cpuid-11", 8, value ) - || __kmp_str_match( "cpuid11", 7, value ) - || __kmp_str_match( "leaf 11", 7, value ) - || __kmp_str_match( "leaf_11", 7, value ) - || __kmp_str_match( "leaf-11", 7, value ) - || __kmp_str_match( "leaf11", 6, value ) ) { - __kmp_affinity_top_method = affinity_top_method_x2apicid; - } - else if ( __kmp_str_match( "apic id", 7, value ) - || __kmp_str_match( "apic_id", 7, value ) - || __kmp_str_match( "apic-id", 7, value ) - || __kmp_str_match( "apicid", 6, value ) - || __kmp_str_match( "cpuid leaf 4", 12, value ) - || __kmp_str_match( "cpuid_leaf_4", 12, value ) - || __kmp_str_match( "cpuid-leaf-4", 12, value ) - || __kmp_str_match( "cpuid leaf4", 11, value ) - || __kmp_str_match( "cpuid_leaf4", 11, value ) - || __kmp_str_match( "cpuid-leaf4", 11, value ) - || __kmp_str_match( "cpuidleaf 4", 11, value ) - || __kmp_str_match( "cpuidleaf_4", 11, value ) - || __kmp_str_match( "cpuidleaf-4", 11, value ) - || __kmp_str_match( "cpuidleaf4", 10, value ) - || __kmp_str_match( "cpuid 4", 7, value ) - || __kmp_str_match( "cpuid_4", 7, value ) - || __kmp_str_match( "cpuid-4", 7, value ) - || __kmp_str_match( "cpuid4", 6, value ) - || __kmp_str_match( "leaf 4", 6, value ) - || __kmp_str_match( "leaf_4", 6, value ) - || __kmp_str_match( "leaf-4", 6, value ) - || __kmp_str_match( "leaf4", 5, value ) ) { - __kmp_affinity_top_method = affinity_top_method_apicid; - } -# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - else if ( __kmp_str_match( "/proc/cpuinfo", 2, value ) - || __kmp_str_match( "cpuinfo", 5, value )) { - __kmp_affinity_top_method = affinity_top_method_cpuinfo; - } -# if KMP_GROUP_AFFINITY - else if ( __kmp_str_match( "group", 1, value ) ) { - __kmp_affinity_top_method = affinity_top_method_group; - } -# endif /* KMP_GROUP_AFFINITY */ - else if ( __kmp_str_match( "flat", 1, value ) ) { - __kmp_affinity_top_method = affinity_top_method_flat; +static void __kmp_stg_print_places(kmp_str_buf_t *buffer, char const *name, + void *data) { + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_NAME; + } else { + __kmp_str_buf_print(buffer, " %s", name); + } + if ((__kmp_nested_proc_bind.used == 0) || + (__kmp_nested_proc_bind.bind_types == NULL) || + (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) { + __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); + } else if (__kmp_affinity_type == affinity_explicit) { + if (__kmp_affinity_proclist != NULL) { + __kmp_str_buf_print(buffer, "='%s'\n", __kmp_affinity_proclist); + } else { + __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); } -# if KMP_USE_HWLOC - else if ( __kmp_str_match( "hwloc", 1, value) ) { - __kmp_affinity_top_method = affinity_top_method_hwloc; + } else if (__kmp_affinity_type == affinity_compact) { + int num; + if (__kmp_affinity_num_masks > 0) { + num = __kmp_affinity_num_masks; + } else if (__kmp_affinity_num_places > 0) { + num = __kmp_affinity_num_places; + } else { + num = 0; } -# endif - else { - KMP_WARNING( StgInvalidValue, name, value ); + if (__kmp_affinity_gran == affinity_gran_thread) { + if (num > 0) { + __kmp_str_buf_print(buffer, "='threads(%d)'\n", num); + } else { + __kmp_str_buf_print(buffer, "='threads'\n"); + } + } else if (__kmp_affinity_gran == affinity_gran_core) { + if (num > 0) { + __kmp_str_buf_print(buffer, "='cores(%d)' \n", num); + } else { + __kmp_str_buf_print(buffer, "='cores'\n"); + } + } else if (__kmp_affinity_gran == affinity_gran_package) { + if (num > 0) { + __kmp_str_buf_print(buffer, "='sockets(%d)'\n", num); + } else { + __kmp_str_buf_print(buffer, "='sockets'\n"); + } + } else { + __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); } + } else { + __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); + } +} + +#endif /* OMP_40_ENABLED */ + +#if (!OMP_40_ENABLED) + +static void __kmp_stg_parse_proc_bind(char const *name, char const *value, + void *data) { + int enabled; + kmp_setting_t **rivals = (kmp_setting_t **)data; + int rc; + + rc = __kmp_stg_check_rivals(name, value, rivals); + if (rc) { + return; + } + + // In OMP 3.1, OMP_PROC_BIND is strictly a boolean + __kmp_stg_parse_bool(name, value, &enabled); + if (enabled) { + // OMP_PROC_BIND => granularity=fine,scatter on MIC + // OMP_PROC_BIND => granularity=core,scatter elsewhere + __kmp_affinity_type = affinity_scatter; +#if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) + if (__kmp_mic_type != non_mic) + __kmp_affinity_gran = affinity_gran_fine; + else +#endif + __kmp_affinity_gran = affinity_gran_core; + } else { + __kmp_affinity_type = affinity_none; + } +} // __kmp_parse_proc_bind + +#endif /* if (! OMP_40_ENABLED) */ + +static void __kmp_stg_parse_topology_method(char const *name, char const *value, + void *data) { + if (__kmp_str_match("all", 1, value)) { + __kmp_affinity_top_method = affinity_top_method_all; + } +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + else if (__kmp_str_match("x2apic id", 9, value) || + __kmp_str_match("x2apic_id", 9, value) || + __kmp_str_match("x2apic-id", 9, value) || + __kmp_str_match("x2apicid", 8, value) || + __kmp_str_match("cpuid leaf 11", 13, value) || + __kmp_str_match("cpuid_leaf_11", 13, value) || + __kmp_str_match("cpuid-leaf-11", 13, value) || + __kmp_str_match("cpuid leaf11", 12, value) || + __kmp_str_match("cpuid_leaf11", 12, value) || + __kmp_str_match("cpuid-leaf11", 12, value) || + __kmp_str_match("cpuidleaf 11", 12, value) || + __kmp_str_match("cpuidleaf_11", 12, value) || + __kmp_str_match("cpuidleaf-11", 12, value) || + __kmp_str_match("cpuidleaf11", 11, value) || + __kmp_str_match("cpuid 11", 8, value) || + __kmp_str_match("cpuid_11", 8, value) || + __kmp_str_match("cpuid-11", 8, value) || + __kmp_str_match("cpuid11", 7, value) || + __kmp_str_match("leaf 11", 7, value) || + __kmp_str_match("leaf_11", 7, value) || + __kmp_str_match("leaf-11", 7, value) || + __kmp_str_match("leaf11", 6, value)) { + __kmp_affinity_top_method = affinity_top_method_x2apicid; + } else if (__kmp_str_match("apic id", 7, value) || + __kmp_str_match("apic_id", 7, value) || + __kmp_str_match("apic-id", 7, value) || + __kmp_str_match("apicid", 6, value) || + __kmp_str_match("cpuid leaf 4", 12, value) || + __kmp_str_match("cpuid_leaf_4", 12, value) || + __kmp_str_match("cpuid-leaf-4", 12, value) || + __kmp_str_match("cpuid leaf4", 11, value) || + __kmp_str_match("cpuid_leaf4", 11, value) || + __kmp_str_match("cpuid-leaf4", 11, value) || + __kmp_str_match("cpuidleaf 4", 11, value) || + __kmp_str_match("cpuidleaf_4", 11, value) || + __kmp_str_match("cpuidleaf-4", 11, value) || + __kmp_str_match("cpuidleaf4", 10, value) || + __kmp_str_match("cpuid 4", 7, value) || + __kmp_str_match("cpuid_4", 7, value) || + __kmp_str_match("cpuid-4", 7, value) || + __kmp_str_match("cpuid4", 6, value) || + __kmp_str_match("leaf 4", 6, value) || + __kmp_str_match("leaf_4", 6, value) || + __kmp_str_match("leaf-4", 6, value) || + __kmp_str_match("leaf4", 5, value)) { + __kmp_affinity_top_method = affinity_top_method_apicid; + } +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + else if (__kmp_str_match("/proc/cpuinfo", 2, value) || + __kmp_str_match("cpuinfo", 5, value)) { + __kmp_affinity_top_method = affinity_top_method_cpuinfo; + } +#if KMP_GROUP_AFFINITY + else if (__kmp_str_match("group", 1, value)) { + __kmp_affinity_top_method = affinity_top_method_group; + } +#endif /* KMP_GROUP_AFFINITY */ + else if (__kmp_str_match("flat", 1, value)) { + __kmp_affinity_top_method = affinity_top_method_flat; + } +#if KMP_USE_HWLOC + else if (__kmp_str_match("hwloc", 1, value)) { + __kmp_affinity_top_method = affinity_top_method_hwloc; + } +#endif + else { + KMP_WARNING(StgInvalidValue, name, value); + } } // __kmp_stg_parse_topology_method -static void -__kmp_stg_print_topology_method( kmp_str_buf_t * buffer, char const * name, - void * data ) { -# if KMP_DEBUG - char const * value = NULL; +static void __kmp_stg_print_topology_method(kmp_str_buf_t *buffer, + char const *name, void *data) { +#if KMP_DEBUG + char const *value = NULL; - switch ( __kmp_affinity_top_method ) { - case affinity_top_method_default: - value = "default"; - break; + switch (__kmp_affinity_top_method) { + case affinity_top_method_default: + value = "default"; + break; - case affinity_top_method_all: - value = "all"; - break; + case affinity_top_method_all: + value = "all"; + break; -# if KMP_ARCH_X86 || KMP_ARCH_X86_64 - case affinity_top_method_x2apicid: - value = "x2APIC id"; - break; +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 + case affinity_top_method_x2apicid: + value = "x2APIC id"; + break; - case affinity_top_method_apicid: - value = "APIC id"; - break; -# endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ + case affinity_top_method_apicid: + value = "APIC id"; + break; +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ # if KMP_USE_HWLOC - case affinity_top_method_hwloc: - value = "hwloc"; - break; + case affinity_top_method_hwloc: + value = "hwloc"; + break; # endif - case affinity_top_method_cpuinfo: - value = "cpuinfo"; - break; + case affinity_top_method_cpuinfo: + value = "cpuinfo"; + break; -# if KMP_GROUP_AFFINITY - case affinity_top_method_group: - value = "group"; - break; -# endif /* KMP_GROUP_AFFINITY */ +#if KMP_GROUP_AFFINITY + case affinity_top_method_group: + value = "group"; + break; +#endif /* KMP_GROUP_AFFINITY */ - case affinity_top_method_flat: - value = "flat"; - break; - } + case affinity_top_method_flat: + value = "flat"; + break; + } - if ( value != NULL ) { - __kmp_stg_print_str( buffer, name, value ); - } -# endif /* KMP_DEBUG */ + if (value != NULL) { + __kmp_stg_print_str(buffer, name, value); + } +#endif /* KMP_DEBUG */ } // __kmp_stg_print_topology_method #endif /* KMP_AFFINITY_SUPPORTED */ - #if OMP_40_ENABLED -// // OMP_PROC_BIND / bind-var is functional on all 4.0 builds, including OS X* // OMP_PLACES / place-partition-var is not. -// -static void -__kmp_stg_parse_proc_bind( char const * name, char const * value, void * data ) -{ - kmp_setting_t **rivals = (kmp_setting_t **) data; - int rc; - - rc = __kmp_stg_check_rivals( name, value, rivals ); - if ( rc ) { - return; - } +static void __kmp_stg_parse_proc_bind(char const *name, char const *value, + void *data) { + kmp_setting_t **rivals = (kmp_setting_t **)data; + int rc; + + rc = __kmp_stg_check_rivals(name, value, rivals); + if (rc) { + return; + } - // - // in OMP 4.0 OMP_PROC_BIND is a vector of proc_bind types. - // - KMP_DEBUG_ASSERT( (__kmp_nested_proc_bind.bind_types != NULL) - && ( __kmp_nested_proc_bind.used > 0 ) ); + // In OMP 4.0 OMP_PROC_BIND is a vector of proc_bind types. + KMP_DEBUG_ASSERT((__kmp_nested_proc_bind.bind_types != NULL) && + (__kmp_nested_proc_bind.used > 0)); - const char *buf = value; - const char *next; - int num; - SKIP_WS( buf ); - if ( (*buf >= '0') && (*buf <= '9') ) { - next = buf; - SKIP_DIGITS( next ); - num = __kmp_str_to_int( buf, *next ); - KMP_ASSERT( num >= 0 ); - buf = next; - SKIP_WS( buf ); + const char *buf = value; + const char *next; + int num; + SKIP_WS(buf); + if ((*buf >= '0') && (*buf <= '9')) { + next = buf; + SKIP_DIGITS(next); + num = __kmp_str_to_int(buf, *next); + KMP_ASSERT(num >= 0); + buf = next; + SKIP_WS(buf); + } else { + num = -1; + } + + next = buf; + if (__kmp_match_str("disabled", buf, &next)) { + buf = next; + SKIP_WS(buf); +#if KMP_AFFINITY_SUPPORTED + __kmp_affinity_type = affinity_disabled; +#endif /* KMP_AFFINITY_SUPPORTED */ + __kmp_nested_proc_bind.used = 1; + __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; + } else if ((num == (int)proc_bind_false) || + __kmp_match_str("false", buf, &next)) { + buf = next; + SKIP_WS(buf); +#if KMP_AFFINITY_SUPPORTED + __kmp_affinity_type = affinity_none; +#endif /* KMP_AFFINITY_SUPPORTED */ + __kmp_nested_proc_bind.used = 1; + __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; + } else if ((num == (int)proc_bind_true) || + __kmp_match_str("true", buf, &next)) { + buf = next; + SKIP_WS(buf); + __kmp_nested_proc_bind.used = 1; + __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; + } else { + // Count the number of values in the env var string + const char *scan; + int nelem = 1; + for (scan = buf; *scan != '\0'; scan++) { + if (*scan == ',') { + nelem++; + } } - else { - num = -1; + + // Create / expand the nested proc_bind array as needed + if (__kmp_nested_proc_bind.size < nelem) { + __kmp_nested_proc_bind.bind_types = + (kmp_proc_bind_t *)KMP_INTERNAL_REALLOC( + __kmp_nested_proc_bind.bind_types, + sizeof(kmp_proc_bind_t) * nelem); + if (__kmp_nested_proc_bind.bind_types == NULL) { + KMP_FATAL(MemoryAllocFailed); + } + __kmp_nested_proc_bind.size = nelem; } + __kmp_nested_proc_bind.used = nelem; - next = buf; - if ( __kmp_match_str( "disabled", buf, &next ) ) { + // Save values in the nested proc_bind array + int i = 0; + for (;;) { + enum kmp_proc_bind_t bind; + + if ((num == (int)proc_bind_master) || + __kmp_match_str("master", buf, &next)) { buf = next; - SKIP_WS( buf ); -# if KMP_AFFINITY_SUPPORTED - __kmp_affinity_type = affinity_disabled; -# endif /* KMP_AFFINITY_SUPPORTED */ - __kmp_nested_proc_bind.used = 1; - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; - } - else if ( ( num == (int)proc_bind_false ) - || __kmp_match_str( "false", buf, &next ) ) { + SKIP_WS(buf); + bind = proc_bind_master; + } else if ((num == (int)proc_bind_close) || + __kmp_match_str("close", buf, &next)) { buf = next; - SKIP_WS( buf ); -# if KMP_AFFINITY_SUPPORTED - __kmp_affinity_type = affinity_none; -# endif /* KMP_AFFINITY_SUPPORTED */ - __kmp_nested_proc_bind.used = 1; - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; - } - else if ( ( num == (int)proc_bind_true ) - || __kmp_match_str( "true", buf, &next ) ) { + SKIP_WS(buf); + bind = proc_bind_close; + } else if ((num == (int)proc_bind_spread) || + __kmp_match_str("spread", buf, &next)) { buf = next; - SKIP_WS( buf ); + SKIP_WS(buf); + bind = proc_bind_spread; + } else { + KMP_WARNING(StgInvalidValue, name, value); + __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; __kmp_nested_proc_bind.used = 1; - __kmp_nested_proc_bind.bind_types[0] = proc_bind_true; - } - else { - // - // Count the number of values in the env var string - // - const char *scan; - int nelem = 1; - for ( scan = buf; *scan != '\0'; scan++ ) { - if ( *scan == ',' ) { - nelem++; - } - } + return; + } - // - // Create / expand the nested proc_bind array as needed - // - if ( __kmp_nested_proc_bind.size < nelem ) { - __kmp_nested_proc_bind.bind_types = (kmp_proc_bind_t *) - KMP_INTERNAL_REALLOC( __kmp_nested_proc_bind.bind_types, - sizeof(kmp_proc_bind_t) * nelem ); - if ( __kmp_nested_proc_bind.bind_types == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - } - __kmp_nested_proc_bind.size = nelem; - } - __kmp_nested_proc_bind.used = nelem; - - // - // Save values in the nested proc_bind array - // - int i = 0; - for (;;) { - enum kmp_proc_bind_t bind; - - if ( ( num == (int)proc_bind_master ) - || __kmp_match_str( "master", buf, &next ) ) { - buf = next; - SKIP_WS( buf ); - bind = proc_bind_master; - } - else if ( ( num == (int)proc_bind_close ) - || __kmp_match_str( "close", buf, &next ) ) { - buf = next; - SKIP_WS( buf ); - bind = proc_bind_close; - } - else if ( ( num == (int)proc_bind_spread ) - || __kmp_match_str( "spread", buf, &next ) ) { - buf = next; - SKIP_WS( buf ); - bind = proc_bind_spread; - } - else { - KMP_WARNING( StgInvalidValue, name, value ); - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; - __kmp_nested_proc_bind.used = 1; - return; - } + __kmp_nested_proc_bind.bind_types[i++] = bind; + if (i >= nelem) { + break; + } + KMP_DEBUG_ASSERT(*buf == ','); + buf++; + SKIP_WS(buf); - __kmp_nested_proc_bind.bind_types[i++] = bind; - if ( i >= nelem ) { - break; - } - KMP_DEBUG_ASSERT( *buf == ',' ); - buf++; - SKIP_WS( buf ); - - // - // Read next value if it was specified as an integer - // - if ( (*buf >= '0') && (*buf <= '9') ) { - next = buf; - SKIP_DIGITS( next ); - num = __kmp_str_to_int( buf, *next ); - KMP_ASSERT( num >= 0 ); - buf = next; - SKIP_WS( buf ); - } - else { - num = -1; - } - } - SKIP_WS( buf ); - } - if ( *buf != '\0' ) { - KMP_WARNING( ParseExtraCharsWarn, name, buf ); + // Read next value if it was specified as an integer + if ((*buf >= '0') && (*buf <= '9')) { + next = buf; + SKIP_DIGITS(next); + num = __kmp_str_to_int(buf, *next); + KMP_ASSERT(num >= 0); + buf = next; + SKIP_WS(buf); + } else { + num = -1; + } } + SKIP_WS(buf); + } + if (*buf != '\0') { + KMP_WARNING(ParseExtraCharsWarn, name, buf); + } } +static void __kmp_stg_print_proc_bind(kmp_str_buf_t *buffer, char const *name, + void *data) { + int nelem = __kmp_nested_proc_bind.used; + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_NAME; + } else { + __kmp_str_buf_print(buffer, " %s", name); + } + if (nelem == 0) { + __kmp_str_buf_print(buffer, ": %s\n", KMP_I18N_STR(NotDefined)); + } else { + int i; + __kmp_str_buf_print(buffer, "='", name); + for (i = 0; i < nelem; i++) { + switch (__kmp_nested_proc_bind.bind_types[i]) { + case proc_bind_false: + __kmp_str_buf_print(buffer, "false"); + break; -static void -__kmp_stg_print_proc_bind( kmp_str_buf_t * buffer, char const * name, - void * data ) -{ - int nelem = __kmp_nested_proc_bind.used; - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME; - } else { - __kmp_str_buf_print( buffer, " %s", name ); - } - if ( nelem == 0 ) { - __kmp_str_buf_print( buffer, ": %s\n", KMP_I18N_STR( NotDefined ) ); - } - else { - int i; - __kmp_str_buf_print( buffer, "='", name ); - for ( i = 0; i < nelem; i++ ) { - switch ( __kmp_nested_proc_bind.bind_types[i] ) { - case proc_bind_false: - __kmp_str_buf_print( buffer, "false" ); - break; - - case proc_bind_true: - __kmp_str_buf_print( buffer, "true" ); - break; - - case proc_bind_master: - __kmp_str_buf_print( buffer, "master" ); - break; - - case proc_bind_close: - __kmp_str_buf_print( buffer, "close" ); - break; - - case proc_bind_spread: - __kmp_str_buf_print( buffer, "spread" ); - break; - - case proc_bind_intel: - __kmp_str_buf_print( buffer, "intel" ); - break; - - case proc_bind_default: - __kmp_str_buf_print( buffer, "default" ); - break; - } - if ( i < nelem - 1 ) { - __kmp_str_buf_print( buffer, "," ); - } - } - __kmp_str_buf_print( buffer, "'\n" ); + case proc_bind_true: + __kmp_str_buf_print(buffer, "true"); + break; + + case proc_bind_master: + __kmp_str_buf_print(buffer, "master"); + break; + + case proc_bind_close: + __kmp_str_buf_print(buffer, "close"); + break; + + case proc_bind_spread: + __kmp_str_buf_print(buffer, "spread"); + break; + + case proc_bind_intel: + __kmp_str_buf_print(buffer, "intel"); + break; + + case proc_bind_default: + __kmp_str_buf_print(buffer, "default"); + break; + } + if (i < nelem - 1) { + __kmp_str_buf_print(buffer, ","); + } } + __kmp_str_buf_print(buffer, "'\n"); + } } #endif /* OMP_40_ENABLED */ - -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // OMP_DYNAMIC -// ------------------------------------------------------------------------------------------------- -static void -__kmp_stg_parse_omp_dynamic( char const * name, char const * value, void * data ) -{ - __kmp_stg_parse_bool( name, value, & (__kmp_global.g.g_dynamic) ); +static void __kmp_stg_parse_omp_dynamic(char const *name, char const *value, + void *data) { + __kmp_stg_parse_bool(name, value, &(__kmp_global.g.g_dynamic)); } // __kmp_stg_parse_omp_dynamic -static void -__kmp_stg_print_omp_dynamic( kmp_str_buf_t * buffer, char const * name, void * data ) -{ - __kmp_stg_print_bool( buffer, name, __kmp_global.g.g_dynamic ); +static void __kmp_stg_print_omp_dynamic(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_bool(buffer, name, __kmp_global.g.g_dynamic); } // __kmp_stg_print_omp_dynamic -static void -__kmp_stg_parse_kmp_dynamic_mode( char const * name, char const * value, void * data ) -{ - if ( TCR_4(__kmp_init_parallel) ) { - KMP_WARNING( EnvParallelWarn, name ); - __kmp_env_toPrint( name, 0 ); - return; - } +static void __kmp_stg_parse_kmp_dynamic_mode(char const *name, + char const *value, void *data) { + if (TCR_4(__kmp_init_parallel)) { + KMP_WARNING(EnvParallelWarn, name); + __kmp_env_toPrint(name, 0); + return; + } #ifdef USE_LOAD_BALANCE - else if ( __kmp_str_match( "load balance", 2, value ) - || __kmp_str_match( "load_balance", 2, value ) - || __kmp_str_match( "load-balance", 2, value ) - || __kmp_str_match( "loadbalance", 2, value ) - || __kmp_str_match( "balance", 1, value ) ) { - __kmp_global.g.g_dynamic_mode = dynamic_load_balance; - } + else if (__kmp_str_match("load balance", 2, value) || + __kmp_str_match("load_balance", 2, value) || + __kmp_str_match("load-balance", 2, value) || + __kmp_str_match("loadbalance", 2, value) || + __kmp_str_match("balance", 1, value)) { + __kmp_global.g.g_dynamic_mode = dynamic_load_balance; + } #endif /* USE_LOAD_BALANCE */ - else if ( __kmp_str_match( "thread limit", 1, value ) - || __kmp_str_match( "thread_limit", 1, value ) - || __kmp_str_match( "thread-limit", 1, value ) - || __kmp_str_match( "threadlimit", 1, value ) - || __kmp_str_match( "limit", 2, value ) ) { - __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; - } - else if ( __kmp_str_match( "random", 1, value ) ) { - __kmp_global.g.g_dynamic_mode = dynamic_random; - } - else { - KMP_WARNING( StgInvalidValue, name, value ); - } + else if (__kmp_str_match("thread limit", 1, value) || + __kmp_str_match("thread_limit", 1, value) || + __kmp_str_match("thread-limit", 1, value) || + __kmp_str_match("threadlimit", 1, value) || + __kmp_str_match("limit", 2, value)) { + __kmp_global.g.g_dynamic_mode = dynamic_thread_limit; + } else if (__kmp_str_match("random", 1, value)) { + __kmp_global.g.g_dynamic_mode = dynamic_random; + } else { + KMP_WARNING(StgInvalidValue, name, value); + } } //__kmp_stg_parse_kmp_dynamic_mode -static void -__kmp_stg_print_kmp_dynamic_mode( kmp_str_buf_t * buffer, char const * name, void * data ) -{ +static void __kmp_stg_print_kmp_dynamic_mode(kmp_str_buf_t *buffer, + char const *name, void *data) { #if KMP_DEBUG - if ( __kmp_global.g.g_dynamic_mode == dynamic_default ) { - __kmp_str_buf_print( buffer, " %s: %s \n", name, KMP_I18N_STR( NotDefined ) ); - } -# ifdef USE_LOAD_BALANCE - else if ( __kmp_global.g.g_dynamic_mode == dynamic_load_balance ) { - __kmp_stg_print_str( buffer, name, "load balance" ); - } -# endif /* USE_LOAD_BALANCE */ - else if ( __kmp_global.g.g_dynamic_mode == dynamic_thread_limit ) { - __kmp_stg_print_str( buffer, name, "thread limit" ); - } - else if ( __kmp_global.g.g_dynamic_mode == dynamic_random ) { - __kmp_stg_print_str( buffer, name, "random" ); - } - else { - KMP_ASSERT(0); - } + if (__kmp_global.g.g_dynamic_mode == dynamic_default) { + __kmp_str_buf_print(buffer, " %s: %s \n", name, KMP_I18N_STR(NotDefined)); + } +#ifdef USE_LOAD_BALANCE + else if (__kmp_global.g.g_dynamic_mode == dynamic_load_balance) { + __kmp_stg_print_str(buffer, name, "load balance"); + } +#endif /* USE_LOAD_BALANCE */ + else if (__kmp_global.g.g_dynamic_mode == dynamic_thread_limit) { + __kmp_stg_print_str(buffer, name, "thread limit"); + } else if (__kmp_global.g.g_dynamic_mode == dynamic_random) { + __kmp_stg_print_str(buffer, name, "random"); + } else { + KMP_ASSERT(0); + } #endif /* KMP_DEBUG */ } // __kmp_stg_print_kmp_dynamic_mode - #ifdef USE_LOAD_BALANCE -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_LOAD_BALANCE_INTERVAL -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_ld_balance_interval( char const * name, char const * value, void * data ) -{ - double interval = __kmp_convert_to_double( value ); - if ( interval >= 0 ) { - __kmp_load_balance_interval = interval; - } else { - KMP_WARNING( StgInvalidValue, name, value ); - }; // if + +static void __kmp_stg_parse_ld_balance_interval(char const *name, + char const *value, void *data) { + double interval = __kmp_convert_to_double(value); + if (interval >= 0) { + __kmp_load_balance_interval = interval; + } else { + KMP_WARNING(StgInvalidValue, name, value); + }; // if } // __kmp_stg_parse_load_balance_interval -static void -__kmp_stg_print_ld_balance_interval( kmp_str_buf_t * buffer, char const * name, void * data ) { +static void __kmp_stg_print_ld_balance_interval(kmp_str_buf_t *buffer, + char const *name, void *data) { #if KMP_DEBUG - __kmp_str_buf_print( buffer, " %s=%8.6f\n", name, __kmp_load_balance_interval ); + __kmp_str_buf_print(buffer, " %s=%8.6f\n", name, + __kmp_load_balance_interval); #endif /* KMP_DEBUG */ } // __kmp_stg_print_load_balance_interval #endif /* USE_LOAD_BALANCE */ -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_INIT_AT_FORK -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_init_at_fork( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_need_register_atfork ); - if ( __kmp_need_register_atfork ) { - __kmp_need_register_atfork_specified = TRUE; - }; + +static void __kmp_stg_parse_init_at_fork(char const *name, char const *value, + void *data) { + __kmp_stg_parse_bool(name, value, &__kmp_need_register_atfork); + if (__kmp_need_register_atfork) { + __kmp_need_register_atfork_specified = TRUE; + }; } // __kmp_stg_parse_init_at_fork -static void -__kmp_stg_print_init_at_fork( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_need_register_atfork_specified ); +static void __kmp_stg_print_init_at_fork(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_bool(buffer, name, __kmp_need_register_atfork_specified); } // __kmp_stg_print_init_at_fork -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_SCHEDULE -// ------------------------------------------------------------------------------------------------- -static void -__kmp_stg_parse_schedule( char const * name, char const * value, void * data ) { +static void __kmp_stg_parse_schedule(char const *name, char const *value, + void *data) { - if ( value != NULL ) { - size_t length = KMP_STRLEN( value ); - if ( length > INT_MAX ) { - KMP_WARNING( LongValue, name ); - } else { - char *semicolon; - if( value[ length - 1 ] == '"' || value[ length -1 ] == '\'' ) - KMP_WARNING( UnbalancedQuotes, name ); - do { - char sentinel; - - semicolon = (char *) strchr( value, ';' ); - if( *value && semicolon != value ) { - char *comma = (char *) strchr( value, ',' ); - - if ( comma ) { - ++comma; - sentinel = ','; - } else - sentinel = ';'; - if ( !__kmp_strcasecmp_with_sentinel( "static", value, sentinel ) ) { - if( !__kmp_strcasecmp_with_sentinel( "greedy", comma, ';' ) ) { - __kmp_static = kmp_sch_static_greedy; - continue; - } else if( !__kmp_strcasecmp_with_sentinel( "balanced", comma, ';' ) ) { - __kmp_static = kmp_sch_static_balanced; - continue; - } - } else if ( !__kmp_strcasecmp_with_sentinel( "guided", value, sentinel ) ) { - if ( !__kmp_strcasecmp_with_sentinel( "iterative", comma, ';' ) ) { - __kmp_guided = kmp_sch_guided_iterative_chunked; - continue; - } else if ( !__kmp_strcasecmp_with_sentinel( "analytical", comma, ';' ) ) { - /* analytical not allowed for too many threads */ - __kmp_guided = kmp_sch_guided_analytical_chunked; - continue; - } - } - KMP_WARNING( InvalidClause, name, value ); - } else - KMP_WARNING( EmptyClause, name ); - } while ( (value = semicolon ? semicolon + 1 : NULL) ); - } - }; // if + if (value != NULL) { + size_t length = KMP_STRLEN(value); + if (length > INT_MAX) { + KMP_WARNING(LongValue, name); + } else { + char *semicolon; + if (value[length - 1] == '"' || value[length - 1] == '\'') + KMP_WARNING(UnbalancedQuotes, name); + do { + char sentinel; + + semicolon = (char *)strchr(value, ';'); + if (*value && semicolon != value) { + char *comma = (char *)strchr(value, ','); + + if (comma) { + ++comma; + sentinel = ','; + } else + sentinel = ';'; + if (!__kmp_strcasecmp_with_sentinel("static", value, sentinel)) { + if (!__kmp_strcasecmp_with_sentinel("greedy", comma, ';')) { + __kmp_static = kmp_sch_static_greedy; + continue; + } else if (!__kmp_strcasecmp_with_sentinel("balanced", comma, + ';')) { + __kmp_static = kmp_sch_static_balanced; + continue; + } + } else if (!__kmp_strcasecmp_with_sentinel("guided", value, + sentinel)) { + if (!__kmp_strcasecmp_with_sentinel("iterative", comma, ';')) { + __kmp_guided = kmp_sch_guided_iterative_chunked; + continue; + } else if (!__kmp_strcasecmp_with_sentinel("analytical", comma, + ';')) { + /* analytical not allowed for too many threads */ + __kmp_guided = kmp_sch_guided_analytical_chunked; + continue; + } + } + KMP_WARNING(InvalidClause, name, value); + } else + KMP_WARNING(EmptyClause, name); + } while ((value = semicolon ? semicolon + 1 : NULL)); + } + }; // if } // __kmp_stg_parse__schedule -static void -__kmp_stg_print_schedule( kmp_str_buf_t * buffer, char const * name, void * data ) { - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME_EX(name); - } else { - __kmp_str_buf_print( buffer, " %s='", name ); - } - if ( __kmp_static == kmp_sch_static_greedy ) { - __kmp_str_buf_print( buffer, "%s", "static,greedy"); - } else if ( __kmp_static == kmp_sch_static_balanced ) { - __kmp_str_buf_print ( buffer, "%s", "static,balanced"); - } - if ( __kmp_guided == kmp_sch_guided_iterative_chunked ) { - __kmp_str_buf_print( buffer, ";%s'\n", "guided,iterative"); - } else if ( __kmp_guided == kmp_sch_guided_analytical_chunked ) { - __kmp_str_buf_print( buffer, ";%s'\n", "guided,analytical"); - } +static void __kmp_stg_print_schedule(kmp_str_buf_t *buffer, char const *name, + void *data) { + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_NAME_EX(name); + } else { + __kmp_str_buf_print(buffer, " %s='", name); + } + if (__kmp_static == kmp_sch_static_greedy) { + __kmp_str_buf_print(buffer, "%s", "static,greedy"); + } else if (__kmp_static == kmp_sch_static_balanced) { + __kmp_str_buf_print(buffer, "%s", "static,balanced"); + } + if (__kmp_guided == kmp_sch_guided_iterative_chunked) { + __kmp_str_buf_print(buffer, ";%s'\n", "guided,iterative"); + } else if (__kmp_guided == kmp_sch_guided_analytical_chunked) { + __kmp_str_buf_print(buffer, ";%s'\n", "guided,analytical"); + } } // __kmp_stg_print_schedule -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // OMP_SCHEDULE -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_omp_schedule( char const * name, char const * value, void * data ) -{ - size_t length; - if( value ) { - length = KMP_STRLEN( value ); - if( length ) { - char *comma = (char *) strchr( value, ',' ); - if( value[ length - 1 ] == '"' || value[ length -1 ] == '\'') - KMP_WARNING( UnbalancedQuotes, name ); - /* get the specified scheduling style */ - if (!__kmp_strcasecmp_with_sentinel("dynamic", value, ',')) /* DYNAMIC */ - __kmp_sched = kmp_sch_dynamic_chunked; - else if (!__kmp_strcasecmp_with_sentinel("guided", value, ',')) /* GUIDED */ - __kmp_sched = kmp_sch_guided_chunked; -// AC: TODO: add AUTO schedule, and pprobably remove TRAPEZOIDAL (OMP 3.0 does not allow it) - else if (!__kmp_strcasecmp_with_sentinel("auto", value, ',')) { /* AUTO */ - __kmp_sched = kmp_sch_auto; - if( comma ) { - __kmp_msg( kmp_ms_warning, KMP_MSG( IgnoreChunk, name, comma ), __kmp_msg_null ); - comma = NULL; - } - } - else if (!__kmp_strcasecmp_with_sentinel("trapezoidal", value, ',')) /* TRAPEZOIDAL */ - __kmp_sched = kmp_sch_trapezoidal; - else if (!__kmp_strcasecmp_with_sentinel("static", value, ',')) /* STATIC */ - __kmp_sched = kmp_sch_static; + +static void __kmp_stg_parse_omp_schedule(char const *name, char const *value, + void *data) { + size_t length; + if (value) { + length = KMP_STRLEN(value); + if (length) { + char *comma = (char *)strchr(value, ','); + if (value[length - 1] == '"' || value[length - 1] == '\'') + KMP_WARNING(UnbalancedQuotes, name); + /* get the specified scheduling style */ + if (!__kmp_strcasecmp_with_sentinel("dynamic", value, ',')) /* DYNAMIC */ + __kmp_sched = kmp_sch_dynamic_chunked; + else if (!__kmp_strcasecmp_with_sentinel("guided", value, + ',')) /* GUIDED */ + __kmp_sched = kmp_sch_guided_chunked; + // AC: TODO: add AUTO schedule, and pprobably remove TRAPEZOIDAL (OMP 3.0 + // does not allow it) + else if (!__kmp_strcasecmp_with_sentinel("auto", value, ',')) { /* AUTO */ + __kmp_sched = kmp_sch_auto; + if (comma) { + __kmp_msg(kmp_ms_warning, KMP_MSG(IgnoreChunk, name, comma), + __kmp_msg_null); + comma = NULL; + } + } else if (!__kmp_strcasecmp_with_sentinel("trapezoidal", value, + ',')) /* TRAPEZOIDAL */ + __kmp_sched = kmp_sch_trapezoidal; + else if (!__kmp_strcasecmp_with_sentinel("static", value, + ',')) /* STATIC */ + __kmp_sched = kmp_sch_static; #if KMP_STATIC_STEAL_ENABLED - else if (!__kmp_strcasecmp_with_sentinel("static_steal", value, ',')) - __kmp_sched = kmp_sch_static_steal; + else if (!__kmp_strcasecmp_with_sentinel("static_steal", value, ',')) + __kmp_sched = kmp_sch_static_steal; #endif - else { - KMP_WARNING( StgInvalidValue, name, value ); - value = NULL; /* skip processing of comma */ - } - if( value && comma ) { - __kmp_env_chunk = TRUE; - - if(__kmp_sched == kmp_sch_static) - __kmp_sched = kmp_sch_static_chunked; - ++comma; - __kmp_chunk = __kmp_str_to_int( comma, 0 ); - if ( __kmp_chunk < 1 ) { - __kmp_chunk = KMP_DEFAULT_CHUNK; - __kmp_msg( kmp_ms_warning, KMP_MSG( InvalidChunk, name, comma ), __kmp_msg_null ); - KMP_INFORM( Using_int_Value, name, __kmp_chunk ); -// AC: next block commented out until KMP_DEFAULT_CHUNK != KMP_MIN_CHUNK (to improve code coverage :) -// The default chunk size is 1 according to standard, thus making KMP_MIN_CHUNK not 1 we would introduce mess: -// wrong chunk becomes 1, but it will be impossible to explicitely set 1, because it becomes KMP_MIN_CHUNK... -// } else if ( __kmp_chunk < KMP_MIN_CHUNK ) { -// __kmp_chunk = KMP_MIN_CHUNK; - } else if ( __kmp_chunk > KMP_MAX_CHUNK ) { - __kmp_chunk = KMP_MAX_CHUNK; - __kmp_msg( kmp_ms_warning, KMP_MSG( LargeChunk, name, comma ), __kmp_msg_null ); - KMP_INFORM( Using_int_Value, name, __kmp_chunk ); - } - } else - __kmp_env_chunk = FALSE; - } else - KMP_WARNING( EmptyString, name ); - } - K_DIAG(1, ("__kmp_static == %d\n", __kmp_static)) - K_DIAG(1, ("__kmp_guided == %d\n", __kmp_guided)) - K_DIAG(1, ("__kmp_sched == %d\n", __kmp_sched)) - K_DIAG(1, ("__kmp_chunk == %d\n", __kmp_chunk)) + else { + KMP_WARNING(StgInvalidValue, name, value); + value = NULL; /* skip processing of comma */ + } + if (value && comma) { + __kmp_env_chunk = TRUE; + + if (__kmp_sched == kmp_sch_static) + __kmp_sched = kmp_sch_static_chunked; + ++comma; + __kmp_chunk = __kmp_str_to_int(comma, 0); + if (__kmp_chunk < 1) { + __kmp_chunk = KMP_DEFAULT_CHUNK; + __kmp_msg(kmp_ms_warning, KMP_MSG(InvalidChunk, name, comma), + __kmp_msg_null); + KMP_INFORM(Using_int_Value, name, __kmp_chunk); + // AC: next block commented out until KMP_DEFAULT_CHUNK != + // KMP_MIN_CHUNK (to improve code coverage :) + // The default chunk size is 1 according to standard, thus making + // KMP_MIN_CHUNK not 1 we would introduce mess: + // wrong chunk becomes 1, but it will be impossible to explicitely + // set 1, because it becomes KMP_MIN_CHUNK... + // } else if ( __kmp_chunk < KMP_MIN_CHUNK ) { + // __kmp_chunk = KMP_MIN_CHUNK; + } else if (__kmp_chunk > KMP_MAX_CHUNK) { + __kmp_chunk = KMP_MAX_CHUNK; + __kmp_msg(kmp_ms_warning, KMP_MSG(LargeChunk, name, comma), + __kmp_msg_null); + KMP_INFORM(Using_int_Value, name, __kmp_chunk); + } + } else + __kmp_env_chunk = FALSE; + } else + KMP_WARNING(EmptyString, name); + } + K_DIAG(1, ("__kmp_static == %d\n", __kmp_static)) + K_DIAG(1, ("__kmp_guided == %d\n", __kmp_guided)) + K_DIAG(1, ("__kmp_sched == %d\n", __kmp_sched)) + K_DIAG(1, ("__kmp_chunk == %d\n", __kmp_chunk)) } // __kmp_stg_parse_omp_schedule -static void -__kmp_stg_print_omp_schedule( kmp_str_buf_t * buffer, char const * name, void * data ) { - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME_EX(name); - } else { - __kmp_str_buf_print( buffer, " %s='", name ); +static void __kmp_stg_print_omp_schedule(kmp_str_buf_t *buffer, + char const *name, void *data) { + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_NAME_EX(name); + } else { + __kmp_str_buf_print(buffer, " %s='", name); + } + if (__kmp_chunk) { + switch (__kmp_sched) { + case kmp_sch_dynamic_chunked: + __kmp_str_buf_print(buffer, "%s,%d'\n", "dynamic", __kmp_chunk); + break; + case kmp_sch_guided_iterative_chunked: + case kmp_sch_guided_analytical_chunked: + __kmp_str_buf_print(buffer, "%s,%d'\n", "guided", __kmp_chunk); + break; + case kmp_sch_trapezoidal: + __kmp_str_buf_print(buffer, "%s,%d'\n", "trapezoidal", __kmp_chunk); + break; + case kmp_sch_static: + case kmp_sch_static_chunked: + case kmp_sch_static_balanced: + case kmp_sch_static_greedy: + __kmp_str_buf_print(buffer, "%s,%d'\n", "static", __kmp_chunk); + break; + case kmp_sch_static_steal: + __kmp_str_buf_print(buffer, "%s,%d'\n", "static_steal", __kmp_chunk); + break; + case kmp_sch_auto: + __kmp_str_buf_print(buffer, "%s,%d'\n", "auto", __kmp_chunk); + break; } - if ( __kmp_chunk ) { - switch ( __kmp_sched ) { - case kmp_sch_dynamic_chunked: - __kmp_str_buf_print( buffer, "%s,%d'\n", "dynamic", __kmp_chunk); - break; - case kmp_sch_guided_iterative_chunked: - case kmp_sch_guided_analytical_chunked: - __kmp_str_buf_print( buffer, "%s,%d'\n", "guided", __kmp_chunk); - break; - case kmp_sch_trapezoidal: - __kmp_str_buf_print( buffer, "%s,%d'\n", "trapezoidal", __kmp_chunk); - break; - case kmp_sch_static: - case kmp_sch_static_chunked: - case kmp_sch_static_balanced: - case kmp_sch_static_greedy: - __kmp_str_buf_print( buffer, "%s,%d'\n", "static", __kmp_chunk); - break; - case kmp_sch_static_steal: - __kmp_str_buf_print( buffer, "%s,%d'\n", "static_steal", __kmp_chunk); - break; - case kmp_sch_auto: - __kmp_str_buf_print( buffer, "%s,%d'\n", "auto", __kmp_chunk); - break; - } - } else { - switch ( __kmp_sched ) { - case kmp_sch_dynamic_chunked: - __kmp_str_buf_print( buffer, "%s'\n", "dynamic"); - break; - case kmp_sch_guided_iterative_chunked: - case kmp_sch_guided_analytical_chunked: - __kmp_str_buf_print( buffer, "%s'\n", "guided"); - break; - case kmp_sch_trapezoidal: - __kmp_str_buf_print( buffer, "%s'\n", "trapezoidal"); - break; - case kmp_sch_static: - case kmp_sch_static_chunked: - case kmp_sch_static_balanced: - case kmp_sch_static_greedy: - __kmp_str_buf_print( buffer, "%s'\n", "static"); - break; - case kmp_sch_static_steal: - __kmp_str_buf_print( buffer, "%s'\n", "static_steal"); - break; - case kmp_sch_auto: - __kmp_str_buf_print( buffer, "%s'\n", "auto"); - break; - } + } else { + switch (__kmp_sched) { + case kmp_sch_dynamic_chunked: + __kmp_str_buf_print(buffer, "%s'\n", "dynamic"); + break; + case kmp_sch_guided_iterative_chunked: + case kmp_sch_guided_analytical_chunked: + __kmp_str_buf_print(buffer, "%s'\n", "guided"); + break; + case kmp_sch_trapezoidal: + __kmp_str_buf_print(buffer, "%s'\n", "trapezoidal"); + break; + case kmp_sch_static: + case kmp_sch_static_chunked: + case kmp_sch_static_balanced: + case kmp_sch_static_greedy: + __kmp_str_buf_print(buffer, "%s'\n", "static"); + break; + case kmp_sch_static_steal: + __kmp_str_buf_print(buffer, "%s'\n", "static_steal"); + break; + case kmp_sch_auto: + __kmp_str_buf_print(buffer, "%s'\n", "auto"); + break; } + } } // __kmp_stg_print_omp_schedule -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_ATOMIC_MODE -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_atomic_mode( char const * name, char const * value, void * data ) { - // Modes: 0 -- do not change default; 1 -- Intel perf mode, 2 -- GOMP compatibility mode. - int mode = 0; - int max = 1; - #ifdef KMP_GOMP_COMPAT - max = 2; - #endif /* KMP_GOMP_COMPAT */ - __kmp_stg_parse_int( name, value, 0, max, & mode ); - // TODO; parse_int is not very suitable for this case. In case of overflow it is better to use - // 0 rather that max value. - if ( mode > 0 ) { - __kmp_atomic_mode = mode; - }; // if + +static void __kmp_stg_parse_atomic_mode(char const *name, char const *value, + void *data) { + // Modes: 0 -- do not change default; 1 -- Intel perf mode, 2 -- GOMP + // compatibility mode. + int mode = 0; + int max = 1; +#ifdef KMP_GOMP_COMPAT + max = 2; +#endif /* KMP_GOMP_COMPAT */ + __kmp_stg_parse_int(name, value, 0, max, &mode); + // TODO; parse_int is not very suitable for this case. In case of overflow it + // is better to use + // 0 rather that max value. + if (mode > 0) { + __kmp_atomic_mode = mode; + }; // if } // __kmp_stg_parse_atomic_mode -static void -__kmp_stg_print_atomic_mode( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_atomic_mode ); +static void __kmp_stg_print_atomic_mode(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_int(buffer, name, __kmp_atomic_mode); } // __kmp_stg_print_atomic_mode - -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_CONSISTENCY_CHECK -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_consistency_check( char const * name, char const * value, void * data ) { - if ( ! __kmp_strcasecmp_with_sentinel( "all", value, 0 ) ) { - // Note, this will not work from kmp_set_defaults because th_cons stack was not allocated - // for existed thread(s) thus the first __kmp_push_ will break with assertion. - // TODO: allocate th_cons if called from kmp_set_defaults. - __kmp_env_consistency_check = TRUE; - } else if ( ! __kmp_strcasecmp_with_sentinel( "none", value, 0 ) ) { - __kmp_env_consistency_check = FALSE; - } else { - KMP_WARNING( StgInvalidValue, name, value ); - }; // if + +static void __kmp_stg_parse_consistency_check(char const *name, + char const *value, void *data) { + if (!__kmp_strcasecmp_with_sentinel("all", value, 0)) { + // Note, this will not work from kmp_set_defaults because th_cons stack was + // not allocated + // for existed thread(s) thus the first __kmp_push_ will break + // with assertion. + // TODO: allocate th_cons if called from kmp_set_defaults. + __kmp_env_consistency_check = TRUE; + } else if (!__kmp_strcasecmp_with_sentinel("none", value, 0)) { + __kmp_env_consistency_check = FALSE; + } else { + KMP_WARNING(StgInvalidValue, name, value); + }; // if } // __kmp_stg_parse_consistency_check -static void -__kmp_stg_print_consistency_check( kmp_str_buf_t * buffer, char const * name, void * data ) { +static void __kmp_stg_print_consistency_check(kmp_str_buf_t *buffer, + char const *name, void *data) { #if KMP_DEBUG - const char *value = NULL; + const char *value = NULL; - if ( __kmp_env_consistency_check ) { - value = "all"; - } else { - value = "none"; - } + if (__kmp_env_consistency_check) { + value = "all"; + } else { + value = "none"; + } - if ( value != NULL ) { - __kmp_stg_print_str( buffer, name, value ); - } + if (value != NULL) { + __kmp_stg_print_str(buffer, name, value); + } #endif /* KMP_DEBUG */ } // __kmp_stg_print_consistency_check - #if USE_ITT_BUILD -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_ITT_PREPARE_DELAY -// ------------------------------------------------------------------------------------------------- #if USE_ITT_NOTIFY -static void -__kmp_stg_parse_itt_prepare_delay( char const * name, char const * value, void * data ) -{ - // Experimental code: KMP_ITT_PREPARE_DELAY specifies numbert of loop iterations. - int delay = 0; - __kmp_stg_parse_int( name, value, 0, INT_MAX, & delay ); - __kmp_itt_prepare_delay = delay; +static void __kmp_stg_parse_itt_prepare_delay(char const *name, + char const *value, void *data) { + // Experimental code: KMP_ITT_PREPARE_DELAY specifies numbert of loop + // iterations. + int delay = 0; + __kmp_stg_parse_int(name, value, 0, INT_MAX, &delay); + __kmp_itt_prepare_delay = delay; } // __kmp_str_parse_itt_prepare_delay -static void -__kmp_stg_print_itt_prepare_delay( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_uint64( buffer, name, __kmp_itt_prepare_delay ); +static void __kmp_stg_print_itt_prepare_delay(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_uint64(buffer, name, __kmp_itt_prepare_delay); } // __kmp_str_print_itt_prepare_delay #endif // USE_ITT_NOTIFY #endif /* USE_ITT_BUILD */ -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_MALLOC_POOL_INCR -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_malloc_pool_incr( char const * name, char const * value, void * data ) { - __kmp_stg_parse_size( - name, - value, - KMP_MIN_MALLOC_POOL_INCR, - KMP_MAX_MALLOC_POOL_INCR, - NULL, - & __kmp_malloc_pool_incr, - 1 - ); + +static void __kmp_stg_parse_malloc_pool_incr(char const *name, + char const *value, void *data) { + __kmp_stg_parse_size(name, value, KMP_MIN_MALLOC_POOL_INCR, + KMP_MAX_MALLOC_POOL_INCR, NULL, &__kmp_malloc_pool_incr, + 1); } // __kmp_stg_parse_malloc_pool_incr -static void -__kmp_stg_print_malloc_pool_incr( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_size( buffer, name, __kmp_malloc_pool_incr ); +static void __kmp_stg_print_malloc_pool_incr(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_size(buffer, name, __kmp_malloc_pool_incr); } // _kmp_stg_print_malloc_pool_incr - #ifdef KMP_DEBUG -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_PAR_RANGE -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_par_range_env( char const * name, char const * value, void * data ) { - __kmp_stg_parse_par_range( - name, - value, - & __kmp_par_range, - __kmp_par_range_routine, - __kmp_par_range_filename, - & __kmp_par_range_lb, - & __kmp_par_range_ub - ); + +static void __kmp_stg_parse_par_range_env(char const *name, char const *value, + void *data) { + __kmp_stg_parse_par_range(name, value, &__kmp_par_range, + __kmp_par_range_routine, __kmp_par_range_filename, + &__kmp_par_range_lb, &__kmp_par_range_ub); } // __kmp_stg_parse_par_range_env -static void -__kmp_stg_print_par_range_env( kmp_str_buf_t * buffer, char const * name, void * data ) { - if (__kmp_par_range != 0) { - __kmp_stg_print_str( buffer, name, par_range_to_print ); - } +static void __kmp_stg_print_par_range_env(kmp_str_buf_t *buffer, + char const *name, void *data) { + if (__kmp_par_range != 0) { + __kmp_stg_print_str(buffer, name, par_range_to_print); + } } // __kmp_stg_print_par_range_env -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_YIELD_CYCLE, KMP_YIELD_ON, KMP_YIELD_OFF -// ------------------------------------------------------------------------------------------------- -static void -__kmp_stg_parse_yield_cycle( char const * name, char const * value, void * data ) { - int flag = __kmp_yield_cycle; - __kmp_stg_parse_bool( name, value, & flag ); - __kmp_yield_cycle = flag; +static void __kmp_stg_parse_yield_cycle(char const *name, char const *value, + void *data) { + int flag = __kmp_yield_cycle; + __kmp_stg_parse_bool(name, value, &flag); + __kmp_yield_cycle = flag; } // __kmp_stg_parse_yield_cycle -static void -__kmp_stg_print_yield_cycle( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_yield_cycle ); +static void __kmp_stg_print_yield_cycle(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_bool(buffer, name, __kmp_yield_cycle); } // __kmp_stg_print_yield_cycle -static void -__kmp_stg_parse_yield_on( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( name, value, 2, INT_MAX, & __kmp_yield_on_count ); +static void __kmp_stg_parse_yield_on(char const *name, char const *value, + void *data) { + __kmp_stg_parse_int(name, value, 2, INT_MAX, &__kmp_yield_on_count); } // __kmp_stg_parse_yield_on -static void -__kmp_stg_print_yield_on( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_yield_on_count ); +static void __kmp_stg_print_yield_on(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_int(buffer, name, __kmp_yield_on_count); } // __kmp_stg_print_yield_on -static void -__kmp_stg_parse_yield_off( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( name, value, 2, INT_MAX, & __kmp_yield_off_count ); +static void __kmp_stg_parse_yield_off(char const *name, char const *value, + void *data) { + __kmp_stg_parse_int(name, value, 2, INT_MAX, &__kmp_yield_off_count); } // __kmp_stg_parse_yield_off -static void -__kmp_stg_print_yield_off( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_yield_off_count ); +static void __kmp_stg_print_yield_off(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_int(buffer, name, __kmp_yield_off_count); } // __kmp_stg_print_yield_off #endif -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_INIT_WAIT, KMP_NEXT_WAIT -// ------------------------------------------------------------------------------------------------- - -static void -__kmp_stg_parse_init_wait( char const * name, char const * value, void * data ) { - int wait; - KMP_ASSERT( ( __kmp_init_wait & 1 ) == 0 ); - wait = __kmp_init_wait / 2; - __kmp_stg_parse_int( name, value, KMP_MIN_INIT_WAIT, KMP_MAX_INIT_WAIT, & wait ); - __kmp_init_wait = wait * 2; - KMP_ASSERT( ( __kmp_init_wait & 1 ) == 0 ); - __kmp_yield_init = __kmp_init_wait; + +static void __kmp_stg_parse_init_wait(char const *name, char const *value, + void *data) { + int wait; + KMP_ASSERT((__kmp_init_wait & 1) == 0); + wait = __kmp_init_wait / 2; + __kmp_stg_parse_int(name, value, KMP_MIN_INIT_WAIT, KMP_MAX_INIT_WAIT, &wait); + __kmp_init_wait = wait * 2; + KMP_ASSERT((__kmp_init_wait & 1) == 0); + __kmp_yield_init = __kmp_init_wait; } // __kmp_stg_parse_init_wait -static void -__kmp_stg_print_init_wait( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_init_wait ); +static void __kmp_stg_print_init_wait(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_int(buffer, name, __kmp_init_wait); } // __kmp_stg_print_init_wait -static void -__kmp_stg_parse_next_wait( char const * name, char const * value, void * data ) { - int wait; - KMP_ASSERT( ( __kmp_next_wait & 1 ) == 0 ); - wait = __kmp_next_wait / 2; - __kmp_stg_parse_int( name, value, KMP_MIN_NEXT_WAIT, KMP_MAX_NEXT_WAIT, & wait ); - __kmp_next_wait = wait * 2; - KMP_ASSERT( ( __kmp_next_wait & 1 ) == 0 ); - __kmp_yield_next = __kmp_next_wait; +static void __kmp_stg_parse_next_wait(char const *name, char const *value, + void *data) { + int wait; + KMP_ASSERT((__kmp_next_wait & 1) == 0); + wait = __kmp_next_wait / 2; + __kmp_stg_parse_int(name, value, KMP_MIN_NEXT_WAIT, KMP_MAX_NEXT_WAIT, &wait); + __kmp_next_wait = wait * 2; + KMP_ASSERT((__kmp_next_wait & 1) == 0); + __kmp_yield_next = __kmp_next_wait; } // __kmp_stg_parse_next_wait -static void -__kmp_stg_print_next_wait( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_next_wait ); +static void __kmp_stg_print_next_wait(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_int(buffer, name, __kmp_next_wait); } //__kmp_stg_print_next_wait - -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_GTID_MODE -// ------------------------------------------------------------------------------------------------- -static void -__kmp_stg_parse_gtid_mode( char const * name, char const * value, void * data ) { - // - // Modes: - // 0 -- do not change default - // 1 -- sp search - // 2 -- use "keyed" TLS var, i.e. - // pthread_getspecific(Linux* OS/OS X*) or TlsGetValue(Windows* OS) - // 3 -- __declspec(thread) TLS var in tdata section - // - int mode = 0; - int max = 2; - #ifdef KMP_TDATA_GTID - max = 3; - #endif /* KMP_TDATA_GTID */ - __kmp_stg_parse_int( name, value, 0, max, & mode ); - // TODO; parse_int is not very suitable for this case. In case of overflow it is better to use - // 0 rather that max value. - if ( mode == 0 ) { - __kmp_adjust_gtid_mode = TRUE; - } - else { - __kmp_gtid_mode = mode; - __kmp_adjust_gtid_mode = FALSE; - }; // if +static void __kmp_stg_parse_gtid_mode(char const *name, char const *value, + void *data) { + // Modes: + // 0 -- do not change default + // 1 -- sp search + // 2 -- use "keyed" TLS var, i.e. + // pthread_getspecific(Linux* OS/OS X*) or TlsGetValue(Windows* OS) + // 3 -- __declspec(thread) TLS var in tdata section + int mode = 0; + int max = 2; +#ifdef KMP_TDATA_GTID + max = 3; +#endif /* KMP_TDATA_GTID */ + __kmp_stg_parse_int(name, value, 0, max, &mode); + // TODO; parse_int is not very suitable for this case. In case of overflow it + // is better to use 0 rather that max value. + if (mode == 0) { + __kmp_adjust_gtid_mode = TRUE; + } else { + __kmp_gtid_mode = mode; + __kmp_adjust_gtid_mode = FALSE; + }; // if } // __kmp_str_parse_gtid_mode -static void -__kmp_stg_print_gtid_mode( kmp_str_buf_t * buffer, char const * name, void * data ) { - if ( __kmp_adjust_gtid_mode ) { - __kmp_stg_print_int( buffer, name, 0 ); - } - else { - __kmp_stg_print_int( buffer, name, __kmp_gtid_mode ); - } +static void __kmp_stg_print_gtid_mode(kmp_str_buf_t *buffer, char const *name, + void *data) { + if (__kmp_adjust_gtid_mode) { + __kmp_stg_print_int(buffer, name, 0); + } else { + __kmp_stg_print_int(buffer, name, __kmp_gtid_mode); + } } // __kmp_stg_print_gtid_mode -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_NUM_LOCKS_IN_BLOCK -// ------------------------------------------------------------------------------------------------- -static void -__kmp_stg_parse_lock_block( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( name, value, 0, KMP_INT_MAX, & __kmp_num_locks_in_block ); +static void __kmp_stg_parse_lock_block(char const *name, char const *value, + void *data) { + __kmp_stg_parse_int(name, value, 0, KMP_INT_MAX, &__kmp_num_locks_in_block); } // __kmp_str_parse_lock_block -static void -__kmp_stg_print_lock_block( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_num_locks_in_block ); +static void __kmp_stg_print_lock_block(kmp_str_buf_t *buffer, char const *name, + void *data) { + __kmp_stg_print_int(buffer, name, __kmp_num_locks_in_block); } // __kmp_stg_print_lock_block -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_LOCK_KIND -// ------------------------------------------------------------------------------------------------- #if KMP_USE_DYNAMIC_LOCK -# define KMP_STORE_LOCK_SEQ(a) (__kmp_user_lock_seq = lockseq_##a) +#define KMP_STORE_LOCK_SEQ(a) (__kmp_user_lock_seq = lockseq_##a) #else -# define KMP_STORE_LOCK_SEQ(a) +#define KMP_STORE_LOCK_SEQ(a) #endif -static void -__kmp_stg_parse_lock_kind( char const * name, char const * value, void * data ) { - if ( __kmp_init_user_locks ) { - KMP_WARNING( EnvLockWarn, name ); - return; - } +static void __kmp_stg_parse_lock_kind(char const *name, char const *value, + void *data) { + if (__kmp_init_user_locks) { + KMP_WARNING(EnvLockWarn, name); + return; + } - if ( __kmp_str_match( "tas", 2, value ) - || __kmp_str_match( "test and set", 2, value ) - || __kmp_str_match( "test_and_set", 2, value ) - || __kmp_str_match( "test-and-set", 2, value ) - || __kmp_str_match( "test andset", 2, value ) - || __kmp_str_match( "test_andset", 2, value ) - || __kmp_str_match( "test-andset", 2, value ) - || __kmp_str_match( "testand set", 2, value ) - || __kmp_str_match( "testand_set", 2, value ) - || __kmp_str_match( "testand-set", 2, value ) - || __kmp_str_match( "testandset", 2, value ) ) { - __kmp_user_lock_kind = lk_tas; - KMP_STORE_LOCK_SEQ(tas); - } + if (__kmp_str_match("tas", 2, value) || + __kmp_str_match("test and set", 2, value) || + __kmp_str_match("test_and_set", 2, value) || + __kmp_str_match("test-and-set", 2, value) || + __kmp_str_match("test andset", 2, value) || + __kmp_str_match("test_andset", 2, value) || + __kmp_str_match("test-andset", 2, value) || + __kmp_str_match("testand set", 2, value) || + __kmp_str_match("testand_set", 2, value) || + __kmp_str_match("testand-set", 2, value) || + __kmp_str_match("testandset", 2, value)) { + __kmp_user_lock_kind = lk_tas; + KMP_STORE_LOCK_SEQ(tas); + } #if KMP_USE_FUTEX - else if ( __kmp_str_match( "futex", 1, value ) ) { - if ( __kmp_futex_determine_capable() ) { - __kmp_user_lock_kind = lk_futex; - KMP_STORE_LOCK_SEQ(futex); - } - else { - KMP_WARNING( FutexNotSupported, name, value ); - } + else if (__kmp_str_match("futex", 1, value)) { + if (__kmp_futex_determine_capable()) { + __kmp_user_lock_kind = lk_futex; + KMP_STORE_LOCK_SEQ(futex); + } else { + KMP_WARNING(FutexNotSupported, name, value); } + } #endif - else if ( __kmp_str_match( "ticket", 2, value ) ) { - __kmp_user_lock_kind = lk_ticket; - KMP_STORE_LOCK_SEQ(ticket); - } - else if ( __kmp_str_match( "queuing", 1, value ) - || __kmp_str_match( "queue", 1, value ) ) { - __kmp_user_lock_kind = lk_queuing; - KMP_STORE_LOCK_SEQ(queuing); - } - else if ( __kmp_str_match( "drdpa ticket", 1, value ) - || __kmp_str_match( "drdpa_ticket", 1, value ) - || __kmp_str_match( "drdpa-ticket", 1, value ) - || __kmp_str_match( "drdpaticket", 1, value ) - || __kmp_str_match( "drdpa", 1, value ) ) { - __kmp_user_lock_kind = lk_drdpa; - KMP_STORE_LOCK_SEQ(drdpa); - } + else if (__kmp_str_match("ticket", 2, value)) { + __kmp_user_lock_kind = lk_ticket; + KMP_STORE_LOCK_SEQ(ticket); + } else if (__kmp_str_match("queuing", 1, value) || + __kmp_str_match("queue", 1, value)) { + __kmp_user_lock_kind = lk_queuing; + KMP_STORE_LOCK_SEQ(queuing); + } else if (__kmp_str_match("drdpa ticket", 1, value) || + __kmp_str_match("drdpa_ticket", 1, value) || + __kmp_str_match("drdpa-ticket", 1, value) || + __kmp_str_match("drdpaticket", 1, value) || + __kmp_str_match("drdpa", 1, value)) { + __kmp_user_lock_kind = lk_drdpa; + KMP_STORE_LOCK_SEQ(drdpa); + } #if KMP_USE_ADAPTIVE_LOCKS - else if ( __kmp_str_match( "adaptive", 1, value ) ) { - if( __kmp_cpuinfo.rtm ) { // ??? Is cpuinfo available here? - __kmp_user_lock_kind = lk_adaptive; - KMP_STORE_LOCK_SEQ(adaptive); - } else { - KMP_WARNING( AdaptiveNotSupported, name, value ); - __kmp_user_lock_kind = lk_queuing; - KMP_STORE_LOCK_SEQ(queuing); - } + else if (__kmp_str_match("adaptive", 1, value)) { + if (__kmp_cpuinfo.rtm) { // ??? Is cpuinfo available here? + __kmp_user_lock_kind = lk_adaptive; + KMP_STORE_LOCK_SEQ(adaptive); + } else { + KMP_WARNING(AdaptiveNotSupported, name, value); + __kmp_user_lock_kind = lk_queuing; + KMP_STORE_LOCK_SEQ(queuing); } + } #endif // KMP_USE_ADAPTIVE_LOCKS #if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX - else if ( __kmp_str_match("rtm", 1, value) ) { - if ( __kmp_cpuinfo.rtm ) { - __kmp_user_lock_kind = lk_rtm; - KMP_STORE_LOCK_SEQ(rtm); - } else { - KMP_WARNING( AdaptiveNotSupported, name, value ); - __kmp_user_lock_kind = lk_queuing; - KMP_STORE_LOCK_SEQ(queuing); - } - } - else if ( __kmp_str_match("hle", 1, value) ) { - __kmp_user_lock_kind = lk_hle; - KMP_STORE_LOCK_SEQ(hle); + else if (__kmp_str_match("rtm", 1, value)) { + if (__kmp_cpuinfo.rtm) { + __kmp_user_lock_kind = lk_rtm; + KMP_STORE_LOCK_SEQ(rtm); + } else { + KMP_WARNING(AdaptiveNotSupported, name, value); + __kmp_user_lock_kind = lk_queuing; + KMP_STORE_LOCK_SEQ(queuing); } + } else if (__kmp_str_match("hle", 1, value)) { + __kmp_user_lock_kind = lk_hle; + KMP_STORE_LOCK_SEQ(hle); + } #endif - else { - KMP_WARNING( StgInvalidValue, name, value ); - } + else { + KMP_WARNING(StgInvalidValue, name, value); + } } -static void -__kmp_stg_print_lock_kind( kmp_str_buf_t * buffer, char const * name, void * data ) { - const char *value = NULL; +static void __kmp_stg_print_lock_kind(kmp_str_buf_t *buffer, char const *name, + void *data) { + const char *value = NULL; - switch ( __kmp_user_lock_kind ) { - case lk_default: - value = "default"; - break; + switch (__kmp_user_lock_kind) { + case lk_default: + value = "default"; + break; - case lk_tas: - value = "tas"; - break; + case lk_tas: + value = "tas"; + break; #if KMP_USE_FUTEX - case lk_futex: - value = "futex"; - break; + case lk_futex: + value = "futex"; + break; #endif #if KMP_USE_DYNAMIC_LOCK && KMP_USE_TSX - case lk_rtm: - value = "rtm"; - break; + case lk_rtm: + value = "rtm"; + break; - case lk_hle: - value = "hle"; - break; + case lk_hle: + value = "hle"; + break; #endif - case lk_ticket: - value = "ticket"; - break; + case lk_ticket: + value = "ticket"; + break; - case lk_queuing: - value = "queuing"; - break; + case lk_queuing: + value = "queuing"; + break; - case lk_drdpa: - value = "drdpa"; - break; + case lk_drdpa: + value = "drdpa"; + break; #if KMP_USE_ADAPTIVE_LOCKS - case lk_adaptive: - value = "adaptive"; - break; + case lk_adaptive: + value = "adaptive"; + break; #endif - } + } - if ( value != NULL ) { - __kmp_stg_print_str( buffer, name, value ); - } + if (value != NULL) { + __kmp_stg_print_str(buffer, name, value); + } } -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_SPIN_BACKOFF_PARAMS -// ------------------------------------------------------------------------------------------------- - -// KMP_SPIN_BACKOFF_PARAMS=max_backoff[,min_tick] (max backoff size, min tick for machine pause) -static void -__kmp_stg_parse_spin_backoff_params(const char* name, const char* value, void* data) -{ - const char *next = value; - int total = 0; // Count elements that were set. It'll be used as an array size - int prev_comma = FALSE; // For correct processing sequential commas - int i; - - kmp_uint32 max_backoff = __kmp_spin_backoff_params.max_backoff; - kmp_uint32 min_tick = __kmp_spin_backoff_params.min_tick; +// KMP_SPIN_BACKOFF_PARAMS=max_backoff[,min_tick] (max backoff size, min tick +// for machine pause) +static void __kmp_stg_parse_spin_backoff_params(const char *name, + const char *value, void *data) { + const char *next = value; - // Run only 3 iterations because it is enough to read two values or find a syntax error - for ( i = 0; i < 3 ; i++) { - SKIP_WS( next ); + int total = 0; // Count elements that were set. It'll be used as an array size + int prev_comma = FALSE; // For correct processing sequential commas + int i; - if ( *next == '\0' ) { - break; - } - // Next character is not an integer or not a comma OR number of values > 2 => end of list - if ( ( ( *next < '0' || *next > '9' ) && *next !=',' ) || total > 2 ) { - KMP_WARNING( EnvSyntaxError, name, value ); - return; - } - // The next character is ',' - if ( *next == ',' ) { - // ',' is the fisrt character - if ( total == 0 || prev_comma ) { - total++; - } - prev_comma = TRUE; - next++; //skip ',' - SKIP_WS( next ); - } - // Next character is a digit - if ( *next >= '0' && *next <= '9' ) { - int num; - const char *buf = next; - char const * msg = NULL; - prev_comma = FALSE; - SKIP_DIGITS( next ); - total++; - - const char *tmp = next; - SKIP_WS( tmp ); - if ( ( *next == ' ' || *next == '\t' ) && ( *tmp >= '0' && *tmp <= '9' ) ) { - KMP_WARNING( EnvSpacesNotAllowed, name, value ); - return; - } + kmp_uint32 max_backoff = __kmp_spin_backoff_params.max_backoff; + kmp_uint32 min_tick = __kmp_spin_backoff_params.min_tick; - num = __kmp_str_to_int( buf, *next ); - if ( num <= 0 ) { // The number of retries should be > 0 - msg = KMP_I18N_STR( ValueTooSmall ); - num = 1; - } else if ( num > KMP_INT_MAX ) { - msg = KMP_I18N_STR( ValueTooLarge ); - num = KMP_INT_MAX; - } - if ( msg != NULL ) { - // Message is not empty. Print warning. - KMP_WARNING( ParseSizeIntWarn, name, value, msg ); - KMP_INFORM( Using_int_Value, name, num ); - } - if( total == 1 ) { - max_backoff = num; - } else if( total == 2 ) { - min_tick = num; - } - } + // Run only 3 iterations because it is enough to read two values or find a + // syntax error + for (i = 0; i < 3; i++) { + SKIP_WS(next); + + if (*next == '\0') { + break; } - KMP_DEBUG_ASSERT( total > 0 ); - if( total <= 0 ) { - KMP_WARNING( EnvSyntaxError, name, value ); + // Next character is not an integer or not a comma OR number of values > 2 + // => end of list + if (((*next < '0' || *next > '9') && *next != ',') || total > 2) { + KMP_WARNING(EnvSyntaxError, name, value); + return; + } + // The next character is ',' + if (*next == ',') { + // ',' is the fisrt character + if (total == 0 || prev_comma) { + total++; + } + prev_comma = TRUE; + next++; // skip ',' + SKIP_WS(next); + } + // Next character is a digit + if (*next >= '0' && *next <= '9') { + int num; + const char *buf = next; + char const *msg = NULL; + prev_comma = FALSE; + SKIP_DIGITS(next); + total++; + + const char *tmp = next; + SKIP_WS(tmp); + if ((*next == ' ' || *next == '\t') && (*tmp >= '0' && *tmp <= '9')) { + KMP_WARNING(EnvSpacesNotAllowed, name, value); return; + } + + num = __kmp_str_to_int(buf, *next); + if (num <= 0) { // The number of retries should be > 0 + msg = KMP_I18N_STR(ValueTooSmall); + num = 1; + } else if (num > KMP_INT_MAX) { + msg = KMP_I18N_STR(ValueTooLarge); + num = KMP_INT_MAX; + } + if (msg != NULL) { + // Message is not empty. Print warning. + KMP_WARNING(ParseSizeIntWarn, name, value, msg); + KMP_INFORM(Using_int_Value, name, num); + } + if (total == 1) { + max_backoff = num; + } else if (total == 2) { + min_tick = num; + } } - __kmp_spin_backoff_params.max_backoff = max_backoff; - __kmp_spin_backoff_params.min_tick = min_tick; + } + KMP_DEBUG_ASSERT(total > 0); + if (total <= 0) { + KMP_WARNING(EnvSyntaxError, name, value); + return; + } + __kmp_spin_backoff_params.max_backoff = max_backoff; + __kmp_spin_backoff_params.min_tick = min_tick; } -static void -__kmp_stg_print_spin_backoff_params(kmp_str_buf_t *buffer, char const* name, void* data) -{ - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME_EX(name); - } else { - __kmp_str_buf_print( buffer, " %s='", name ); - } - __kmp_str_buf_print( buffer, "%d,%d'\n", __kmp_spin_backoff_params.max_backoff, - __kmp_spin_backoff_params.min_tick ); +static void __kmp_stg_print_spin_backoff_params(kmp_str_buf_t *buffer, + char const *name, void *data) { + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_NAME_EX(name); + } else { + __kmp_str_buf_print(buffer, " %s='", name); + } + __kmp_str_buf_print(buffer, "%d,%d'\n", __kmp_spin_backoff_params.max_backoff, + __kmp_spin_backoff_params.min_tick); } #if KMP_USE_ADAPTIVE_LOCKS -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_ADAPTIVE_LOCK_PROPS, KMP_SPECULATIVE_STATSFILE -// ------------------------------------------------------------------------------------------------- // Parse out values for the tunable parameters from a string of the form // KMP_ADAPTIVE_LOCK_PROPS=max_soft_retries[,max_badness] -static void -__kmp_stg_parse_adaptive_lock_props( const char *name, const char *value, void *data ) -{ - int max_retries = 0; - int max_badness = 0; +static void __kmp_stg_parse_adaptive_lock_props(const char *name, + const char *value, void *data) { + int max_retries = 0; + int max_badness = 0; - const char *next = value; - - int total = 0; // Count elements that were set. It'll be used as an array size - int prev_comma = FALSE; // For correct processing sequential commas - int i; + const char *next = value; - // Save values in the structure __kmp_speculative_backoff_params - // Run only 3 iterations because it is enough to read two values or find a syntax error - for ( i = 0; i < 3 ; i++) { - SKIP_WS( next ); + int total = 0; // Count elements that were set. It'll be used as an array size + int prev_comma = FALSE; // For correct processing sequential commas + int i; - if ( *next == '\0' ) { - break; - } - // Next character is not an integer or not a comma OR number of values > 2 => end of list - if ( ( ( *next < '0' || *next > '9' ) && *next !=',' ) || total > 2 ) { - KMP_WARNING( EnvSyntaxError, name, value ); - return; - } - // The next character is ',' - if ( *next == ',' ) { - // ',' is the fisrt character - if ( total == 0 || prev_comma ) { - total++; - } - prev_comma = TRUE; - next++; //skip ',' - SKIP_WS( next ); - } - // Next character is a digit - if ( *next >= '0' && *next <= '9' ) { - int num; - const char *buf = next; - char const * msg = NULL; - prev_comma = FALSE; - SKIP_DIGITS( next ); - total++; - - const char *tmp = next; - SKIP_WS( tmp ); - if ( ( *next == ' ' || *next == '\t' ) && ( *tmp >= '0' && *tmp <= '9' ) ) { - KMP_WARNING( EnvSpacesNotAllowed, name, value ); - return; - } + // Save values in the structure __kmp_speculative_backoff_params + // Run only 3 iterations because it is enough to read two values or find a + // syntax error + for (i = 0; i < 3; i++) { + SKIP_WS(next); - num = __kmp_str_to_int( buf, *next ); - if ( num < 0 ) { // The number of retries should be >= 0 - msg = KMP_I18N_STR( ValueTooSmall ); - num = 1; - } else if ( num > KMP_INT_MAX ) { - msg = KMP_I18N_STR( ValueTooLarge ); - num = KMP_INT_MAX; - } - if ( msg != NULL ) { - // Message is not empty. Print warning. - KMP_WARNING( ParseSizeIntWarn, name, value, msg ); - KMP_INFORM( Using_int_Value, name, num ); - } - if( total == 1 ) { - max_retries = num; - } else if( total == 2 ) { - max_badness = num; - } - } + if (*next == '\0') { + break; } - KMP_DEBUG_ASSERT( total > 0 ); - if( total <= 0 ) { - KMP_WARNING( EnvSyntaxError, name, value ); + // Next character is not an integer or not a comma OR number of values > 2 + // => end of list + if (((*next < '0' || *next > '9') && *next != ',') || total > 2) { + KMP_WARNING(EnvSyntaxError, name, value); + return; + } + // The next character is ',' + if (*next == ',') { + // ',' is the fisrt character + if (total == 0 || prev_comma) { + total++; + } + prev_comma = TRUE; + next++; // skip ',' + SKIP_WS(next); + } + // Next character is a digit + if (*next >= '0' && *next <= '9') { + int num; + const char *buf = next; + char const *msg = NULL; + prev_comma = FALSE; + SKIP_DIGITS(next); + total++; + + const char *tmp = next; + SKIP_WS(tmp); + if ((*next == ' ' || *next == '\t') && (*tmp >= '0' && *tmp <= '9')) { + KMP_WARNING(EnvSpacesNotAllowed, name, value); return; + } + + num = __kmp_str_to_int(buf, *next); + if (num < 0) { // The number of retries should be >= 0 + msg = KMP_I18N_STR(ValueTooSmall); + num = 1; + } else if (num > KMP_INT_MAX) { + msg = KMP_I18N_STR(ValueTooLarge); + num = KMP_INT_MAX; + } + if (msg != NULL) { + // Message is not empty. Print warning. + KMP_WARNING(ParseSizeIntWarn, name, value, msg); + KMP_INFORM(Using_int_Value, name, num); + } + if (total == 1) { + max_retries = num; + } else if (total == 2) { + max_badness = num; + } } - __kmp_adaptive_backoff_params.max_soft_retries = max_retries; - __kmp_adaptive_backoff_params.max_badness = max_badness; + } + KMP_DEBUG_ASSERT(total > 0); + if (total <= 0) { + KMP_WARNING(EnvSyntaxError, name, value); + return; + } + __kmp_adaptive_backoff_params.max_soft_retries = max_retries; + __kmp_adaptive_backoff_params.max_badness = max_badness; } - -static void -__kmp_stg_print_adaptive_lock_props(kmp_str_buf_t * buffer, char const * name, void * data ) -{ - if( __kmp_env_format ) { - KMP_STR_BUF_PRINT_NAME_EX(name); - } else { - __kmp_str_buf_print( buffer, " %s='", name ); - } - __kmp_str_buf_print( buffer, "%d,%d'\n", __kmp_adaptive_backoff_params.max_soft_retries, - __kmp_adaptive_backoff_params.max_badness ); +static void __kmp_stg_print_adaptive_lock_props(kmp_str_buf_t *buffer, + char const *name, void *data) { + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_NAME_EX(name); + } else { + __kmp_str_buf_print(buffer, " %s='", name); + } + __kmp_str_buf_print(buffer, "%d,%d'\n", + __kmp_adaptive_backoff_params.max_soft_retries, + __kmp_adaptive_backoff_params.max_badness); } // __kmp_stg_print_adaptive_lock_props #if KMP_DEBUG_ADAPTIVE_LOCKS -static void -__kmp_stg_parse_speculative_statsfile( char const * name, char const * value, void * data ) { - __kmp_stg_parse_file( name, value, "", & __kmp_speculative_statsfile ); +static void __kmp_stg_parse_speculative_statsfile(char const *name, + char const *value, + void *data) { + __kmp_stg_parse_file(name, value, "", &__kmp_speculative_statsfile); } // __kmp_stg_parse_speculative_statsfile -static void -__kmp_stg_print_speculative_statsfile( kmp_str_buf_t * buffer, char const * name, void * data ) { - if ( __kmp_str_match( "-", 0, __kmp_speculative_statsfile ) ) { - __kmp_stg_print_str( buffer, name, "stdout" ); - } else { - __kmp_stg_print_str( buffer, name, __kmp_speculative_statsfile ); - } +static void __kmp_stg_print_speculative_statsfile(kmp_str_buf_t *buffer, + char const *name, + void *data) { + if (__kmp_str_match("-", 0, __kmp_speculative_statsfile)) { + __kmp_stg_print_str(buffer, name, "stdout"); + } else { + __kmp_stg_print_str(buffer, name, __kmp_speculative_statsfile); + } } // __kmp_stg_print_speculative_statsfile @@ -4300,9 +4065,8 @@ __kmp_stg_print_speculative_statsfile( kmp_str_buf_t * buffer, char const * name #endif // KMP_USE_ADAPTIVE_LOCKS -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_HW_SUBSET (was KMP_PLACE_THREADS) -// ------------------------------------------------------------------------------------------------- // The longest observable sequense of items is // Socket-Node-Tile-Core-Thread @@ -4310,8 +4074,8 @@ __kmp_stg_print_speculative_statsfile( kmp_str_buf_t * buffer, char const * name // The input string is usually short enough, let's use 512 limit for now #define MAX_T_LEVEL 5 #define MAX_STR_LEN 512 -static void -__kmp_stg_parse_hw_subset( char const * name, char const * value, void * data ) { +static void __kmp_stg_parse_hw_subset(char const *name, char const *value, + void *data) { // Value example: 1s,5c@3,2T // Which means "use 1 socket, 5 cores with offset 3, 2 threads per core" static int parsed = 0; @@ -4447,1076 +4211,1116 @@ err: return; } -static void -__kmp_stg_print_hw_subset( kmp_str_buf_t * buffer, char const * name, void * data ) { - if (__kmp_hws_requested) { - int comma = 0; - kmp_str_buf_t buf; - __kmp_str_buf_init(&buf); - if(__kmp_env_format) - KMP_STR_BUF_PRINT_NAME_EX(name); - else - __kmp_str_buf_print(buffer, " %s='", name); - if (__kmp_hws_socket.num) { - __kmp_str_buf_print(&buf, "%ds", __kmp_hws_socket.num); - if (__kmp_hws_socket.offset) - __kmp_str_buf_print(&buf, "@%d", __kmp_hws_socket.offset); - comma = 1; - } - if (__kmp_hws_node.num) { - __kmp_str_buf_print(&buf, "%s%dn", comma?",":"", __kmp_hws_node.num); - if (__kmp_hws_node.offset) - __kmp_str_buf_print(&buf, "@%d", __kmp_hws_node.offset); - comma = 1; - } - if (__kmp_hws_tile.num) { - __kmp_str_buf_print(&buf, "%s%dL2", comma?",":"", __kmp_hws_tile.num); - if (__kmp_hws_tile.offset) - __kmp_str_buf_print(&buf, "@%d", __kmp_hws_tile.offset); - comma = 1; - } - if (__kmp_hws_core.num) { - __kmp_str_buf_print(&buf, "%s%dc", comma?",":"", __kmp_hws_core.num); - if (__kmp_hws_core.offset) - __kmp_str_buf_print(&buf, "@%d", __kmp_hws_core.offset); - comma = 1; - } - if (__kmp_hws_proc.num) - __kmp_str_buf_print(&buf, "%s%dt", comma?",":"", __kmp_hws_proc.num); - __kmp_str_buf_print(buffer, "%s'\n", buf.str ); - __kmp_str_buf_free(&buf); - } +static void __kmp_stg_print_hw_subset(kmp_str_buf_t *buffer, char const *name, + void *data ) { + if (__kmp_hws_requested) { + int comma = 0; + kmp_str_buf_t buf; + __kmp_str_buf_init(&buf); + if (__kmp_env_format) + KMP_STR_BUF_PRINT_NAME_EX(name); + else + __kmp_str_buf_print(buffer, " %s='", name); + if (__kmp_hws_socket.num) { + __kmp_str_buf_print(&buf, "%ds", __kmp_hws_socket.num); + if (__kmp_hws_socket.offset) + __kmp_str_buf_print(&buf, "@%d", __kmp_hws_socket.offset); + comma = 1; + } + if (__kmp_hws_node.num) { + __kmp_str_buf_print(&buf, "%s%dn", comma?",":"", __kmp_hws_node.num); + if (__kmp_hws_node.offset) + __kmp_str_buf_print(&buf, "@%d", __kmp_hws_node.offset); + comma = 1; + } + if (__kmp_hws_tile.num) { + __kmp_str_buf_print(&buf, "%s%dL2", comma?",":"", __kmp_hws_tile.num); + if (__kmp_hws_tile.offset) + __kmp_str_buf_print(&buf, "@%d", __kmp_hws_tile.offset); + comma = 1; + } + if (__kmp_hws_core.num) { + __kmp_str_buf_print(&buf, "%s%dc", comma?",":"", __kmp_hws_core.num); + if (__kmp_hws_core.offset) + __kmp_str_buf_print(&buf, "@%d", __kmp_hws_core.offset); + comma = 1; + } + if (__kmp_hws_proc.num) + __kmp_str_buf_print(&buf, "%s%dt", comma?",":"", __kmp_hws_proc.num); + __kmp_str_buf_print(buffer, "%s'\n", buf.str ); + __kmp_str_buf_free(&buf); + } } #if USE_ITT_BUILD -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_FORKJOIN_FRAMES -// ------------------------------------------------------------------------------------------------- -static void -__kmp_stg_parse_forkjoin_frames( char const * name, char const * value, void * data ) { - __kmp_stg_parse_bool( name, value, & __kmp_forkjoin_frames ); +static void __kmp_stg_parse_forkjoin_frames(char const *name, char const *value, + void *data) { + __kmp_stg_parse_bool(name, value, &__kmp_forkjoin_frames); } // __kmp_stg_parse_forkjoin_frames -static void -__kmp_stg_print_forkjoin_frames( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_forkjoin_frames ); +static void __kmp_stg_print_forkjoin_frames(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_bool(buffer, name, __kmp_forkjoin_frames); } // __kmp_stg_print_forkjoin_frames -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // KMP_FORKJOIN_FRAMES_MODE -// ------------------------------------------------------------------------------------------------- -static void -__kmp_stg_parse_forkjoin_frames_mode( char const * name, char const * value, void * data ) { - __kmp_stg_parse_int( name, value, 0, 3, & __kmp_forkjoin_frames_mode ); +static void __kmp_stg_parse_forkjoin_frames_mode(char const *name, + char const *value, + void *data) { + __kmp_stg_parse_int(name, value, 0, 3, &__kmp_forkjoin_frames_mode); } // __kmp_stg_parse_forkjoin_frames -static void -__kmp_stg_print_forkjoin_frames_mode( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_int( buffer, name, __kmp_forkjoin_frames_mode ); +static void __kmp_stg_print_forkjoin_frames_mode(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_int(buffer, name, __kmp_forkjoin_frames_mode); } // __kmp_stg_print_forkjoin_frames #endif /* USE_ITT_BUILD */ -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // OMP_DISPLAY_ENV -// ------------------------------------------------------------------------------------------------- #if OMP_40_ENABLED -static void -__kmp_stg_parse_omp_display_env( char const * name, char const * value, void * data ) -{ - if ( __kmp_str_match( "VERBOSE", 1, value ) ) - { - __kmp_display_env_verbose = TRUE; - } else { - __kmp_stg_parse_bool( name, value, & __kmp_display_env ); - } +static void __kmp_stg_parse_omp_display_env(char const *name, char const *value, + void *data) { + if (__kmp_str_match("VERBOSE", 1, value)) { + __kmp_display_env_verbose = TRUE; + } else { + __kmp_stg_parse_bool(name, value, &__kmp_display_env); + } } // __kmp_stg_parse_omp_display_env -static void -__kmp_stg_print_omp_display_env( kmp_str_buf_t * buffer, char const * name, void * data ) -{ - if ( __kmp_display_env_verbose ) - { - __kmp_stg_print_str( buffer, name, "VERBOSE" ); - } else { - __kmp_stg_print_bool( buffer, name, __kmp_display_env ); - } +static void __kmp_stg_print_omp_display_env(kmp_str_buf_t *buffer, + char const *name, void *data) { + if (__kmp_display_env_verbose) { + __kmp_stg_print_str(buffer, name, "VERBOSE"); + } else { + __kmp_stg_print_bool(buffer, name, __kmp_display_env); + } } // __kmp_stg_print_omp_display_env -static void -__kmp_stg_parse_omp_cancellation( char const * name, char const * value, void * data ) { - if ( TCR_4(__kmp_init_parallel) ) { - KMP_WARNING( EnvParallelWarn, name ); - return; - } // read value before first parallel only - __kmp_stg_parse_bool( name, value, & __kmp_omp_cancellation ); +static void __kmp_stg_parse_omp_cancellation(char const *name, + char const *value, void *data) { + if (TCR_4(__kmp_init_parallel)) { + KMP_WARNING(EnvParallelWarn, name); + return; + } // read value before first parallel only + __kmp_stg_parse_bool(name, value, &__kmp_omp_cancellation); } // __kmp_stg_parse_omp_cancellation -static void -__kmp_stg_print_omp_cancellation( kmp_str_buf_t * buffer, char const * name, void * data ) { - __kmp_stg_print_bool( buffer, name, __kmp_omp_cancellation ); +static void __kmp_stg_print_omp_cancellation(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_bool(buffer, name, __kmp_omp_cancellation); } // __kmp_stg_print_omp_cancellation #endif -// ------------------------------------------------------------------------------------------------- +// ----------------------------------------------------------------------------- // Table. -// ------------------------------------------------------------------------------------------------- - static kmp_setting_t __kmp_stg_table[] = { - { "KMP_ALL_THREADS", __kmp_stg_parse_all_threads, __kmp_stg_print_all_threads, NULL, 0, 0 }, - { "KMP_BLOCKTIME", __kmp_stg_parse_blocktime, __kmp_stg_print_blocktime, NULL, 0, 0 }, - { "KMP_DUPLICATE_LIB_OK", __kmp_stg_parse_duplicate_lib_ok, __kmp_stg_print_duplicate_lib_ok, NULL, 0, 0 }, - { "KMP_LIBRARY", __kmp_stg_parse_wait_policy, __kmp_stg_print_wait_policy, NULL, 0, 0 }, - { "KMP_MAX_THREADS", __kmp_stg_parse_all_threads, NULL, NULL, 0, 0 }, // For backward compatibility + {"KMP_ALL_THREADS", __kmp_stg_parse_all_threads, + __kmp_stg_print_all_threads, NULL, 0, 0}, + {"KMP_BLOCKTIME", __kmp_stg_parse_blocktime, __kmp_stg_print_blocktime, + NULL, 0, 0}, + {"KMP_DUPLICATE_LIB_OK", __kmp_stg_parse_duplicate_lib_ok, + __kmp_stg_print_duplicate_lib_ok, NULL, 0, 0}, + {"KMP_LIBRARY", __kmp_stg_parse_wait_policy, __kmp_stg_print_wait_policy, + NULL, 0, 0}, + {"KMP_MAX_THREADS", __kmp_stg_parse_all_threads, NULL, NULL, 0, + 0}, // For backward compatibility #if KMP_USE_MONITOR - { "KMP_MONITOR_STACKSIZE", __kmp_stg_parse_monitor_stacksize, __kmp_stg_print_monitor_stacksize, NULL, 0, 0 }, + {"KMP_MONITOR_STACKSIZE", __kmp_stg_parse_monitor_stacksize, + __kmp_stg_print_monitor_stacksize, NULL, 0, 0}, #endif - { "KMP_SETTINGS", __kmp_stg_parse_settings, __kmp_stg_print_settings, NULL, 0, 0 }, - { "KMP_STACKOFFSET", __kmp_stg_parse_stackoffset, __kmp_stg_print_stackoffset, NULL, 0, 0 }, - { "KMP_STACKSIZE", __kmp_stg_parse_stacksize, __kmp_stg_print_stacksize, NULL, 0, 0 }, - { "KMP_STACKPAD", __kmp_stg_parse_stackpad, __kmp_stg_print_stackpad, NULL, 0, 0 }, - { "KMP_VERSION", __kmp_stg_parse_version, __kmp_stg_print_version, NULL, 0, 0 }, - { "KMP_WARNINGS", __kmp_stg_parse_warnings, __kmp_stg_print_warnings, NULL, 0, 0 }, - - { "OMP_NESTED", __kmp_stg_parse_nested, __kmp_stg_print_nested, NULL, 0, 0 }, - { "OMP_NUM_THREADS", __kmp_stg_parse_num_threads, __kmp_stg_print_num_threads, NULL, 0, 0 }, - { "OMP_STACKSIZE", __kmp_stg_parse_stacksize, __kmp_stg_print_stacksize, NULL, 0, 0 }, - - { "KMP_TASKING", __kmp_stg_parse_tasking, __kmp_stg_print_tasking, NULL, 0, 0 }, - { "KMP_TASK_STEALING_CONSTRAINT", __kmp_stg_parse_task_stealing, __kmp_stg_print_task_stealing, NULL, 0, 0 }, - { "OMP_MAX_ACTIVE_LEVELS", __kmp_stg_parse_max_active_levels, __kmp_stg_print_max_active_levels, NULL, 0, 0 }, + {"KMP_SETTINGS", __kmp_stg_parse_settings, __kmp_stg_print_settings, NULL, + 0, 0}, + {"KMP_STACKOFFSET", __kmp_stg_parse_stackoffset, + __kmp_stg_print_stackoffset, NULL, 0, 0}, + {"KMP_STACKSIZE", __kmp_stg_parse_stacksize, __kmp_stg_print_stacksize, + NULL, 0, 0}, + {"KMP_STACKPAD", __kmp_stg_parse_stackpad, __kmp_stg_print_stackpad, NULL, + 0, 0}, + {"KMP_VERSION", __kmp_stg_parse_version, __kmp_stg_print_version, NULL, 0, + 0}, + {"KMP_WARNINGS", __kmp_stg_parse_warnings, __kmp_stg_print_warnings, NULL, + 0, 0}, + + {"OMP_NESTED", __kmp_stg_parse_nested, __kmp_stg_print_nested, NULL, 0, 0}, + {"OMP_NUM_THREADS", __kmp_stg_parse_num_threads, + __kmp_stg_print_num_threads, NULL, 0, 0}, + {"OMP_STACKSIZE", __kmp_stg_parse_stacksize, __kmp_stg_print_stacksize, + NULL, 0, 0}, + + {"KMP_TASKING", __kmp_stg_parse_tasking, __kmp_stg_print_tasking, NULL, 0, + 0}, + {"KMP_TASK_STEALING_CONSTRAINT", __kmp_stg_parse_task_stealing, + __kmp_stg_print_task_stealing, NULL, 0, 0}, + {"OMP_MAX_ACTIVE_LEVELS", __kmp_stg_parse_max_active_levels, + __kmp_stg_print_max_active_levels, NULL, 0, 0}, #if OMP_40_ENABLED - { "OMP_DEFAULT_DEVICE", __kmp_stg_parse_default_device, __kmp_stg_print_default_device, NULL, 0, 0 }, + {"OMP_DEFAULT_DEVICE", __kmp_stg_parse_default_device, + __kmp_stg_print_default_device, NULL, 0, 0}, #endif #if OMP_45_ENABLED - { "OMP_MAX_TASK_PRIORITY", __kmp_stg_parse_max_task_priority, __kmp_stg_print_max_task_priority, NULL, 0, 0 }, + {"OMP_MAX_TASK_PRIORITY", __kmp_stg_parse_max_task_priority, + __kmp_stg_print_max_task_priority, NULL, 0, 0}, #endif - { "OMP_THREAD_LIMIT", __kmp_stg_parse_all_threads, __kmp_stg_print_all_threads, NULL, 0, 0 }, - { "OMP_WAIT_POLICY", __kmp_stg_parse_wait_policy, __kmp_stg_print_wait_policy, NULL, 0, 0 }, - { "KMP_DISP_NUM_BUFFERS", __kmp_stg_parse_disp_buffers, __kmp_stg_print_disp_buffers, NULL, 0, 0 }, + {"OMP_THREAD_LIMIT", __kmp_stg_parse_all_threads, + __kmp_stg_print_all_threads, NULL, 0, 0}, + {"OMP_WAIT_POLICY", __kmp_stg_parse_wait_policy, + __kmp_stg_print_wait_policy, NULL, 0, 0}, + {"KMP_DISP_NUM_BUFFERS", __kmp_stg_parse_disp_buffers, + __kmp_stg_print_disp_buffers, NULL, 0, 0}, #if KMP_NESTED_HOT_TEAMS - { "KMP_HOT_TEAMS_MAX_LEVEL", __kmp_stg_parse_hot_teams_level, __kmp_stg_print_hot_teams_level, NULL, 0, 0 }, - { "KMP_HOT_TEAMS_MODE", __kmp_stg_parse_hot_teams_mode, __kmp_stg_print_hot_teams_mode, NULL, 0, 0 }, + {"KMP_HOT_TEAMS_MAX_LEVEL", __kmp_stg_parse_hot_teams_level, + __kmp_stg_print_hot_teams_level, NULL, 0, 0}, + {"KMP_HOT_TEAMS_MODE", __kmp_stg_parse_hot_teams_mode, + __kmp_stg_print_hot_teams_mode, NULL, 0, 0}, #endif // KMP_NESTED_HOT_TEAMS #if KMP_HANDLE_SIGNALS - { "KMP_HANDLE_SIGNALS", __kmp_stg_parse_handle_signals, __kmp_stg_print_handle_signals, NULL, 0, 0 }, + {"KMP_HANDLE_SIGNALS", __kmp_stg_parse_handle_signals, + __kmp_stg_print_handle_signals, NULL, 0, 0}, #endif #if KMP_ARCH_X86 || KMP_ARCH_X86_64 - { "KMP_INHERIT_FP_CONTROL", __kmp_stg_parse_inherit_fp_control, __kmp_stg_print_inherit_fp_control, NULL, 0, 0 }, + {"KMP_INHERIT_FP_CONTROL", __kmp_stg_parse_inherit_fp_control, + __kmp_stg_print_inherit_fp_control, NULL, 0, 0}, #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ #ifdef KMP_GOMP_COMPAT - { "GOMP_STACKSIZE", __kmp_stg_parse_stacksize, NULL, NULL, 0, 0 }, + {"GOMP_STACKSIZE", __kmp_stg_parse_stacksize, NULL, NULL, 0, 0}, #endif #ifdef KMP_DEBUG - { "KMP_A_DEBUG", __kmp_stg_parse_a_debug, __kmp_stg_print_a_debug, NULL, 0, 0 }, - { "KMP_B_DEBUG", __kmp_stg_parse_b_debug, __kmp_stg_print_b_debug, NULL, 0, 0 }, - { "KMP_C_DEBUG", __kmp_stg_parse_c_debug, __kmp_stg_print_c_debug, NULL, 0, 0 }, - { "KMP_D_DEBUG", __kmp_stg_parse_d_debug, __kmp_stg_print_d_debug, NULL, 0, 0 }, - { "KMP_E_DEBUG", __kmp_stg_parse_e_debug, __kmp_stg_print_e_debug, NULL, 0, 0 }, - { "KMP_F_DEBUG", __kmp_stg_parse_f_debug, __kmp_stg_print_f_debug, NULL, 0, 0 }, - { "KMP_DEBUG", __kmp_stg_parse_debug, NULL, /* no print */ NULL, 0, 0 }, - { "KMP_DEBUG_BUF", __kmp_stg_parse_debug_buf, __kmp_stg_print_debug_buf, NULL, 0, 0 }, - { "KMP_DEBUG_BUF_ATOMIC", __kmp_stg_parse_debug_buf_atomic, __kmp_stg_print_debug_buf_atomic, NULL, 0, 0 }, - { "KMP_DEBUG_BUF_CHARS", __kmp_stg_parse_debug_buf_chars, __kmp_stg_print_debug_buf_chars, NULL, 0, 0 }, - { "KMP_DEBUG_BUF_LINES", __kmp_stg_parse_debug_buf_lines, __kmp_stg_print_debug_buf_lines, NULL, 0, 0 }, - { "KMP_DIAG", __kmp_stg_parse_diag, __kmp_stg_print_diag, NULL, 0, 0 }, - - { "KMP_PAR_RANGE", __kmp_stg_parse_par_range_env, __kmp_stg_print_par_range_env, NULL, 0, 0 }, - { "KMP_YIELD_CYCLE", __kmp_stg_parse_yield_cycle, __kmp_stg_print_yield_cycle, NULL, 0, 0 }, - { "KMP_YIELD_ON", __kmp_stg_parse_yield_on, __kmp_stg_print_yield_on, NULL, 0, 0 }, - { "KMP_YIELD_OFF", __kmp_stg_parse_yield_off, __kmp_stg_print_yield_off, NULL, 0, 0 }, + {"KMP_A_DEBUG", __kmp_stg_parse_a_debug, __kmp_stg_print_a_debug, NULL, 0, + 0}, + {"KMP_B_DEBUG", __kmp_stg_parse_b_debug, __kmp_stg_print_b_debug, NULL, 0, + 0}, + {"KMP_C_DEBUG", __kmp_stg_parse_c_debug, __kmp_stg_print_c_debug, NULL, 0, + 0}, + {"KMP_D_DEBUG", __kmp_stg_parse_d_debug, __kmp_stg_print_d_debug, NULL, 0, + 0}, + {"KMP_E_DEBUG", __kmp_stg_parse_e_debug, __kmp_stg_print_e_debug, NULL, 0, + 0}, + {"KMP_F_DEBUG", __kmp_stg_parse_f_debug, __kmp_stg_print_f_debug, NULL, 0, + 0}, + {"KMP_DEBUG", __kmp_stg_parse_debug, NULL, /* no print */ NULL, 0, 0}, + {"KMP_DEBUG_BUF", __kmp_stg_parse_debug_buf, __kmp_stg_print_debug_buf, + NULL, 0, 0}, + {"KMP_DEBUG_BUF_ATOMIC", __kmp_stg_parse_debug_buf_atomic, + __kmp_stg_print_debug_buf_atomic, NULL, 0, 0}, + {"KMP_DEBUG_BUF_CHARS", __kmp_stg_parse_debug_buf_chars, + __kmp_stg_print_debug_buf_chars, NULL, 0, 0}, + {"KMP_DEBUG_BUF_LINES", __kmp_stg_parse_debug_buf_lines, + __kmp_stg_print_debug_buf_lines, NULL, 0, 0}, + {"KMP_DIAG", __kmp_stg_parse_diag, __kmp_stg_print_diag, NULL, 0, 0}, + + {"KMP_PAR_RANGE", __kmp_stg_parse_par_range_env, + __kmp_stg_print_par_range_env, NULL, 0, 0}, + {"KMP_YIELD_CYCLE", __kmp_stg_parse_yield_cycle, + __kmp_stg_print_yield_cycle, NULL, 0, 0}, + {"KMP_YIELD_ON", __kmp_stg_parse_yield_on, __kmp_stg_print_yield_on, NULL, + 0, 0}, + {"KMP_YIELD_OFF", __kmp_stg_parse_yield_off, __kmp_stg_print_yield_off, + NULL, 0, 0}, #endif // KMP_DEBUG - { "KMP_ALIGN_ALLOC", __kmp_stg_parse_align_alloc, __kmp_stg_print_align_alloc, NULL, 0, 0 }, - - { "KMP_PLAIN_BARRIER", __kmp_stg_parse_barrier_branch_bit, __kmp_stg_print_barrier_branch_bit, NULL, 0, 0 }, - { "KMP_PLAIN_BARRIER_PATTERN", __kmp_stg_parse_barrier_pattern, __kmp_stg_print_barrier_pattern, NULL, 0, 0 }, - { "KMP_FORKJOIN_BARRIER", __kmp_stg_parse_barrier_branch_bit, __kmp_stg_print_barrier_branch_bit, NULL, 0, 0 }, - { "KMP_FORKJOIN_BARRIER_PATTERN", __kmp_stg_parse_barrier_pattern, __kmp_stg_print_barrier_pattern, NULL, 0, 0 }, + {"KMP_ALIGN_ALLOC", __kmp_stg_parse_align_alloc, + __kmp_stg_print_align_alloc, NULL, 0, 0}, + + {"KMP_PLAIN_BARRIER", __kmp_stg_parse_barrier_branch_bit, + __kmp_stg_print_barrier_branch_bit, NULL, 0, 0}, + {"KMP_PLAIN_BARRIER_PATTERN", __kmp_stg_parse_barrier_pattern, + __kmp_stg_print_barrier_pattern, NULL, 0, 0}, + {"KMP_FORKJOIN_BARRIER", __kmp_stg_parse_barrier_branch_bit, + __kmp_stg_print_barrier_branch_bit, NULL, 0, 0}, + {"KMP_FORKJOIN_BARRIER_PATTERN", __kmp_stg_parse_barrier_pattern, + __kmp_stg_print_barrier_pattern, NULL, 0, 0}, #if KMP_FAST_REDUCTION_BARRIER - { "KMP_REDUCTION_BARRIER", __kmp_stg_parse_barrier_branch_bit, __kmp_stg_print_barrier_branch_bit, NULL, 0, 0 }, - { "KMP_REDUCTION_BARRIER_PATTERN", __kmp_stg_parse_barrier_pattern, __kmp_stg_print_barrier_pattern, NULL, 0, 0 }, + {"KMP_REDUCTION_BARRIER", __kmp_stg_parse_barrier_branch_bit, + __kmp_stg_print_barrier_branch_bit, NULL, 0, 0}, + {"KMP_REDUCTION_BARRIER_PATTERN", __kmp_stg_parse_barrier_pattern, + __kmp_stg_print_barrier_pattern, NULL, 0, 0}, #endif - { "KMP_ABORT_DELAY", __kmp_stg_parse_abort_delay, __kmp_stg_print_abort_delay, NULL, 0, 0 }, - { "KMP_CPUINFO_FILE", __kmp_stg_parse_cpuinfo_file, __kmp_stg_print_cpuinfo_file, NULL, 0, 0 }, - { "KMP_FORCE_REDUCTION", __kmp_stg_parse_force_reduction, __kmp_stg_print_force_reduction, NULL, 0, 0 }, - { "KMP_DETERMINISTIC_REDUCTION", __kmp_stg_parse_force_reduction, __kmp_stg_print_force_reduction, NULL, 0, 0 }, - { "KMP_STORAGE_MAP", __kmp_stg_parse_storage_map, __kmp_stg_print_storage_map, NULL, 0, 0 }, - { "KMP_ALL_THREADPRIVATE", __kmp_stg_parse_all_threadprivate, __kmp_stg_print_all_threadprivate, NULL, 0, 0 }, - { "KMP_FOREIGN_THREADS_THREADPRIVATE", __kmp_stg_parse_foreign_threads_threadprivate, __kmp_stg_print_foreign_threads_threadprivate, NULL, 0, 0 }, + {"KMP_ABORT_DELAY", __kmp_stg_parse_abort_delay, + __kmp_stg_print_abort_delay, NULL, 0, 0}, + {"KMP_CPUINFO_FILE", __kmp_stg_parse_cpuinfo_file, + __kmp_stg_print_cpuinfo_file, NULL, 0, 0}, + {"KMP_FORCE_REDUCTION", __kmp_stg_parse_force_reduction, + __kmp_stg_print_force_reduction, NULL, 0, 0}, + {"KMP_DETERMINISTIC_REDUCTION", __kmp_stg_parse_force_reduction, + __kmp_stg_print_force_reduction, NULL, 0, 0}, + {"KMP_STORAGE_MAP", __kmp_stg_parse_storage_map, + __kmp_stg_print_storage_map, NULL, 0, 0}, + {"KMP_ALL_THREADPRIVATE", __kmp_stg_parse_all_threadprivate, + __kmp_stg_print_all_threadprivate, NULL, 0, 0}, + {"KMP_FOREIGN_THREADS_THREADPRIVATE", + __kmp_stg_parse_foreign_threads_threadprivate, + __kmp_stg_print_foreign_threads_threadprivate, NULL, 0, 0}, #if KMP_AFFINITY_SUPPORTED - { "KMP_AFFINITY", __kmp_stg_parse_affinity, __kmp_stg_print_affinity, NULL, 0, 0 }, -# ifdef KMP_GOMP_COMPAT - { "GOMP_CPU_AFFINITY", __kmp_stg_parse_gomp_cpu_affinity, NULL, /* no print */ NULL, 0, 0 }, -# endif /* KMP_GOMP_COMPAT */ -# if OMP_40_ENABLED - { "OMP_PROC_BIND", __kmp_stg_parse_proc_bind, __kmp_stg_print_proc_bind, NULL, 0, 0 }, - { "OMP_PLACES", __kmp_stg_parse_places, __kmp_stg_print_places, NULL, 0, 0 }, -# else - { "OMP_PROC_BIND", __kmp_stg_parse_proc_bind, NULL, /* no print */ NULL, 0, 0 }, -# endif /* OMP_40_ENABLED */ - - { "KMP_TOPOLOGY_METHOD", __kmp_stg_parse_topology_method, __kmp_stg_print_topology_method, NULL, 0, 0 }, + {"KMP_AFFINITY", __kmp_stg_parse_affinity, __kmp_stg_print_affinity, NULL, + 0, 0}, +#ifdef KMP_GOMP_COMPAT + {"GOMP_CPU_AFFINITY", __kmp_stg_parse_gomp_cpu_affinity, NULL, + /* no print */ NULL, 0, 0}, +#endif /* KMP_GOMP_COMPAT */ +#if OMP_40_ENABLED + {"OMP_PROC_BIND", __kmp_stg_parse_proc_bind, __kmp_stg_print_proc_bind, + NULL, 0, 0}, + {"OMP_PLACES", __kmp_stg_parse_places, __kmp_stg_print_places, NULL, 0, 0}, +#else + {"OMP_PROC_BIND", __kmp_stg_parse_proc_bind, NULL, /* no print */ NULL, 0, + 0}, +#endif /* OMP_40_ENABLED */ + + {"KMP_TOPOLOGY_METHOD", __kmp_stg_parse_topology_method, + __kmp_stg_print_topology_method, NULL, 0, 0}, #else - // - // KMP_AFFINITY is not supported on OS X*, nor is OMP_PLACES. - // OMP_PROC_BIND and proc-bind-var are supported, however. - // -# if OMP_40_ENABLED - { "OMP_PROC_BIND", __kmp_stg_parse_proc_bind, __kmp_stg_print_proc_bind, NULL, 0, 0 }, -# endif +// KMP_AFFINITY is not supported on OS X*, nor is OMP_PLACES. +// OMP_PROC_BIND and proc-bind-var are supported, however. +#if OMP_40_ENABLED + {"OMP_PROC_BIND", __kmp_stg_parse_proc_bind, __kmp_stg_print_proc_bind, + NULL, 0, 0}, +#endif #endif // KMP_AFFINITY_SUPPORTED - { "KMP_INIT_AT_FORK", __kmp_stg_parse_init_at_fork, __kmp_stg_print_init_at_fork, NULL, 0, 0 }, - { "KMP_SCHEDULE", __kmp_stg_parse_schedule, __kmp_stg_print_schedule, NULL, 0, 0 }, - { "OMP_SCHEDULE", __kmp_stg_parse_omp_schedule, __kmp_stg_print_omp_schedule, NULL, 0, 0 }, - { "KMP_ATOMIC_MODE", __kmp_stg_parse_atomic_mode, __kmp_stg_print_atomic_mode, NULL, 0, 0 }, - { "KMP_CONSISTENCY_CHECK", __kmp_stg_parse_consistency_check, __kmp_stg_print_consistency_check, NULL, 0, 0 }, + {"KMP_INIT_AT_FORK", __kmp_stg_parse_init_at_fork, + __kmp_stg_print_init_at_fork, NULL, 0, 0}, + {"KMP_SCHEDULE", __kmp_stg_parse_schedule, __kmp_stg_print_schedule, NULL, + 0, 0}, + {"OMP_SCHEDULE", __kmp_stg_parse_omp_schedule, __kmp_stg_print_omp_schedule, + NULL, 0, 0}, + {"KMP_ATOMIC_MODE", __kmp_stg_parse_atomic_mode, + __kmp_stg_print_atomic_mode, NULL, 0, 0}, + {"KMP_CONSISTENCY_CHECK", __kmp_stg_parse_consistency_check, + __kmp_stg_print_consistency_check, NULL, 0, 0}, #if USE_ITT_BUILD && USE_ITT_NOTIFY - { "KMP_ITT_PREPARE_DELAY", __kmp_stg_parse_itt_prepare_delay, __kmp_stg_print_itt_prepare_delay, NULL, 0, 0 }, + {"KMP_ITT_PREPARE_DELAY", __kmp_stg_parse_itt_prepare_delay, + __kmp_stg_print_itt_prepare_delay, NULL, 0, 0}, #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - { "KMP_MALLOC_POOL_INCR", __kmp_stg_parse_malloc_pool_incr, __kmp_stg_print_malloc_pool_incr, NULL, 0, 0 }, - { "KMP_INIT_WAIT", __kmp_stg_parse_init_wait, __kmp_stg_print_init_wait, NULL, 0, 0 }, - { "KMP_NEXT_WAIT", __kmp_stg_parse_next_wait, __kmp_stg_print_next_wait, NULL, 0, 0 }, - { "KMP_GTID_MODE", __kmp_stg_parse_gtid_mode, __kmp_stg_print_gtid_mode, NULL, 0, 0 }, - { "OMP_DYNAMIC", __kmp_stg_parse_omp_dynamic, __kmp_stg_print_omp_dynamic, NULL, 0, 0 }, - { "KMP_DYNAMIC_MODE", __kmp_stg_parse_kmp_dynamic_mode, __kmp_stg_print_kmp_dynamic_mode, NULL, 0, 0 }, + {"KMP_MALLOC_POOL_INCR", __kmp_stg_parse_malloc_pool_incr, + __kmp_stg_print_malloc_pool_incr, NULL, 0, 0}, + {"KMP_INIT_WAIT", __kmp_stg_parse_init_wait, __kmp_stg_print_init_wait, + NULL, 0, 0}, + {"KMP_NEXT_WAIT", __kmp_stg_parse_next_wait, __kmp_stg_print_next_wait, + NULL, 0, 0}, + {"KMP_GTID_MODE", __kmp_stg_parse_gtid_mode, __kmp_stg_print_gtid_mode, + NULL, 0, 0}, + {"OMP_DYNAMIC", __kmp_stg_parse_omp_dynamic, __kmp_stg_print_omp_dynamic, + NULL, 0, 0}, + {"KMP_DYNAMIC_MODE", __kmp_stg_parse_kmp_dynamic_mode, + __kmp_stg_print_kmp_dynamic_mode, NULL, 0, 0}, #ifdef USE_LOAD_BALANCE - { "KMP_LOAD_BALANCE_INTERVAL", __kmp_stg_parse_ld_balance_interval,__kmp_stg_print_ld_balance_interval,NULL, 0, 0 }, + {"KMP_LOAD_BALANCE_INTERVAL", __kmp_stg_parse_ld_balance_interval, + __kmp_stg_print_ld_balance_interval, NULL, 0, 0}, #endif - { "KMP_NUM_LOCKS_IN_BLOCK", __kmp_stg_parse_lock_block, __kmp_stg_print_lock_block, NULL, 0, 0 }, - { "KMP_LOCK_KIND", __kmp_stg_parse_lock_kind, __kmp_stg_print_lock_kind, NULL, 0, 0 }, - { "KMP_SPIN_BACKOFF_PARAMS", __kmp_stg_parse_spin_backoff_params, __kmp_stg_print_spin_backoff_params, NULL, 0, 0 }, + {"KMP_NUM_LOCKS_IN_BLOCK", __kmp_stg_parse_lock_block, + __kmp_stg_print_lock_block, NULL, 0, 0}, + {"KMP_LOCK_KIND", __kmp_stg_parse_lock_kind, __kmp_stg_print_lock_kind, + NULL, 0, 0}, + {"KMP_SPIN_BACKOFF_PARAMS", __kmp_stg_parse_spin_backoff_params, + __kmp_stg_print_spin_backoff_params, NULL, 0, 0}, #if KMP_USE_ADAPTIVE_LOCKS - { "KMP_ADAPTIVE_LOCK_PROPS", __kmp_stg_parse_adaptive_lock_props,__kmp_stg_print_adaptive_lock_props, NULL, 0, 0 }, + {"KMP_ADAPTIVE_LOCK_PROPS", __kmp_stg_parse_adaptive_lock_props, + __kmp_stg_print_adaptive_lock_props, NULL, 0, 0}, #if KMP_DEBUG_ADAPTIVE_LOCKS - { "KMP_SPECULATIVE_STATSFILE", __kmp_stg_parse_speculative_statsfile,__kmp_stg_print_speculative_statsfile, NULL, 0, 0 }, + {"KMP_SPECULATIVE_STATSFILE", __kmp_stg_parse_speculative_statsfile, + __kmp_stg_print_speculative_statsfile, NULL, 0, 0}, #endif #endif // KMP_USE_ADAPTIVE_LOCKS - { "KMP_PLACE_THREADS", __kmp_stg_parse_hw_subset, __kmp_stg_print_hw_subset, NULL, 0, 0 }, - { "KMP_HW_SUBSET", __kmp_stg_parse_hw_subset, __kmp_stg_print_hw_subset, NULL, 0, 0 }, + {"KMP_PLACE_THREADS", __kmp_stg_parse_hw_subset, __kmp_stg_print_hw_subset, + NULL, 0, 0}, + {"KMP_HW_SUBSET", __kmp_stg_parse_hw_subset, __kmp_stg_print_hw_subset, + NULL, 0, 0}, #if USE_ITT_BUILD - { "KMP_FORKJOIN_FRAMES", __kmp_stg_parse_forkjoin_frames, __kmp_stg_print_forkjoin_frames, NULL, 0, 0 }, - { "KMP_FORKJOIN_FRAMES_MODE", __kmp_stg_parse_forkjoin_frames_mode,__kmp_stg_print_forkjoin_frames_mode, NULL, 0, 0 }, + {"KMP_FORKJOIN_FRAMES", __kmp_stg_parse_forkjoin_frames, + __kmp_stg_print_forkjoin_frames, NULL, 0, 0}, + {"KMP_FORKJOIN_FRAMES_MODE", __kmp_stg_parse_forkjoin_frames_mode, + __kmp_stg_print_forkjoin_frames_mode, NULL, 0, 0}, #endif -# if OMP_40_ENABLED - { "OMP_DISPLAY_ENV", __kmp_stg_parse_omp_display_env, __kmp_stg_print_omp_display_env, NULL, 0, 0 }, - { "OMP_CANCELLATION", __kmp_stg_parse_omp_cancellation, __kmp_stg_print_omp_cancellation, NULL, 0, 0 }, +#if OMP_40_ENABLED + {"OMP_DISPLAY_ENV", __kmp_stg_parse_omp_display_env, + __kmp_stg_print_omp_display_env, NULL, 0, 0}, + {"OMP_CANCELLATION", __kmp_stg_parse_omp_cancellation, + __kmp_stg_print_omp_cancellation, NULL, 0, 0}, #endif - { "", NULL, NULL, NULL, 0, 0 } -}; // settings + {"", NULL, NULL, NULL, 0, 0}}; // settings -static int const __kmp_stg_count = sizeof( __kmp_stg_table ) / sizeof( kmp_setting_t ); +static int const __kmp_stg_count = + sizeof(__kmp_stg_table) / sizeof(kmp_setting_t); -static inline -kmp_setting_t * -__kmp_stg_find( char const * name ) { +static inline kmp_setting_t *__kmp_stg_find(char const *name) { - int i; - if ( name != NULL ) { - for ( i = 0; i < __kmp_stg_count; ++ i ) { - if ( strcmp( __kmp_stg_table[ i ].name, name ) == 0 ) { - return & __kmp_stg_table[ i ]; - }; // if - }; // for - }; // if - return NULL; + int i; + if (name != NULL) { + for (i = 0; i < __kmp_stg_count; ++i) { + if (strcmp(__kmp_stg_table[i].name, name) == 0) { + return &__kmp_stg_table[i]; + }; // if + }; // for + }; // if + return NULL; } // __kmp_stg_find +static int __kmp_stg_cmp(void const *_a, void const *_b) { + kmp_setting_t *a = (kmp_setting_t *)_a; + kmp_setting_t *b = (kmp_setting_t *)_b; -static int -__kmp_stg_cmp( void const * _a, void const * _b ) { - kmp_setting_t * a = (kmp_setting_t *) _a; - kmp_setting_t * b = (kmp_setting_t *) _b; - - // - // Process KMP_AFFINITY last. - // It needs to come after OMP_PLACES and GOMP_CPU_AFFINITY. - // - if ( strcmp( a->name, "KMP_AFFINITY" ) == 0 ) { - if ( strcmp( b->name, "KMP_AFFINITY" ) == 0 ) { - return 0; - } - return 1; - } - else if ( strcmp( b->name, "KMP_AFFINITY" ) == 0 ) { - return -1; + // Process KMP_AFFINITY last. + // It needs to come after OMP_PLACES and GOMP_CPU_AFFINITY. + if (strcmp(a->name, "KMP_AFFINITY") == 0) { + if (strcmp(b->name, "KMP_AFFINITY") == 0) { + return 0; } - return strcmp( a->name, b->name ); + return 1; + } else if (strcmp(b->name, "KMP_AFFINITY") == 0) { + return -1; + } + return strcmp(a->name, b->name); } // __kmp_stg_cmp +static void __kmp_stg_init(void) { -static void -__kmp_stg_init( void -) { - - static int initialized = 0; + static int initialized = 0; - if ( ! initialized ) { + if (!initialized) { - // Sort table. - qsort( __kmp_stg_table, __kmp_stg_count - 1, sizeof( kmp_setting_t ), __kmp_stg_cmp ); + // Sort table. + qsort(__kmp_stg_table, __kmp_stg_count - 1, sizeof(kmp_setting_t), + __kmp_stg_cmp); - { // Initialize *_STACKSIZE data. - - kmp_setting_t * kmp_stacksize = __kmp_stg_find( "KMP_STACKSIZE" ); // 1st priority. + { // Initialize *_STACKSIZE data. + kmp_setting_t *kmp_stacksize = + __kmp_stg_find("KMP_STACKSIZE"); // 1st priority. #ifdef KMP_GOMP_COMPAT - kmp_setting_t * gomp_stacksize = __kmp_stg_find( "GOMP_STACKSIZE" ); // 2nd priority. + kmp_setting_t *gomp_stacksize = + __kmp_stg_find("GOMP_STACKSIZE"); // 2nd priority. #endif - kmp_setting_t * omp_stacksize = __kmp_stg_find( "OMP_STACKSIZE" ); // 3rd priority. - - // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. - // !!! Compiler does not understand rivals is used and optimizes out assignments - // !!! rivals[ i ++ ] = ...; - static kmp_setting_t * volatile rivals[ 4 ]; - static kmp_stg_ss_data_t kmp_data = { 1, (kmp_setting_t **)rivals }; + kmp_setting_t *omp_stacksize = + __kmp_stg_find("OMP_STACKSIZE"); // 3rd priority. + + // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. + // !!! Compiler does not understand rivals is used and optimizes out + // assignments + // !!! rivals[ i ++ ] = ...; + static kmp_setting_t *volatile rivals[4]; + static kmp_stg_ss_data_t kmp_data = {1, (kmp_setting_t **)rivals}; #ifdef KMP_GOMP_COMPAT - static kmp_stg_ss_data_t gomp_data = { 1024, (kmp_setting_t **)rivals }; + static kmp_stg_ss_data_t gomp_data = {1024, (kmp_setting_t **)rivals}; #endif - static kmp_stg_ss_data_t omp_data = { 1024, (kmp_setting_t **)rivals }; - int i = 0; + static kmp_stg_ss_data_t omp_data = {1024, (kmp_setting_t **)rivals}; + int i = 0; - rivals[ i ++ ] = kmp_stacksize; + rivals[i++] = kmp_stacksize; #ifdef KMP_GOMP_COMPAT - if ( gomp_stacksize != NULL ) { - rivals[ i ++ ] = gomp_stacksize; - }; // if + if (gomp_stacksize != NULL) { + rivals[i++] = gomp_stacksize; + }; // if #endif - rivals[ i ++ ] = omp_stacksize; - rivals[ i ++ ] = NULL; + rivals[i++] = omp_stacksize; + rivals[i++] = NULL; - kmp_stacksize->data = & kmp_data; + kmp_stacksize->data = &kmp_data; #ifdef KMP_GOMP_COMPAT - if ( gomp_stacksize != NULL ) { - gomp_stacksize->data = & gomp_data; - }; // if + if (gomp_stacksize != NULL) { + gomp_stacksize->data = &gomp_data; + }; // if #endif - omp_stacksize->data = & omp_data; - - } - - { // Initialize KMP_LIBRARY and OMP_WAIT_POLICY data. - - kmp_setting_t * kmp_library = __kmp_stg_find( "KMP_LIBRARY" ); // 1st priority. - kmp_setting_t * omp_wait_policy = __kmp_stg_find( "OMP_WAIT_POLICY" ); // 2nd priority. - - // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. - static kmp_setting_t * volatile rivals[ 3 ]; - static kmp_stg_wp_data_t kmp_data = { 0, (kmp_setting_t **)rivals }; - static kmp_stg_wp_data_t omp_data = { 1, (kmp_setting_t **)rivals }; - int i = 0; - - rivals[ i ++ ] = kmp_library; - if ( omp_wait_policy != NULL ) { - rivals[ i ++ ] = omp_wait_policy; - }; // if - rivals[ i ++ ] = NULL; - - kmp_library->data = & kmp_data; - if ( omp_wait_policy != NULL ) { - omp_wait_policy->data = & omp_data; - }; // if - - } - - { // Initialize KMP_ALL_THREADS, KMP_MAX_THREADS, and OMP_THREAD_LIMIT data. - - kmp_setting_t * kmp_all_threads = __kmp_stg_find( "KMP_ALL_THREADS" ); // 1st priority. - kmp_setting_t * kmp_max_threads = __kmp_stg_find( "KMP_MAX_THREADS" ); // 2nd priority. - kmp_setting_t * omp_thread_limit = __kmp_stg_find( "OMP_THREAD_LIMIT" ); // 3rd priority. - - // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. - static kmp_setting_t * volatile rivals[ 4 ]; - int i = 0; - - rivals[ i ++ ] = kmp_all_threads; - rivals[ i ++ ] = kmp_max_threads; - if ( omp_thread_limit != NULL ) { - rivals[ i ++ ] = omp_thread_limit; - }; // if - rivals[ i ++ ] = NULL; - - kmp_all_threads->data = (void*)& rivals; - kmp_max_threads->data = (void*)& rivals; - if ( omp_thread_limit != NULL ) { - omp_thread_limit->data = (void*)& rivals; - }; // if - - } + omp_stacksize->data = &omp_data; + } + + { // Initialize KMP_LIBRARY and OMP_WAIT_POLICY data. + kmp_setting_t *kmp_library = + __kmp_stg_find("KMP_LIBRARY"); // 1st priority. + kmp_setting_t *omp_wait_policy = + __kmp_stg_find("OMP_WAIT_POLICY"); // 2nd priority. + + // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. + static kmp_setting_t *volatile rivals[3]; + static kmp_stg_wp_data_t kmp_data = {0, (kmp_setting_t **)rivals}; + static kmp_stg_wp_data_t omp_data = {1, (kmp_setting_t **)rivals}; + int i = 0; + + rivals[i++] = kmp_library; + if (omp_wait_policy != NULL) { + rivals[i++] = omp_wait_policy; + }; // if + rivals[i++] = NULL; + + kmp_library->data = &kmp_data; + if (omp_wait_policy != NULL) { + omp_wait_policy->data = &omp_data; + }; // if + } + + { // Initialize KMP_ALL_THREADS, KMP_MAX_THREADS, and OMP_THREAD_LIMIT data. + kmp_setting_t *kmp_all_threads = + __kmp_stg_find("KMP_ALL_THREADS"); // 1st priority. + kmp_setting_t *kmp_max_threads = + __kmp_stg_find("KMP_MAX_THREADS"); // 2nd priority. + kmp_setting_t *omp_thread_limit = + __kmp_stg_find("OMP_THREAD_LIMIT"); // 3rd priority. + + // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. + static kmp_setting_t *volatile rivals[4]; + int i = 0; + + rivals[i++] = kmp_all_threads; + rivals[i++] = kmp_max_threads; + if (omp_thread_limit != NULL) { + rivals[i++] = omp_thread_limit; + }; // if + rivals[i++] = NULL; + + kmp_all_threads->data = (void *)&rivals; + kmp_max_threads->data = (void *)&rivals; + if (omp_thread_limit != NULL) { + omp_thread_limit->data = (void *)&rivals; + }; // if + } #if KMP_AFFINITY_SUPPORTED - { // Initialize KMP_AFFINITY, GOMP_CPU_AFFINITY, and OMP_PROC_BIND data. - - kmp_setting_t * kmp_affinity = __kmp_stg_find( "KMP_AFFINITY" ); // 1st priority. - KMP_DEBUG_ASSERT( kmp_affinity != NULL ); + { // Initialize KMP_AFFINITY, GOMP_CPU_AFFINITY, and OMP_PROC_BIND data. + kmp_setting_t *kmp_affinity = + __kmp_stg_find("KMP_AFFINITY"); // 1st priority. + KMP_DEBUG_ASSERT(kmp_affinity != NULL); -# ifdef KMP_GOMP_COMPAT - kmp_setting_t * gomp_cpu_affinity = __kmp_stg_find( "GOMP_CPU_AFFINITY" ); // 2nd priority. - KMP_DEBUG_ASSERT( gomp_cpu_affinity != NULL ); -# endif +#ifdef KMP_GOMP_COMPAT + kmp_setting_t *gomp_cpu_affinity = + __kmp_stg_find("GOMP_CPU_AFFINITY"); // 2nd priority. + KMP_DEBUG_ASSERT(gomp_cpu_affinity != NULL); +#endif - kmp_setting_t * omp_proc_bind = __kmp_stg_find( "OMP_PROC_BIND" ); // 3rd priority. - KMP_DEBUG_ASSERT( omp_proc_bind != NULL ); + kmp_setting_t *omp_proc_bind = + __kmp_stg_find("OMP_PROC_BIND"); // 3rd priority. + KMP_DEBUG_ASSERT(omp_proc_bind != NULL); - // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. - static kmp_setting_t * volatile rivals[ 4 ]; - int i = 0; + // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. + static kmp_setting_t *volatile rivals[4]; + int i = 0; - rivals[ i ++ ] = kmp_affinity; + rivals[i++] = kmp_affinity; -# ifdef KMP_GOMP_COMPAT - rivals[ i ++ ] = gomp_cpu_affinity; - gomp_cpu_affinity->data = (void*)& rivals; -# endif +#ifdef KMP_GOMP_COMPAT + rivals[i++] = gomp_cpu_affinity; + gomp_cpu_affinity->data = (void *)&rivals; +#endif - rivals[ i ++ ] = omp_proc_bind; - omp_proc_bind->data = (void*)& rivals; - rivals[ i ++ ] = NULL; + rivals[i++] = omp_proc_bind; + omp_proc_bind->data = (void *)&rivals; + rivals[i++] = NULL; -# if OMP_40_ENABLED - static kmp_setting_t * volatile places_rivals[ 4 ]; - i = 0; +#if OMP_40_ENABLED + static kmp_setting_t *volatile places_rivals[4]; + i = 0; - kmp_setting_t * omp_places = __kmp_stg_find( "OMP_PLACES" ); // 3rd priority. - KMP_DEBUG_ASSERT( omp_places != NULL ); + kmp_setting_t *omp_places = __kmp_stg_find("OMP_PLACES"); // 3rd priority. + KMP_DEBUG_ASSERT(omp_places != NULL); - places_rivals[ i ++ ] = kmp_affinity; -# ifdef KMP_GOMP_COMPAT - places_rivals[ i ++ ] = gomp_cpu_affinity; -# endif - places_rivals[ i ++ ] = omp_places; - omp_places->data = (void*)& places_rivals; - places_rivals[ i ++ ] = NULL; -# endif - } + places_rivals[i++] = kmp_affinity; +#ifdef KMP_GOMP_COMPAT + places_rivals[i++] = gomp_cpu_affinity; +#endif + places_rivals[i++] = omp_places; + omp_places->data = (void *)&places_rivals; + places_rivals[i++] = NULL; +#endif + } #else - // KMP_AFFINITY not supported, so OMP_PROC_BIND has no rivals. - // OMP_PLACES not supported yet. +// KMP_AFFINITY not supported, so OMP_PROC_BIND has no rivals. +// OMP_PLACES not supported yet. #endif // KMP_AFFINITY_SUPPORTED - { // Initialize KMP_DETERMINISTIC_REDUCTION and KMP_FORCE_REDUCTION data. - - kmp_setting_t * kmp_force_red = __kmp_stg_find( "KMP_FORCE_REDUCTION" ); // 1st priority. - kmp_setting_t * kmp_determ_red = __kmp_stg_find( "KMP_DETERMINISTIC_REDUCTION" ); // 2nd priority. + { // Initialize KMP_DETERMINISTIC_REDUCTION and KMP_FORCE_REDUCTION data. + kmp_setting_t *kmp_force_red = + __kmp_stg_find("KMP_FORCE_REDUCTION"); // 1st priority. + kmp_setting_t *kmp_determ_red = + __kmp_stg_find("KMP_DETERMINISTIC_REDUCTION"); // 2nd priority. - // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. - static kmp_setting_t * volatile rivals[ 3 ]; - static kmp_stg_fr_data_t force_data = { 1, (kmp_setting_t **)rivals }; - static kmp_stg_fr_data_t determ_data = { 0, (kmp_setting_t **)rivals }; - int i = 0; + // !!! volatile keyword is Intel (R) C Compiler bug CQ49908 workaround. + static kmp_setting_t *volatile rivals[3]; + static kmp_stg_fr_data_t force_data = {1, (kmp_setting_t **)rivals}; + static kmp_stg_fr_data_t determ_data = {0, (kmp_setting_t **)rivals}; + int i = 0; - rivals[ i ++ ] = kmp_force_red; - if ( kmp_determ_red != NULL ) { - rivals[ i ++ ] = kmp_determ_red; - }; // if - rivals[ i ++ ] = NULL; - - kmp_force_red->data = & force_data; - if ( kmp_determ_red != NULL ) { - kmp_determ_red->data = & determ_data; - }; // if - } + rivals[i++] = kmp_force_red; + if (kmp_determ_red != NULL) { + rivals[i++] = kmp_determ_red; + }; // if + rivals[i++] = NULL; - initialized = 1; + kmp_force_red->data = &force_data; + if (kmp_determ_red != NULL) { + kmp_determ_red->data = &determ_data; + }; // if + } - }; // if + initialized = 1; + }; // if - // Reset flags. - int i; - for ( i = 0; i < __kmp_stg_count; ++ i ) { - __kmp_stg_table[ i ].set = 0; - }; // for + // Reset flags. + int i; + for (i = 0; i < __kmp_stg_count; ++i) { + __kmp_stg_table[i].set = 0; + }; // for } // __kmp_stg_init - -static void -__kmp_stg_parse( - char const * name, - char const * value -) { - - // On Windows* OS there are some nameless variables like "C:=C:\" (yeah, really nameless, they are - // presented in environment block as "=C:=C\\\x00=D:=D:\\\x00...", so let us skip them. - if ( name[ 0 ] == 0 ) { - return; - }; // if - - if ( value != NULL ) { - kmp_setting_t * setting = __kmp_stg_find( name ); - if ( setting != NULL ) { - setting->parse( name, value, setting->data ); - setting->defined = 1; - }; // if +static void __kmp_stg_parse(char const *name, char const *value) { + // On Windows* OS there are some nameless variables like "C:=C:\" (yeah, + // really nameless, they are presented in environment block as + // "=C:=C\\\x00=D:=D:\\\x00...", so let us skip them. + if (name[0] == 0) { + return; + }; // if + + if (value != NULL) { + kmp_setting_t *setting = __kmp_stg_find(name); + if (setting != NULL) { + setting->parse(name, value, setting->data); + setting->defined = 1; }; // if + }; // if } // __kmp_stg_parse +static int __kmp_stg_check_rivals( // 0 -- Ok, 1 -- errors found. + char const *name, // Name of variable. + char const *value, // Value of the variable. + kmp_setting_t **rivals // List of rival settings (must include current one). + ) { -static int -__kmp_stg_check_rivals( // 0 -- Ok, 1 -- errors found. - char const * name, // Name of variable. - char const * value, // Value of the variable. - kmp_setting_t * * rivals // List of rival settings (the list must include current one). -) { - - if ( rivals == NULL ) { - return 0; - } + if (rivals == NULL) { + return 0; + } - // Loop thru higher priority settings (listed before current). - int i = 0; - for ( ; strcmp( rivals[ i ]->name, name ) != 0; i++ ) { - KMP_DEBUG_ASSERT( rivals[ i ] != NULL ); + // Loop thru higher priority settings (listed before current). + int i = 0; + for (; strcmp(rivals[i]->name, name) != 0; i++) { + KMP_DEBUG_ASSERT(rivals[i] != NULL); #if KMP_AFFINITY_SUPPORTED - if ( rivals[ i ] == __kmp_affinity_notype ) { - // - // If KMP_AFFINITY is specified without a type name, - // it does not rival OMP_PROC_BIND or GOMP_CPU_AFFINITY. - // - continue; - } + if (rivals[i] == __kmp_affinity_notype) { + // If KMP_AFFINITY is specified without a type name, + // it does not rival OMP_PROC_BIND or GOMP_CPU_AFFINITY. + continue; + } #endif - if ( rivals[ i ]->set ) { - KMP_WARNING( StgIgnored, name, rivals[ i ]->name ); - return 1; - }; // if - }; // while + if (rivals[i]->set) { + KMP_WARNING(StgIgnored, name, rivals[i]->name); + return 1; + }; // if + }; // while - ++ i; // Skip current setting. - return 0; + ++i; // Skip current setting. + return 0; }; // __kmp_stg_check_rivals - -static int -__kmp_env_toPrint( char const * name, int flag ) { - int rc = 0; - kmp_setting_t * setting = __kmp_stg_find( name ); - if ( setting != NULL ) { - rc = setting->defined; - if ( flag >= 0 ) { - setting->defined = flag; - }; // if +static int __kmp_env_toPrint(char const *name, int flag) { + int rc = 0; + kmp_setting_t *setting = __kmp_stg_find(name); + if (setting != NULL) { + rc = setting->defined; + if (flag >= 0) { + setting->defined = flag; }; // if - return rc; + }; // if + return rc; } +static void __kmp_aux_env_initialize(kmp_env_blk_t *block) { -static void -__kmp_aux_env_initialize( kmp_env_blk_t* block ) { + char const *value; - char const * value; - - /* OMP_NUM_THREADS */ - value = __kmp_env_blk_var( block, "OMP_NUM_THREADS" ); - if ( value ) { - ompc_set_num_threads( __kmp_dflt_team_nth ); - } + /* OMP_NUM_THREADS */ + value = __kmp_env_blk_var(block, "OMP_NUM_THREADS"); + if (value) { + ompc_set_num_threads(__kmp_dflt_team_nth); + } - /* KMP_BLOCKTIME */ - value = __kmp_env_blk_var( block, "KMP_BLOCKTIME" ); - if ( value ) { - kmpc_set_blocktime( __kmp_dflt_blocktime ); - } + /* KMP_BLOCKTIME */ + value = __kmp_env_blk_var(block, "KMP_BLOCKTIME"); + if (value) { + kmpc_set_blocktime(__kmp_dflt_blocktime); + } - /* OMP_NESTED */ - value = __kmp_env_blk_var( block, "OMP_NESTED" ); - if ( value ) { - ompc_set_nested( __kmp_dflt_nested ); - } + /* OMP_NESTED */ + value = __kmp_env_blk_var(block, "OMP_NESTED"); + if (value) { + ompc_set_nested(__kmp_dflt_nested); + } - /* OMP_DYNAMIC */ - value = __kmp_env_blk_var( block, "OMP_DYNAMIC" ); - if ( value ) { - ompc_set_dynamic( __kmp_global.g.g_dynamic ); - } + /* OMP_DYNAMIC */ + value = __kmp_env_blk_var(block, "OMP_DYNAMIC"); + if (value) { + ompc_set_dynamic(__kmp_global.g.g_dynamic); + } } -void -__kmp_env_initialize( char const * string ) { +void __kmp_env_initialize(char const *string) { - kmp_env_blk_t block; - int i; + kmp_env_blk_t block; + int i; - __kmp_stg_init(); + __kmp_stg_init(); - // Hack!!! - if ( string == NULL ) { - // __kmp_max_nth = __kmp_sys_max_nth; - __kmp_threads_capacity = __kmp_initial_threads_capacity( __kmp_dflt_team_nth_ub ); - }; // if - __kmp_env_blk_init( & block, string ); + // Hack!!! + if (string == NULL) { + // __kmp_max_nth = __kmp_sys_max_nth; + __kmp_threads_capacity = + __kmp_initial_threads_capacity(__kmp_dflt_team_nth_ub); + }; // if + __kmp_env_blk_init(&block, string); - // - // update the set flag on all entries that have an env var - // - for ( i = 0; i < block.count; ++ i ) { - if (( block.vars[ i ].name == NULL ) - || ( *block.vars[ i ].name == '\0')) { - continue; - } - if ( block.vars[ i ].value == NULL ) { - continue; - } - kmp_setting_t * setting = __kmp_stg_find( block.vars[ i ].name ); - if ( setting != NULL ) { - setting->set = 1; - } - }; // for i + // update the set flag on all entries that have an env var + for (i = 0; i < block.count; ++i) { + if ((block.vars[i].name == NULL) || (*block.vars[i].name == '\0')) { + continue; + } + if (block.vars[i].value == NULL) { + continue; + } + kmp_setting_t *setting = __kmp_stg_find(block.vars[i].name); + if (setting != NULL) { + setting->set = 1; + } + }; // for i - // We need to know if blocktime was set when processing OMP_WAIT_POLICY - blocktime_str = __kmp_env_blk_var( & block, "KMP_BLOCKTIME" ); + // We need to know if blocktime was set when processing OMP_WAIT_POLICY + blocktime_str = __kmp_env_blk_var(&block, "KMP_BLOCKTIME"); - // Special case. If we parse environment, not a string, process KMP_WARNINGS first. - if ( string == NULL ) { - char const * name = "KMP_WARNINGS"; - char const * value = __kmp_env_blk_var( & block, name ); - __kmp_stg_parse( name, value ); - }; // if + // Special case. If we parse environment, not a string, process KMP_WARNINGS + // first. + if (string == NULL) { + char const *name = "KMP_WARNINGS"; + char const *value = __kmp_env_blk_var(&block, name); + __kmp_stg_parse(name, value); + }; // if #if KMP_AFFINITY_SUPPORTED - // - // Special case. KMP_AFFINITY is not a rival to other affinity env vars - // if no affinity type is specified. We want to allow - // KMP_AFFINITY=[no],verbose/[no]warnings/etc. to be enabled when - // specifying the affinity type via GOMP_CPU_AFFINITY or the OMP 4.0 - // affinity mechanism. - // - __kmp_affinity_notype = NULL; - char const *aff_str = __kmp_env_blk_var( & block, "KMP_AFFINITY" ); - if ( aff_str != NULL ) { - // - // Check if the KMP_AFFINITY type is specified in the string. - // We just search the string for "compact", "scatter", etc. - // without really parsing the string. The syntax of the - // KMP_AFFINITY env var is such that none of the affinity - // type names can appear anywhere other that the type - // specifier, even as substrings. - // - // I can't find a case-insensitive version of strstr on Windows* OS. - // Use the case-sensitive version for now. - // - -# if KMP_OS_WINDOWS -# define FIND strstr -# else -# define FIND strcasestr -# endif + // Special case. KMP_AFFINITY is not a rival to other affinity env vars + // if no affinity type is specified. We want to allow + // KMP_AFFINITY=[no],verbose/[no]warnings/etc. to be enabled when + // specifying the affinity type via GOMP_CPU_AFFINITY or the OMP 4.0 + // affinity mechanism. + __kmp_affinity_notype = NULL; + char const *aff_str = __kmp_env_blk_var(&block, "KMP_AFFINITY"); + if (aff_str != NULL) { +// Check if the KMP_AFFINITY type is specified in the string. +// We just search the string for "compact", "scatter", etc. +// without really parsing the string. The syntax of the +// KMP_AFFINITY env var is such that none of the affinity +// type names can appear anywhere other that the type +// specifier, even as substrings. +// +// I can't find a case-insensitive version of strstr on Windows* OS. +// Use the case-sensitive version for now. - if ( ( FIND( aff_str, "none" ) == NULL ) - && ( FIND( aff_str, "physical" ) == NULL ) - && ( FIND( aff_str, "logical" ) == NULL ) - && ( FIND( aff_str, "compact" ) == NULL ) - && ( FIND( aff_str, "scatter" ) == NULL ) - && ( FIND( aff_str, "explicit" ) == NULL ) - && ( FIND( aff_str, "balanced" ) == NULL ) - && ( FIND( aff_str, "disabled" ) == NULL ) ) { - __kmp_affinity_notype = __kmp_stg_find( "KMP_AFFINITY" ); - } - else { - // - // A new affinity type is specified. - // Reset the affinity flags to their default values, - // in case this is called from kmp_set_defaults(). - // - __kmp_affinity_type = affinity_default; - __kmp_affinity_gran = affinity_gran_default; - __kmp_affinity_top_method = affinity_top_method_default; - __kmp_affinity_respect_mask = affinity_respect_mask_default; - } -# undef FIND +#if KMP_OS_WINDOWS +#define FIND strstr +#else +#define FIND strcasestr +#endif + + if ((FIND(aff_str, "none") == NULL) && + (FIND(aff_str, "physical") == NULL) && + (FIND(aff_str, "logical") == NULL) && + (FIND(aff_str, "compact") == NULL) && + (FIND(aff_str, "scatter") == NULL) && + (FIND(aff_str, "explicit") == NULL) && + (FIND(aff_str, "balanced") == NULL) && + (FIND(aff_str, "disabled") == NULL)) { + __kmp_affinity_notype = __kmp_stg_find("KMP_AFFINITY"); + } else { + // A new affinity type is specified. + // Reset the affinity flags to their default values, + // in case this is called from kmp_set_defaults(). + __kmp_affinity_type = affinity_default; + __kmp_affinity_gran = affinity_gran_default; + __kmp_affinity_top_method = affinity_top_method_default; + __kmp_affinity_respect_mask = affinity_respect_mask_default; + } +#undef FIND #if OMP_40_ENABLED - // - // Also reset the affinity flags if OMP_PROC_BIND is specified. - // - aff_str = __kmp_env_blk_var( & block, "OMP_PROC_BIND" ); - if ( aff_str != NULL ) { - __kmp_affinity_type = affinity_default; - __kmp_affinity_gran = affinity_gran_default; - __kmp_affinity_top_method = affinity_top_method_default; - __kmp_affinity_respect_mask = affinity_respect_mask_default; - } -#endif /* OMP_40_ENABLED */ + // Also reset the affinity flags if OMP_PROC_BIND is specified. + aff_str = __kmp_env_blk_var(&block, "OMP_PROC_BIND"); + if (aff_str != NULL) { + __kmp_affinity_type = affinity_default; + __kmp_affinity_gran = affinity_gran_default; + __kmp_affinity_top_method = affinity_top_method_default; + __kmp_affinity_respect_mask = affinity_respect_mask_default; } +#endif /* OMP_40_ENABLED */ + } #endif /* KMP_AFFINITY_SUPPORTED */ #if OMP_40_ENABLED - // - // Set up the nested proc bind type vector. - // - if ( __kmp_nested_proc_bind.bind_types == NULL ) { - __kmp_nested_proc_bind.bind_types = (kmp_proc_bind_t *) - KMP_INTERNAL_MALLOC( sizeof(kmp_proc_bind_t) ); - if ( __kmp_nested_proc_bind.bind_types == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - } - __kmp_nested_proc_bind.size = 1; - __kmp_nested_proc_bind.used = 1; -# if KMP_AFFINITY_SUPPORTED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_default; -# else - // default proc bind is false if affinity not supported - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; -# endif - - } + // Set up the nested proc bind type vector. + if (__kmp_nested_proc_bind.bind_types == NULL) { + __kmp_nested_proc_bind.bind_types = + (kmp_proc_bind_t *)KMP_INTERNAL_MALLOC(sizeof(kmp_proc_bind_t)); + if (__kmp_nested_proc_bind.bind_types == NULL) { + KMP_FATAL(MemoryAllocFailed); + } + __kmp_nested_proc_bind.size = 1; + __kmp_nested_proc_bind.used = 1; +#if KMP_AFFINITY_SUPPORTED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_default; +#else + // default proc bind is false if affinity not supported + __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; +#endif + } #endif /* OMP_40_ENABLED */ - // - // Now process all of the settings. - // - for ( i = 0; i < block.count; ++ i ) { - __kmp_stg_parse( block.vars[ i ].name, block.vars[ i ].value ); - }; // for i + // Now process all of the settings. + for (i = 0; i < block.count; ++i) { + __kmp_stg_parse(block.vars[i].name, block.vars[i].value); + }; // for i - // - // If user locks have been allocated yet, don't reset the lock vptr table. - // - if ( ! __kmp_init_user_locks ) { - if ( __kmp_user_lock_kind == lk_default ) { - __kmp_user_lock_kind = lk_queuing; - } + // If user locks have been allocated yet, don't reset the lock vptr table. + if (!__kmp_init_user_locks) { + if (__kmp_user_lock_kind == lk_default) { + __kmp_user_lock_kind = lk_queuing; + } #if KMP_USE_DYNAMIC_LOCK - __kmp_init_dynamic_user_locks(); + __kmp_init_dynamic_user_locks(); #else - __kmp_set_user_lock_vptrs( __kmp_user_lock_kind ); + __kmp_set_user_lock_vptrs(__kmp_user_lock_kind); #endif - } - else { - KMP_DEBUG_ASSERT( string != NULL); // kmp_set_defaults() was called - KMP_DEBUG_ASSERT( __kmp_user_lock_kind != lk_default ); - // Binds lock functions again to follow the transition between different - // KMP_CONSISTENCY_CHECK values. Calling this again is harmless as long - // as we do not allow lock kind changes after making a call to any - // user lock functions (true). + } else { + KMP_DEBUG_ASSERT(string != NULL); // kmp_set_defaults() was called + KMP_DEBUG_ASSERT(__kmp_user_lock_kind != lk_default); +// Binds lock functions again to follow the transition between different +// KMP_CONSISTENCY_CHECK values. Calling this again is harmless as long +// as we do not allow lock kind changes after making a call to any +// user lock functions (true). #if KMP_USE_DYNAMIC_LOCK - __kmp_init_dynamic_user_locks(); + __kmp_init_dynamic_user_locks(); #else - __kmp_set_user_lock_vptrs( __kmp_user_lock_kind ); + __kmp_set_user_lock_vptrs(__kmp_user_lock_kind); #endif - } + } #if KMP_AFFINITY_SUPPORTED - if ( ! TCR_4(__kmp_init_middle) ) { - // - // Determine if the machine/OS is actually capable of supporting - // affinity. - // - const char *var = "KMP_AFFINITY"; - KMPAffinity::pick_api(); - if ( __kmp_affinity_type == affinity_disabled ) { - KMP_AFFINITY_DISABLE(); - } - else if ( ! KMP_AFFINITY_CAPABLE() ) { - __kmp_affinity_dispatch->determine_capable(var); - if ( ! KMP_AFFINITY_CAPABLE() ) { - if ( __kmp_affinity_verbose || ( __kmp_affinity_warnings - && ( __kmp_affinity_type != affinity_default ) - && ( __kmp_affinity_type != affinity_none ) - && ( __kmp_affinity_type != affinity_disabled ) ) ) { - KMP_WARNING( AffNotSupported, var ); - } - __kmp_affinity_type = affinity_disabled; - __kmp_affinity_respect_mask = 0; - __kmp_affinity_gran = affinity_gran_fine; - } + if (!TCR_4(__kmp_init_middle)) { + // Determine if the machine/OS is actually capable of supporting + // affinity. + const char *var = "KMP_AFFINITY"; + KMPAffinity::pick_api(); + if (__kmp_affinity_type == affinity_disabled) { + KMP_AFFINITY_DISABLE(); + } else if (!KMP_AFFINITY_CAPABLE()) { + __kmp_affinity_dispatch->determine_capable(var); + if (!KMP_AFFINITY_CAPABLE()) { + if (__kmp_affinity_verbose || + (__kmp_affinity_warnings && + (__kmp_affinity_type != affinity_default) && + (__kmp_affinity_type != affinity_none) && + (__kmp_affinity_type != affinity_disabled))) { + KMP_WARNING(AffNotSupported, var); } + __kmp_affinity_type = affinity_disabled; + __kmp_affinity_respect_mask = 0; + __kmp_affinity_gran = affinity_gran_fine; + } + } -# if OMP_40_ENABLED - if ( __kmp_affinity_type == affinity_disabled ) { - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; +#if OMP_40_ENABLED + if (__kmp_affinity_type == affinity_disabled) { + __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; + } else if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_true) { + // OMP_PROC_BIND=true maps to OMP_PROC_BIND=spread. + __kmp_nested_proc_bind.bind_types[0] = proc_bind_spread; + } +#endif /* OMP_40_ENABLED */ + + if (KMP_AFFINITY_CAPABLE()) { + +#if KMP_GROUP_AFFINITY + + // Handle the Win 64 group affinity stuff if there are multiple + // processor groups, or if the user requested it, and OMP 4.0 + // affinity is not in effect. + if (((__kmp_num_proc_groups > 1) && + (__kmp_affinity_type == affinity_default) +#if OMP_40_ENABLED + && (__kmp_nested_proc_bind.bind_types[0] == proc_bind_default)) +#endif + || (__kmp_affinity_top_method == affinity_top_method_group)) { + if (__kmp_affinity_respect_mask == affinity_respect_mask_default) { + __kmp_affinity_respect_mask = FALSE; } - else if ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_true ) { - // - // OMP_PROC_BIND=true maps to OMP_PROC_BIND=spread. - // - __kmp_nested_proc_bind.bind_types[0] = proc_bind_spread; + if (__kmp_affinity_type == affinity_default) { + __kmp_affinity_type = affinity_compact; +#if OMP_40_ENABLED + __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; +#endif } -# endif /* OMP_40_ENABLED */ - - if ( KMP_AFFINITY_CAPABLE() ) { - -# if KMP_GROUP_AFFINITY - - // - // Handle the Win 64 group affinity stuff if there are multiple - // processor groups, or if the user requested it, and OMP 4.0 - // affinity is not in effect. - // - if ( ( ( __kmp_num_proc_groups > 1 ) - && ( __kmp_affinity_type == affinity_default ) -# if OMP_40_ENABLED - && ( __kmp_nested_proc_bind.bind_types[0] == proc_bind_default ) ) -# endif - || ( __kmp_affinity_top_method == affinity_top_method_group ) ) { - if ( __kmp_affinity_respect_mask == affinity_respect_mask_default ) { - __kmp_affinity_respect_mask = FALSE; - } - if ( __kmp_affinity_type == affinity_default ) { - __kmp_affinity_type = affinity_compact; -# if OMP_40_ENABLED - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; -# endif - } - if ( __kmp_affinity_top_method == affinity_top_method_default ) { - if ( __kmp_affinity_gran == affinity_gran_default ) { - __kmp_affinity_top_method = affinity_top_method_group; - __kmp_affinity_gran = affinity_gran_group; - } - else if ( __kmp_affinity_gran == affinity_gran_group ) { - __kmp_affinity_top_method = affinity_top_method_group; - } - else { - __kmp_affinity_top_method = affinity_top_method_all; - } - } - else if ( __kmp_affinity_top_method == affinity_top_method_group ) { - if ( __kmp_affinity_gran == affinity_gran_default ) { - __kmp_affinity_gran = affinity_gran_group; - } - else if ( ( __kmp_affinity_gran != affinity_gran_group ) - && ( __kmp_affinity_gran != affinity_gran_fine ) - && ( __kmp_affinity_gran != affinity_gran_thread ) ) { - const char *str = NULL; - switch ( __kmp_affinity_gran ) { - case affinity_gran_core: str = "core"; break; - case affinity_gran_package: str = "package"; break; - case affinity_gran_node: str = "node"; break; - default: KMP_DEBUG_ASSERT( 0 ); - } - KMP_WARNING( AffGranTopGroup, var, str ); - __kmp_affinity_gran = affinity_gran_fine; - } - } - else { - if ( __kmp_affinity_gran == affinity_gran_default ) { - __kmp_affinity_gran = affinity_gran_core; - } - else if ( __kmp_affinity_gran == affinity_gran_group ) { - const char *str = NULL; - switch ( __kmp_affinity_type ) { - case affinity_physical: str = "physical"; break; - case affinity_logical: str = "logical"; break; - case affinity_compact: str = "compact"; break; - case affinity_scatter: str = "scatter"; break; - case affinity_explicit: str = "explicit"; break; - // No MIC on windows, so no affinity_balanced case - default: KMP_DEBUG_ASSERT( 0 ); - } - KMP_WARNING( AffGranGroupType, var, str ); - __kmp_affinity_gran = affinity_gran_core; - } - } + if (__kmp_affinity_top_method == affinity_top_method_default) { + if (__kmp_affinity_gran == affinity_gran_default) { + __kmp_affinity_top_method = affinity_top_method_group; + __kmp_affinity_gran = affinity_gran_group; + } else if (__kmp_affinity_gran == affinity_gran_group) { + __kmp_affinity_top_method = affinity_top_method_group; + } else { + __kmp_affinity_top_method = affinity_top_method_all; + } + } else if (__kmp_affinity_top_method == affinity_top_method_group) { + if (__kmp_affinity_gran == affinity_gran_default) { + __kmp_affinity_gran = affinity_gran_group; + } else if ((__kmp_affinity_gran != affinity_gran_group) && + (__kmp_affinity_gran != affinity_gran_fine) && + (__kmp_affinity_gran != affinity_gran_thread)) { + const char *str = NULL; + switch (__kmp_affinity_gran) { + case affinity_gran_core: + str = "core"; + break; + case affinity_gran_package: + str = "package"; + break; + case affinity_gran_node: + str = "node"; + break; + default: + KMP_DEBUG_ASSERT(0); } - else - -# endif /* KMP_GROUP_AFFINITY */ - - { - if ( __kmp_affinity_respect_mask == affinity_respect_mask_default ) { -# if KMP_GROUP_AFFINITY - if ( __kmp_num_proc_groups > 1 ) { - __kmp_affinity_respect_mask = FALSE; - } - else -# endif /* KMP_GROUP_AFFINITY */ - { - __kmp_affinity_respect_mask = TRUE; - } - } -# if OMP_40_ENABLED - if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel ) - && ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_default ) ) { - if ( __kmp_affinity_type == affinity_default ) { - __kmp_affinity_type = affinity_compact; - __kmp_affinity_dups = FALSE; - } - } - else -# endif /* OMP_40_ENABLED */ - if ( __kmp_affinity_type == affinity_default ) { + KMP_WARNING(AffGranTopGroup, var, str); + __kmp_affinity_gran = affinity_gran_fine; + } + } else { + if (__kmp_affinity_gran == affinity_gran_default) { + __kmp_affinity_gran = affinity_gran_core; + } else if (__kmp_affinity_gran == affinity_gran_group) { + const char *str = NULL; + switch (__kmp_affinity_type) { + case affinity_physical: + str = "physical"; + break; + case affinity_logical: + str = "logical"; + break; + case affinity_compact: + str = "compact"; + break; + case affinity_scatter: + str = "scatter"; + break; + case affinity_explicit: + str = "explicit"; + break; + // No MIC on windows, so no affinity_balanced case + default: + KMP_DEBUG_ASSERT(0); + } + KMP_WARNING(AffGranGroupType, var, str); + __kmp_affinity_gran = affinity_gran_core; + } + } + } else + +#endif /* KMP_GROUP_AFFINITY */ + + { + if (__kmp_affinity_respect_mask == affinity_respect_mask_default) { +#if KMP_GROUP_AFFINITY + if (__kmp_num_proc_groups > 1) { + __kmp_affinity_respect_mask = FALSE; + } else +#endif /* KMP_GROUP_AFFINITY */ + { + __kmp_affinity_respect_mask = TRUE; + } + } +#if OMP_40_ENABLED + if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) && + (__kmp_nested_proc_bind.bind_types[0] != proc_bind_default)) { + if (__kmp_affinity_type == affinity_default) { + __kmp_affinity_type = affinity_compact; + __kmp_affinity_dups = FALSE; + } + } else +#endif /* OMP_40_ENABLED */ + if (__kmp_affinity_type == affinity_default) { #if OMP_40_ENABLED #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) - if( __kmp_mic_type != non_mic ) { - __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; - } else + if (__kmp_mic_type != non_mic) { + __kmp_nested_proc_bind.bind_types[0] = proc_bind_intel; + } else #endif - { - __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; - } + { + __kmp_nested_proc_bind.bind_types[0] = proc_bind_false; + } #endif /* OMP_40_ENABLED */ #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) - if( __kmp_mic_type != non_mic ) { - __kmp_affinity_type = affinity_scatter; - } else + if (__kmp_mic_type != non_mic) { + __kmp_affinity_type = affinity_scatter; + } else #endif - { - __kmp_affinity_type = affinity_none; - } - - } - if ( ( __kmp_affinity_gran == affinity_gran_default ) - && ( __kmp_affinity_gran_levels < 0 ) ) { + { + __kmp_affinity_type = affinity_none; + } + } + if ((__kmp_affinity_gran == affinity_gran_default) && + (__kmp_affinity_gran_levels < 0)) { #if KMP_ARCH_X86_64 && (KMP_OS_LINUX || KMP_OS_WINDOWS) - if( __kmp_mic_type != non_mic ) { - __kmp_affinity_gran = affinity_gran_fine; - } else + if (__kmp_mic_type != non_mic) { + __kmp_affinity_gran = affinity_gran_fine; + } else #endif - { - __kmp_affinity_gran = affinity_gran_core; - } - } - if ( __kmp_affinity_top_method == affinity_top_method_default ) { - __kmp_affinity_top_method = affinity_top_method_all; - } - } + { + __kmp_affinity_gran = affinity_gran_core; + } } - - K_DIAG( 1, ( "__kmp_affinity_type == %d\n", __kmp_affinity_type ) ); - K_DIAG( 1, ( "__kmp_affinity_compact == %d\n", __kmp_affinity_compact ) ); - K_DIAG( 1, ( "__kmp_affinity_offset == %d\n", __kmp_affinity_offset ) ); - K_DIAG( 1, ( "__kmp_affinity_verbose == %d\n", __kmp_affinity_verbose ) ); - K_DIAG( 1, ( "__kmp_affinity_warnings == %d\n", __kmp_affinity_warnings ) ); - K_DIAG( 1, ( "__kmp_affinity_respect_mask == %d\n", __kmp_affinity_respect_mask ) ); - K_DIAG( 1, ( "__kmp_affinity_gran == %d\n", __kmp_affinity_gran ) ); - - KMP_DEBUG_ASSERT( __kmp_affinity_type != affinity_default); -# if OMP_40_ENABLED - KMP_DEBUG_ASSERT( __kmp_nested_proc_bind.bind_types[0] != proc_bind_default ); -# endif + if (__kmp_affinity_top_method == affinity_top_method_default) { + __kmp_affinity_top_method = affinity_top_method_all; + } + } } + K_DIAG(1, ("__kmp_affinity_type == %d\n", __kmp_affinity_type)); + K_DIAG(1, ("__kmp_affinity_compact == %d\n", __kmp_affinity_compact)); + K_DIAG(1, ("__kmp_affinity_offset == %d\n", __kmp_affinity_offset)); + K_DIAG(1, ("__kmp_affinity_verbose == %d\n", __kmp_affinity_verbose)); + K_DIAG(1, ("__kmp_affinity_warnings == %d\n", __kmp_affinity_warnings)); + K_DIAG(1, ("__kmp_affinity_respect_mask == %d\n", + __kmp_affinity_respect_mask)); + K_DIAG(1, ("__kmp_affinity_gran == %d\n", __kmp_affinity_gran)); + + KMP_DEBUG_ASSERT(__kmp_affinity_type != affinity_default); +#if OMP_40_ENABLED + KMP_DEBUG_ASSERT(__kmp_nested_proc_bind.bind_types[0] != proc_bind_default); +#endif + } + #endif /* KMP_AFFINITY_SUPPORTED */ - if ( __kmp_version ) { - __kmp_print_version_1(); - }; // if + if (__kmp_version) { + __kmp_print_version_1(); + }; // if - // Post-initialization step: some env. vars need their value's further processing - if ( string != NULL) { // kmp_set_defaults() was called - __kmp_aux_env_initialize( &block ); - } + // Post-initialization step: some env. vars need their value's further + // processing + if (string != NULL) { // kmp_set_defaults() was called + __kmp_aux_env_initialize(&block); + } - __kmp_env_blk_free( & block ); + __kmp_env_blk_free(&block); - KMP_MB(); + KMP_MB(); } // __kmp_env_initialize +void __kmp_env_print() { -void -__kmp_env_print() { + kmp_env_blk_t block; + int i; + kmp_str_buf_t buffer; - kmp_env_blk_t block; - int i; - kmp_str_buf_t buffer; + __kmp_stg_init(); + __kmp_str_buf_init(&buffer); - __kmp_stg_init(); - __kmp_str_buf_init( & buffer ); + __kmp_env_blk_init(&block, NULL); + __kmp_env_blk_sort(&block); - __kmp_env_blk_init( & block, NULL ); - __kmp_env_blk_sort( & block ); - - // Print real environment values. - __kmp_str_buf_print( & buffer, "\n%s\n\n", KMP_I18N_STR( UserSettings ) ); - for ( i = 0; i < block.count; ++ i ) { - char const * name = block.vars[ i ].name; - char const * value = block.vars[ i ].value; - if ( - ( KMP_STRLEN( name ) > 4 && strncmp( name, "KMP_", 4 ) == 0 ) - || strncmp( name, "OMP_", 4 ) == 0 - #ifdef KMP_GOMP_COMPAT - || strncmp( name, "GOMP_", 5 ) == 0 - #endif // KMP_GOMP_COMPAT + // Print real environment values. + __kmp_str_buf_print(&buffer, "\n%s\n\n", KMP_I18N_STR(UserSettings)); + for (i = 0; i < block.count; ++i) { + char const *name = block.vars[i].name; + char const *value = block.vars[i].value; + if ((KMP_STRLEN(name) > 4 && strncmp(name, "KMP_", 4) == 0) || + strncmp(name, "OMP_", 4) == 0 +#ifdef KMP_GOMP_COMPAT + || strncmp(name, "GOMP_", 5) == 0 +#endif // KMP_GOMP_COMPAT ) { - __kmp_str_buf_print( & buffer, " %s=%s\n", name, value ); - }; // if - }; // for - __kmp_str_buf_print( & buffer, "\n" ); - - // Print internal (effective) settings. - __kmp_str_buf_print( & buffer, "%s\n\n", KMP_I18N_STR( EffectiveSettings ) ); - for ( int i = 0; i < __kmp_stg_count; ++ i ) { - if ( __kmp_stg_table[ i ].print != NULL ) { - __kmp_stg_table[ i ].print( & buffer, __kmp_stg_table[ i ].name, __kmp_stg_table[ i ].data ); - }; // if - }; // for + __kmp_str_buf_print(&buffer, " %s=%s\n", name, value); + }; // if + }; // for + __kmp_str_buf_print(&buffer, "\n"); + + // Print internal (effective) settings. + __kmp_str_buf_print(&buffer, "%s\n\n", KMP_I18N_STR(EffectiveSettings)); + for (int i = 0; i < __kmp_stg_count; ++i) { + if (__kmp_stg_table[i].print != NULL) { + __kmp_stg_table[i].print(&buffer, __kmp_stg_table[i].name, + __kmp_stg_table[i].data); + }; // if + }; // for - __kmp_printf( "%s", buffer.str ); + __kmp_printf("%s", buffer.str); - __kmp_env_blk_free( & block ); - __kmp_str_buf_free( & buffer ); + __kmp_env_blk_free(&block); + __kmp_str_buf_free(&buffer); - __kmp_printf("\n"); + __kmp_printf("\n"); } // __kmp_env_print - #if OMP_40_ENABLED -void -__kmp_env_print_2() { +void __kmp_env_print_2() { - kmp_env_blk_t block; - kmp_str_buf_t buffer; + kmp_env_blk_t block; + kmp_str_buf_t buffer; - __kmp_env_format = 1; + __kmp_env_format = 1; - __kmp_stg_init(); - __kmp_str_buf_init( & buffer ); + __kmp_stg_init(); + __kmp_str_buf_init(&buffer); - __kmp_env_blk_init( & block, NULL ); - __kmp_env_blk_sort( & block ); + __kmp_env_blk_init(&block, NULL); + __kmp_env_blk_sort(&block); - __kmp_str_buf_print( & buffer, "\n%s\n", KMP_I18N_STR( DisplayEnvBegin ) ); - __kmp_str_buf_print( & buffer, " _OPENMP='%d'\n", __kmp_openmp_version ); + __kmp_str_buf_print(&buffer, "\n%s\n", KMP_I18N_STR(DisplayEnvBegin)); + __kmp_str_buf_print(&buffer, " _OPENMP='%d'\n", __kmp_openmp_version); - for ( int i = 0; i < __kmp_stg_count; ++ i ) { - if ( __kmp_stg_table[ i ].print != NULL && - ( ( __kmp_display_env && strncmp( __kmp_stg_table[ i ].name, "OMP_", 4 ) == 0 ) || __kmp_display_env_verbose ) ) { - __kmp_stg_table[ i ].print( & buffer, __kmp_stg_table[ i ].name, __kmp_stg_table[ i ].data ); - }; // if - }; // for + for (int i = 0; i < __kmp_stg_count; ++i) { + if (__kmp_stg_table[i].print != NULL && + ((__kmp_display_env && + strncmp(__kmp_stg_table[i].name, "OMP_", 4) == 0) || + __kmp_display_env_verbose)) { + __kmp_stg_table[i].print(&buffer, __kmp_stg_table[i].name, + __kmp_stg_table[i].data); + }; // if + }; // for - __kmp_str_buf_print( & buffer, "%s\n", KMP_I18N_STR( DisplayEnvEnd ) ); - __kmp_str_buf_print( & buffer, "\n" ); + __kmp_str_buf_print(&buffer, "%s\n", KMP_I18N_STR(DisplayEnvEnd)); + __kmp_str_buf_print(&buffer, "\n"); - __kmp_printf( "%s", buffer.str ); + __kmp_printf("%s", buffer.str); - __kmp_env_blk_free( & block ); - __kmp_str_buf_free( & buffer ); + __kmp_env_blk_free(&block); + __kmp_str_buf_free(&buffer); - __kmp_printf("\n"); + __kmp_printf("\n"); } // __kmp_env_print_2 #endif // OMP_40_ENABLED // end of file - diff --git a/openmp/runtime/src/kmp_settings.h b/openmp/runtime/src/kmp_settings.h index 7232e61..470c636 100644 --- a/openmp/runtime/src/kmp_settings.h +++ b/openmp/runtime/src/kmp_settings.h @@ -16,35 +16,52 @@ #ifndef KMP_SETTINGS_H #define KMP_SETTINGS_H -void __kmp_reset_global_vars( void ); -void __kmp_env_initialize( char const * ); +void __kmp_reset_global_vars(void); +void __kmp_env_initialize(char const *); void __kmp_env_print(); #if OMP_40_ENABLED void __kmp_env_print_2(); #endif // OMP_40_ENABLED -int __kmp_initial_threads_capacity( int req_nproc ); +int __kmp_initial_threads_capacity(int req_nproc); void __kmp_init_dflt_team_nth(); -int __kmp_convert_to_milliseconds( char const * ); -int __kmp_default_tp_capacity( int, int, int); +int __kmp_convert_to_milliseconds(char const *); +int __kmp_default_tp_capacity(int, int, int); #if KMP_MIC -#define KMP_STR_BUF_PRINT_NAME __kmp_str_buf_print( buffer, " %s %s", KMP_I18N_STR(Device), name ) -#define KMP_STR_BUF_PRINT_NAME_EX(x) __kmp_str_buf_print( buffer, " %s %s='", KMP_I18N_STR(Device), x ) -#define KMP_STR_BUF_PRINT_BOOL __kmp_str_buf_print( buffer, " %s %s='%s'\n", KMP_I18N_STR(Device), name, value ? "TRUE" : "FALSE" ); -#define KMP_STR_BUF_PRINT_INT __kmp_str_buf_print( buffer, " %s %s='%d'\n", KMP_I18N_STR(Device), name, value ) -#define KMP_STR_BUF_PRINT_UINT64 __kmp_str_buf_print( buffer, " %s %s='%" KMP_UINT64_SPEC "'\n", KMP_I18N_STR(Device), name, value ); -#define KMP_STR_BUF_PRINT_STR __kmp_str_buf_print( buffer, " %s %s='%s'\n", KMP_I18N_STR(Device), name, value ) +#define KMP_STR_BUF_PRINT_NAME \ + __kmp_str_buf_print(buffer, " %s %s", KMP_I18N_STR(Device), name) +#define KMP_STR_BUF_PRINT_NAME_EX(x) \ + __kmp_str_buf_print(buffer, " %s %s='", KMP_I18N_STR(Device), x) +#define KMP_STR_BUF_PRINT_BOOL \ + __kmp_str_buf_print(buffer, " %s %s='%s'\n", KMP_I18N_STR(Device), name, \ + value ? "TRUE" : "FALSE"); +#define KMP_STR_BUF_PRINT_INT \ + __kmp_str_buf_print(buffer, " %s %s='%d'\n", KMP_I18N_STR(Device), name, \ + value) +#define KMP_STR_BUF_PRINT_UINT64 \ + __kmp_str_buf_print(buffer, " %s %s='%" KMP_UINT64_SPEC "'\n", \ + KMP_I18N_STR(Device), name, value); +#define KMP_STR_BUF_PRINT_STR \ + __kmp_str_buf_print(buffer, " %s %s='%s'\n", KMP_I18N_STR(Device), name, \ + value) #else -#define KMP_STR_BUF_PRINT_NAME __kmp_str_buf_print( buffer, " %s %s", KMP_I18N_STR(Host), name ) -#define KMP_STR_BUF_PRINT_NAME_EX(x) __kmp_str_buf_print( buffer, " %s %s='", KMP_I18N_STR(Host), x ) -#define KMP_STR_BUF_PRINT_BOOL __kmp_str_buf_print( buffer, " %s %s='%s'\n", KMP_I18N_STR(Host), name, value ? "TRUE" : "FALSE" ); -#define KMP_STR_BUF_PRINT_INT __kmp_str_buf_print( buffer, " %s %s='%d'\n", KMP_I18N_STR(Host), name, value ) -#define KMP_STR_BUF_PRINT_UINT64 __kmp_str_buf_print( buffer, " %s %s='%" KMP_UINT64_SPEC "'\n", KMP_I18N_STR(Host), name, value ); -#define KMP_STR_BUF_PRINT_STR __kmp_str_buf_print( buffer, " %s %s='%s'\n", KMP_I18N_STR(Host), name, value ) +#define KMP_STR_BUF_PRINT_NAME \ + __kmp_str_buf_print(buffer, " %s %s", KMP_I18N_STR(Host), name) +#define KMP_STR_BUF_PRINT_NAME_EX(x) \ + __kmp_str_buf_print(buffer, " %s %s='", KMP_I18N_STR(Host), x) +#define KMP_STR_BUF_PRINT_BOOL \ + __kmp_str_buf_print(buffer, " %s %s='%s'\n", KMP_I18N_STR(Host), name, \ + value ? "TRUE" : "FALSE"); +#define KMP_STR_BUF_PRINT_INT \ + __kmp_str_buf_print(buffer, " %s %s='%d'\n", KMP_I18N_STR(Host), name, value) +#define KMP_STR_BUF_PRINT_UINT64 \ + __kmp_str_buf_print(buffer, " %s %s='%" KMP_UINT64_SPEC "'\n", \ + KMP_I18N_STR(Host), name, value); +#define KMP_STR_BUF_PRINT_STR \ + __kmp_str_buf_print(buffer, " %s %s='%s'\n", KMP_I18N_STR(Host), name, value) #endif #endif // KMP_SETTINGS_H // end of file // - diff --git a/openmp/runtime/src/kmp_stats.cpp b/openmp/runtime/src/kmp_stats.cpp index 3ae25d5..aeea4de 100644 --- a/openmp/runtime/src/kmp_stats.cpp +++ b/openmp/runtime/src/kmp_stats.cpp @@ -12,196 +12,186 @@ // //===----------------------------------------------------------------------===// + #include "kmp.h" -#include "kmp_str.h" #include "kmp_lock.h" #include "kmp_stats.h" +#include "kmp_str.h" #include -#include -#include -#include // for atexit #include +#include +#include +#include // for atexit #define STRINGIZE2(x) #x #define STRINGIZE(x) STRINGIZE2(x) -#define expandName(name,flags,ignore) {STRINGIZE(name),flags}, +#define expandName(name, flags, ignore) {STRINGIZE(name), flags}, statInfo timeStat::timerInfo[] = { - KMP_FOREACH_TIMER(expandName,0) - {"TIMER_LAST", 0} -}; + KMP_FOREACH_TIMER(expandName, 0){"TIMER_LAST", 0}}; const statInfo counter::counterInfo[] = { - KMP_FOREACH_COUNTER(expandName,0) - {"COUNTER_LAST", 0} -}; + KMP_FOREACH_COUNTER(expandName, 0){"COUNTER_LAST", 0}}; #undef expandName -#define expandName(ignore1,ignore2,ignore3) {0.0,0.0,0.0}, +#define expandName(ignore1, ignore2, ignore3) {0.0, 0.0, 0.0}, kmp_stats_output_module::rgb_color kmp_stats_output_module::timerColorInfo[] = { - KMP_FOREACH_TIMER(expandName,0) - {0.0,0.0,0.0} -}; + KMP_FOREACH_TIMER(expandName, 0){0.0, 0.0, 0.0}}; #undef expandName -const kmp_stats_output_module::rgb_color kmp_stats_output_module::globalColorArray[] = { - {1.0, 0.0, 0.0}, // red - {1.0, 0.6, 0.0}, // orange - {1.0, 1.0, 0.0}, // yellow - {0.0, 1.0, 0.0}, // green - {0.0, 0.0, 1.0}, // blue - {0.6, 0.2, 0.8}, // purple - {1.0, 0.0, 1.0}, // magenta - {0.0, 0.4, 0.2}, // dark green - {1.0, 1.0, 0.6}, // light yellow - {0.6, 0.4, 0.6}, // dirty purple - {0.0, 1.0, 1.0}, // cyan - {1.0, 0.4, 0.8}, // pink - {0.5, 0.5, 0.5}, // grey - {0.8, 0.7, 0.5}, // brown - {0.6, 0.6, 1.0}, // light blue - {1.0, 0.7, 0.5}, // peach - {0.8, 0.5, 1.0}, // lavender - {0.6, 0.0, 0.0}, // dark red - {0.7, 0.6, 0.0}, // gold - {0.0, 0.0, 0.0} // black +const kmp_stats_output_module::rgb_color + kmp_stats_output_module::globalColorArray[] = { + {1.0, 0.0, 0.0}, // red + {1.0, 0.6, 0.0}, // orange + {1.0, 1.0, 0.0}, // yellow + {0.0, 1.0, 0.0}, // green + {0.0, 0.0, 1.0}, // blue + {0.6, 0.2, 0.8}, // purple + {1.0, 0.0, 1.0}, // magenta + {0.0, 0.4, 0.2}, // dark green + {1.0, 1.0, 0.6}, // light yellow + {0.6, 0.4, 0.6}, // dirty purple + {0.0, 1.0, 1.0}, // cyan + {1.0, 0.4, 0.8}, // pink + {0.5, 0.5, 0.5}, // grey + {0.8, 0.7, 0.5}, // brown + {0.6, 0.6, 1.0}, // light blue + {1.0, 0.7, 0.5}, // peach + {0.8, 0.5, 1.0}, // lavender + {0.6, 0.0, 0.0}, // dark red + {0.7, 0.6, 0.0}, // gold + {0.0, 0.0, 0.0} // black }; // Ensure that the atexit handler only runs once. static uint32_t statsPrinted = 0; // output interface -static kmp_stats_output_module* __kmp_stats_global_output = NULL; +static kmp_stats_output_module *__kmp_stats_global_output = NULL; -/* ****************************************************** */ /* ************* statistic member functions ************* */ -void statistic::addSample(double sample) -{ - double delta = sample - meanVal; +void statistic::addSample(double sample) { + double delta = sample - meanVal; - sampleCount = sampleCount + 1; - meanVal = meanVal + delta/sampleCount; - m2 = m2 + delta*(sample - meanVal); + sampleCount = sampleCount + 1; + meanVal = meanVal + delta / sampleCount; + m2 = m2 + delta * (sample - meanVal); - minVal = std::min(minVal, sample); - maxVal = std::max(maxVal, sample); + minVal = std::min(minVal, sample); + maxVal = std::max(maxVal, sample); } -statistic & statistic::operator+= (const statistic & other) -{ - if (sampleCount == 0) - { - *this = other; - return *this; - } - - uint64_t newSampleCount = sampleCount + other.sampleCount; - double dnsc = double(newSampleCount); - double dsc = double(sampleCount); - double dscBydnsc = dsc/dnsc; - double dosc = double(other.sampleCount); - double delta = other.meanVal - meanVal; - - // Try to order these calculations to avoid overflows. - // If this were Fortran, then the compiler would not be able to re-order over brackets. - // In C++ it may be legal to do that (we certainly hope it doesn't, and CC+ Programming Language 2nd edition - // suggests it shouldn't, since it says that exploitation of associativity can only be made if the operation - // really is associative (which floating addition isn't...)). - meanVal = meanVal*dscBydnsc + other.meanVal*(1-dscBydnsc); - m2 = m2 + other.m2 + dscBydnsc*dosc*delta*delta; - minVal = std::min (minVal, other.minVal); - maxVal = std::max (maxVal, other.maxVal); - sampleCount = newSampleCount; - - +statistic &statistic::operator+=(const statistic &other) { + if (sampleCount == 0) { + *this = other; return *this; + } + + uint64_t newSampleCount = sampleCount + other.sampleCount; + double dnsc = double(newSampleCount); + double dsc = double(sampleCount); + double dscBydnsc = dsc / dnsc; + double dosc = double(other.sampleCount); + double delta = other.meanVal - meanVal; + + // Try to order these calculations to avoid overflows. If this were Fortran, + // then the compiler would not be able to re-order over brackets. In C++ it + // may be legal to do that (we certainly hope it doesn't, and CC+ Programming + // Language 2nd edition suggests it shouldn't, since it says that exploitation + // of associativity can only be made if the operation really is associative + // (which floating addition isn't...)). + meanVal = meanVal * dscBydnsc + other.meanVal * (1 - dscBydnsc); + m2 = m2 + other.m2 + dscBydnsc * dosc * delta * delta; + minVal = std::min(minVal, other.minVal); + maxVal = std::max(maxVal, other.maxVal); + sampleCount = newSampleCount; + + return *this; +} + +void statistic::scale(double factor) { + minVal = minVal * factor; + maxVal = maxVal * factor; + meanVal = meanVal * factor; + m2 = m2 * factor * factor; + return; +} + +std::string statistic::format(char unit, bool total) const { + std::string result = formatSI(sampleCount, 9, ' '); + + if (sampleCount == 0) { + result = result + std::string(", ") + formatSI(0.0, 9, unit); + result = result + std::string(", ") + formatSI(0.0, 9, unit); + result = result + std::string(", ") + formatSI(0.0, 9, unit); + if (total) + result = result + std::string(", ") + formatSI(0.0, 9, unit); + result = result + std::string(", ") + formatSI(0.0, 9, unit); + } else { + result = result + std::string(", ") + formatSI(minVal, 9, unit); + result = result + std::string(", ") + formatSI(meanVal, 9, unit); + result = result + std::string(", ") + formatSI(maxVal, 9, unit); + if (total) + result = + result + std::string(", ") + formatSI(meanVal * sampleCount, 9, unit); + result = result + std::string(", ") + formatSI(getSD(), 9, unit); + } + return result; } -void statistic::scale(double factor) -{ - minVal = minVal*factor; - maxVal = maxVal*factor; - meanVal= meanVal*factor; - m2 = m2*factor*factor; - return; -} - -std::string statistic::format(char unit, bool total) const -{ - std::string result = formatSI(sampleCount,9,' '); - - if (sampleCount == 0) - { - result = result + std::string(", ") + formatSI(0.0, 9, unit); - result = result + std::string(", ") + formatSI(0.0, 9, unit); - result = result + std::string(", ") + formatSI(0.0, 9, unit); - if (total) - result = result + std::string(", ") + formatSI(0.0, 9, unit); - result = result + std::string(", ") + formatSI(0.0, 9, unit); - } - else - { - result = result + std::string(", ") + formatSI(minVal, 9, unit); - result = result + std::string(", ") + formatSI(meanVal, 9, unit); - result = result + std::string(", ") + formatSI(maxVal, 9, unit); - if (total) - result = result + std::string(", ") + formatSI(meanVal*sampleCount, 9, unit); - result = result + std::string(", ") + formatSI(getSD(), 9, unit); - } - return result; -} - -/* ********************************************************** */ /* ************* explicitTimer member functions ************* */ void explicitTimer::start(timer_e timerEnumValue) { - startTime = tsc_tick_count::now(); - totalPauseTime = 0; - if(timeStat::logEvent(timerEnumValue)) { - __kmp_stats_thread_ptr->incrementNestValue(); - } - return; + startTime = tsc_tick_count::now(); + totalPauseTime = 0; + if (timeStat::logEvent(timerEnumValue)) { + __kmp_stats_thread_ptr->incrementNestValue(); + } + return; } -void explicitTimer::stop(timer_e timerEnumValue, kmp_stats_list* stats_ptr /* = nullptr */) { - if (startTime.getValue() == 0) - return; +void explicitTimer::stop(timer_e timerEnumValue, + kmp_stats_list *stats_ptr /* = nullptr */) { + if (startTime.getValue() == 0) + return; - tsc_tick_count finishTime = tsc_tick_count::now(); + tsc_tick_count finishTime = tsc_tick_count::now(); - //stat->addSample ((tsc_tick_count::now() - startTime).ticks()); - stat->addSample(((finishTime - startTime) - totalPauseTime).ticks()); + // stat->addSample ((tsc_tick_count::now() - startTime).ticks()); + stat->addSample(((finishTime - startTime) - totalPauseTime).ticks()); - if(timeStat::logEvent(timerEnumValue)) { - if(!stats_ptr) - stats_ptr = __kmp_stats_thread_ptr; - stats_ptr->push_event(startTime.getValue() - __kmp_stats_start_time.getValue(), finishTime.getValue() - __kmp_stats_start_time.getValue(), __kmp_stats_thread_ptr->getNestValue(), timerEnumValue); - stats_ptr->decrementNestValue(); - } + if (timeStat::logEvent(timerEnumValue)) { + if (!stats_ptr) + stats_ptr = __kmp_stats_thread_ptr; + stats_ptr->push_event( + startTime.getValue() - __kmp_stats_start_time.getValue(), + finishTime.getValue() - __kmp_stats_start_time.getValue(), + __kmp_stats_thread_ptr->getNestValue(), timerEnumValue); + stats_ptr->decrementNestValue(); + } - /* We accept the risk that we drop a sample because it really did start at t==0. */ - startTime = 0; - return; + /* We accept the risk that we drop a sample because it really did start at + t==0. */ + startTime = 0; + return; } -/* ************************************************************** */ /* ************* partitionedTimers member functions ************* */ -partitionedTimers::partitionedTimers() { - timer_stack.reserve(8); -} +partitionedTimers::partitionedTimers() { timer_stack.reserve(8); } // add a timer to this collection of partitioned timers. -void partitionedTimers::add_timer(explicit_timer_e timer_index, explicitTimer* timer_pointer) { - KMP_DEBUG_ASSERT((int)timer_index < (int)EXPLICIT_TIMER_LAST+1); - timers[timer_index] = timer_pointer; +void partitionedTimers::add_timer(explicit_timer_e timer_index, + explicitTimer *timer_pointer) { + KMP_DEBUG_ASSERT((int)timer_index < (int)EXPLICIT_TIMER_LAST + 1); + timers[timer_index] = timer_pointer; } // initialize the paritioned timers to an initial timer void partitionedTimers::init(timerPair init_timer_pair) { - KMP_DEBUG_ASSERT(this->timer_stack.size() == 0); - timer_stack.push_back(init_timer_pair); - timers[init_timer_pair.get_index()]->start(init_timer_pair.get_timer()); + KMP_DEBUG_ASSERT(this->timer_stack.size() == 0); + timer_stack.push_back(init_timer_pair); + timers[init_timer_pair.get_index()]->start(init_timer_pair.get_timer()); } // stop/save the current timer, and start the new timer (timer_pair) @@ -209,33 +199,33 @@ void partitionedTimers::init(timerPair init_timer_pair) { // the one you are trying to push, then it only manipulates the stack, // and it won't stop/start the currently running timer. void partitionedTimers::push(timerPair timer_pair) { - // get the current timer - // stop current timer - // push new timer - // start the new timer - KMP_DEBUG_ASSERT(this->timer_stack.size() > 0); - timerPair current_timer = timer_stack.back(); - timer_stack.push_back(timer_pair); - if(current_timer != timer_pair) { - timers[current_timer.get_index()]->pause(); - timers[timer_pair.get_index()]->start(timer_pair.get_timer()); - } + // get the current timer + // stop current timer + // push new timer + // start the new timer + KMP_DEBUG_ASSERT(this->timer_stack.size() > 0); + timerPair current_timer = timer_stack.back(); + timer_stack.push_back(timer_pair); + if (current_timer != timer_pair) { + timers[current_timer.get_index()]->pause(); + timers[timer_pair.get_index()]->start(timer_pair.get_timer()); + } } // stop/discard the current timer, and start the previously saved timer void partitionedTimers::pop() { - // get the current timer - // stop current timer - // pop current timer - // get the new current timer and start it back up - KMP_DEBUG_ASSERT(this->timer_stack.size() > 1); - timerPair current_timer = timer_stack.back(); - timer_stack.pop_back(); - timerPair new_timer = timer_stack.back(); - if(current_timer != new_timer) { - timers[current_timer.get_index()]->stop(current_timer.get_timer()); - timers[new_timer.get_index()]->resume(); - } + // get the current timer + // stop current timer + // pop current timer + // get the new current timer and start it back up + KMP_DEBUG_ASSERT(this->timer_stack.size() > 1); + timerPair current_timer = timer_stack.back(); + timer_stack.pop_back(); + timerPair new_timer = timer_stack.back(); + if (current_timer != new_timer) { + timers[current_timer.get_index()]->stop(current_timer.get_timer()); + timers[new_timer.get_index()]->resume(); + } } // Wind up all the currently running timers. @@ -243,481 +233,483 @@ void partitionedTimers::pop() { // After this is called, init() must be run again to initialize the // stack of timers void partitionedTimers::windup() { - while(timer_stack.size() > 1) { - this->pop(); - } - if(timer_stack.size() > 0) { - timerPair last_timer = timer_stack.back(); - timer_stack.pop_back(); - timers[last_timer.get_index()]->stop(last_timer.get_timer()); - } + while (timer_stack.size() > 1) { + this->pop(); + } + if (timer_stack.size() > 0) { + timerPair last_timer = timer_stack.back(); + timer_stack.pop_back(); + timers[last_timer.get_index()]->stop(last_timer.get_timer()); + } } -/* ******************************************************************* */ /* ************* kmp_stats_event_vector member functions ************* */ void kmp_stats_event_vector::deallocate() { - __kmp_free(events); - internal_size = 0; - allocated_size = 0; - events = NULL; + __kmp_free(events); + internal_size = 0; + allocated_size = 0; + events = NULL; } // This function is for qsort() which requires the compare function to return -// either a negative number if event1 < event2, a positive number if event1 > event2 -// or zero if event1 == event2. -// This sorts by start time (lowest to highest). -int compare_two_events(const void* event1, const void* event2) { - kmp_stats_event* ev1 = (kmp_stats_event*)event1; - kmp_stats_event* ev2 = (kmp_stats_event*)event2; +// either a negative number if event1 < event2, a positive number if event1 > +// event2 or zero if event1 == event2. This sorts by start time (lowest to +// highest). +int compare_two_events(const void *event1, const void *event2) { + kmp_stats_event *ev1 = (kmp_stats_event *)event1; + kmp_stats_event *ev2 = (kmp_stats_event *)event2; - if(ev1->getStart() < ev2->getStart()) return -1; - else if(ev1->getStart() > ev2->getStart()) return 1; - else return 0; + if (ev1->getStart() < ev2->getStart()) + return -1; + else if (ev1->getStart() > ev2->getStart()) + return 1; + else + return 0; } void kmp_stats_event_vector::sort() { - qsort(events, internal_size, sizeof(kmp_stats_event), compare_two_events); + qsort(events, internal_size, sizeof(kmp_stats_event), compare_two_events); } -/* *********************************************************** */ /* ************* kmp_stats_list member functions ************* */ // returns a pointer to newly created stats node -kmp_stats_list* kmp_stats_list::push_back(int gtid) { - kmp_stats_list* newnode = (kmp_stats_list*)__kmp_allocate(sizeof(kmp_stats_list)); - // placement new, only requires space and pointer and initializes (so __kmp_allocate instead of C++ new[] is used) - new (newnode) kmp_stats_list(); - newnode->setGtid(gtid); - newnode->prev = this->prev; - newnode->next = this; - newnode->prev->next = newnode; - newnode->next->prev = newnode; - return newnode; +kmp_stats_list *kmp_stats_list::push_back(int gtid) { + kmp_stats_list *newnode = + (kmp_stats_list *)__kmp_allocate(sizeof(kmp_stats_list)); + // placement new, only requires space and pointer and initializes (so + // __kmp_allocate instead of C++ new[] is used) + new (newnode) kmp_stats_list(); + newnode->setGtid(gtid); + newnode->prev = this->prev; + newnode->next = this; + newnode->prev->next = newnode; + newnode->next->prev = newnode; + return newnode; } void kmp_stats_list::deallocate() { - kmp_stats_list* ptr = this->next; - kmp_stats_list* delptr = this->next; - while(ptr != this) { - delptr = ptr; - ptr=ptr->next; - // placement new means we have to explicitly call destructor. - delptr->_event_vector.deallocate(); - delptr->~kmp_stats_list(); - __kmp_free(delptr); - } + kmp_stats_list *ptr = this->next; + kmp_stats_list *delptr = this->next; + while (ptr != this) { + delptr = ptr; + ptr = ptr->next; + // placement new means we have to explicitly call destructor. + delptr->_event_vector.deallocate(); + delptr->~kmp_stats_list(); + __kmp_free(delptr); + } } kmp_stats_list::iterator kmp_stats_list::begin() { - kmp_stats_list::iterator it; - it.ptr = this->next; - return it; + kmp_stats_list::iterator it; + it.ptr = this->next; + return it; } kmp_stats_list::iterator kmp_stats_list::end() { - kmp_stats_list::iterator it; - it.ptr = this; - return it; + kmp_stats_list::iterator it; + it.ptr = this; + return it; } int kmp_stats_list::size() { - int retval; - kmp_stats_list::iterator it; - for(retval=0, it=begin(); it!=end(); it++, retval++) {} - return retval; + int retval; + kmp_stats_list::iterator it; + for (retval = 0, it = begin(); it != end(); it++, retval++) { + } + return retval; } -/* ********************************************************************* */ /* ************* kmp_stats_list::iterator member functions ************* */ kmp_stats_list::iterator::iterator() : ptr(NULL) {} kmp_stats_list::iterator::~iterator() {} kmp_stats_list::iterator kmp_stats_list::iterator::operator++() { - this->ptr = this->ptr->next; - return *this; + this->ptr = this->ptr->next; + return *this; } kmp_stats_list::iterator kmp_stats_list::iterator::operator++(int dummy) { - this->ptr = this->ptr->next; - return *this; + this->ptr = this->ptr->next; + return *this; } kmp_stats_list::iterator kmp_stats_list::iterator::operator--() { - this->ptr = this->ptr->prev; - return *this; + this->ptr = this->ptr->prev; + return *this; } kmp_stats_list::iterator kmp_stats_list::iterator::operator--(int dummy) { - this->ptr = this->ptr->prev; - return *this; + this->ptr = this->ptr->prev; + return *this; } -bool kmp_stats_list::iterator::operator!=(const kmp_stats_list::iterator & rhs) { - return this->ptr!=rhs.ptr; +bool kmp_stats_list::iterator::operator!=(const kmp_stats_list::iterator &rhs) { + return this->ptr != rhs.ptr; } -bool kmp_stats_list::iterator::operator==(const kmp_stats_list::iterator & rhs) { - return this->ptr==rhs.ptr; +bool kmp_stats_list::iterator::operator==(const kmp_stats_list::iterator &rhs) { + return this->ptr == rhs.ptr; } -kmp_stats_list* kmp_stats_list::iterator::operator*() const { - return this->ptr; +kmp_stats_list *kmp_stats_list::iterator::operator*() const { + return this->ptr; } -/* *************************************************************** */ /* ************* kmp_stats_output_module functions ************** */ -const char* kmp_stats_output_module::eventsFileName = NULL; -const char* kmp_stats_output_module::plotFileName = NULL; -int kmp_stats_output_module::printPerThreadFlag = 0; +const char *kmp_stats_output_module::eventsFileName = NULL; +const char *kmp_stats_output_module::plotFileName = NULL; +int kmp_stats_output_module::printPerThreadFlag = 0; int kmp_stats_output_module::printPerThreadEventsFlag = 0; -// init() is called very near the beginning of execution time in the constructor of __kmp_stats_global_output -void kmp_stats_output_module::init() -{ - char * statsFileName = getenv("KMP_STATS_FILE"); - eventsFileName = getenv("KMP_STATS_EVENTS_FILE"); - plotFileName = getenv("KMP_STATS_PLOT_FILE"); - char * threadStats = getenv("KMP_STATS_THREADS"); - char * threadEvents = getenv("KMP_STATS_EVENTS"); - - // set the stats output filenames based on environment variables and defaults - if(statsFileName) { - // append the process id to the output filename - // events.csv --> events-pid.csv - size_t index; - std::string baseFileName, pid, suffix; - std::stringstream ss; - outputFileName = std::string(statsFileName); - index = outputFileName.find_last_of('.'); - if(index == std::string::npos) { - baseFileName = outputFileName; - } else { - baseFileName = outputFileName.substr(0, index); - suffix = outputFileName.substr(index); - } - ss << getpid(); - pid = ss.str(); - outputFileName = baseFileName + "-" + pid + suffix; - } - eventsFileName = eventsFileName ? eventsFileName : "events.dat"; - plotFileName = plotFileName ? plotFileName : "events.plt"; - - // set the flags based on environment variables matching: true, on, 1, .true. , .t. , yes - printPerThreadFlag = __kmp_str_match_true(threadStats); - printPerThreadEventsFlag = __kmp_str_match_true(threadEvents); - - if(printPerThreadEventsFlag) { - // assigns a color to each timer for printing - setupEventColors(); +// init() is called very near the beginning of execution time in the constructor +// of __kmp_stats_global_output +void kmp_stats_output_module::init() { + char *statsFileName = getenv("KMP_STATS_FILE"); + eventsFileName = getenv("KMP_STATS_EVENTS_FILE"); + plotFileName = getenv("KMP_STATS_PLOT_FILE"); + char *threadStats = getenv("KMP_STATS_THREADS"); + char *threadEvents = getenv("KMP_STATS_EVENTS"); + + // set the stats output filenames based on environment variables and defaults + if (statsFileName) { + // append the process id to the output filename + // events.csv --> events-pid.csv + size_t index; + std::string baseFileName, pid, suffix; + std::stringstream ss; + outputFileName = std::string(statsFileName); + index = outputFileName.find_last_of('.'); + if (index == std::string::npos) { + baseFileName = outputFileName; } else { - // will clear flag so that no event will be logged - timeStat::clearEventFlags(); + baseFileName = outputFileName.substr(0, index); + suffix = outputFileName.substr(index); } + ss << getpid(); + pid = ss.str(); + outputFileName = baseFileName + "-" + pid + suffix; + } + eventsFileName = eventsFileName ? eventsFileName : "events.dat"; + plotFileName = plotFileName ? plotFileName : "events.plt"; - return; -} + // set the flags based on environment variables matching: true, on, 1, .true. + // , .t. , yes + printPerThreadFlag = __kmp_str_match_true(threadStats); + printPerThreadEventsFlag = __kmp_str_match_true(threadEvents); -void kmp_stats_output_module::setupEventColors() { - int i; - int globalColorIndex = 0; - int numGlobalColors = sizeof(globalColorArray) / sizeof(rgb_color); - for(i=0;iformat(tag, true).c_str()); - } - // Also print the Total_ versions of times. - for (timer_e s = timer_e(0); sformat(' ', true).c_str()); - } -} - -void kmp_stats_output_module::printCounters(FILE * statsOut, counter const * theCounters) -{ - // We print all the counters even if they are zero. - // That makes it easier to slice them into a spreadsheet if you need to. - fprintf (statsOut, "\nCounter, Count\n"); - for (int c = 0; cgetValue(), 9, ' ').c_str()); - } + return; } -void kmp_stats_output_module::printEvents(FILE* eventsOut, kmp_stats_event_vector* theEvents, int gtid) { - // sort by start time before printing - theEvents->sort(); - for (int i = 0; i < theEvents->size(); i++) { - kmp_stats_event ev = theEvents->at(i); - rgb_color color = getEventColor(ev.getTimerName()); - fprintf(eventsOut, "%d %lu %lu %1.1f rgb(%1.1f,%1.1f,%1.1f) %s\n", - gtid, - ev.getStart(), - ev.getStop(), - 1.2 - (ev.getNestLevel() * 0.2), - color.r, color.g, color.b, - timeStat::name(ev.getTimerName()) - ); +void kmp_stats_output_module::setupEventColors() { + int i; + int globalColorIndex = 0; + int numGlobalColors = sizeof(globalColorArray) / sizeof(rgb_color); + for (i = 0; i < TIMER_LAST; i++) { + if (timeStat::logEvent((timer_e)i)) { + timerColorInfo[i] = globalColorArray[globalColorIndex]; + globalColorIndex = (globalColorIndex + 1) % numGlobalColors; } - return; -} - -void kmp_stats_output_module::windupExplicitTimers() -{ - // Wind up any explicit timers. We assume that it's fair at this point to just walk all the explcit timers in all threads - // and say "it's over". - // If the timer wasn't running, this won't record anything anyway. - kmp_stats_list::iterator it; - for(it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) { - kmp_stats_list* ptr = *it; - ptr->getPartitionedTimers()->windup(); - for (int timer=0; timergetExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer, ptr); - } + } + return; +} + +void kmp_stats_output_module::printTimerStats(FILE *statsOut, + statistic const *theStats, + statistic const *totalStats) { + fprintf(statsOut, "Timer, SampleCount, Min, " + "Mean, Max, Total, SD\n"); + for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) { + statistic const *stat = &theStats[s]; + char tag = timeStat::noUnits(s) ? ' ' : 'T'; + + fprintf(statsOut, "%-28s, %s\n", timeStat::name(s), + stat->format(tag, true).c_str()); + } + // Also print the Total_ versions of times. + for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) { + char tag = timeStat::noUnits(s) ? ' ' : 'T'; + if (totalStats && !timeStat::noTotal(s)) + fprintf(statsOut, "Total_%-22s, %s\n", timeStat::name(s), + totalStats[s].format(tag, true).c_str()); + } +} + +void kmp_stats_output_module::printCounterStats(FILE *statsOut, + statistic const *theStats) { + fprintf(statsOut, "Counter, ThreadCount, Min, Mean, " + " Max, Total, SD\n"); + for (int s = 0; s < COUNTER_LAST; s++) { + statistic const *stat = &theStats[s]; + fprintf(statsOut, "%-25s, %s\n", counter::name(counter_e(s)), + stat->format(' ', true).c_str()); + } +} + +void kmp_stats_output_module::printCounters(FILE *statsOut, + counter const *theCounters) { + // We print all the counters even if they are zero. + // That makes it easier to slice them into a spreadsheet if you need to. + fprintf(statsOut, "\nCounter, Count\n"); + for (int c = 0; c < COUNTER_LAST; c++) { + counter const *stat = &theCounters[c]; + fprintf(statsOut, "%-25s, %s\n", counter::name(counter_e(c)), + formatSI(stat->getValue(), 9, ' ').c_str()); + } +} + +void kmp_stats_output_module::printEvents(FILE *eventsOut, + kmp_stats_event_vector *theEvents, + int gtid) { + // sort by start time before printing + theEvents->sort(); + for (int i = 0; i < theEvents->size(); i++) { + kmp_stats_event ev = theEvents->at(i); + rgb_color color = getEventColor(ev.getTimerName()); + fprintf(eventsOut, "%d %lu %lu %1.1f rgb(%1.1f,%1.1f,%1.1f) %s\n", gtid, + ev.getStart(), ev.getStop(), 1.2 - (ev.getNestLevel() * 0.2), + color.r, color.g, color.b, timeStat::name(ev.getTimerName())); + } + return; +} + +void kmp_stats_output_module::windupExplicitTimers() { + // Wind up any explicit timers. We assume that it's fair at this point to just + // walk all the explcit timers in all threads and say "it's over". + // If the timer wasn't running, this won't record anything anyway. + kmp_stats_list::iterator it; + for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) { + kmp_stats_list *ptr = *it; + ptr->getPartitionedTimers()->windup(); + for (int timer = 0; timer < EXPLICIT_TIMER_LAST; timer++) { + ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer, ptr); } + } } void kmp_stats_output_module::printPloticusFile() { - int i; - int size = __kmp_stats_list->size(); - FILE* plotOut = fopen(plotFileName, "w+"); - - fprintf(plotOut, "#proc page\n" - " pagesize: 15 10\n" - " scale: 1.0\n\n"); - - fprintf(plotOut, "#proc getdata\n" - " file: %s\n\n", - eventsFileName); - - fprintf(plotOut, "#proc areadef\n" - " title: OpenMP Sampling Timeline\n" - " titledetails: align=center size=16\n" - " rectangle: 1 1 13 9\n" - " xautorange: datafield=2,3\n" - " yautorange: -1 %d\n\n", - size); - - fprintf(plotOut, "#proc xaxis\n" - " stubs: inc\n" - " stubdetails: size=12\n" - " label: Time (ticks)\n" - " labeldetails: size=14\n\n"); - - fprintf(plotOut, "#proc yaxis\n" - " stubs: inc 1\n" - " stubrange: 0 %d\n" - " stubdetails: size=12\n" - " label: Thread #\n" - " labeldetails: size=14\n\n", - size-1); - - fprintf(plotOut, "#proc bars\n" - " exactcolorfield: 5\n" - " axis: x\n" - " locfield: 1\n" - " segmentfields: 2 3\n" - " barwidthfield: 4\n\n"); - - // create legend entries corresponding to the timer color - for(i=0;isize(); + FILE *plotOut = fopen(plotFileName, "w+"); + + fprintf(plotOut, "#proc page\n" + " pagesize: 15 10\n" + " scale: 1.0\n\n"); + + fprintf(plotOut, "#proc getdata\n" + " file: %s\n\n", + eventsFileName); + + fprintf(plotOut, "#proc areadef\n" + " title: OpenMP Sampling Timeline\n" + " titledetails: align=center size=16\n" + " rectangle: 1 1 13 9\n" + " xautorange: datafield=2,3\n" + " yautorange: -1 %d\n\n", + size); + + fprintf(plotOut, "#proc xaxis\n" + " stubs: inc\n" + " stubdetails: size=12\n" + " label: Time (ticks)\n" + " labeldetails: size=14\n\n"); + + fprintf(plotOut, "#proc yaxis\n" + " stubs: inc 1\n" + " stubrange: 0 %d\n" + " stubdetails: size=12\n" + " label: Thread #\n" + " labeldetails: size=14\n\n", + size - 1); + + fprintf(plotOut, "#proc bars\n" + " exactcolorfield: 5\n" + " axis: x\n" + " locfield: 1\n" + " segmentfields: 2 3\n" + " barwidthfield: 4\n\n"); + + // create legend entries corresponding to the timer color + for (i = 0; i < TIMER_LAST; i++) { + if (timeStat::logEvent((timer_e)i)) { + rgb_color c = getEventColor((timer_e)i); + fprintf(plotOut, "#proc legendentry\n" + " sampletype: color\n" + " label: %s\n" + " details: rgb(%1.1f,%1.1f,%1.1f)\n\n", + timeStat::name((timer_e)i), c.r, c.g, c.b); } - - fprintf(plotOut, "#proc legend\n" - " format: down\n" - " location: max max\n\n"); - fclose(plotOut); - return; -} - -/* - * Print some useful information about - * * the date and time this experiment ran. - * * the machine on which it ran. - * We output all of this as stylised comments, though we may decide to parse some of it. - */ -void kmp_stats_output_module::printHeaderInfo(FILE * statsOut) -{ - std::time_t now = std::time(0); - char buffer[40]; - char hostName[80]; - - std::strftime(&buffer[0], sizeof(buffer), "%c", std::localtime(&now)); - fprintf (statsOut, "# Time of run: %s\n", &buffer[0]); - if (gethostname(&hostName[0], sizeof(hostName)) == 0) - fprintf (statsOut,"# Hostname: %s\n", &hostName[0]); + } + + fprintf(plotOut, "#proc legend\n" + " format: down\n" + " location: max max\n\n"); + fclose(plotOut); + return; +} + +/* Print some useful information about + * the date and time this experiment ran. + * the machine on which it ran. + We output all of this as stylised comments, though we may decide to parse + some of it. */ +void kmp_stats_output_module::printHeaderInfo(FILE *statsOut) { + std::time_t now = std::time(0); + char buffer[40]; + char hostName[80]; + + std::strftime(&buffer[0], sizeof(buffer), "%c", std::localtime(&now)); + fprintf(statsOut, "# Time of run: %s\n", &buffer[0]); + if (gethostname(&hostName[0], sizeof(hostName)) == 0) + fprintf(statsOut, "# Hostname: %s\n", &hostName[0]); #if KMP_ARCH_X86 || KMP_ARCH_X86_64 - fprintf (statsOut, "# CPU: %s\n", &__kmp_cpuinfo.name[0]); - fprintf (statsOut, "# Family: %d, Model: %d, Stepping: %d\n", __kmp_cpuinfo.family, __kmp_cpuinfo.model, __kmp_cpuinfo.stepping); - if (__kmp_cpuinfo.frequency == 0) - fprintf (statsOut, "# Nominal frequency: Unknown\n"); - else - fprintf (statsOut, "# Nominal frequency: %sz\n", formatSI(double(__kmp_cpuinfo.frequency),9,'H').c_str()); + fprintf(statsOut, "# CPU: %s\n", &__kmp_cpuinfo.name[0]); + fprintf(statsOut, "# Family: %d, Model: %d, Stepping: %d\n", + __kmp_cpuinfo.family, __kmp_cpuinfo.model, __kmp_cpuinfo.stepping); + if (__kmp_cpuinfo.frequency == 0) + fprintf(statsOut, "# Nominal frequency: Unknown\n"); + else + fprintf(statsOut, "# Nominal frequency: %sz\n", + formatSI(double(__kmp_cpuinfo.frequency), 9, 'H').c_str()); #endif } -void kmp_stats_output_module::outputStats(const char* heading) -{ - // Stop all the explicit timers in all threads - // Do this before declaring the local statistics because thay have constructors so will take time to create. - windupExplicitTimers(); - - statistic allStats[TIMER_LAST]; - statistic totalStats[TIMER_LAST]; /* Synthesized, cross threads versions of normal timer stats */ - statistic allCounters[COUNTER_LAST]; - - FILE * statsOut = !outputFileName.empty() ? fopen (outputFileName.c_str(), "a+") : stderr; - if (!statsOut) - statsOut = stderr; - - FILE * eventsOut; +void kmp_stats_output_module::outputStats(const char *heading) { + // Stop all the explicit timers in all threads + // Do this before declaring the local statistics because thay have + // constructors so will take time to create. + windupExplicitTimers(); + + statistic allStats[TIMER_LAST]; + statistic totalStats[TIMER_LAST]; /* Synthesized, cross threads versions of + normal timer stats */ + statistic allCounters[COUNTER_LAST]; + + FILE *statsOut = + !outputFileName.empty() ? fopen(outputFileName.c_str(), "a+") : stderr; + if (!statsOut) + statsOut = stderr; + + FILE *eventsOut; + if (eventPrintingEnabled()) { + eventsOut = fopen(eventsFileName, "w+"); + } + + printHeaderInfo(statsOut); + fprintf(statsOut, "%s\n", heading); + // Accumulate across threads. + kmp_stats_list::iterator it; + for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) { + int t = (*it)->getGtid(); + // Output per thread stats if requested. + if (printPerThreadFlag) { + fprintf(statsOut, "Thread %d\n", t); + printTimerStats(statsOut, (*it)->getTimers(), 0); + printCounters(statsOut, (*it)->getCounters()); + fprintf(statsOut, "\n"); + } + // Output per thread events if requested. if (eventPrintingEnabled()) { - eventsOut = fopen(eventsFileName, "w+"); + kmp_stats_event_vector events = (*it)->getEventVector(); + printEvents(eventsOut, &events, t); } - printHeaderInfo (statsOut); - fprintf(statsOut, "%s\n",heading); - // Accumulate across threads. - kmp_stats_list::iterator it; - for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) { - int t = (*it)->getGtid(); - // Output per thread stats if requested. - if (printPerThreadFlag) { - fprintf (statsOut, "Thread %d\n", t); - printTimerStats (statsOut, (*it)->getTimers(), 0); - printCounters (statsOut, (*it)->getCounters()); - fprintf (statsOut,"\n"); - } - // Output per thread events if requested. - if (eventPrintingEnabled()) { - kmp_stats_event_vector events = (*it)->getEventVector(); - printEvents(eventsOut, &events, t); - } - - // Accumulate timers. - for (timer_e s = timer_e(0); sgetTimer(s); - allStats[s] += *threadStat; - - // Add Total stats for timers that are valid in more than one thread - if (!timeStat::noTotal(s)) - totalStats[s].addSample(threadStat->getTotal()); - } - - // Accumulate counters. - for (counter_e c = counter_e(0); cgetCounter(c)->getValue()); - } + // Accumulate timers. + for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) { + // See if we should ignore this timer when aggregating + if ((timeStat::masterOnly(s) && (t != 0)) || // Timer only valid on master + // and this thread is worker + (timeStat::workerOnly(s) && (t == 0)) // Timer only valid on worker + // and this thread is the master + ) { + continue; + } + + statistic *threadStat = (*it)->getTimer(s); + allStats[s] += *threadStat; + + // Add Total stats for timers that are valid in more than one thread + if (!timeStat::noTotal(s)) + totalStats[s].addSample(threadStat->getTotal()); } - if (eventPrintingEnabled()) { - printPloticusFile(); - fclose(eventsOut); + // Accumulate counters. + for (counter_e c = counter_e(0); c < COUNTER_LAST; c = counter_e(c + 1)) { + if (counter::masterOnly(c) && t != 0) + continue; + allCounters[c].addSample((*it)->getCounter(c)->getValue()); } + } - fprintf (statsOut, "Aggregate for all threads\n"); - printTimerStats (statsOut, &allStats[0], &totalStats[0]); - fprintf (statsOut, "\n"); - printCounterStats (statsOut, &allCounters[0]); + if (eventPrintingEnabled()) { + printPloticusFile(); + fclose(eventsOut); + } - if (statsOut != stderr) - fclose(statsOut); + fprintf(statsOut, "Aggregate for all threads\n"); + printTimerStats(statsOut, &allStats[0], &totalStats[0]); + fprintf(statsOut, "\n"); + printCounterStats(statsOut, &allCounters[0]); + + if (statsOut != stderr) + fclose(statsOut); } -/* ************************************************** */ /* ************* exported C functions ************** */ -// no name mangling for these functions, we want the c files to be able to get at these functions +// no name mangling for these functions, we want the c files to be able to get +// at these functions extern "C" { -void __kmp_reset_stats() -{ - kmp_stats_list::iterator it; - for(it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) { - timeStat * timers = (*it)->getTimers(); - counter * counters = (*it)->getCounters(); - explicitTimer * eTimers = (*it)->getExplicitTimers(); +void __kmp_reset_stats() { + kmp_stats_list::iterator it; + for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) { + timeStat *timers = (*it)->getTimers(); + counter *counters = (*it)->getCounters(); + explicitTimer *eTimers = (*it)->getExplicitTimers(); - for (int t = 0; tresetEventVector(); - } + // reset the event vector so all previous events are "erased" + (*it)->resetEventVector(); + } } -// This function will reset all stats and stop all threads' explicit timers if they haven't been stopped already. -void __kmp_output_stats(const char * heading) -{ - __kmp_stats_global_output->outputStats(heading); - __kmp_reset_stats(); +// This function will reset all stats and stop all threads' explicit timers if +// they haven't been stopped already. +void __kmp_output_stats(const char *heading) { + __kmp_stats_global_output->outputStats(heading); + __kmp_reset_stats(); } -void __kmp_accumulate_stats_at_exit(void) -{ - // Only do this once. - if (KMP_XCHG_FIXED32(&statsPrinted, 1) != 0) - return; +void __kmp_accumulate_stats_at_exit(void) { + // Only do this once. + if (KMP_XCHG_FIXED32(&statsPrinted, 1) != 0) + return; - __kmp_output_stats("Statistics on exit"); + __kmp_output_stats("Statistics on exit"); } -void __kmp_stats_init(void) -{ - __kmp_init_tas_lock( & __kmp_stats_lock ); - __kmp_stats_start_time = tsc_tick_count::now(); - __kmp_stats_global_output = new kmp_stats_output_module(); - __kmp_stats_list = new kmp_stats_list(); +void __kmp_stats_init(void) { + __kmp_init_tas_lock(&__kmp_stats_lock); + __kmp_stats_start_time = tsc_tick_count::now(); + __kmp_stats_global_output = new kmp_stats_output_module(); + __kmp_stats_list = new kmp_stats_list(); } -void __kmp_stats_fini(void) -{ - __kmp_accumulate_stats_at_exit(); - __kmp_stats_list->deallocate(); - delete __kmp_stats_global_output; - delete __kmp_stats_list; +void __kmp_stats_fini(void) { + __kmp_accumulate_stats_at_exit(); + __kmp_stats_list->deallocate(); + delete __kmp_stats_global_output; + delete __kmp_stats_list; } } // extern "C" - diff --git a/openmp/runtime/src/kmp_stats.h b/openmp/runtime/src/kmp_stats.h index 40ccb50..50ad257 100644 --- a/openmp/runtime/src/kmp_stats.h +++ b/openmp/runtime/src/kmp_stats.h @@ -15,28 +15,29 @@ // //===----------------------------------------------------------------------===// + #include "kmp_config.h" #if KMP_STATS_ENABLED -/* - * Statistics accumulator. - * Accumulates number of samples and computes min, max, mean, standard deviation on the fly. - * - * Online variance calculation algorithm from http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm +/* Statistics accumulator. + Accumulates number of samples and computes min, max, mean, standard deviation + on the fly. + + Online variance calculation algorithm from + http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm */ +#include "kmp_stats_timing.h" #include #include -#include -#include -#include #include // placement new -#include "kmp_stats_timing.h" +#include +#include +#include -/* - * Enable developer statistics here if you want them. They are more detailed than is useful for application characterisation and - * are intended for the runtime library developer. - */ +/* Enable developer statistics here if you want them. They are more detailed + than is useful for application characterisation and are intended for the + runtime library developer. */ // #define KMP_DEVELOPER_STATS 1 /*! @@ -45,11 +46,13 @@ * */ enum stats_flags_e { - noTotal = 1<<0, //!< do not show a TOTAL_aggregation for this statistic - onlyInMaster = 1<<1, //!< statistic is valid only for master - noUnits = 1<<2, //!< statistic doesn't need units printed next to it in output - notInMaster = 1<<3, //!< statistic is valid only for non-master threads - logEvent = 1<<4 //!< statistic can be logged on the event timeline when KMP_STATS_EVENTS is on (valid only for timers) + noTotal = 1 << 0, //!< do not show a TOTAL_aggregation for this statistic + onlyInMaster = 1 << 1, //!< statistic is valid only for master + noUnits = + 1 << 2, //!< statistic doesn't need units printed next to it in output + notInMaster = 1 << 3, //!< statistic is valid only for non-master threads + logEvent = 1 << 4 //!< statistic can be logged on the event timeline when + //! KMP_STATS_EVENTS is on (valid only for timers) }; /*! @@ -58,123 +61,143 @@ enum stats_flags_e { * */ enum stats_state_e { - IDLE, - SERIAL_REGION, - FORK_JOIN_BARRIER, - PLAIN_BARRIER, - TASKWAIT, - TASKYIELD, - TASKGROUP, - IMPLICIT_TASK, - EXPLICIT_TASK + IDLE, + SERIAL_REGION, + FORK_JOIN_BARRIER, + PLAIN_BARRIER, + TASKWAIT, + TASKYIELD, + TASKGROUP, + IMPLICIT_TASK, + EXPLICIT_TASK }; /*! * \brief Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h * - * @param macro a user defined macro that takes three arguments - macro(COUNTER_NAME, flags, arg) + * @param macro a user defined macro that takes three arguments - + * macro(COUNTER_NAME, flags, arg) * @param arg a user defined argument to send to the user defined macro * - * \details A counter counts the occurrence of some event. - * Each thread accumulates its own count, at the end of execution the counts are aggregated treating each thread - * as a separate measurement. (Unless onlyInMaster is set, in which case there's only a single measurement). - * The min,mean,max are therefore the values for the threads. - * Adding the counter here and then putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you need to do. - * All of the tables and printing is generated from this macro. + * \details A counter counts the occurrence of some event. Each thread + * accumulates its own count, at the end of execution the counts are aggregated + * treating each thread as a separate measurement. (Unless onlyInMaster is set, + * in which case there's only a single measurement). The min,mean,max are + * therefore the values for the threads. Adding the counter here and then + * putting a KMP_BLOCK_COUNTER(name) at the point you want to count is all you + * need to do. All of the tables and printing is generated from this macro. * Format is "macro(name, flags, arg)" * * @ingroup STATS_GATHERING */ -#define KMP_FOREACH_COUNTER(macro, arg) \ - macro (OMP_PARALLEL, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, arg) \ - macro (OMP_NESTED_PARALLEL, 0, arg) \ - macro (OMP_FOR_static, 0, arg) \ - macro (OMP_FOR_static_steal, 0, arg) \ - macro (OMP_FOR_dynamic, 0, arg) \ - macro (OMP_DISTRIBUTE, 0, arg) \ - macro (OMP_BARRIER, 0, arg) \ - macro (OMP_CRITICAL,0, arg) \ - macro (OMP_SINGLE, 0, arg) \ - macro (OMP_MASTER, 0, arg) \ - macro (OMP_TEAMS, 0, arg) \ - macro (OMP_set_lock, 0, arg) \ - macro (OMP_test_lock, 0, arg) \ - macro (REDUCE_wait, 0, arg) \ - macro (REDUCE_nowait, 0, arg) \ - macro (OMP_TASKYIELD, 0, arg) \ - macro (OMP_TASKLOOP, 0, arg) \ - macro (TASK_executed, 0, arg) \ - macro (TASK_cancelled, 0, arg) \ - macro (TASK_stolen, 0, arg) +// clang-format off +#define KMP_FOREACH_COUNTER(macro, arg) \ + macro(OMP_PARALLEL, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, \ + arg) macro(OMP_NESTED_PARALLEL, 0, arg) macro(OMP_FOR_static, 0, arg) \ + macro(OMP_FOR_static_steal, 0, arg) macro(OMP_FOR_dynamic, 0, arg) \ + macro(OMP_DISTRIBUTE, 0, arg) macro(OMP_BARRIER, 0, arg) \ + macro(OMP_CRITICAL, 0, arg) macro(OMP_SINGLE, 0, arg) \ + macro(OMP_MASTER, 0, arg) macro(OMP_TEAMS, 0, arg) \ + macro(OMP_set_lock, 0, arg) macro(OMP_test_lock, 0, arg) \ + macro(REDUCE_wait, 0, arg) \ + macro(REDUCE_nowait, 0, arg) \ + macro(OMP_TASKYIELD, 0, arg) \ + macro(OMP_TASKLOOP, 0, arg) \ + macro(TASK_executed, 0, arg) \ + macro(TASK_cancelled, 0, arg) \ + macro(TASK_stolen, 0, arg) +// clang-format on /*! * \brief Add new timers under KMP_FOREACH_TIMER() macro in kmp_stats.h * - * @param macro a user defined macro that takes three arguments - macro(TIMER_NAME, flags, arg) + * @param macro a user defined macro that takes three arguments - + * macro(TIMER_NAME, flags, arg) * @param arg a user defined argument to send to the user defined macro * - * \details A timer collects multiple samples of some count in each thread and then finally aggregates alll of the samples from all of the threads. - * For most timers the printing code also provides an aggregation over the thread totals. These are printed as TOTAL_foo. - * The count is normally a time (in ticks), hence the name "timer". (But can be any value, so we use this for "number of arguments passed to fork" - * as well). - * For timers the threads are not significant, it's the individual observations that count, so the statistics are at that level. - * Format is "macro(name, flags, arg)" + * \details A timer collects multiple samples of some count in each thread and + * then finally aggregates alll of the samples from all of the threads. For most + * timers the printing code also provides an aggregation over the thread totals. + * These are printed as TOTAL_foo. The count is normally a time (in ticks), + * hence the name "timer". (But can be any value, so we use this for "number of + * arguments passed to fork" as well). For timers the threads are not + * significant, it's the individual observations that count, so the statistics + * are at that level. Format is "macro(name, flags, arg)" * * @ingroup STATS_GATHERING2 */ -#define KMP_FOREACH_TIMER(macro, arg) \ - macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \ - macro (FOR_static_scheduling, 0, arg) \ - macro (FOR_dynamic_scheduling, 0, arg) \ - macro (OMP_critical, 0, arg) \ - macro (OMP_critical_wait, 0, arg) \ - macro (OMP_single, 0, arg) \ - macro (OMP_master, 0, arg) \ - macro (OMP_idle, stats_flags_e::logEvent, arg) \ - macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \ - macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \ - macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \ - macro (OMP_parallel, stats_flags_e::logEvent, arg) \ - macro (OMP_task_immediate, 0, arg) \ - macro (OMP_task_taskwait, 0, arg) \ - macro (OMP_task_taskyield, 0, arg) \ - macro (OMP_task_taskgroup, 0, arg) \ - macro (OMP_task_join_bar, 0, arg) \ - macro (OMP_task_plain_bar, 0, arg) \ - macro (OMP_serial, stats_flags_e::logEvent, arg) \ - macro (OMP_taskloop_scheduling, 0, arg) \ - macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ - macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ - macro (FOR_static_iterations, stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ - macro (FOR_dynamic_iterations,stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ - macro (FOR_static_steal_stolen,stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ - macro (FOR_static_steal_chunks,stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ +// clang-format off +#define KMP_FOREACH_TIMER(macro, arg) \ + macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \ + macro (FOR_static_scheduling, 0, arg) \ + macro (FOR_dynamic_scheduling, 0, arg) \ + macro (OMP_critical, 0, arg) \ + macro (OMP_critical_wait, 0, arg) \ + macro (OMP_single, 0, arg) \ + macro (OMP_master, 0, arg) \ + macro (OMP_idle, stats_flags_e::logEvent, arg) \ + macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \ + macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \ + macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \ + macro (OMP_parallel, stats_flags_e::logEvent, arg) \ + macro (OMP_task_immediate, 0, arg) \ + macro (OMP_task_taskwait, 0, arg) \ + macro (OMP_task_taskyield, 0, arg) \ + macro (OMP_task_taskgroup, 0, arg) \ + macro (OMP_task_join_bar, 0, arg) \ + macro (OMP_task_plain_bar, 0, arg) \ + macro (OMP_serial, stats_flags_e::logEvent, arg) \ + macro (OMP_taskloop_scheduling, 0, arg) \ + macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal,\ + arg) \ + macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, \ + arg) \ + macro (FOR_static_iterations, \ + stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ + macro (FOR_dynamic_iterations, \ + stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ + macro (FOR_static_steal_stolen, \ + stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ + macro (FOR_static_steal_chunks, \ + stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \ KMP_FOREACH_DEVELOPER_TIMER(macro, arg) +// clang-format on - -// OMP_start_end -- Time from when OpenMP is initialized until the stats are printed at exit +// OMP_start_end -- Time from when OpenMP is initialized until the +// stats are printed at exit // OMP_serial -- Thread zero time executing serial code -// OMP_work -- Elapsed time in code dispatched by a fork (measured in the thread) +// OMP_work -- Elapsed time in code dispatched by a fork (measured +// in the thread) // OMP_barrier -- Time at "real" barriers (includes task time) // FOR_static_scheduling -- Time spent doing scheduling for a static "for" // FOR_dynamic_scheduling -- Time spent doing scheduling for a dynamic "for" -// OMP_idle -- Worker threads time spent waiting for inclusion in a parallel region +// OMP_idle -- Worker threads time spent waiting for inclusion in +// a parallel region // OMP_plain_barrier -- Time spent in a barrier construct -// OMP_fork_join_barrier -- Time spent in a the fork-join barrier surrounding a parallel region +// OMP_fork_join_barrier -- Time spent in a the fork-join barrier surrounding a +// parallel region // OMP_parallel -- Time spent inside a parallel construct // OMP_task_immediate -- Time spent executing non-deferred tasks -// OMP_task_taskwait -- Time spent executing tasks inside a taskwait construct -// OMP_task_taskyield -- Time spent executing tasks inside a taskyield construct -// OMP_task_taskgroup -- Time spent executing tasks inside a taskygroup construct +// OMP_task_taskwait -- Time spent executing tasks inside a taskwait +// construct +// OMP_task_taskyield -- Time spent executing tasks inside a taskyield +// construct +// OMP_task_taskgroup -- Time spent executing tasks inside a taskygroup +// construct // OMP_task_join_bar -- Time spent executing tasks inside a join barrier -// OMP_task_plain_bar -- Time spent executing tasks inside a barrier construct +// OMP_task_plain_bar -- Time spent executing tasks inside a barrier +// construct // OMP_single -- Time spent executing a "single" region // OMP_master -- Time spent executing a "master" region // OMP_set_numthreads -- Values passed to omp_set_num_threads // OMP_PARALLEL_args -- Number of arguments passed to a parallel region -// FOR_static_iterations -- Number of available parallel chunks of work in a static for -// FOR_dynamic_iterations -- Number of available parallel chunks of work in a dynamic for -// Both adjust for any chunking, so if there were an iteration count of 20 but a chunk size of 10, we'd record 2. +// FOR_static_iterations -- Number of available parallel chunks of work in a +// static for +// FOR_dynamic_iterations -- Number of available parallel chunks of work in a +// dynamic for +// Both adjust for any chunking, so if there were an +// iteration count of 20 but a chunk size of 10, we'd +// record 2. #if (KMP_DEVELOPER_STATS) // Timers which are of interest to runtime library developers, not end users. @@ -192,227 +215,239 @@ enum stats_state_e { // KMP_tree_release -- time in __kmp_tree_barrier_release // KMP_hyper_gather -- time in __kmp_hyper_barrier_gather // KMP_hyper_release -- time in __kmp_hyper_barrier_release -# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ - macro (KMP_fork_call, 0, arg) \ - macro (KMP_join_call, 0, arg) \ - macro (KMP_end_split_barrier, 0, arg) \ - macro (KMP_hier_gather, 0, arg) \ - macro (KMP_hier_release, 0, arg) \ - macro (KMP_hyper_gather, 0, arg) \ - macro (KMP_hyper_release, 0, arg) \ - macro (KMP_linear_gather, 0, arg) \ - macro (KMP_linear_release, 0, arg) \ - macro (KMP_tree_gather, 0, arg) \ - macro (KMP_tree_release, 0, arg) \ - macro (USER_resume, 0, arg) \ - macro (USER_suspend, 0, arg) \ - macro (KMP_allocate_team, 0, arg) \ - macro (KMP_setup_icv_copy, 0, arg) \ - macro (USER_icv_copy, 0, arg) +#define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \ + macro(KMP_fork_call, 0, arg) macro(KMP_join_call, 0, arg) macro( \ + KMP_end_split_barrier, 0, arg) macro(KMP_hier_gather, 0, arg) \ + macro(KMP_hier_release, 0, arg) macro(KMP_hyper_gather, 0, arg) \ + macro(KMP_hyper_release, 0, arg) macro(KMP_linear_gather, 0, arg) \ + macro(KMP_linear_release, 0, arg) macro(KMP_tree_gather, 0, arg) \ + macro(KMP_tree_release, 0, arg) macro(USER_resume, 0, arg) \ + macro(USER_suspend, 0, arg) \ + macro(KMP_allocate_team, 0, arg) \ + macro(KMP_setup_icv_copy, 0, arg) \ + macro(USER_icv_copy, 0, arg) #else -# define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) +#define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) #endif /*! * \brief Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro. * - * @param macro a user defined macro that takes three arguments - macro(TIMER_NAME, flags, arg) + * @param macro a user defined macro that takes three arguments - + * macro(TIMER_NAME, flags, arg) * @param arg a user defined argument to send to the user defined macro * - * \warning YOU MUST HAVE THE SAME NAMED TIMER UNDER KMP_FOREACH_TIMER() OR ELSE BAD THINGS WILL HAPPEN! + * \warning YOU MUST HAVE THE SAME NAMED TIMER UNDER KMP_FOREACH_TIMER() OR ELSE + * BAD THINGS WILL HAPPEN! * - * \details Explicit timers are ones where we need to allocate a timer itself (as well as the accumulated timing statistics). - * We allocate these on a per-thread basis, and explicitly start and stop them. - * Block timers just allocate the timer itself on the stack, and use the destructor to notice block exit; they don't - * need to be defined here. - * The name here should be the same as that of a timer above. + * \details Explicit timers are ones where we need to allocate a timer itself + * (as well as the accumulated timing statistics). We allocate these on a + * per-thread basis, and explicitly start and stop them. Block timers just + * allocate the timer itself on the stack, and use the destructor to notice + * block exit; they don't need to be defined here. The name here should be the + * same as that of a timer above. * * @ingroup STATS_GATHERING */ -#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) \ - KMP_FOREACH_TIMER(macro, arg) +#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) KMP_FOREACH_TIMER(macro, arg) -#define ENUMERATE(name,ignore,prefix) prefix##name, -enum timer_e { - KMP_FOREACH_TIMER(ENUMERATE, TIMER_) - TIMER_LAST -}; +#define ENUMERATE(name, ignore, prefix) prefix##name, +enum timer_e { KMP_FOREACH_TIMER(ENUMERATE, TIMER_) TIMER_LAST }; enum explicit_timer_e { - KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_) - EXPLICIT_TIMER_LAST + KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_) EXPLICIT_TIMER_LAST }; -enum counter_e { - KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_) - COUNTER_LAST -}; +enum counter_e { KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_) COUNTER_LAST }; #undef ENUMERATE class timerPair { - explicit_timer_e timer_index; - timer_e timer; - public: - timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {} - inline explicit_timer_e get_index() const { return timer_index; } - inline timer_e get_timer() const { return timer; } - bool operator==(const timerPair & rhs) { - return this->get_index() == rhs.get_index(); - } - bool operator!=(const timerPair & rhs) { - return !(*this == rhs); - } + explicit_timer_e timer_index; + timer_e timer; + +public: + timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {} + inline explicit_timer_e get_index() const { return timer_index; } + inline timer_e get_timer() const { return timer; } + bool operator==(const timerPair &rhs) { + return this->get_index() == rhs.get_index(); + } + bool operator!=(const timerPair &rhs) { return !(*this == rhs); } }; -class statistic -{ - double minVal; - double maxVal; - double meanVal; - double m2; - uint64_t sampleCount; - - public: - statistic() { reset(); } - statistic (statistic const &o): minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2), sampleCount(o.sampleCount) {} - - double getMin() const { return minVal; } - double getMean() const { return meanVal; } - double getMax() const { return maxVal; } - uint64_t getCount() const { return sampleCount; } - double getSD() const { return sqrt(m2/sampleCount); } - double getTotal() const { return sampleCount*meanVal; } - - void reset() - { - minVal = std::numeric_limits::max(); - maxVal = -std::numeric_limits::max(); - meanVal= 0.0; - m2 = 0.0; - sampleCount = 0; - } - void addSample(double sample); - void scale (double factor); - void scaleDown(double f) { scale (1./f); } - statistic & operator+= (statistic const & other); - - std::string format(char unit, bool total=false) const; +class statistic { + double minVal; + double maxVal; + double meanVal; + double m2; + uint64_t sampleCount; + +public: + statistic() { reset(); } + statistic(statistic const &o) + : minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2), + sampleCount(o.sampleCount) {} + + double getMin() const { return minVal; } + double getMean() const { return meanVal; } + double getMax() const { return maxVal; } + uint64_t getCount() const { return sampleCount; } + double getSD() const { return sqrt(m2 / sampleCount); } + double getTotal() const { return sampleCount * meanVal; } + + void reset() { + minVal = std::numeric_limits::max(); + maxVal = -std::numeric_limits::max(); + meanVal = 0.0; + m2 = 0.0; + sampleCount = 0; + } + void addSample(double sample); + void scale(double factor); + void scaleDown(double f) { scale(1. / f); } + statistic &operator+=(statistic const &other); + + std::string format(char unit, bool total = false) const; }; -struct statInfo -{ - const char * name; - uint32_t flags; +struct statInfo { + const char *name; + uint32_t flags; }; -class timeStat : public statistic -{ - static statInfo timerInfo[]; - - public: - timeStat() : statistic() {} - static const char * name(timer_e e) { return timerInfo[e].name; } - static bool noTotal (timer_e e) { return timerInfo[e].flags & stats_flags_e::noTotal; } - static bool masterOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::onlyInMaster; } - static bool workerOnly (timer_e e) { return timerInfo[e].flags & stats_flags_e::notInMaster; } - static bool noUnits (timer_e e) { return timerInfo[e].flags & stats_flags_e::noUnits; } - static bool logEvent (timer_e e) { return timerInfo[e].flags & stats_flags_e::logEvent; } - static void clearEventFlags() { - for(int i=0;i timer_stack; - public: - partitionedTimers(); - void add_timer(explicit_timer_e timer_index, explicitTimer* timer_pointer); - void init(timerPair timer_index); - void push(timerPair timer_index); - void pop(); - void windup(); +// DOING_NOTHING would render these conditions: +// time(EXECUTING_TASK) + time(DOING_NOTHING) = total time thread is alive +// No clock tick in the EXECUTING_TASK is a member of DOING_NOTHING and vice +// versa +class partitionedTimers { +private: + explicitTimer *timers[EXPLICIT_TIMER_LAST + 1]; + std::vector timer_stack; + +public: + partitionedTimers(); + void add_timer(explicit_timer_e timer_index, explicitTimer *timer_pointer); + void init(timerPair timer_index); + void push(timerPair timer_index); + void pop(); + void windup(); }; // Special wrapper around the partioned timers to aid timing code blocks // It avoids the need to have an explicit end, leaving the scope suffices. -class blockPartitionedTimer -{ - partitionedTimers* part_timers; - timerPair timer_pair; - public: - blockPartitionedTimer(partitionedTimers* pt, timerPair tp) : part_timers(pt), timer_pair(tp) { part_timers->push(timer_pair); } - ~blockPartitionedTimer() { part_timers->pop(); } +class blockPartitionedTimer { + partitionedTimers *part_timers; + timerPair timer_pair; + +public: + blockPartitionedTimer(partitionedTimers *pt, timerPair tp) + : part_timers(pt), timer_pair(tp) { + part_timers->push(timer_pair); + } + ~blockPartitionedTimer() { part_timers->pop(); } }; -// Special wrapper around the thread state to aid in keeping state in code blocks -// It avoids the need to have an explicit end, leaving the scope suffices. -class blockThreadState -{ - stats_state_e* state_pointer; - stats_state_e old_state; - public: - blockThreadState(stats_state_e* thread_state_pointer, stats_state_e new_state) : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) { - *state_pointer = new_state; - } - ~blockThreadState() { *state_pointer = old_state; } +// Special wrapper around the thread state to aid in keeping state in code +// blocks It avoids the need to have an explicit end, leaving the scope +// suffices. +class blockThreadState { + stats_state_e *state_pointer; + stats_state_e old_state; + +public: + blockThreadState(stats_state_e *thread_state_pointer, stats_state_e new_state) + : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) { + *state_pointer = new_state; + } + ~blockThreadState() { *state_pointer = old_state; } }; // If all you want is a count, then you can use this... -// The individual per-thread counts will be aggregated into a statistic at program exit. -class counter -{ - uint64_t value; - static const statInfo counterInfo[]; - - public: - counter() : value(0) {} - void increment() { value++; } - uint64_t getValue() const { return value; } - void reset() { value = 0; } - static const char * name(counter_e e) { return counterInfo[e].name; } - static bool masterOnly (counter_e e) { return counterInfo[e].flags & stats_flags_e::onlyInMaster; } +// The individual per-thread counts will be aggregated into a statistic at +// program exit. +class counter { + uint64_t value; + static const statInfo counterInfo[]; + +public: + counter() : value(0) {} + void increment() { value++; } + uint64_t getValue() const { return value; } + void reset() { value = 0; } + static const char *name(counter_e e) { return counterInfo[e].name; } + static bool masterOnly(counter_e e) { + return counterInfo[e].flags & stats_flags_e::onlyInMaster; + } }; /* **************************************************************** @@ -449,17 +484,20 @@ Begin -------------------------------------------------------------> Time **************************************************************** */ class kmp_stats_event { - uint64_t start; - uint64_t stop; - int nest_level; - timer_e timer_name; - public: - kmp_stats_event() : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {} - kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme) : start(strt), stop(stp), nest_level(nst), timer_name(nme) {} - inline uint64_t getStart() const { return start; } - inline uint64_t getStop() const { return stop; } - inline int getNestLevel() const { return nest_level; } - inline timer_e getTimerName() const { return timer_name; } + uint64_t start; + uint64_t stop; + int nest_level; + timer_e timer_name; + +public: + kmp_stats_event() + : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {} + kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme) + : start(strt), stop(stp), nest_level(nst), timer_name(nme) {} + inline uint64_t getStart() const { return start; } + inline uint64_t getStop() const { return stop; } + inline int getNestLevel() const { return nest_level; } + inline timer_e getTimerName() const { return timer_name; } }; /* **************************************************************** @@ -479,48 +517,54 @@ class kmp_stats_event { to avoid reallocations, then set INIT_SIZE to a large value. the interface to this class is through six operations: - 1) reset() -- sets the internal_size back to 0 but does not deallocate any memory + 1) reset() -- sets the internal_size back to 0 but does not deallocate any + memory 2) size() -- returns the number of valid elements in the vector 3) push_back(start, stop, nest, timer_name) -- pushes an event onto - the back of the array + the back of the array 4) deallocate() -- frees all memory associated with the vector 5) sort() -- sorts the vector by start time 6) operator[index] or at(index) -- returns event reference at that index - **************************************************************** */ class kmp_stats_event_vector { - kmp_stats_event* events; - int internal_size; - int allocated_size; - static const int INIT_SIZE = 1024; - public: - kmp_stats_event_vector() { - events = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*INIT_SIZE); - internal_size = 0; - allocated_size = INIT_SIZE; + kmp_stats_event *events; + int internal_size; + int allocated_size; + static const int INIT_SIZE = 1024; + +public: + kmp_stats_event_vector() { + events = + (kmp_stats_event *)__kmp_allocate(sizeof(kmp_stats_event) * INIT_SIZE); + internal_size = 0; + allocated_size = INIT_SIZE; + } + ~kmp_stats_event_vector() {} + inline void reset() { internal_size = 0; } + inline int size() const { return internal_size; } + void push_back(uint64_t start_time, uint64_t stop_time, int nest_level, + timer_e name) { + int i; + if (internal_size == allocated_size) { + kmp_stats_event *tmp = (kmp_stats_event *)__kmp_allocate( + sizeof(kmp_stats_event) * allocated_size * 2); + for (i = 0; i < internal_size; i++) + tmp[i] = events[i]; + __kmp_free(events); + events = tmp; + allocated_size *= 2; } - ~kmp_stats_event_vector() {} - inline void reset() { internal_size = 0; } - inline int size() const { return internal_size; } - void push_back(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) { - int i; - if(internal_size == allocated_size) { - kmp_stats_event* tmp = (kmp_stats_event*)__kmp_allocate(sizeof(kmp_stats_event)*allocated_size*2); - for(i=0;isetStat(getTimer(TIMER_##name)); \ - _partitionedTimers.add_timer(EXPLICIT_TIMER_##name, getExplicitTimer(EXPLICIT_TIMER_##name)); - KMP_FOREACH_EXPLICIT_TIMER(doInit,0); + int gtid; + timeStat _timers[TIMER_LAST + 1]; + counter _counters[COUNTER_LAST + 1]; + explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST + 1]; + partitionedTimers _partitionedTimers; + int _nestLevel; // one per thread + kmp_stats_event_vector _event_vector; + kmp_stats_list *next; + kmp_stats_list *prev; + stats_state_e state; + int thread_is_idle_flag; + +public: + kmp_stats_list() + : _nestLevel(0), _event_vector(), next(this), prev(this), state(IDLE), + thread_is_idle_flag(0) { +#define doInit(name, ignore1, ignore2) \ + getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); \ + _partitionedTimers.add_timer(EXPLICIT_TIMER_##name, \ + getExplicitTimer(EXPLICIT_TIMER_##name)); + KMP_FOREACH_EXPLICIT_TIMER(doInit, 0); #undef doInit - } - ~kmp_stats_list() { } - inline timeStat * getTimer(timer_e idx) { return &_timers[idx]; } - inline counter * getCounter(counter_e idx) { return &_counters[idx]; } - inline explicitTimer * getExplicitTimer(explicit_timer_e idx) { return &_explicitTimers[idx]; } - inline partitionedTimers * getPartitionedTimers() { return &_partitionedTimers; } - inline timeStat * getTimers() { return _timers; } - inline counter * getCounters() { return _counters; } - inline explicitTimer * getExplicitTimers() { return _explicitTimers; } - inline kmp_stats_event_vector & getEventVector() { return _event_vector; } - inline void resetEventVector() { _event_vector.reset(); } - inline void incrementNestValue() { _nestLevel++; } - inline int getNestValue() { return _nestLevel; } - inline void decrementNestValue() { _nestLevel--; } - inline int getGtid() const { return gtid; } - inline void setGtid(int newgtid) { gtid = newgtid; } - inline void setState(stats_state_e newstate) { state = newstate; } - inline stats_state_e getState() const { return state; } - inline stats_state_e * getStatePointer() { return &state; } - inline bool isIdle() { return thread_is_idle_flag==1; } - inline void setIdleFlag() { thread_is_idle_flag = 1; } - inline void resetIdleFlag() { thread_is_idle_flag = 0; } - kmp_stats_list* push_back(int gtid); // returns newly created list node - inline void push_event(uint64_t start_time, uint64_t stop_time, int nest_level, timer_e name) { - _event_vector.push_back(start_time, stop_time, nest_level, name); - } - void deallocate(); - class iterator; - kmp_stats_list::iterator begin(); - kmp_stats_list::iterator end(); - int size(); - class iterator { - kmp_stats_list* ptr; - friend kmp_stats_list::iterator kmp_stats_list::begin(); - friend kmp_stats_list::iterator kmp_stats_list::end(); - public: - iterator(); - ~iterator(); - iterator operator++(); - iterator operator++(int dummy); - iterator operator--(); - iterator operator--(int dummy); - bool operator!=(const iterator & rhs); - bool operator==(const iterator & rhs); - kmp_stats_list* operator*() const; // dereference operator - }; + } + ~kmp_stats_list() {} + inline timeStat *getTimer(timer_e idx) { return &_timers[idx]; } + inline counter *getCounter(counter_e idx) { return &_counters[idx]; } + inline explicitTimer *getExplicitTimer(explicit_timer_e idx) { + return &_explicitTimers[idx]; + } + inline partitionedTimers *getPartitionedTimers() { + return &_partitionedTimers; + } + inline timeStat *getTimers() { return _timers; } + inline counter *getCounters() { return _counters; } + inline explicitTimer *getExplicitTimers() { return _explicitTimers; } + inline kmp_stats_event_vector &getEventVector() { return _event_vector; } + inline void resetEventVector() { _event_vector.reset(); } + inline void incrementNestValue() { _nestLevel++; } + inline int getNestValue() { return _nestLevel; } + inline void decrementNestValue() { _nestLevel--; } + inline int getGtid() const { return gtid; } + inline void setGtid(int newgtid) { gtid = newgtid; } + inline void setState(stats_state_e newstate) { state = newstate; } + inline stats_state_e getState() const { return state; } + inline stats_state_e *getStatePointer() { return &state; } + inline bool isIdle() { return thread_is_idle_flag == 1; } + inline void setIdleFlag() { thread_is_idle_flag = 1; } + inline void resetIdleFlag() { thread_is_idle_flag = 0; } + kmp_stats_list *push_back(int gtid); // returns newly created list node + inline void push_event(uint64_t start_time, uint64_t stop_time, + int nest_level, timer_e name) { + _event_vector.push_back(start_time, stop_time, nest_level, name); + } + void deallocate(); + class iterator; + kmp_stats_list::iterator begin(); + kmp_stats_list::iterator end(); + int size(); + class iterator { + kmp_stats_list *ptr; + friend kmp_stats_list::iterator kmp_stats_list::begin(); + friend kmp_stats_list::iterator kmp_stats_list::end(); + + public: + iterator(); + ~iterator(); + iterator operator++(); + iterator operator++(int dummy); + iterator operator--(); + iterator operator--(int dummy); + bool operator!=(const iterator &rhs); + bool operator==(const iterator &rhs); + kmp_stats_list *operator*() const; // dereference operator + }; }; /* **************************************************************** Class to encapsulate all output functions and the environment variables - This module holds filenames for various outputs (normal stats, events, plot file), - as well as coloring information for the plot file. + This module holds filenames for various outputs (normal stats, events, plot + file), as well as coloring information for the plot file. The filenames and flags variables are read from environment variables. - These are read once by the constructor of the global variable __kmp_stats_output - which calls init(). + These are read once by the constructor of the global variable + __kmp_stats_output which calls init(). - During this init() call, event flags for the timeStat::timerInfo[] global array - are cleared if KMP_STATS_EVENTS is not true (on, 1, yes). + During this init() call, event flags for the timeStat::timerInfo[] global + array are cleared if KMP_STATS_EVENTS is not true (on, 1, yes). - The only interface function that is public is outputStats(heading). This function - should print out everything it needs to, either to files or stderr, + The only interface function that is public is outputStats(heading). This + function should print out everything it needs to, either to files or stderr, depending on the environment variables described below ENVIRONMENT VARIABLES: - KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this file, - otherwise, print to stderr - KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to either - KMP_STATS_FILE or stderr + KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this + file, otherwise, print to stderr + KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to + either KMP_STATS_FILE or stderr KMP_STATS_PLOT_FILE -- if set, print the ploticus plot file to this filename, otherwise, the plot file is sent to "events.plt" - KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log events + KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log + events KMP_STATS_EVENTS_FILE -- if set, all events are outputted to this file, otherwise, output is sent to "events.dat" - **************************************************************** */ class kmp_stats_output_module { - public: - struct rgb_color { - float r; - float g; - float b; - }; - - private: - std::string outputFileName; - static const char* eventsFileName; - static const char* plotFileName; - static int printPerThreadFlag; - static int printPerThreadEventsFlag; - static const rgb_color globalColorArray[]; - static rgb_color timerColorInfo[]; - - void init(); - static void setupEventColors(); - static void printPloticusFile(); - static void printHeaderInfo(FILE *statsOut); - static void printTimerStats(FILE *statsOut, statistic const * theStats, statistic const * totalStats); - static void printCounterStats(FILE *statsOut, statistic const * theStats); - static void printCounters(FILE * statsOut, counter const * theCounters); - static void printEvents(FILE * eventsOut, kmp_stats_event_vector* theEvents, int gtid); - static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; } - static void windupExplicitTimers(); - bool eventPrintingEnabled() const { return printPerThreadEventsFlag; } - - public: - kmp_stats_output_module() { init(); } - void outputStats(const char* heading); +public: + struct rgb_color { + float r; + float g; + float b; + }; + +private: + std::string outputFileName; + static const char *eventsFileName; + static const char *plotFileName; + static int printPerThreadFlag; + static int printPerThreadEventsFlag; + static const rgb_color globalColorArray[]; + static rgb_color timerColorInfo[]; + + void init(); + static void setupEventColors(); + static void printPloticusFile(); + static void printHeaderInfo(FILE *statsOut); + static void printTimerStats(FILE *statsOut, statistic const *theStats, + statistic const *totalStats); + static void printCounterStats(FILE *statsOut, statistic const *theStats); + static void printCounters(FILE *statsOut, counter const *theCounters); + static void printEvents(FILE *eventsOut, kmp_stats_event_vector *theEvents, + int gtid); + static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; } + static void windupExplicitTimers(); + bool eventPrintingEnabled() const { return printPerThreadEventsFlag; } + +public: + kmp_stats_output_module() { init(); } + void outputStats(const char *heading); }; #ifdef __cplusplus @@ -693,11 +746,11 @@ void __kmp_reset_stats(); void __kmp_output_stats(const char *); void __kmp_accumulate_stats_at_exit(void); // thread local pointer to stats node within list -extern __thread kmp_stats_list* __kmp_stats_thread_ptr; +extern __thread kmp_stats_list *__kmp_stats_thread_ptr; // head to stats list. -extern kmp_stats_list* __kmp_stats_list; +extern kmp_stats_list *__kmp_stats_list; // lock for __kmp_stats_list -extern kmp_tas_lock_t __kmp_stats_lock; +extern kmp_tas_lock_t __kmp_stats_lock; // reference start time extern tsc_tick_count __kmp_stats_start_time; // interface to output @@ -709,21 +762,21 @@ extern kmp_stats_output_module __kmp_stats_output; // Simple, standard interfaces that drop out completely if stats aren't enabled - /*! * \brief Uses specified timer (name) to time code block. * * @param name timer name as specified under the KMP_FOREACH_TIMER() macro * - * \details Use KMP_TIME_BLOCK(name) macro to time a code block. This will record the time taken in the block - * and use the destructor to stop the timer. Convenient! - * With this definition you can't have more than one KMP_TIME_BLOCK in the same code block. - * I don't think that's a problem. + * \details Use KMP_TIME_BLOCK(name) macro to time a code block. This will + * record the time taken in the block and use the destructor to stop the timer. + * Convenient! With this definition you can't have more than one KMP_TIME_BLOCK + * in the same code block. I don't think that's a problem. * * @ingroup STATS_GATHERING */ -#define KMP_TIME_BLOCK(name) \ - blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), TIMER_##name) +#define KMP_TIME_BLOCK(name) \ + blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), \ + TIMER_##name) /*! * \brief Adds value to specified timer (name). @@ -731,69 +784,83 @@ extern kmp_stats_output_module __kmp_stats_output; * @param name timer name as specified under the KMP_FOREACH_TIMER() macro * @param value double precision sample value to add to statistics for the timer * - * \details Use KMP_COUNT_VALUE(name, value) macro to add a particular value to a timer statistics. + * \details Use KMP_COUNT_VALUE(name, value) macro to add a particular value to + * a timer statistics. * * @ingroup STATS_GATHERING */ -#define KMP_COUNT_VALUE(name, value) \ - __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value) +#define KMP_COUNT_VALUE(name, value) \ + __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value) /*! * \brief Increments specified counter (name). * * @param name counter name as specified under the KMP_FOREACH_COUNTER() macro * - * \details Use KMP_COUNT_BLOCK(name, value) macro to increment a statistics counter for the executing thread. + * \details Use KMP_COUNT_BLOCK(name, value) macro to increment a statistics + * counter for the executing thread. * * @ingroup STATS_GATHERING */ -#define KMP_COUNT_BLOCK(name) \ - __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment() +#define KMP_COUNT_BLOCK(name) \ + __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment() /*! - * \brief "Starts" an explicit timer which will need a corresponding KMP_STOP_EXPLICIT_TIMER() macro. + * \brief "Starts" an explicit timer which will need a corresponding + * KMP_STOP_EXPLICIT_TIMER() macro. * - * @param name explicit timer name as specified under the KMP_FOREACH_EXPLICIT_TIMER() macro + * @param name explicit timer name as specified under the + * KMP_FOREACH_EXPLICIT_TIMER() macro * - * \details Use to start a timer. This will need a corresponding KMP_STOP_EXPLICIT_TIMER() - * macro to stop the timer unlike the KMP_TIME_BLOCK(name) macro which has an implicit stopping macro at the end - * of the code block. All explicit timers are stopped at library exit time before the final statistics are outputted. + * \details Use to start a timer. This will need a corresponding + * KMP_STOP_EXPLICIT_TIMER() macro to stop the timer unlike the + * KMP_TIME_BLOCK(name) macro which has an implicit stopping macro at the end + * of the code block. All explicit timers are stopped at library exit time + * before the final statistics are outputted. * * @ingroup STATS_GATHERING */ -#define KMP_START_EXPLICIT_TIMER(name) \ - __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->start(TIMER_##name) +#define KMP_START_EXPLICIT_TIMER(name) \ + __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name) \ + ->start(TIMER_##name) /*! * \brief "Stops" an explicit timer. * - * @param name explicit timer name as specified under the KMP_FOREACH_EXPLICIT_TIMER() macro + * @param name explicit timer name as specified under the + * KMP_FOREACH_EXPLICIT_TIMER() macro * - * \details Use KMP_STOP_EXPLICIT_TIMER(name) to stop a timer. When this is done, the time between the last KMP_START_EXPLICIT_TIMER(name) - * and this KMP_STOP_EXPLICIT_TIMER(name) will be added to the timer's stat value. The timer will then be reset. - * After the KMP_STOP_EXPLICIT_TIMER(name) macro is called, another call to KMP_START_EXPLICIT_TIMER(name) will start the timer once again. + * \details Use KMP_STOP_EXPLICIT_TIMER(name) to stop a timer. When this is + * done, the time between the last KMP_START_EXPLICIT_TIMER(name) and this + * KMP_STOP_EXPLICIT_TIMER(name) will be added to the timer's stat value. The + * timer will then be reset. After the KMP_STOP_EXPLICIT_TIMER(name) macro is + * called, another call to KMP_START_EXPLICIT_TIMER(name) will start the timer + * once again. * * @ingroup STATS_GATHERING */ -#define KMP_STOP_EXPLICIT_TIMER(name) \ - __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name)->stop(TIMER_##name) +#define KMP_STOP_EXPLICIT_TIMER(name) \ + __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name) \ + ->stop(TIMER_##name) /*! * \brief Outputs the current thread statistics and reset them. * * @param heading_string heading put above the final stats output * - * \details Explicitly stops all timers and outputs all stats. - * Environment variable, `OMPTB_STATSFILE=filename`, can be used to output the stats to a filename instead of stderr - * Environment variable, `OMPTB_STATSTHREADS=true|undefined`, can be used to output thread specific stats - * For now the `OMPTB_STATSTHREADS` environment variable can either be defined with any value, which will print out thread - * specific stats, or it can be undefined (not specified in the environment) and thread specific stats won't be printed - * It should be noted that all statistics are reset when this macro is called. + * \details Explicitly stops all timers and outputs all stats. Environment + * variable, `OMPTB_STATSFILE=filename`, can be used to output the stats to a + * filename instead of stderr. Environment variable, + * `OMPTB_STATSTHREADS=true|undefined`, can be used to output thread specific + * stats. For now the `OMPTB_STATSTHREADS` environment variable can either be + * defined with any value, which will print out thread specific stats, or it can + * be undefined (not specified in the environment) and thread specific stats + * won't be printed. It should be noted that all statistics are reset when this + * macro is called. * * @ingroup STATS_GATHERING */ -#define KMP_OUTPUT_STATS(heading_string) \ - __kmp_output_stats(heading_string) +#define KMP_OUTPUT_STATS(heading_string) __kmp_output_stats(heading_string) /*! * \brief Initializes the paritioned timers to begin with name. @@ -802,27 +869,30 @@ extern kmp_stats_output_module __kmp_stats_output; * * @ingroup STATS_GATHERING */ -#define KMP_INIT_PARTITIONED_TIMERS(name) \ - __kmp_stats_thread_ptr->getPartitionedTimers()->init(timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) +#define KMP_INIT_PARTITIONED_TIMERS(name) \ + __kmp_stats_thread_ptr->getPartitionedTimers()->init( \ + timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) -#define KMP_TIME_PARTITIONED_BLOCK(name) \ - blockPartitionedTimer __PBLOCKTIME__(__kmp_stats_thread_ptr->getPartitionedTimers(), \ - timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) +#define KMP_TIME_PARTITIONED_BLOCK(name) \ + blockPartitionedTimer __PBLOCKTIME__( \ + __kmp_stats_thread_ptr->getPartitionedTimers(), \ + timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) -#define KMP_PUSH_PARTITIONED_TIMER(name) \ - __kmp_stats_thread_ptr->getPartitionedTimers()->push(timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) +#define KMP_PUSH_PARTITIONED_TIMER(name) \ + __kmp_stats_thread_ptr->getPartitionedTimers()->push( \ + timerPair(EXPLICIT_TIMER_##name, TIMER_##name)) -#define KMP_POP_PARTITIONED_TIMER() \ - __kmp_stats_thread_ptr->getPartitionedTimers()->pop() +#define KMP_POP_PARTITIONED_TIMER() \ + __kmp_stats_thread_ptr->getPartitionedTimers()->pop() -#define KMP_SET_THREAD_STATE(state_name) \ - __kmp_stats_thread_ptr->setState(state_name) +#define KMP_SET_THREAD_STATE(state_name) \ + __kmp_stats_thread_ptr->setState(state_name) -#define KMP_GET_THREAD_STATE() \ - __kmp_stats_thread_ptr->getState() +#define KMP_GET_THREAD_STATE() __kmp_stats_thread_ptr->getState() -#define KMP_SET_THREAD_STATE_BLOCK(state_name) \ - blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), state_name) +#define KMP_SET_THREAD_STATE_BLOCK(state_name) \ + blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), \ + state_name) /*! * \brief resets all stats (counters to 0, timers to 0 elapsed ticks) @@ -831,50 +901,50 @@ extern kmp_stats_output_module __kmp_stats_output; * * @ingroup STATS_GATHERING */ -#define KMP_RESET_STATS() __kmp_reset_stats() +#define KMP_RESET_STATS() __kmp_reset_stats() #if (KMP_DEVELOPER_STATS) -# define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n) -# define KMP_COUNT_DEVELOPER_VALUE(n,v) KMP_COUNT_VALUE(n,v) -# define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n) -# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n) -# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n) -# define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n) +#define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n) +#define KMP_COUNT_DEVELOPER_VALUE(n, v) KMP_COUNT_VALUE(n, v) +#define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n) +#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n) +#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n) +#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n) #else // Null definitions -# define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) -# define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0) -# define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) -# define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) -# define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) -# define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) +#define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) +#define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0) +#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) +#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) +#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) +#define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) #endif #else // KMP_STATS_ENABLED // Null definitions -#define KMP_TIME_BLOCK(n) ((void)0) -#define KMP_COUNT_VALUE(n,v) ((void)0) -#define KMP_COUNT_BLOCK(n) ((void)0) -#define KMP_START_EXPLICIT_TIMER(n) ((void)0) -#define KMP_STOP_EXPLICIT_TIMER(n) ((void)0) +#define KMP_TIME_BLOCK(n) ((void)0) +#define KMP_COUNT_VALUE(n, v) ((void)0) +#define KMP_COUNT_BLOCK(n) ((void)0) +#define KMP_START_EXPLICIT_TIMER(n) ((void)0) +#define KMP_STOP_EXPLICIT_TIMER(n) ((void)0) #define KMP_OUTPUT_STATS(heading_string) ((void)0) -#define KMP_RESET_STATS() ((void)0) - -#define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) -#define KMP_COUNT_DEVELOPER_VALUE(n,v) ((void)0) -#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) -#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) -#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) -#define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0) -#define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0) +#define KMP_RESET_STATS() ((void)0) + +#define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0) +#define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0) +#define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0) +#define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) +#define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0) +#define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0) +#define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0) #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0) -#define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0) -#define KMP_POP_PARTITIONED_TIMER() ((void)0) -#define KMP_SET_THREAD_STATE(state_name) ((void)0) -#define KMP_GET_THREAD_STATE() ((void)0) -#define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0) -#endif // KMP_STATS_ENABLED +#define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0) +#define KMP_POP_PARTITIONED_TIMER() ((void)0) +#define KMP_SET_THREAD_STATE(state_name) ((void)0) +#define KMP_GET_THREAD_STATE() ((void)0) +#define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0) +#endif // KMP_STATS_ENABLED #endif // KMP_STATS_H diff --git a/openmp/runtime/src/kmp_stats_timing.cpp b/openmp/runtime/src/kmp_stats_timing.cpp index 62cecc8..52d70fb 100644 --- a/openmp/runtime/src/kmp_stats_timing.cpp +++ b/openmp/runtime/src/kmp_stats_timing.cpp @@ -16,8 +16,8 @@ #include #include -#include #include +#include #include #include "kmp.h" @@ -26,119 +26,107 @@ using namespace std; #if KMP_HAVE_TICK_TIME -# if KMP_MIC -double tsc_tick_count::tick_time() -{ - // pretty bad assumption of 1GHz clock for MIC - return 1/((double)1000*1.e6); +#if KMP_MIC +double tsc_tick_count::tick_time() { + // pretty bad assumption of 1GHz clock for MIC + return 1 / ((double)1000 * 1.e6); } -# elif KMP_ARCH_X86 || KMP_ARCH_X86_64 -# include +#elif KMP_ARCH_X86 || KMP_ARCH_X86_64 +#include // Extract the value from the CPUID information -double tsc_tick_count::tick_time() -{ - static double result = 0.0; - - if (result == 0.0) - { - kmp_cpuid_t cpuinfo; - char brand[256]; - - __kmp_x86_cpuid(0x80000000, 0, &cpuinfo); - memset(brand, 0, sizeof(brand)); - int ids = cpuinfo.eax; - - for (unsigned int i=2; i<(ids^0x80000000)+2; i++) - __kmp_x86_cpuid(i | 0x80000000, 0, (kmp_cpuid_t*)(brand+(i-2)*sizeof(kmp_cpuid_t))); - - char * start = &brand[0]; - for (;*start == ' '; start++) - ; - - char * end = brand + KMP_STRLEN(brand) - 3; - uint64_t multiplier; - - if (*end == 'M') multiplier = 1000LL*1000LL; - else if (*end == 'G') multiplier = 1000LL*1000LL*1000LL; - else if (*end == 'T') multiplier = 1000LL*1000LL*1000LL*1000LL; - else - { - cout << "Error determining multiplier '" << *end << "'\n"; - exit (-1); - } - *end = 0; - while (*end != ' ') end--; - end++; - - double freq = strtod(end, &start); - if (freq == 0.0) - { - cout << "Error calculating frequency " << end << "\n"; - exit (-1); - } - - result = ((double)1.0)/(freq * multiplier); +double tsc_tick_count::tick_time() { + static double result = 0.0; + + if (result == 0.0) { + kmp_cpuid_t cpuinfo; + char brand[256]; + + __kmp_x86_cpuid(0x80000000, 0, &cpuinfo); + memset(brand, 0, sizeof(brand)); + int ids = cpuinfo.eax; + + for (unsigned int i = 2; i < (ids ^ 0x80000000) + 2; i++) + __kmp_x86_cpuid(i | 0x80000000, 0, + (kmp_cpuid_t *)(brand + (i - 2) * sizeof(kmp_cpuid_t))); + + char *start = &brand[0]; + for (; *start == ' '; start++) + ; + + char *end = brand + KMP_STRLEN(brand) - 3; + uint64_t multiplier; + + if (*end == 'M') + multiplier = 1000LL * 1000LL; + else if (*end == 'G') + multiplier = 1000LL * 1000LL * 1000LL; + else if (*end == 'T') + multiplier = 1000LL * 1000LL * 1000LL * 1000LL; + else { + cout << "Error determining multiplier '" << *end << "'\n"; + exit(-1); + } + *end = 0; + while (*end != ' ') + end--; + end++; + + double freq = strtod(end, &start); + if (freq == 0.0) { + cout << "Error calculating frequency " << end << "\n"; + exit(-1); } - return result; + + result = ((double)1.0) / (freq * multiplier); + } + return result; } -# endif +#endif #endif static bool useSI = true; // Return a formatted string after normalising the value into // engineering style and using a suitable unit prefix (e.g. ms, us, ns). -std::string formatSI(double interval, int width, char unit) -{ - std::stringstream os; - - if (useSI) - { - // Preserve accuracy for small numbers, since we only multiply and the positive powers - // of ten are precisely representable. - static struct { double scale; char prefix; } ranges[] = { - {1.e12,'f'}, - {1.e9, 'p'}, - {1.e6, 'n'}, - {1.e3, 'u'}, - {1.0, 'm'}, - {1.e-3,' '}, - {1.e-6,'k'}, - {1.e-9,'M'}, - {1.e-12,'G'}, - {1.e-15,'T'}, - {1.e-18,'P'}, - {1.e-21,'E'}, - {1.e-24,'Z'}, - {1.e-27,'Y'} - }; - - if (interval == 0.0) - { - os << std::setw(width-3) << std::right << "0.00" << std::setw(3) << unit; - return os.str(); - } - - bool negative = false; - if (interval < 0.0) - { - negative = true; - interval = -interval; - } - - for (int i=0; i<(int)(sizeof(ranges)/sizeof(ranges[0])); i++) - { - if (interval*ranges[i].scale < 1.e0) - { - interval = interval * 1000.e0 * ranges[i].scale; - os << std::fixed << std::setprecision(2) << std::setw(width-3) << std::right << - (negative ? -interval : interval) << std::setw(2) << ranges[i].prefix << std::setw(1) << unit; - - return os.str(); - } - } +std::string formatSI(double interval, int width, char unit) { + std::stringstream os; + + if (useSI) { + // Preserve accuracy for small numbers, since we only multiply and the + // positive powers of ten are precisely representable. + static struct { + double scale; + char prefix; + } ranges[] = {{1.e12, 'f'}, {1.e9, 'p'}, {1.e6, 'n'}, {1.e3, 'u'}, + {1.0, 'm'}, {1.e-3, ' '}, {1.e-6, 'k'}, {1.e-9, 'M'}, + {1.e-12, 'G'}, {1.e-15, 'T'}, {1.e-18, 'P'}, {1.e-21, 'E'}, + {1.e-24, 'Z'}, {1.e-27, 'Y'}}; + + if (interval == 0.0) { + os << std::setw(width - 3) << std::right << "0.00" << std::setw(3) + << unit; + return os.str(); + } + + bool negative = false; + if (interval < 0.0) { + negative = true; + interval = -interval; + } + + for (int i = 0; i < (int)(sizeof(ranges) / sizeof(ranges[0])); i++) { + if (interval * ranges[i].scale < 1.e0) { + interval = interval * 1000.e0 * ranges[i].scale; + os << std::fixed << std::setprecision(2) << std::setw(width - 3) + << std::right << (negative ? -interval : interval) << std::setw(2) + << ranges[i].prefix << std::setw(1) << unit; + + return os.str(); + } } - os << std::setprecision(2) << std::fixed << std::right << std::setw(width-3) << interval << std::setw(3) << unit; + } + os << std::setprecision(2) << std::fixed << std::right << std::setw(width - 3) + << interval << std::setw(3) << unit; - return os.str(); + return os.str(); } diff --git a/openmp/runtime/src/kmp_stats_timing.h b/openmp/runtime/src/kmp_stats_timing.h index 0605d94..69195b9 100644 --- a/openmp/runtime/src/kmp_stats_timing.h +++ b/openmp/runtime/src/kmp_stats_timing.h @@ -16,97 +16,103 @@ //===----------------------------------------------------------------------===// - +#include "kmp_os.h" +#include #include #include -#include -#include "kmp_os.h" #if KMP_HAVE_X86INTRIN_H -# include +#include #endif class tsc_tick_count { - private: - int64_t my_count; +private: + int64_t my_count; + +public: + class tsc_interval_t { + int64_t value; + explicit tsc_interval_t(int64_t _value) : value(_value) {} public: - class tsc_interval_t { - int64_t value; - explicit tsc_interval_t(int64_t _value) : value(_value) {} - public: - tsc_interval_t() : value(0) {}; // Construct 0 time duration + tsc_interval_t() : value(0){}; // Construct 0 time duration #if KMP_HAVE_TICK_TIME - double seconds() const; // Return the length of a time interval in seconds + double seconds() const; // Return the length of a time interval in seconds #endif - double ticks() const { return double(value); } - int64_t getValue() const { return value; } - tsc_interval_t& operator=(int64_t nvalue) { value = nvalue; return *this; } + double ticks() const { return double(value); } + int64_t getValue() const { return value; } + tsc_interval_t &operator=(int64_t nvalue) { + value = nvalue; + return *this; + } - friend class tsc_tick_count; + friend class tsc_tick_count; - friend tsc_interval_t operator-(const tsc_tick_count& t1, - const tsc_tick_count& t0); - friend tsc_interval_t operator-(const tsc_tick_count::tsc_interval_t& i1, - const tsc_tick_count::tsc_interval_t& i0); - friend tsc_interval_t& operator+=(tsc_tick_count::tsc_interval_t& i1, - const tsc_tick_count::tsc_interval_t& i0); - }; + friend tsc_interval_t operator-(const tsc_tick_count &t1, + const tsc_tick_count &t0); + friend tsc_interval_t operator-(const tsc_tick_count::tsc_interval_t &i1, + const tsc_tick_count::tsc_interval_t &i0); + friend tsc_interval_t &operator+=(tsc_tick_count::tsc_interval_t &i1, + const tsc_tick_count::tsc_interval_t &i0); + }; #if KMP_HAVE___BUILTIN_READCYCLECOUNTER - tsc_tick_count() : my_count(static_cast(__builtin_readcyclecounter())) {} + tsc_tick_count() + : my_count(static_cast(__builtin_readcyclecounter())) {} #elif KMP_HAVE___RDTSC - tsc_tick_count() : my_count(static_cast(__rdtsc())) {}; + tsc_tick_count() : my_count(static_cast(__rdtsc())){}; #else -# error Must have high resolution timer defined +#error Must have high resolution timer defined #endif - tsc_tick_count(int64_t value) : my_count(value) {}; - int64_t getValue() const { return my_count; } - tsc_tick_count later (tsc_tick_count const other) const { - return my_count > other.my_count ? (*this) : other; - } - tsc_tick_count earlier(tsc_tick_count const other) const { - return my_count < other.my_count ? (*this) : other; - } + tsc_tick_count(int64_t value) : my_count(value){}; + int64_t getValue() const { return my_count; } + tsc_tick_count later(tsc_tick_count const other) const { + return my_count > other.my_count ? (*this) : other; + } + tsc_tick_count earlier(tsc_tick_count const other) const { + return my_count < other.my_count ? (*this) : other; + } #if KMP_HAVE_TICK_TIME - static double tick_time(); // returns seconds per cycle (period) of clock + static double tick_time(); // returns seconds per cycle (period) of clock #endif - static tsc_tick_count now() { return tsc_tick_count(); } // returns the rdtsc register value - friend tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count& t1, const tsc_tick_count& t0); + static tsc_tick_count now() { + return tsc_tick_count(); + } // returns the rdtsc register value + friend tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count &t1, + const tsc_tick_count &t0); }; -inline tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count& t1, const tsc_tick_count& t0) -{ - return tsc_tick_count::tsc_interval_t( t1.my_count-t0.my_count ); +inline tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count &t1, + const tsc_tick_count &t0) { + return tsc_tick_count::tsc_interval_t(t1.my_count - t0.my_count); } -inline tsc_tick_count::tsc_interval_t operator-(const tsc_tick_count::tsc_interval_t& i1, const tsc_tick_count::tsc_interval_t& i0) -{ - return tsc_tick_count::tsc_interval_t( i1.value-i0.value ); +inline tsc_tick_count::tsc_interval_t +operator-(const tsc_tick_count::tsc_interval_t &i1, + const tsc_tick_count::tsc_interval_t &i0) { + return tsc_tick_count::tsc_interval_t(i1.value - i0.value); } -inline tsc_tick_count::tsc_interval_t& operator+=(tsc_tick_count::tsc_interval_t& i1, const tsc_tick_count::tsc_interval_t& i0) -{ - i1.value += i0.value; - return i1; +inline tsc_tick_count::tsc_interval_t & +operator+=(tsc_tick_count::tsc_interval_t &i1, + const tsc_tick_count::tsc_interval_t &i0) { + i1.value += i0.value; + return i1; } #if KMP_HAVE_TICK_TIME -inline double tsc_tick_count::tsc_interval_t::seconds() const -{ - return value*tick_time(); +inline double tsc_tick_count::tsc_interval_t::seconds() const { + return value * tick_time(); } #endif extern std::string formatSI(double interval, int width, char unit); -inline std::string formatSeconds(double interval, int width) -{ - return formatSI(interval, width, 'S'); +inline std::string formatSeconds(double interval, int width) { + return formatSI(interval, width, 'S'); } -inline std::string formatTicks(double interval, int width) -{ - return formatSI(interval, width, 'T'); +inline std::string formatTicks(double interval, int width) { + return formatSI(interval, width, 'T'); } #endif // KMP_STATS_TIMING_H diff --git a/openmp/runtime/src/kmp_str.cpp b/openmp/runtime/src/kmp_str.cpp index 8d633ad..f795807 100644 --- a/openmp/runtime/src/kmp_str.cpp +++ b/openmp/runtime/src/kmp_str.cpp @@ -15,866 +15,721 @@ #include "kmp_str.h" -#include // va_* -#include // vsnprintf() -#include // malloc(), realloc() +#include // va_* +#include // vsnprintf() +#include // malloc(), realloc() #include "kmp.h" #include "kmp_i18n.h" -/* - ------------------------------------------------------------------------------------------------ - String buffer. - ------------------------------------------------------------------------------------------------ - - Usage: - - // Declare buffer and initialize it. - kmp_str_buf_t buffer; - __kmp_str_buf_init( & buffer ); - - // Print to buffer. - __kmp_str_buf_print( & buffer, "Error in file \"%s\" line %d\n", "foo.c", 12 ); - __kmp_str_buf_print( & buffer, " <%s>\n", line ); +/* String buffer. - // Use buffer contents. buffer.str is a pointer to data, buffer.used is a number of printed - // characters (not including terminating zero). - write( fd, buffer.str, buffer.used ); + Usage: - // Free buffer. - __kmp_str_buf_free( & buffer ); + // Declare buffer and initialize it. + kmp_str_buf_t buffer; + __kmp_str_buf_init( & buffer ); - // Alternatively, you can detach allocated memory from buffer: - __kmp_str_buf_detach( & buffer ); - return buffer.str; // That memory should be freed eventually. + // Print to buffer. + __kmp_str_buf_print(& buffer, "Error in file \"%s\" line %d\n", "foo.c", 12); + __kmp_str_buf_print(& buffer, " <%s>\n", line); + // Use buffer contents. buffer.str is a pointer to data, buffer.used is a + // number of printed characters (not including terminating zero). + write( fd, buffer.str, buffer.used ); - Notes: + // Free buffer. + __kmp_str_buf_free( & buffer ); - * Buffer users may use buffer.str and buffer.used. Users should not change any fields of - buffer directly. + // Alternatively, you can detach allocated memory from buffer: + __kmp_str_buf_detach( & buffer ); + return buffer.str; // That memory should be freed eventually. - * buffer.str is never NULL. If buffer is empty, buffer.str points to empty string (""). + Notes: - * For performance reasons, buffer uses stack memory (buffer.bulk) first. If stack memory is - exhausted, buffer allocates memory on heap by malloc(), and reallocates it by realloc() - as amount of used memory grows. - - * Buffer doubles amount of allocated memory each time it is exhausted. - - ------------------------------------------------------------------------------------------------ + * Buffer users may use buffer.str and buffer.used. Users should not change + any fields of buffer directly. + * buffer.str is never NULL. If buffer is empty, buffer.str points to empty + string (""). + * For performance reasons, buffer uses stack memory (buffer.bulk) first. If + stack memory is exhausted, buffer allocates memory on heap by malloc(), and + reallocates it by realloc() as amount of used memory grows. + * Buffer doubles amount of allocated memory each time it is exhausted. */ // TODO: __kmp_str_buf_print() can use thread local memory allocator. -#define KMP_STR_BUF_INVARIANT( b ) \ - { \ - KMP_DEBUG_ASSERT( (b)->str != NULL ); \ - KMP_DEBUG_ASSERT( (b)->size >= sizeof( (b)->bulk ) ); \ - KMP_DEBUG_ASSERT( (b)->size % sizeof( (b)->bulk ) == 0 ); \ - KMP_DEBUG_ASSERT( (unsigned)(b)->used < (b)->size ); \ - KMP_DEBUG_ASSERT( (b)->size == sizeof( (b)->bulk ) ? (b)->str == & (b)->bulk[ 0 ] : 1 ); \ - KMP_DEBUG_ASSERT( (b)->size > sizeof( (b)->bulk ) ? (b)->str != & (b)->bulk[ 0 ] : 1 ); \ - } - -void - __kmp_str_buf_clear( - kmp_str_buf_t * buffer -) { - KMP_STR_BUF_INVARIANT( buffer ); - if ( buffer->used > 0 ) { - buffer->used = 0; - buffer->str[ 0 ] = 0; - }; // if - KMP_STR_BUF_INVARIANT( buffer ); +#define KMP_STR_BUF_INVARIANT(b) \ + { \ + KMP_DEBUG_ASSERT((b)->str != NULL); \ + KMP_DEBUG_ASSERT((b)->size >= sizeof((b)->bulk)); \ + KMP_DEBUG_ASSERT((b)->size % sizeof((b)->bulk) == 0); \ + KMP_DEBUG_ASSERT((unsigned)(b)->used < (b)->size); \ + KMP_DEBUG_ASSERT( \ + (b)->size == sizeof((b)->bulk) ? (b)->str == &(b)->bulk[0] : 1); \ + KMP_DEBUG_ASSERT((b)->size > sizeof((b)->bulk) ? (b)->str != &(b)->bulk[0] \ + : 1); \ + } + +void __kmp_str_buf_clear(kmp_str_buf_t *buffer) { + KMP_STR_BUF_INVARIANT(buffer); + if (buffer->used > 0) { + buffer->used = 0; + buffer->str[0] = 0; + }; // if + KMP_STR_BUF_INVARIANT(buffer); } // __kmp_str_buf_clear +void __kmp_str_buf_reserve(kmp_str_buf_t *buffer, int size) { + KMP_STR_BUF_INVARIANT(buffer); + KMP_DEBUG_ASSERT(size >= 0); -void -__kmp_str_buf_reserve( - kmp_str_buf_t * buffer, - int size -) { - - KMP_STR_BUF_INVARIANT( buffer ); - KMP_DEBUG_ASSERT( size >= 0 ); - - if ( buffer->size < (unsigned int)size ) { - - // Calculate buffer size. - do { - buffer->size *= 2; - } while ( buffer->size < (unsigned int)size ); - - // Enlarge buffer. - if ( buffer->str == & buffer->bulk[ 0 ] ) { - buffer->str = (char *) KMP_INTERNAL_MALLOC( buffer->size ); - if ( buffer->str == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - }; // if - KMP_MEMCPY_S( buffer->str, buffer->size, buffer->bulk, buffer->used + 1 ); - } else { - buffer->str = (char *) KMP_INTERNAL_REALLOC( buffer->str, buffer->size ); - if ( buffer->str == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - }; // if - }; // if - + if (buffer->size < (unsigned int)size) { + // Calculate buffer size. + do { + buffer->size *= 2; + } while (buffer->size < (unsigned int)size); + + // Enlarge buffer. + if (buffer->str == &buffer->bulk[0]) { + buffer->str = (char *)KMP_INTERNAL_MALLOC(buffer->size); + if (buffer->str == NULL) { + KMP_FATAL(MemoryAllocFailed); + }; // if + KMP_MEMCPY_S(buffer->str, buffer->size, buffer->bulk, buffer->used + 1); + } else { + buffer->str = (char *)KMP_INTERNAL_REALLOC(buffer->str, buffer->size); + if (buffer->str == NULL) { + KMP_FATAL(MemoryAllocFailed); + }; // if }; // if - KMP_DEBUG_ASSERT( buffer->size > 0 ); - KMP_DEBUG_ASSERT( buffer->size >= (unsigned)size ); - KMP_STR_BUF_INVARIANT( buffer ); + }; // if + KMP_DEBUG_ASSERT(buffer->size > 0); + KMP_DEBUG_ASSERT(buffer->size >= (unsigned)size); + KMP_STR_BUF_INVARIANT(buffer); } // __kmp_str_buf_reserve +void __kmp_str_buf_detach(kmp_str_buf_t *buffer) { + KMP_STR_BUF_INVARIANT(buffer); -void -__kmp_str_buf_detach( - kmp_str_buf_t * buffer -) { - - KMP_STR_BUF_INVARIANT( buffer ); - - // If internal bulk is used, allocate memory and copy it. - if ( buffer->size <= sizeof( buffer->bulk ) ) { - buffer->str = (char *) KMP_INTERNAL_MALLOC( buffer->size ); - if ( buffer->str == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - }; // if - KMP_MEMCPY_S( buffer->str, buffer->size, buffer->bulk, buffer->used + 1 ); + // If internal bulk is used, allocate memory and copy it. + if (buffer->size <= sizeof(buffer->bulk)) { + buffer->str = (char *)KMP_INTERNAL_MALLOC(buffer->size); + if (buffer->str == NULL) { + KMP_FATAL(MemoryAllocFailed); }; // if - + KMP_MEMCPY_S(buffer->str, buffer->size, buffer->bulk, buffer->used + 1); + }; // if } // __kmp_str_buf_detach - -void -__kmp_str_buf_free( - kmp_str_buf_t * buffer -) { - KMP_STR_BUF_INVARIANT( buffer ); - if ( buffer->size > sizeof( buffer->bulk ) ) { - KMP_INTERNAL_FREE( buffer->str ); - }; // if - buffer->str = buffer->bulk; - buffer->size = sizeof( buffer->bulk ); - buffer->used = 0; - KMP_STR_BUF_INVARIANT( buffer ); +void __kmp_str_buf_free(kmp_str_buf_t *buffer) { + KMP_STR_BUF_INVARIANT(buffer); + if (buffer->size > sizeof(buffer->bulk)) { + KMP_INTERNAL_FREE(buffer->str); + }; // if + buffer->str = buffer->bulk; + buffer->size = sizeof(buffer->bulk); + buffer->used = 0; + KMP_STR_BUF_INVARIANT(buffer); } // __kmp_str_buf_free - -void -__kmp_str_buf_cat( - kmp_str_buf_t * buffer, - char const * str, - int len -) { - KMP_STR_BUF_INVARIANT( buffer ); - KMP_DEBUG_ASSERT( str != NULL ); - KMP_DEBUG_ASSERT( len >= 0 ); - __kmp_str_buf_reserve( buffer, buffer->used + len + 1 ); - KMP_MEMCPY( buffer->str + buffer->used, str, len ); - buffer->str[ buffer->used + len ] = 0; - buffer->used += len; - KMP_STR_BUF_INVARIANT( buffer ); +void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, int len) { + KMP_STR_BUF_INVARIANT(buffer); + KMP_DEBUG_ASSERT(str != NULL); + KMP_DEBUG_ASSERT(len >= 0); + __kmp_str_buf_reserve(buffer, buffer->used + len + 1); + KMP_MEMCPY(buffer->str + buffer->used, str, len); + buffer->str[buffer->used + len] = 0; + buffer->used += len; + KMP_STR_BUF_INVARIANT(buffer); } // __kmp_str_buf_cat +void __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format, + va_list args) { + KMP_STR_BUF_INVARIANT(buffer); -void -__kmp_str_buf_vprint( - kmp_str_buf_t * buffer, - char const * format, - va_list args -) { - - KMP_STR_BUF_INVARIANT( buffer ); - - for ( ; ; ) { - - int const free = buffer->size - buffer->used; - int rc; - int size; - - // Try to format string. - { - /* - On Linux* OS Intel(R) 64, vsnprintf() modifies args argument, so vsnprintf() crashes if it - is called for the second time with the same args. To prevent the crash, we have to - pass a fresh intact copy of args to vsnprintf() on each iteration. - - Unfortunately, standard va_copy() macro is not available on Windows* OS. However, it - seems vsnprintf() does not modify args argument on Windows* OS. - */ - - #if ! KMP_OS_WINDOWS - va_list _args; - __va_copy( _args, args ); // Make copy of args. - #define args _args // Substitute args with its copy, _args. - #endif // KMP_OS_WINDOWS - rc = KMP_VSNPRINTF( buffer->str + buffer->used, free, format, args ); - #if ! KMP_OS_WINDOWS - #undef args // Remove substitution. - va_end( _args ); - #endif // KMP_OS_WINDOWS - } - - // No errors, string has been formatted. - if ( rc >= 0 && rc < free ) { - buffer->used += rc; - break; - }; // if - - // Error occurred, buffer is too small. - if ( rc >= 0 ) { - // C99-conforming implementation of vsnprintf returns required buffer size. - size = buffer->used + rc + 1; - } else { - // Older implementations just return -1. Double buffer size. - size = buffer->size * 2; - }; // if - - // Enlarge buffer. - __kmp_str_buf_reserve( buffer, size ); - - // And try again. - - }; // forever - - KMP_DEBUG_ASSERT( buffer->size > 0 ); - KMP_STR_BUF_INVARIANT( buffer ); + for (;;) { + int const free = buffer->size - buffer->used; + int rc; + int size; -} // __kmp_str_buf_vprint + // Try to format string. + { +/* On Linux* OS Intel(R) 64, vsnprintf() modifies args argument, so vsnprintf() + crashes if it is called for the second time with the same args. To prevent + the crash, we have to pass a fresh intact copy of args to vsnprintf() on each + iteration. + Unfortunately, standard va_copy() macro is not available on Windows* OS. + However, it seems vsnprintf() does not modify args argument on Windows* OS. +*/ -void -__kmp_str_buf_print( - kmp_str_buf_t * buffer, - char const * format, - ... -) { +#if !KMP_OS_WINDOWS + va_list _args; + __va_copy(_args, args); // Make copy of args. +#define args _args // Substitute args with its copy, _args. +#endif // KMP_OS_WINDOWS + rc = KMP_VSNPRINTF(buffer->str + buffer->used, free, format, args); +#if !KMP_OS_WINDOWS +#undef args // Remove substitution. + va_end(_args); +#endif // KMP_OS_WINDOWS + } - va_list args; - va_start( args, format ); - __kmp_str_buf_vprint( buffer, format, args ); - va_end( args ); + // No errors, string has been formatted. + if (rc >= 0 && rc < free) { + buffer->used += rc; + break; + }; // if -} // __kmp_str_buf_print + // Error occurred, buffer is too small. + if (rc >= 0) { + // C99-conforming implementation of vsnprintf returns required buffer size + size = buffer->used + rc + 1; + } else { + // Older implementations just return -1. Double buffer size. + size = buffer->size * 2; + }; // if + // Enlarge buffer. + __kmp_str_buf_reserve(buffer, size); -/* - The function prints specified size to buffer. Size is expressed using biggest possible unit, for - example 1024 is printed as "1k". -*/ + // And try again. + }; // forever -void -__kmp_str_buf_print_size( - kmp_str_buf_t * buf, - size_t size -) { - - char const * names[] = { "", "k", "M", "G", "T", "P", "E", "Z", "Y" }; - int const units = sizeof( names ) / sizeof( char const * ); - int u = 0; - if ( size > 0 ) { - while ( ( size % 1024 == 0 ) && ( u + 1 < units ) ) { - size = size / 1024; - ++ u; - }; // while - }; // if + KMP_DEBUG_ASSERT(buffer->size > 0); + KMP_STR_BUF_INVARIANT(buffer); +} // __kmp_str_buf_vprint + +void __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...) { + va_list args; + va_start(args, format); + __kmp_str_buf_vprint(buffer, format, args); + va_end(args); +} // __kmp_str_buf_print - __kmp_str_buf_print( buf, "%" KMP_SIZE_T_SPEC "%s", size, names[ u ] ); +/* The function prints specified size to buffer. Size is expressed using biggest + possible unit, for example 1024 is printed as "1k". */ +void __kmp_str_buf_print_size(kmp_str_buf_t *buf, size_t size) { + char const *names[] = {"", "k", "M", "G", "T", "P", "E", "Z", "Y"}; + int const units = sizeof(names) / sizeof(char const *); + int u = 0; + if (size > 0) { + while ((size % 1024 == 0) && (u + 1 < units)) { + size = size / 1024; + ++u; + }; // while + }; // if + __kmp_str_buf_print(buf, "%" KMP_SIZE_T_SPEC "%s", size, names[u]); } // __kmp_str_buf_print_size - -void -__kmp_str_fname_init( - kmp_str_fname_t * fname, - char const * path -) { - - fname->path = NULL; - fname->dir = NULL; - fname->base = NULL; - - if ( path != NULL ) { - char * slash = NULL; // Pointer to the last character of dir. - char * base = NULL; // Pointer to the beginning of basename. - fname->path = __kmp_str_format( "%s", path ); - // Original code used strdup() function to copy a string, but on Windows* OS Intel(R) 64 it - // causes assertioon id debug heap, so I had to replace strdup with __kmp_str_format(). - if ( KMP_OS_WINDOWS ) { - __kmp_str_replace( fname->path, '\\', '/' ); - }; // if - fname->dir = __kmp_str_format( "%s", fname->path ); - slash = strrchr( fname->dir, '/' ); - if ( KMP_OS_WINDOWS && slash == NULL ) { // On Windows* OS, if slash not found, - char first = TOLOWER( fname->dir[ 0 ] ); // look for drive. - if ( 'a' <= first && first <= 'z' && fname->dir[ 1 ] == ':' ) { - slash = & fname->dir[ 1 ]; - }; // if - }; // if - base = ( slash == NULL ? fname->dir : slash + 1 ); - fname->base = __kmp_str_format( "%s", base ); // Copy basename - * base = 0; // and truncate dir. +void __kmp_str_fname_init(kmp_str_fname_t *fname, char const *path) { + fname->path = NULL; + fname->dir = NULL; + fname->base = NULL; + + if (path != NULL) { + char *slash = NULL; // Pointer to the last character of dir. + char *base = NULL; // Pointer to the beginning of basename. + fname->path = __kmp_str_format("%s", path); + // Original code used strdup() function to copy a string, but on Windows* OS + // Intel(R) 64 it causes assertioon id debug heap, so I had to replace + // strdup with __kmp_str_format(). + if (KMP_OS_WINDOWS) { + __kmp_str_replace(fname->path, '\\', '/'); + }; // if + fname->dir = __kmp_str_format("%s", fname->path); + slash = strrchr(fname->dir, '/'); + if (KMP_OS_WINDOWS && + slash == NULL) { // On Windows* OS, if slash not found, + char first = TOLOWER(fname->dir[0]); // look for drive. + if ('a' <= first && first <= 'z' && fname->dir[1] == ':') { + slash = &fname->dir[1]; + }; // if }; // if + base = (slash == NULL ? fname->dir : slash + 1); + fname->base = __kmp_str_format("%s", base); // Copy basename + *base = 0; // and truncate dir. + }; // if } // kmp_str_fname_init - -void -__kmp_str_fname_free( - kmp_str_fname_t * fname -) { - __kmp_str_free( (char const **)( & fname->path ) ); - __kmp_str_free( (char const **)( & fname->dir ) ); - __kmp_str_free( (char const **)( & fname->base ) ); +void __kmp_str_fname_free(kmp_str_fname_t *fname) { + __kmp_str_free((char const **)(&fname->path)); + __kmp_str_free((char const **)(&fname->dir)); + __kmp_str_free((char const **)(&fname->base)); } // kmp_str_fname_free - -int -__kmp_str_fname_match( - kmp_str_fname_t const * fname, - char const * pattern -) { - - int dir_match = 1; - int base_match = 1; - - if ( pattern != NULL ) { - kmp_str_fname_t ptrn; - __kmp_str_fname_init( & ptrn, pattern ); - dir_match = - strcmp( ptrn.dir, "*/" ) == 0 - || - ( fname->dir != NULL && __kmp_str_eqf( fname->dir, ptrn.dir ) ); - base_match = - strcmp( ptrn.base, "*" ) == 0 - || - ( fname->base != NULL && __kmp_str_eqf( fname->base, ptrn.base ) ); - __kmp_str_fname_free( & ptrn ); - }; // if - - return dir_match && base_match; - +int __kmp_str_fname_match(kmp_str_fname_t const *fname, char const *pattern) { + int dir_match = 1; + int base_match = 1; + + if (pattern != NULL) { + kmp_str_fname_t ptrn; + __kmp_str_fname_init(&ptrn, pattern); + dir_match = strcmp(ptrn.dir, "*/") == 0 || + (fname->dir != NULL && __kmp_str_eqf(fname->dir, ptrn.dir)); + base_match = strcmp(ptrn.base, "*") == 0 || + (fname->base != NULL && __kmp_str_eqf(fname->base, ptrn.base)); + __kmp_str_fname_free(&ptrn); + }; // if + + return dir_match && base_match; } // __kmp_str_fname_match - -kmp_str_loc_t -__kmp_str_loc_init( - char const * psource, - int init_fname -) { - - kmp_str_loc_t loc; - - loc._bulk = NULL; - loc.file = NULL; - loc.func = NULL; - loc.line = 0; - loc.col = 0; - - if ( psource != NULL ) { - - char * str = NULL; - char * dummy = NULL; - char * line = NULL; - char * col = NULL; - - // Copy psource to keep it intact. - loc._bulk = __kmp_str_format( "%s", psource ); - - // Parse psource string: ";file;func;line;col;;" - str = loc._bulk; - __kmp_str_split( str, ';', & dummy, & str ); - __kmp_str_split( str, ';', & loc.file, & str ); - __kmp_str_split( str, ';', & loc.func, & str ); - __kmp_str_split( str, ';', & line, & str ); - __kmp_str_split( str, ';', & col, & str ); - - // Convert line and col into numberic values. - if ( line != NULL ) { - loc.line = atoi( line ); - if ( loc.line < 0 ) { - loc.line = 0; - }; // if - }; // if - if ( col != NULL ) { - loc.col = atoi( col ); - if ( loc.col < 0 ) { - loc.col = 0; - }; // if - }; // if - +kmp_str_loc_t __kmp_str_loc_init(char const *psource, int init_fname) { + kmp_str_loc_t loc; + + loc._bulk = NULL; + loc.file = NULL; + loc.func = NULL; + loc.line = 0; + loc.col = 0; + + if (psource != NULL) { + char *str = NULL; + char *dummy = NULL; + char *line = NULL; + char *col = NULL; + + // Copy psource to keep it intact. + loc._bulk = __kmp_str_format("%s", psource); + + // Parse psource string: ";file;func;line;col;;" + str = loc._bulk; + __kmp_str_split(str, ';', &dummy, &str); + __kmp_str_split(str, ';', &loc.file, &str); + __kmp_str_split(str, ';', &loc.func, &str); + __kmp_str_split(str, ';', &line, &str); + __kmp_str_split(str, ';', &col, &str); + + // Convert line and col into numberic values. + if (line != NULL) { + loc.line = atoi(line); + if (loc.line < 0) { + loc.line = 0; + }; // if + }; // if + if (col != NULL) { + loc.col = atoi(col); + if (loc.col < 0) { + loc.col = 0; + }; // if }; // if - __kmp_str_fname_init( & loc.fname, init_fname ? loc.file : NULL ); + }; // if - return loc; + __kmp_str_fname_init(&loc.fname, init_fname ? loc.file : NULL); + return loc; } // kmp_str_loc_init - -void -__kmp_str_loc_free( - kmp_str_loc_t * loc -) { - __kmp_str_fname_free( & loc->fname ); - __kmp_str_free((const char **) &(loc->_bulk)); - loc->file = NULL; - loc->func = NULL; +void __kmp_str_loc_free(kmp_str_loc_t *loc) { + __kmp_str_fname_free(&loc->fname); + __kmp_str_free((const char **)&(loc->_bulk)); + loc->file = NULL; + loc->func = NULL; } // kmp_str_loc_free - - -/* - This function is intended to compare file names. On Windows* OS file names are case-insensitive, - so functions performs case-insensitive comparison. On Linux* OS it performs case-sensitive - comparison. - Note: The function returns *true* if strings are *equal*. -*/ - -int -__kmp_str_eqf( // True, if strings are equal, false otherwise. - char const * lhs, // First string. - char const * rhs // Second string. -) { - int result; - #if KMP_OS_WINDOWS - result = ( _stricmp( lhs, rhs ) == 0 ); - #else - result = ( strcmp( lhs, rhs ) == 0 ); - #endif - return result; +/* This function is intended to compare file names. On Windows* OS file names + are case-insensitive, so functions performs case-insensitive comparison. On + Linux* OS it performs case-sensitive comparison. Note: The function returns + *true* if strings are *equal*. */ +int __kmp_str_eqf( // True, if strings are equal, false otherwise. + char const *lhs, // First string. + char const *rhs // Second string. + ) { + int result; +#if KMP_OS_WINDOWS + result = (_stricmp(lhs, rhs) == 0); +#else + result = (strcmp(lhs, rhs) == 0); +#endif + return result; } // __kmp_str_eqf - -/* - This function is like sprintf, but it *allocates* new buffer, which must be freed eventually by - __kmp_str_free(). The function is very convenient for constructing strings, it successfully - replaces strdup(), strcat(), it frees programmer from buffer allocations and helps to avoid - buffer overflows. Examples: - - str = __kmp_str_format( "%s", orig ); // strdup(), do not care about buffer size. - __kmp_str_free( & str ); - str = __kmp_str_format( "%s%s", orig1, orig2 ); // strcat(), do not care about buffer size. - __kmp_str_free( & str ); - str = __kmp_str_format( "%s/%s.txt", path, file ); // constructing string. - __kmp_str_free( & str ); - - Performance note: - This function allocates memory with malloc() calls, so do not call it from - performance-critical code. In performance-critical code consider using kmp_str_buf_t - instead, since it uses stack-allocated buffer for short strings. - - Why does this function use malloc()? - 1. __kmp_allocate() returns cache-aligned memory allocated with malloc(). There are no - reasons in using __kmp_allocate() for strings due to extra overhead while cache-aligned - memory is not necessary. - 2. __kmp_thread_malloc() cannot be used because it requires pointer to thread structure. - We need to perform string operations during library startup (for example, in - __kmp_register_library_startup()) when no thread structures are allocated yet. - So standard malloc() is the only available option. +/* This function is like sprintf, but it *allocates* new buffer, which must be + freed eventually by __kmp_str_free(). The function is very convenient for + constructing strings, it successfully replaces strdup(), strcat(), it frees + programmer from buffer allocations and helps to avoid buffer overflows. + Examples: + + str = __kmp_str_format("%s", orig); //strdup() doesn't care about buffer size + __kmp_str_free( & str ); + str = __kmp_str_format( "%s%s", orig1, orig2 ); // strcat(), doesn't care + // about buffer size. + __kmp_str_free( & str ); + str = __kmp_str_format( "%s/%s.txt", path, file ); // constructing string. + __kmp_str_free( & str ); + + Performance note: + This function allocates memory with malloc() calls, so do not call it from + performance-critical code. In performance-critical code consider using + kmp_str_buf_t instead, since it uses stack-allocated buffer for short + strings. + + Why does this function use malloc()? + 1. __kmp_allocate() returns cache-aligned memory allocated with malloc(). + There are no reasons in using __kmp_allocate() for strings due to extra + overhead while cache-aligned memory is not necessary. + 2. __kmp_thread_malloc() cannot be used because it requires pointer to thread + structure. We need to perform string operations during library startup + (for example, in __kmp_register_library_startup()) when no thread + structures are allocated yet. + So standard malloc() is the only available option. */ -char * -__kmp_str_format( // Allocated string. - char const * format, // Format string. - ... // Other parameters. -) { - - va_list args; - int size = 512; - char * buffer = NULL; - int rc; - - // Allocate buffer. - buffer = (char *) KMP_INTERNAL_MALLOC( size ); - if ( buffer == NULL ) { - KMP_FATAL( MemoryAllocFailed ); +char *__kmp_str_format( // Allocated string. + char const *format, // Format string. + ... // Other parameters. + ) { + va_list args; + int size = 512; + char *buffer = NULL; + int rc; + + // Allocate buffer. + buffer = (char *)KMP_INTERNAL_MALLOC(size); + if (buffer == NULL) { + KMP_FATAL(MemoryAllocFailed); + }; // if + + for (;;) { + // Try to format string. + va_start(args, format); + rc = KMP_VSNPRINTF(buffer, size, format, args); + va_end(args); + + // No errors, string has been formatted. + if (rc >= 0 && rc < size) { + break; }; // if - for ( ; ; ) { - - // Try to format string. - va_start( args, format ); - rc = KMP_VSNPRINTF( buffer, size, format, args ); - va_end( args ); - - // No errors, string has been formatted. - if ( rc >= 0 && rc < size ) { - break; - }; // if - - // Error occurred, buffer is too small. - if ( rc >= 0 ) { - // C99-conforming implementation of vsnprintf returns required buffer size. - size = rc + 1; - } else { - // Older implementations just return -1. - size = size * 2; - }; // if - - // Enlarge buffer and try again. - buffer = (char *) KMP_INTERNAL_REALLOC( buffer, size ); - if ( buffer == NULL ) { - KMP_FATAL( MemoryAllocFailed ); - }; // if - - }; // forever + // Error occurred, buffer is too small. + if (rc >= 0) { + // C99-conforming implementation of vsnprintf returns required buffer + // size. + size = rc + 1; + } else { + // Older implementations just return -1. + size = size * 2; + }; // if - return buffer; + // Enlarge buffer and try again. + buffer = (char *)KMP_INTERNAL_REALLOC(buffer, size); + if (buffer == NULL) { + KMP_FATAL(MemoryAllocFailed); + }; // if + }; // forever + return buffer; } // func __kmp_str_format - -void -__kmp_str_free( - char const * * str -) { - KMP_DEBUG_ASSERT( str != NULL ); - KMP_INTERNAL_FREE( (void *) * str ); - * str = NULL; +void __kmp_str_free(char const **str) { + KMP_DEBUG_ASSERT(str != NULL); + KMP_INTERNAL_FREE((void *)*str); + *str = NULL; } // func __kmp_str_free - -/* If len is zero, returns true iff target and data have exact case-insensitive match. - If len is negative, returns true iff target is a case-insensitive substring of data. - If len is positive, returns true iff target is a case-insensitive substring of data or - vice versa, and neither is shorter than len. -*/ -int -__kmp_str_match( - char const * target, - int len, - char const * data -) { - int i; - if ( target == NULL || data == NULL ) { - return FALSE; +/* If len is zero, returns true iff target and data have exact case-insensitive + match. If len is negative, returns true iff target is a case-insensitive + substring of data. If len is positive, returns true iff target is a + case-insensitive substring of data or vice versa, and neither is shorter than + len. */ +int __kmp_str_match(char const *target, int len, char const *data) { + int i; + if (target == NULL || data == NULL) { + return FALSE; + }; // if + for (i = 0; target[i] && data[i]; ++i) { + if (TOLOWER(target[i]) != TOLOWER(data[i])) { + return FALSE; }; // if - for ( i = 0; target[i] && data[i]; ++ i ) { - if ( TOLOWER( target[i] ) != TOLOWER( data[i] ) ) { - return FALSE; - }; // if - }; // for i - return ( ( len > 0 ) ? i >= len : ( ! target[i] && ( len || ! data[i] ) ) ); + }; // for i + return ((len > 0) ? i >= len : (!target[i] && (len || !data[i]))); } // __kmp_str_match - -int -__kmp_str_match_false( char const * data ) { - int result = - __kmp_str_match( "false", 1, data ) || - __kmp_str_match( "off", 2, data ) || - __kmp_str_match( "0", 1, data ) || - __kmp_str_match( ".false.", 2, data ) || - __kmp_str_match( ".f.", 2, data ) || - __kmp_str_match( "no", 1, data ); - return result; +int __kmp_str_match_false(char const *data) { + int result = + __kmp_str_match("false", 1, data) || __kmp_str_match("off", 2, data) || + __kmp_str_match("0", 1, data) || __kmp_str_match(".false.", 2, data) || + __kmp_str_match(".f.", 2, data) || __kmp_str_match("no", 1, data); + return result; } // __kmp_str_match_false - -int -__kmp_str_match_true( char const * data ) { - int result = - __kmp_str_match( "true", 1, data ) || - __kmp_str_match( "on", 2, data ) || - __kmp_str_match( "1", 1, data ) || - __kmp_str_match( ".true.", 2, data ) || - __kmp_str_match( ".t.", 2, data ) || - __kmp_str_match( "yes", 1, data ); - return result; +int __kmp_str_match_true(char const *data) { + int result = + __kmp_str_match("true", 1, data) || __kmp_str_match("on", 2, data) || + __kmp_str_match("1", 1, data) || __kmp_str_match(".true.", 2, data) || + __kmp_str_match(".t.", 2, data) || __kmp_str_match("yes", 1, data); + return result; } // __kmp_str_match_true -void -__kmp_str_replace( - char * str, - char search_for, - char replace_with -) { - - char * found = NULL; - - found = strchr( str, search_for ); - while ( found ) { - * found = replace_with; - found = strchr( found + 1, search_for ); - }; // while +void __kmp_str_replace(char *str, char search_for, char replace_with) { + char *found = NULL; + found = strchr(str, search_for); + while (found) { + *found = replace_with; + found = strchr(found + 1, search_for); + }; // while } // __kmp_str_replace - -void -__kmp_str_split( - char * str, // I: String to split. - char delim, // I: Character to split on. - char ** head, // O: Pointer to head (may be NULL). - char ** tail // O: Pointer to tail (may be NULL). -) { - char * h = str; - char * t = NULL; - if ( str != NULL ) { - char * ptr = strchr( str, delim ); - if ( ptr != NULL ) { - * ptr = 0; - t = ptr + 1; - }; // if - }; // if - if ( head != NULL ) { - * head = h; - }; // if - if ( tail != NULL ) { - * tail = t; +void __kmp_str_split(char *str, // I: String to split. + char delim, // I: Character to split on. + char **head, // O: Pointer to head (may be NULL). + char **tail // O: Pointer to tail (may be NULL). + ) { + char *h = str; + char *t = NULL; + if (str != NULL) { + char *ptr = strchr(str, delim); + if (ptr != NULL) { + *ptr = 0; + t = ptr + 1; }; // if + }; // if + if (head != NULL) { + *head = h; + }; // if + if (tail != NULL) { + *tail = t; + }; // if } // __kmp_str_split -/* - strtok_r() is not available on Windows* OS. This function reimplements strtok_r(). -*/ -char * -__kmp_str_token( - char * str, // String to split into tokens. Note: String *is* modified! - char const * delim, // Delimiters. - char ** buf // Internal buffer. -) { - char * token = NULL; - #if KMP_OS_WINDOWS - // On Windows* OS there is no strtok_r() function. Let us implement it. - if ( str != NULL ) { - * buf = str; // First call, initialize buf. - }; // if - * buf += strspn( * buf, delim ); // Skip leading delimiters. - if ( ** buf != 0 ) { // Rest of the string is not yet empty. - token = * buf; // Use it as result. - * buf += strcspn( * buf, delim ); // Skip non-delimiters. - if ( ** buf != 0 ) { // Rest of the string is not yet empty. - ** buf = 0; // Terminate token here. - * buf += 1; // Advance buf to start with the next token next time. - }; // if - }; // if - #else - // On Linux* OS and OS X*, strtok_r() is available. Let us use it. - token = strtok_r( str, delim, buf ); - #endif - return token; +/* strtok_r() is not available on Windows* OS. This function reimplements + strtok_r(). */ +char *__kmp_str_token( + char *str, // String to split into tokens. Note: String *is* modified! + char const *delim, // Delimiters. + char **buf // Internal buffer. + ) { + char *token = NULL; +#if KMP_OS_WINDOWS + // On Windows* OS there is no strtok_r() function. Let us implement it. + if (str != NULL) { + *buf = str; // First call, initialize buf. + }; // if + *buf += strspn(*buf, delim); // Skip leading delimiters. + if (**buf != 0) { // Rest of the string is not yet empty. + token = *buf; // Use it as result. + *buf += strcspn(*buf, delim); // Skip non-delimiters. + if (**buf != 0) { // Rest of the string is not yet empty. + **buf = 0; // Terminate token here. + *buf += 1; // Advance buf to start with the next token next time. + }; // if + }; // if +#else + // On Linux* OS and OS X*, strtok_r() is available. Let us use it. + token = strtok_r(str, delim, buf); +#endif + return token; }; // __kmp_str_token - -int -__kmp_str_to_int( - char const * str, - char sentinel -) { - int result, factor; - char const * t; - - result = 0; - - for (t = str; *t != '\0'; ++t) { - if (*t < '0' || *t > '9') - break; - result = (result * 10) + (*t - '0'); - } - - switch (*t) { - case '\0': /* the current default for no suffix is bytes */ - factor = 1; - break; - case 'b': case 'B': /* bytes */ - ++t; - factor = 1; - break; - case 'k': case 'K': /* kilo-bytes */ - ++t; - factor = 1024; - break; - case 'm': case 'M': /* mega-bytes */ - ++t; - factor = (1024 * 1024); - break; - default: - if(*t != sentinel) - return (-1); - t = ""; - factor = 1; - } - - if (result > (INT_MAX / factor)) - result = INT_MAX; - else - result *= factor; - - return (*t != 0 ? 0 : result); - +int __kmp_str_to_int(char const *str, char sentinel) { + int result, factor; + char const *t; + + result = 0; + + for (t = str; *t != '\0'; ++t) { + if (*t < '0' || *t > '9') + break; + result = (result * 10) + (*t - '0'); + } + + switch (*t) { + case '\0': /* the current default for no suffix is bytes */ + factor = 1; + break; + case 'b': + case 'B': /* bytes */ + ++t; + factor = 1; + break; + case 'k': + case 'K': /* kilo-bytes */ + ++t; + factor = 1024; + break; + case 'm': + case 'M': /* mega-bytes */ + ++t; + factor = (1024 * 1024); + break; + default: + if (*t != sentinel) + return (-1); + t = ""; + factor = 1; + } + + if (result > (INT_MAX / factor)) + result = INT_MAX; + else + result *= factor; + + return (*t != 0 ? 0 : result); } // __kmp_str_to_int - -/* - The routine parses input string. It is expected it is a unsigned integer with optional unit. - Units are: "b" for bytes, "kb" or just "k" for kilobytes, "mb" or "m" for megabytes, ..., "yb" - or "y" for yottabytes. :-) Unit name is case-insensitive. The routine returns 0 if everything is - ok, or error code: -1 in case of overflow, -2 in case of unknown unit. *size is set to parsed - value. In case of overflow *size is set to KMP_SIZE_T_MAX, in case of unknown unit *size is set - to zero. -*/ -void -__kmp_str_to_size( // R: Error code. - char const * str, // I: String of characters, unsigned number and unit ("b", "kb", etc). - size_t * out, // O: Parsed number. - size_t dfactor, // I: The factor if none of the letters specified. - char const * * error // O: Null if everything is ok, error message otherwise. -) { - - size_t value = 0; - size_t factor = 0; - int overflow = 0; - int i = 0; - int digit; - - - KMP_DEBUG_ASSERT( str != NULL ); - - // Skip spaces. - while ( str[ i ] == ' ' || str[ i ] == '\t') { - ++ i; - }; // while - - // Parse number. - if ( str[ i ] < '0' || str[ i ] > '9' ) { - * error = KMP_I18N_STR( NotANumber ); - return; - }; // if - do { - digit = str[ i ] - '0'; - overflow = overflow || ( value > ( KMP_SIZE_T_MAX - digit ) / 10 ); - value = ( value * 10 ) + digit; - ++ i; - } while ( str[ i ] >= '0' && str[ i ] <= '9' ); - - // Skip spaces. - while ( str[ i ] == ' ' || str[ i ] == '\t' ) { - ++ i; - }; // while - - // Parse unit. - #define _case( ch, exp ) \ - case ch : \ - case ch - ( 'a' - 'A' ) : { \ - size_t shift = (exp) * 10; \ - ++ i; \ - if ( shift < sizeof( size_t ) * 8 ) { \ - factor = (size_t)( 1 ) << shift; \ - } else { \ - overflow = 1; \ - }; \ - } break; - switch ( str[ i ] ) { - _case( 'k', 1 ); // Kilo - _case( 'm', 2 ); // Mega - _case( 'g', 3 ); // Giga - _case( 't', 4 ); // Tera - _case( 'p', 5 ); // Peta - _case( 'e', 6 ); // Exa - _case( 'z', 7 ); // Zetta - _case( 'y', 8 ); // Yotta - // Oops. No more units... - }; // switch - #undef _case - if ( str[ i ] == 'b' || str[ i ] == 'B' ) { // Skip optional "b". - if ( factor == 0 ) { - factor = 1; - } - ++ i; - }; // if - if ( ! ( str[ i ] == ' ' || str[ i ] == '\t' || str[ i ] == 0 ) ) { // Bad unit - * error = KMP_I18N_STR( BadUnit ); - return; - }; // if - - if ( factor == 0 ) { - factor = dfactor; +/* The routine parses input string. It is expected it is a unsigned integer with + optional unit. Units are: "b" for bytes, "kb" or just "k" for kilobytes, "mb" + or "m" for megabytes, ..., "yb" or "y" for yottabytes. :-) Unit name is + case-insensitive. The routine returns 0 if everything is ok, or error code: + -1 in case of overflow, -2 in case of unknown unit. *size is set to parsed + value. In case of overflow *size is set to KMP_SIZE_T_MAX, in case of unknown + unit *size is set to zero. */ +void __kmp_str_to_size( // R: Error code. + char const *str, // I: String of characters, unsigned number and unit ("b", + // "kb", etc). + size_t *out, // O: Parsed number. + size_t dfactor, // I: The factor if none of the letters specified. + char const **error // O: Null if everything is ok, error message otherwise. + ) { + + size_t value = 0; + size_t factor = 0; + int overflow = 0; + int i = 0; + int digit; + + KMP_DEBUG_ASSERT(str != NULL); + + // Skip spaces. + while (str[i] == ' ' || str[i] == '\t') { + ++i; + }; // while + + // Parse number. + if (str[i] < '0' || str[i] > '9') { + *error = KMP_I18N_STR(NotANumber); + return; + }; // if + do { + digit = str[i] - '0'; + overflow = overflow || (value > (KMP_SIZE_T_MAX - digit) / 10); + value = (value * 10) + digit; + ++i; + } while (str[i] >= '0' && str[i] <= '9'); + + // Skip spaces. + while (str[i] == ' ' || str[i] == '\t') { + ++i; + }; // while + +// Parse unit. +#define _case(ch, exp) \ + case ch: \ + case ch - ('a' - 'A'): { \ + size_t shift = (exp)*10; \ + ++i; \ + if (shift < sizeof(size_t) * 8) { \ + factor = (size_t)(1) << shift; \ + } else { \ + overflow = 1; \ + }; \ + } break; + switch (str[i]) { + _case('k', 1); // Kilo + _case('m', 2); // Mega + _case('g', 3); // Giga + _case('t', 4); // Tera + _case('p', 5); // Peta + _case('e', 6); // Exa + _case('z', 7); // Zetta + _case('y', 8); // Yotta + // Oops. No more units... + }; // switch +#undef _case + if (str[i] == 'b' || str[i] == 'B') { // Skip optional "b". + if (factor == 0) { + factor = 1; } - - // Apply factor. - overflow = overflow || ( value > ( KMP_SIZE_T_MAX / factor ) ); - value *= factor; - - // Skip spaces. - while ( str[ i ] == ' ' || str[ i ] == '\t' ) { - ++ i; - }; // while - - if ( str[ i ] != 0 ) { - * error = KMP_I18N_STR( IllegalCharacters ); - return; - }; // if - - if ( overflow ) { - * error = KMP_I18N_STR( ValueTooLarge ); - * out = KMP_SIZE_T_MAX; - return; - }; // if - - * error = NULL; - * out = value; - + ++i; + }; // if + if (!(str[i] == ' ' || str[i] == '\t' || str[i] == 0)) { // Bad unit + *error = KMP_I18N_STR(BadUnit); + return; + }; // if + + if (factor == 0) { + factor = dfactor; + } + + // Apply factor. + overflow = overflow || (value > (KMP_SIZE_T_MAX / factor)); + value *= factor; + + // Skip spaces. + while (str[i] == ' ' || str[i] == '\t') { + ++i; + }; // while + + if (str[i] != 0) { + *error = KMP_I18N_STR(IllegalCharacters); + return; + }; // if + + if (overflow) { + *error = KMP_I18N_STR(ValueTooLarge); + *out = KMP_SIZE_T_MAX; + return; + }; // if + + *error = NULL; + *out = value; } // __kmp_str_to_size - -void -__kmp_str_to_uint( // R: Error code. - char const * str, // I: String of characters, unsigned number. - kmp_uint64 * out, // O: Parsed number. - char const * * error // O: Null if everything is ok, error message otherwise. -) { - - size_t value = 0; - int overflow = 0; - int i = 0; - int digit; - - - KMP_DEBUG_ASSERT( str != NULL ); - - // Skip spaces. - while ( str[ i ] == ' ' || str[ i ] == '\t' ) { - ++ i; - }; // while - - // Parse number. - if ( str[ i ] < '0' || str[ i ] > '9' ) { - * error = KMP_I18N_STR( NotANumber ); - return; - }; // if - do { - digit = str[ i ] - '0'; - overflow = overflow || ( value > ( KMP_SIZE_T_MAX - digit ) / 10 ); - value = ( value * 10 ) + digit; - ++ i; - } while ( str[ i ] >= '0' && str[ i ] <= '9' ); - - // Skip spaces. - while ( str[ i ] == ' ' || str[ i ] == '\t' ) { - ++ i; - }; // while - - if ( str[ i ] != 0 ) { - * error = KMP_I18N_STR( IllegalCharacters ); - return; - }; // if - - if ( overflow ) { - * error = KMP_I18N_STR( ValueTooLarge ); - * out = (kmp_uint64) -1; - return; - }; // if - - * error = NULL; - * out = value; - +void __kmp_str_to_uint( // R: Error code. + char const *str, // I: String of characters, unsigned number. + kmp_uint64 *out, // O: Parsed number. + char const **error // O: Null if everything is ok, error message otherwise. + ) { + size_t value = 0; + int overflow = 0; + int i = 0; + int digit; + + KMP_DEBUG_ASSERT(str != NULL); + + // Skip spaces. + while (str[i] == ' ' || str[i] == '\t') { + ++i; + }; // while + + // Parse number. + if (str[i] < '0' || str[i] > '9') { + *error = KMP_I18N_STR(NotANumber); + return; + }; // if + do { + digit = str[i] - '0'; + overflow = overflow || (value > (KMP_SIZE_T_MAX - digit) / 10); + value = (value * 10) + digit; + ++i; + } while (str[i] >= '0' && str[i] <= '9'); + + // Skip spaces. + while (str[i] == ' ' || str[i] == '\t') { + ++i; + }; // while + + if (str[i] != 0) { + *error = KMP_I18N_STR(IllegalCharacters); + return; + }; // if + + if (overflow) { + *error = KMP_I18N_STR(ValueTooLarge); + *out = (kmp_uint64)-1; + return; + }; // if + + *error = NULL; + *out = value; } // __kmp_str_to_unit - - // end of file // diff --git a/openmp/runtime/src/kmp_str.h b/openmp/runtime/src/kmp_str.h index ba71bba..0a9830a 100644 --- a/openmp/runtime/src/kmp_str.h +++ b/openmp/runtime/src/kmp_str.h @@ -16,104 +16,112 @@ #ifndef KMP_STR_H #define KMP_STR_H -#include #include +#include #include "kmp_os.h" #ifdef __cplusplus - extern "C" { +extern "C" { #endif // __cplusplus #if KMP_OS_WINDOWS -# define strdup _strdup +#define strdup _strdup #endif /* some macros to replace ctype.h functions */ -#define TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) + 'a' - 'A') : (c)) +#define TOLOWER(c) ((((c) >= 'A') && ((c) <= 'Z')) ? ((c) + 'a' - 'A') : (c)) struct kmp_str_buf { - char * str; // Pointer to buffer content, read only. - unsigned int size; // Do not change this field! - int used; // Number of characters printed to buffer, read only. - char bulk[ 512 ]; // Do not use this field! + char *str; // Pointer to buffer content, read only. + unsigned int size; // Do not change this field! + int used; // Number of characters printed to buffer, read only. + char bulk[512]; // Do not use this field! }; // struct kmp_str_buf -typedef struct kmp_str_buf kmp_str_buf_t; - -#define __kmp_str_buf_init( b ) { (b)->str = (b)->bulk; (b)->size = sizeof( (b)->bulk ); (b)->used = 0; (b)->bulk[ 0 ] = 0; } - -void __kmp_str_buf_clear( kmp_str_buf_t * buffer ); -void __kmp_str_buf_reserve( kmp_str_buf_t * buffer, int size ); -void __kmp_str_buf_detach( kmp_str_buf_t * buffer ); -void __kmp_str_buf_free( kmp_str_buf_t * buffer ); -void __kmp_str_buf_cat( kmp_str_buf_t * buffer, char const * str, int len ); -void __kmp_str_buf_vprint( kmp_str_buf_t * buffer, char const * format, va_list args ); -void __kmp_str_buf_print( kmp_str_buf_t * buffer, char const * format, ... ); -void __kmp_str_buf_print_size( kmp_str_buf_t * buffer, size_t size ); - -/* - File name parser. Usage: - - kmp_str_fname_t fname = __kmp_str_fname_init( path ); - // Use fname.path (copy of original path ), fname.dir, fname.base. - // Note fname.dir concatenated with fname.base gives exact copy of path. - __kmp_str_fname_free( & fname ); - +typedef struct kmp_str_buf kmp_str_buf_t; + +#define __kmp_str_buf_init(b) \ + { \ + (b)->str = (b)->bulk; \ + (b)->size = sizeof((b)->bulk); \ + (b)->used = 0; \ + (b)->bulk[0] = 0; \ + } + +void __kmp_str_buf_clear(kmp_str_buf_t *buffer); +void __kmp_str_buf_reserve(kmp_str_buf_t *buffer, int size); +void __kmp_str_buf_detach(kmp_str_buf_t *buffer); +void __kmp_str_buf_free(kmp_str_buf_t *buffer); +void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, int len); +void __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format, + va_list args); +void __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...); +void __kmp_str_buf_print_size(kmp_str_buf_t *buffer, size_t size); + +/* File name parser. + Usage: + + kmp_str_fname_t fname = __kmp_str_fname_init( path ); + // Use fname.path (copy of original path ), fname.dir, fname.base. + // Note fname.dir concatenated with fname.base gives exact copy of path. + __kmp_str_fname_free( & fname ); */ struct kmp_str_fname { - char * path; - char * dir; - char * base; + char *path; + char *dir; + char *base; }; // struct kmp_str_fname typedef struct kmp_str_fname kmp_str_fname_t; -void __kmp_str_fname_init( kmp_str_fname_t * fname, char const * path ); -void __kmp_str_fname_free( kmp_str_fname_t * fname ); -// Compares file name with specified patern. If pattern is NULL, any fname matched. -int __kmp_str_fname_match( kmp_str_fname_t const * fname, char const * pattern ); - -/* - The compiler provides source locations in string form ";file;func;line;col;;". It not not - convenient for manupulation. These structure keeps source location in more convenient form. - Usage: - - kmp_str_loc_t loc = __kmp_str_loc_init( ident->psource, 0 ); - // use loc.file, loc.func, loc.line, loc.col. - // loc.fname is available if the second argument of __kmp_str_loc_init is true. - __kmp_str_loc_free( & loc ); - - If psource is NULL or does not follow format above, file and/or func may be NULL pointers. +void __kmp_str_fname_init(kmp_str_fname_t *fname, char const *path); +void __kmp_str_fname_free(kmp_str_fname_t *fname); +// Compares file name with specified patern. If pattern is NULL, any fname +// matched. +int __kmp_str_fname_match(kmp_str_fname_t const *fname, char const *pattern); + +/* The compiler provides source locations in string form + ";file;func;line;col;;". It is not convenient for manupulation. This + structure keeps source location in more convenient form. + Usage: + + kmp_str_loc_t loc = __kmp_str_loc_init( ident->psource, 0 ); + // use loc.file, loc.func, loc.line, loc.col. + // loc.fname is available if second argument of __kmp_str_loc_init is true. + __kmp_str_loc_free( & loc ); + + If psource is NULL or does not follow format above, file and/or func may be + NULL pointers. */ struct kmp_str_loc { - char * _bulk; // Do not use thid field. - kmp_str_fname_t fname; // Will be initialized if init_fname is true. - char * file; - char * func; - int line; - int col; + char *_bulk; // Do not use thid field. + kmp_str_fname_t fname; // Will be initialized if init_fname is true. + char *file; + char *func; + int line; + int col; }; // struct kmp_str_loc typedef struct kmp_str_loc kmp_str_loc_t; -kmp_str_loc_t __kmp_str_loc_init( char const * psource, int init_fname ); -void __kmp_str_loc_free( kmp_str_loc_t * loc ); - -int __kmp_str_eqf( char const * lhs, char const * rhs ); -char * __kmp_str_format( char const * format, ... ); -void __kmp_str_free( char const * * str ); -int __kmp_str_match( char const * target, int len, char const * data ); -int __kmp_str_match_false( char const * data ); -int __kmp_str_match_true( char const * data ); -void __kmp_str_replace( char * str, char search_for, char replace_with ); -void __kmp_str_split( char * str, char delim, char ** head, char ** tail ); -char * __kmp_str_token( char * str, char const * delim, char ** buf ); -int __kmp_str_to_int( char const * str, char sentinel ); - -void __kmp_str_to_size( char const * str, size_t * out, size_t dfactor, char const * * error ); -void __kmp_str_to_uint( char const * str, kmp_uint64 * out, char const * * error ); +kmp_str_loc_t __kmp_str_loc_init(char const *psource, int init_fname); +void __kmp_str_loc_free(kmp_str_loc_t *loc); + +int __kmp_str_eqf(char const *lhs, char const *rhs); +char *__kmp_str_format(char const *format, ...); +void __kmp_str_free(char const **str); +int __kmp_str_match(char const *target, int len, char const *data); +int __kmp_str_match_false(char const *data); +int __kmp_str_match_true(char const *data); +void __kmp_str_replace(char *str, char search_for, char replace_with); +void __kmp_str_split(char *str, char delim, char **head, char **tail); +char *__kmp_str_token(char *str, char const *delim, char **buf); +int __kmp_str_to_int(char const *str, char sentinel); + +void __kmp_str_to_size(char const *str, size_t *out, size_t dfactor, + char const **error); +void __kmp_str_to_uint(char const *str, kmp_uint64 *out, char const **error); #ifdef __cplusplus - } // extern "C" +} // extern "C" #endif // __cplusplus #endif // KMP_STR_H // end of file // - diff --git a/openmp/runtime/src/kmp_stub.cpp b/openmp/runtime/src/kmp_stub.cpp index 11b7cb6..a0912ab 100644 --- a/openmp/runtime/src/kmp_stub.cpp +++ b/openmp/runtime/src/kmp_stub.cpp @@ -13,258 +13,304 @@ //===----------------------------------------------------------------------===// -#include -#include #include +#include +#include -#include "omp.h" // Function renamings. -#include "kmp.h" // KMP_DEFAULT_STKSIZE +#include "kmp.h" // KMP_DEFAULT_STKSIZE #include "kmp_stub.h" +#include "omp.h" // Function renamings. #if KMP_OS_WINDOWS - #include +#include #else - #include +#include #endif // Moved from omp.h -#define omp_set_max_active_levels ompc_set_max_active_levels -#define omp_set_schedule ompc_set_schedule -#define omp_get_ancestor_thread_num ompc_get_ancestor_thread_num -#define omp_get_team_size ompc_get_team_size - -#define omp_set_num_threads ompc_set_num_threads -#define omp_set_dynamic ompc_set_dynamic -#define omp_set_nested ompc_set_nested -#define kmp_set_stacksize kmpc_set_stacksize -#define kmp_set_stacksize_s kmpc_set_stacksize_s -#define kmp_set_blocktime kmpc_set_blocktime -#define kmp_set_library kmpc_set_library -#define kmp_set_defaults kmpc_set_defaults -#define kmp_set_disp_num_buffers kmpc_set_disp_num_buffers -#define kmp_malloc kmpc_malloc -#define kmp_aligned_malloc kmpc_aligned_malloc -#define kmp_calloc kmpc_calloc -#define kmp_realloc kmpc_realloc -#define kmp_free kmpc_free +#define omp_set_max_active_levels ompc_set_max_active_levels +#define omp_set_schedule ompc_set_schedule +#define omp_get_ancestor_thread_num ompc_get_ancestor_thread_num +#define omp_get_team_size ompc_get_team_size + +#define omp_set_num_threads ompc_set_num_threads +#define omp_set_dynamic ompc_set_dynamic +#define omp_set_nested ompc_set_nested +#define kmp_set_stacksize kmpc_set_stacksize +#define kmp_set_stacksize_s kmpc_set_stacksize_s +#define kmp_set_blocktime kmpc_set_blocktime +#define kmp_set_library kmpc_set_library +#define kmp_set_defaults kmpc_set_defaults +#define kmp_set_disp_num_buffers kmpc_set_disp_num_buffers +#define kmp_malloc kmpc_malloc +#define kmp_aligned_malloc kmpc_aligned_malloc +#define kmp_calloc kmpc_calloc +#define kmp_realloc kmpc_realloc +#define kmp_free kmpc_free static double frequency = 0.0; // Helper functions. static size_t __kmps_init() { - static int initialized = 0; - static size_t dummy = 0; - if ( ! initialized ) { - - // TODO: Analyze KMP_VERSION environment variable, print - // __kmp_version_copyright and __kmp_version_build_time. - // WARNING: Do not use "fprintf( stderr, ... )" because it will cause - // unresolved "__iob" symbol (see C70080). We need to extract - // __kmp_printf() stuff from kmp_runtime.cpp and use it. - - // Trick with dummy variable forces linker to keep __kmp_version_copyright - // and __kmp_version_build_time strings in executable file (in case of - // static linkage). When KMP_VERSION analysis is implemented, dummy - // variable should be deleted, function should return void. - dummy = __kmp_version_copyright - __kmp_version_build_time; - - #if KMP_OS_WINDOWS - LARGE_INTEGER freq; - BOOL status = QueryPerformanceFrequency( & freq ); - if ( status ) { - frequency = double( freq.QuadPart ); - }; // if - #endif - - initialized = 1; + static int initialized = 0; + static size_t dummy = 0; + if (!initialized) { + // TODO: Analyze KMP_VERSION environment variable, print + // __kmp_version_copyright and __kmp_version_build_time. + // WARNING: Do not use "fprintf(stderr, ...)" because it will cause + // unresolved "__iob" symbol (see C70080). We need to extract __kmp_printf() + // stuff from kmp_runtime.cpp and use it. + + // Trick with dummy variable forces linker to keep __kmp_version_copyright + // and __kmp_version_build_time strings in executable file (in case of + // static linkage). When KMP_VERSION analysis is implemented, dummy + // variable should be deleted, function should return void. + dummy = __kmp_version_copyright - __kmp_version_build_time; + +#if KMP_OS_WINDOWS + LARGE_INTEGER freq; + BOOL status = QueryPerformanceFrequency(&freq); + if (status) { + frequency = double(freq.QuadPart); }; // if - return dummy; +#endif + + initialized = 1; + }; // if + return dummy; }; // __kmps_init #define i __kmps_init(); /* set API functions */ -void omp_set_num_threads( omp_int_t num_threads ) { i; } -void omp_set_dynamic( omp_int_t dynamic ) { i; __kmps_set_dynamic( dynamic ); } -void omp_set_nested( omp_int_t nested ) { i; __kmps_set_nested( nested ); } -void omp_set_max_active_levels( omp_int_t max_active_levels ) { i; } -void omp_set_schedule( omp_sched_t kind, omp_int_t modifier ) { i; __kmps_set_schedule( (kmp_sched_t)kind, modifier ); } -int omp_get_ancestor_thread_num( omp_int_t level ) { i; return ( level ) ? ( -1 ) : ( 0 ); } -int omp_get_team_size( omp_int_t level ) { i; return ( level ) ? ( -1 ) : ( 1 ); } -int kmpc_set_affinity_mask_proc( int proc, void **mask ) { i; return -1; } -int kmpc_unset_affinity_mask_proc( int proc, void **mask ) { i; return -1; } -int kmpc_get_affinity_mask_proc( int proc, void **mask ) { i; return -1; } +void omp_set_num_threads(omp_int_t num_threads) { i; } +void omp_set_dynamic(omp_int_t dynamic) { + i; + __kmps_set_dynamic(dynamic); +} +void omp_set_nested(omp_int_t nested) { + i; + __kmps_set_nested(nested); +} +void omp_set_max_active_levels(omp_int_t max_active_levels) { i; } +void omp_set_schedule(omp_sched_t kind, omp_int_t modifier) { + i; + __kmps_set_schedule((kmp_sched_t)kind, modifier); +} +int omp_get_ancestor_thread_num(omp_int_t level) { + i; + return (level) ? (-1) : (0); +} +int omp_get_team_size(omp_int_t level) { + i; + return (level) ? (-1) : (1); +} +int kmpc_set_affinity_mask_proc(int proc, void **mask) { + i; + return -1; +} +int kmpc_unset_affinity_mask_proc(int proc, void **mask) { + i; + return -1; +} +int kmpc_get_affinity_mask_proc(int proc, void **mask) { + i; + return -1; +} /* kmp API functions */ -void kmp_set_stacksize( omp_int_t arg ) { i; __kmps_set_stacksize( arg ); } -void kmp_set_stacksize_s( size_t arg ) { i; __kmps_set_stacksize( arg ); } -void kmp_set_blocktime( omp_int_t arg ) { i; __kmps_set_blocktime( arg ); } -void kmp_set_library( omp_int_t arg ) { i; __kmps_set_library( arg ); } -void kmp_set_defaults( char const * str ) { i; } -void kmp_set_disp_num_buffers( omp_int_t arg ) { i; } +void kmp_set_stacksize(omp_int_t arg) { + i; + __kmps_set_stacksize(arg); +} +void kmp_set_stacksize_s(size_t arg) { + i; + __kmps_set_stacksize(arg); +} +void kmp_set_blocktime(omp_int_t arg) { + i; + __kmps_set_blocktime(arg); +} +void kmp_set_library(omp_int_t arg) { + i; + __kmps_set_library(arg); +} +void kmp_set_defaults(char const *str) { i; } +void kmp_set_disp_num_buffers(omp_int_t arg) { i; } /* KMP memory management functions. */ -void * kmp_malloc( size_t size ) { i; return malloc( size ); } -void * kmp_aligned_malloc( size_t sz, size_t a ) { - i; +void *kmp_malloc(size_t size) { + i; + return malloc(size); +} +void *kmp_aligned_malloc(size_t sz, size_t a) { + i; #if KMP_OS_WINDOWS - errno = ENOSYS; // not supported - return NULL; // no standard aligned allocator on Windows (pre - C11) + errno = ENOSYS; // not supported + return NULL; // no standard aligned allocator on Windows (pre - C11) #else - void *res; - int err; - if( err = posix_memalign( &res, a, sz ) ) { - errno = err; // can be EINVAL or ENOMEM - return NULL; - } - return res; + void *res; + int err; + if (err = posix_memalign(&res, a, sz)) { + errno = err; // can be EINVAL or ENOMEM + return NULL; + } + return res; #endif } -void * kmp_calloc( size_t nelem, size_t elsize ) { i; return calloc( nelem, elsize ); } -void * kmp_realloc( void *ptr, size_t size ) { i; return realloc( ptr, size ); } -void kmp_free( void * ptr ) { i; free( ptr ); } +void *kmp_calloc(size_t nelem, size_t elsize) { + i; + return calloc(nelem, elsize); +} +void *kmp_realloc(void *ptr, size_t size) { + i; + return realloc(ptr, size); +} +void kmp_free(void *ptr) { + i; + free(ptr); +} static int __kmps_blocktime = INT_MAX; -void __kmps_set_blocktime( int arg ) { - i; - __kmps_blocktime = arg; +void __kmps_set_blocktime(int arg) { + i; + __kmps_blocktime = arg; } // __kmps_set_blocktime -int __kmps_get_blocktime( void ) { - i; - return __kmps_blocktime; +int __kmps_get_blocktime(void) { + i; + return __kmps_blocktime; } // __kmps_get_blocktime static int __kmps_dynamic = 0; -void __kmps_set_dynamic( int arg ) { - i; - __kmps_dynamic = arg; +void __kmps_set_dynamic(int arg) { + i; + __kmps_dynamic = arg; } // __kmps_set_dynamic -int __kmps_get_dynamic( void ) { - i; - return __kmps_dynamic; +int __kmps_get_dynamic(void) { + i; + return __kmps_dynamic; } // __kmps_get_dynamic static int __kmps_library = 1000; -void __kmps_set_library( int arg ) { - i; - __kmps_library = arg; +void __kmps_set_library(int arg) { + i; + __kmps_library = arg; } // __kmps_set_library -int __kmps_get_library( void ) { - i; - return __kmps_library; +int __kmps_get_library(void) { + i; + return __kmps_library; } // __kmps_get_library static int __kmps_nested = 0; -void __kmps_set_nested( int arg ) { - i; - __kmps_nested = arg; +void __kmps_set_nested(int arg) { + i; + __kmps_nested = arg; } // __kmps_set_nested -int __kmps_get_nested( void ) { - i; - return __kmps_nested; +int __kmps_get_nested(void) { + i; + return __kmps_nested; } // __kmps_get_nested static size_t __kmps_stacksize = KMP_DEFAULT_STKSIZE; -void __kmps_set_stacksize( int arg ) { - i; - __kmps_stacksize = arg; +void __kmps_set_stacksize(int arg) { + i; + __kmps_stacksize = arg; } // __kmps_set_stacksize -int __kmps_get_stacksize( void ) { - i; - return __kmps_stacksize; +int __kmps_get_stacksize(void) { + i; + return __kmps_stacksize; } // __kmps_get_stacksize -static kmp_sched_t __kmps_sched_kind = kmp_sched_default; -static int __kmps_sched_modifier = 0; +static kmp_sched_t __kmps_sched_kind = kmp_sched_default; +static int __kmps_sched_modifier = 0; - void __kmps_set_schedule( kmp_sched_t kind, int modifier ) { - i; - __kmps_sched_kind = kind; - __kmps_sched_modifier = modifier; - } // __kmps_set_schedule +void __kmps_set_schedule(kmp_sched_t kind, int modifier) { + i; + __kmps_sched_kind = kind; + __kmps_sched_modifier = modifier; +} // __kmps_set_schedule - void __kmps_get_schedule( kmp_sched_t *kind, int *modifier ) { - i; - *kind = __kmps_sched_kind; - *modifier = __kmps_sched_modifier; - } // __kmps_get_schedule +void __kmps_get_schedule(kmp_sched_t *kind, int *modifier) { + i; + *kind = __kmps_sched_kind; + *modifier = __kmps_sched_modifier; +} // __kmps_get_schedule #if OMP_40_ENABLED static kmp_proc_bind_t __kmps_proc_bind = proc_bind_false; -void __kmps_set_proc_bind( kmp_proc_bind_t arg ) { - i; - __kmps_proc_bind = arg; +void __kmps_set_proc_bind(kmp_proc_bind_t arg) { + i; + __kmps_proc_bind = arg; } // __kmps_set_proc_bind -kmp_proc_bind_t __kmps_get_proc_bind( void ) { - i; - return __kmps_proc_bind; +kmp_proc_bind_t __kmps_get_proc_bind(void) { + i; + return __kmps_proc_bind; } // __kmps_get_proc_bind #endif /* OMP_40_ENABLED */ -double __kmps_get_wtime( void ) { - // Elapsed wall clock time (in second) from "sometime in the past". - double wtime = 0.0; - i; - #if KMP_OS_WINDOWS - if ( frequency > 0.0 ) { - LARGE_INTEGER now; - BOOL status = QueryPerformanceCounter( & now ); - if ( status ) { - wtime = double( now.QuadPart ) / frequency; - }; // if - }; // if - #else - // gettimeofday() returns seconds and microseconds since the Epoch. - struct timeval tval; - int rc; - rc = gettimeofday( & tval, NULL ); - if ( rc == 0 ) { - wtime = (double)( tval.tv_sec ) + 1.0E-06 * (double)( tval.tv_usec ); - } else { - // TODO: Assert or abort here. - }; // if - #endif - return wtime; +double __kmps_get_wtime(void) { + // Elapsed wall clock time (in second) from "sometime in the past". + double wtime = 0.0; + i; +#if KMP_OS_WINDOWS + if (frequency > 0.0) { + LARGE_INTEGER now; + BOOL status = QueryPerformanceCounter(&now); + if (status) { + wtime = double(now.QuadPart) / frequency; + }; // if + }; // if +#else + // gettimeofday() returns seconds and microseconds since the Epoch. + struct timeval tval; + int rc; + rc = gettimeofday(&tval, NULL); + if (rc == 0) { + wtime = (double)(tval.tv_sec) + 1.0E-06 * (double)(tval.tv_usec); + } else { + // TODO: Assert or abort here. + }; // if +#endif + return wtime; }; // __kmps_get_wtime -double __kmps_get_wtick( void ) { - // Number of seconds between successive clock ticks. - double wtick = 0.0; - i; - #if KMP_OS_WINDOWS - { - DWORD increment; - DWORD adjustment; - BOOL disabled; - BOOL rc; - rc = GetSystemTimeAdjustment( & adjustment, & increment, & disabled ); - if ( rc ) { - wtick = 1.0E-07 * (double)( disabled ? increment : adjustment ); - } else { - // TODO: Assert or abort here. - wtick = 1.0E-03; - }; // if - } - #else - // TODO: gettimeofday() returns in microseconds, but what the precision? - wtick = 1.0E-06; - #endif - return wtick; +double __kmps_get_wtick(void) { + // Number of seconds between successive clock ticks. + double wtick = 0.0; + i; +#if KMP_OS_WINDOWS + { + DWORD increment; + DWORD adjustment; + BOOL disabled; + BOOL rc; + rc = GetSystemTimeAdjustment(&adjustment, &increment, &disabled); + if (rc) { + wtick = 1.0E-07 * (double)(disabled ? increment : adjustment); + } else { + // TODO: Assert or abort here. + wtick = 1.0E-03; + }; // if + } +#else + // TODO: gettimeofday() returns in microseconds, but what the precision? + wtick = 1.0E-06; +#endif + return wtick; }; // __kmps_get_wtick // end of file // - diff --git a/openmp/runtime/src/kmp_stub.h b/openmp/runtime/src/kmp_stub.h index cdcffa3..9d1efda 100644 --- a/openmp/runtime/src/kmp_stub.h +++ b/openmp/runtime/src/kmp_stub.h @@ -17,43 +17,43 @@ #define KMP_STUB_H #ifdef __cplusplus - extern "C" { +extern "C" { #endif // __cplusplus -void __kmps_set_blocktime( int arg ); -int __kmps_get_blocktime( void ); -void __kmps_set_dynamic( int arg ); -int __kmps_get_dynamic( void ); -void __kmps_set_library( int arg ); -int __kmps_get_library( void ); -void __kmps_set_nested( int arg ); -int __kmps_get_nested( void ); -void __kmps_set_stacksize( int arg ); -int __kmps_get_stacksize(); +void __kmps_set_blocktime(int arg); +int __kmps_get_blocktime(void); +void __kmps_set_dynamic(int arg); +int __kmps_get_dynamic(void); +void __kmps_set_library(int arg); +int __kmps_get_library(void); +void __kmps_set_nested(int arg); +int __kmps_get_nested(void); +void __kmps_set_stacksize(int arg); +int __kmps_get_stacksize(); #ifndef KMP_SCHED_TYPE_DEFINED #define KMP_SCHED_TYPE_DEFINED typedef enum kmp_sched { - kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33) - kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35) - kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36) - kmp_sched_auto = 4, // mapped to kmp_sch_auto (38) - kmp_sched_default = kmp_sched_static // default scheduling + kmp_sched_static = 1, // mapped to kmp_sch_static_chunked (33) + kmp_sched_dynamic = 2, // mapped to kmp_sch_dynamic_chunked (35) + kmp_sched_guided = 3, // mapped to kmp_sch_guided_chunked (36) + kmp_sched_auto = 4, // mapped to kmp_sch_auto (38) + kmp_sched_default = kmp_sched_static // default scheduling } kmp_sched_t; #endif -void __kmps_set_schedule( kmp_sched_t kind, int modifier ); -void __kmps_get_schedule( kmp_sched_t *kind, int *modifier ); +void __kmps_set_schedule(kmp_sched_t kind, int modifier); +void __kmps_get_schedule(kmp_sched_t *kind, int *modifier); #if OMP_40_ENABLED -void __kmps_set_proc_bind( kmp_proc_bind_t arg ); -kmp_proc_bind_t __kmps_get_proc_bind( void ); +void __kmps_set_proc_bind(kmp_proc_bind_t arg); +kmp_proc_bind_t __kmps_get_proc_bind(void); #endif /* OMP_40_ENABLED */ double __kmps_get_wtime(); double __kmps_get_wtick(); #ifdef __cplusplus - } // extern "C" +} // extern "C" #endif // __cplusplus #endif // KMP_STUB_H diff --git a/openmp/runtime/src/kmp_taskdeps.cpp b/openmp/runtime/src/kmp_taskdeps.cpp index 6fbf0b0..1b4e869 100644 --- a/openmp/runtime/src/kmp_taskdeps.cpp +++ b/openmp/runtime/src/kmp_taskdeps.cpp @@ -21,511 +21,543 @@ #if OMP_40_ENABLED -//TODO: Improve memory allocation? keep a list of pre-allocated structures? allocate in blocks? re-use list finished list entries? -//TODO: don't use atomic ref counters for stack-allocated nodes. -//TODO: find an alternate to atomic refs for heap-allocated nodes? -//TODO: Finish graph output support -//TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other runtime locks -//TODO: Any ITT support needed? +// TODO: Improve memory allocation? keep a list of pre-allocated structures? +// allocate in blocks? re-use list finished list entries? +// TODO: don't use atomic ref counters for stack-allocated nodes. +// TODO: find an alternate to atomic refs for heap-allocated nodes? +// TODO: Finish graph output support +// TODO: kmp_lock_t seems a tad to big (and heavy weight) for this. Check other +// runtime locks +// TODO: Any ITT support needed? #ifdef KMP_SUPPORT_GRAPH_OUTPUT static kmp_int32 kmp_node_id_seed = 0; #endif -static void -__kmp_init_node ( kmp_depnode_t *node ) -{ - node->dn.task = NULL; // set to null initially, it will point to the right task once dependences have been processed - node->dn.successors = NULL; - __kmp_init_lock(&node->dn.lock); - node->dn.nrefs = 1; // init creates the first reference to the node +static void __kmp_init_node(kmp_depnode_t *node) { + node->dn.task = NULL; // set to null initially, it will point to the right + // task once dependences have been processed + node->dn.successors = NULL; + __kmp_init_lock(&node->dn.lock); + node->dn.nrefs = 1; // init creates the first reference to the node #ifdef KMP_SUPPORT_GRAPH_OUTPUT - node->dn.id = KMP_TEST_THEN_INC32(&kmp_node_id_seed); + node->dn.id = KMP_TEST_THEN_INC32(&kmp_node_id_seed); #endif } -static inline kmp_depnode_t * -__kmp_node_ref ( kmp_depnode_t *node ) -{ - KMP_TEST_THEN_INC32(&node->dn.nrefs); - return node; +static inline kmp_depnode_t *__kmp_node_ref(kmp_depnode_t *node) { + KMP_TEST_THEN_INC32(&node->dn.nrefs); + return node; } -static inline void -__kmp_node_deref ( kmp_info_t *thread, kmp_depnode_t *node ) -{ - if (!node) return; +static inline void __kmp_node_deref(kmp_info_t *thread, kmp_depnode_t *node) { + if (!node) + return; - kmp_int32 n = KMP_TEST_THEN_DEC32(&node->dn.nrefs) - 1; - if ( n == 0 ) { - KMP_ASSERT(node->dn.nrefs == 0); + kmp_int32 n = KMP_TEST_THEN_DEC32(&node->dn.nrefs) - 1; + if (n == 0) { + KMP_ASSERT(node->dn.nrefs == 0); #if USE_FAST_MEMORY - __kmp_fast_free(thread,node); + __kmp_fast_free(thread, node); #else - __kmp_thread_free(thread,node); + __kmp_thread_free(thread, node); #endif - } + } } -#define KMP_ACQUIRE_DEPNODE(gtid,n) __kmp_acquire_lock(&(n)->dn.lock,(gtid)) -#define KMP_RELEASE_DEPNODE(gtid,n) __kmp_release_lock(&(n)->dn.lock,(gtid)) +#define KMP_ACQUIRE_DEPNODE(gtid, n) __kmp_acquire_lock(&(n)->dn.lock, (gtid)) +#define KMP_RELEASE_DEPNODE(gtid, n) __kmp_release_lock(&(n)->dn.lock, (gtid)) -static void -__kmp_depnode_list_free ( kmp_info_t *thread, kmp_depnode_list *list ); +static void __kmp_depnode_list_free(kmp_info_t *thread, kmp_depnode_list *list); -enum { - KMP_DEPHASH_OTHER_SIZE = 97, - KMP_DEPHASH_MASTER_SIZE = 997 -}; +enum { KMP_DEPHASH_OTHER_SIZE = 97, KMP_DEPHASH_MASTER_SIZE = 997 }; -static inline kmp_int32 -__kmp_dephash_hash ( kmp_intptr_t addr, size_t hsize ) -{ - //TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) % m_num_sets ); - return ((addr >> 6) ^ (addr >> 2)) % hsize; +static inline kmp_int32 __kmp_dephash_hash(kmp_intptr_t addr, size_t hsize) { + // TODO alternate to try: set = (((Addr64)(addrUsefulBits * 9.618)) % + // m_num_sets ); + return ((addr >> 6) ^ (addr >> 2)) % hsize; } -static kmp_dephash_t * -__kmp_dephash_create ( kmp_info_t *thread, kmp_taskdata_t *current_task ) -{ - kmp_dephash_t *h; +static kmp_dephash_t *__kmp_dephash_create(kmp_info_t *thread, + kmp_taskdata_t *current_task) { + kmp_dephash_t *h; - size_t h_size; + size_t h_size; - if ( current_task->td_flags.tasktype == TASK_IMPLICIT ) - h_size = KMP_DEPHASH_MASTER_SIZE; - else - h_size = KMP_DEPHASH_OTHER_SIZE; + if (current_task->td_flags.tasktype == TASK_IMPLICIT) + h_size = KMP_DEPHASH_MASTER_SIZE; + else + h_size = KMP_DEPHASH_OTHER_SIZE; - kmp_int32 size = - h_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t); + kmp_int32 size = + h_size * sizeof(kmp_dephash_entry_t *) + sizeof(kmp_dephash_t); #if USE_FAST_MEMORY - h = (kmp_dephash_t *) __kmp_fast_allocate( thread, size ); + h = (kmp_dephash_t *)__kmp_fast_allocate(thread, size); #else - h = (kmp_dephash_t *) __kmp_thread_malloc( thread, size ); + h = (kmp_dephash_t *)__kmp_thread_malloc(thread, size); #endif - h->size = h_size; + h->size = h_size; #ifdef KMP_DEBUG - h->nelements = 0; - h->nconflicts = 0; + h->nelements = 0; + h->nconflicts = 0; #endif - h->buckets = (kmp_dephash_entry **)(h+1); + h->buckets = (kmp_dephash_entry **)(h + 1); - for ( size_t i = 0; i < h_size; i++ ) - h->buckets[i] = 0; + for (size_t i = 0; i < h_size; i++) + h->buckets[i] = 0; - return h; + return h; } -void -__kmp_dephash_free_entries(kmp_info_t *thread, kmp_dephash_t *h) -{ - for (size_t i = 0; i < h->size; i++) { - if (h->buckets[i]) { - kmp_dephash_entry_t *next; - for (kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next) { - next = entry->next_in_bucket; - __kmp_depnode_list_free(thread,entry->last_ins); - __kmp_node_deref(thread,entry->last_out); +void __kmp_dephash_free_entries(kmp_info_t *thread, kmp_dephash_t *h) { + for (size_t i = 0; i < h->size; i++) { + if (h->buckets[i]) { + kmp_dephash_entry_t *next; + for (kmp_dephash_entry_t *entry = h->buckets[i]; entry; entry = next) { + next = entry->next_in_bucket; + __kmp_depnode_list_free(thread, entry->last_ins); + __kmp_node_deref(thread, entry->last_out); #if USE_FAST_MEMORY - __kmp_fast_free(thread,entry); + __kmp_fast_free(thread, entry); #else - __kmp_thread_free(thread,entry); + __kmp_thread_free(thread, entry); #endif - } - h->buckets[i] = 0; - } + } + h->buckets[i] = 0; } + } } -void -__kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h) -{ - __kmp_dephash_free_entries(thread, h); +void __kmp_dephash_free(kmp_info_t *thread, kmp_dephash_t *h) { + __kmp_dephash_free_entries(thread, h); #if USE_FAST_MEMORY - __kmp_fast_free(thread,h); + __kmp_fast_free(thread, h); #else - __kmp_thread_free(thread,h); + __kmp_thread_free(thread, h); #endif } static kmp_dephash_entry * -__kmp_dephash_find ( kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr ) -{ - kmp_int32 bucket = __kmp_dephash_hash(addr,h->size); +__kmp_dephash_find(kmp_info_t *thread, kmp_dephash_t *h, kmp_intptr_t addr) { + kmp_int32 bucket = __kmp_dephash_hash(addr, h->size); - kmp_dephash_entry_t *entry; - for ( entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket ) - if ( entry->addr == addr ) break; + kmp_dephash_entry_t *entry; + for (entry = h->buckets[bucket]; entry; entry = entry->next_in_bucket) + if (entry->addr == addr) + break; - if ( entry == NULL ) { - // create entry. This is only done by one thread so no locking required + if (entry == NULL) { +// create entry. This is only done by one thread so no locking required #if USE_FAST_MEMORY - entry = (kmp_dephash_entry_t *) __kmp_fast_allocate( thread, sizeof(kmp_dephash_entry_t) ); + entry = (kmp_dephash_entry_t *)__kmp_fast_allocate( + thread, sizeof(kmp_dephash_entry_t)); #else - entry = (kmp_dephash_entry_t *) __kmp_thread_malloc( thread, sizeof(kmp_dephash_entry_t) ); + entry = (kmp_dephash_entry_t *)__kmp_thread_malloc( + thread, sizeof(kmp_dephash_entry_t)); #endif - entry->addr = addr; - entry->last_out = NULL; - entry->last_ins = NULL; - entry->next_in_bucket = h->buckets[bucket]; - h->buckets[bucket] = entry; + entry->addr = addr; + entry->last_out = NULL; + entry->last_ins = NULL; + entry->next_in_bucket = h->buckets[bucket]; + h->buckets[bucket] = entry; #ifdef KMP_DEBUG - h->nelements++; - if ( entry->next_in_bucket ) h->nconflicts++; + h->nelements++; + if (entry->next_in_bucket) + h->nconflicts++; #endif - } - return entry; + } + return entry; } -static kmp_depnode_list_t * -__kmp_add_node ( kmp_info_t *thread, kmp_depnode_list_t *list, kmp_depnode_t *node ) -{ - kmp_depnode_list_t *new_head; +static kmp_depnode_list_t *__kmp_add_node(kmp_info_t *thread, + kmp_depnode_list_t *list, + kmp_depnode_t *node) { + kmp_depnode_list_t *new_head; #if USE_FAST_MEMORY - new_head = (kmp_depnode_list_t *) __kmp_fast_allocate(thread,sizeof(kmp_depnode_list_t)); + new_head = (kmp_depnode_list_t *)__kmp_fast_allocate( + thread, sizeof(kmp_depnode_list_t)); #else - new_head = (kmp_depnode_list_t *) __kmp_thread_malloc(thread,sizeof(kmp_depnode_list_t)); + new_head = (kmp_depnode_list_t *)__kmp_thread_malloc( + thread, sizeof(kmp_depnode_list_t)); #endif - new_head->node = __kmp_node_ref(node); - new_head->next = list; + new_head->node = __kmp_node_ref(node); + new_head->next = list; - return new_head; + return new_head; } -static void -__kmp_depnode_list_free ( kmp_info_t *thread, kmp_depnode_list *list ) -{ - kmp_depnode_list *next; +static void __kmp_depnode_list_free(kmp_info_t *thread, + kmp_depnode_list *list) { + kmp_depnode_list *next; - for ( ; list ; list = next ) { - next = list->next; + for (; list; list = next) { + next = list->next; - __kmp_node_deref(thread,list->node); + __kmp_node_deref(thread, list->node); #if USE_FAST_MEMORY - __kmp_fast_free(thread,list); + __kmp_fast_free(thread, list); #else - __kmp_thread_free(thread,list); + __kmp_thread_free(thread, list); #endif - } + } } -static inline void -__kmp_track_dependence ( kmp_depnode_t *source, kmp_depnode_t *sink, - kmp_task_t *sink_task ) -{ +static inline void __kmp_track_dependence(kmp_depnode_t *source, + kmp_depnode_t *sink, + kmp_task_t *sink_task) { #ifdef KMP_SUPPORT_GRAPH_OUTPUT - kmp_taskdata_t * task_source = KMP_TASK_TO_TASKDATA(source->dn.task); - // do not use sink->dn.task as that is only filled after the dependencies - // are already processed! - kmp_taskdata_t * task_sink = KMP_TASK_TO_TASKDATA(sink_task); - - __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id, task_source->td_ident->psource, sink->dn.id, task_sink->td_ident->psource); + kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task); + // do not use sink->dn.task as that is only filled after the dependencies + // are already processed! + kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task); + + __kmp_printf("%d(%s) -> %d(%s)\n", source->dn.id, + task_source->td_ident->psource, sink->dn.id, + task_sink->td_ident->psource); #endif #if OMPT_SUPPORT && OMPT_TRACE - /* OMPT tracks dependences between task (a=source, b=sink) in which - task a blocks the execution of b through the ompt_new_dependence_callback */ - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)) - { - kmp_taskdata_t * task_source = KMP_TASK_TO_TASKDATA(source->dn.task); - kmp_taskdata_t * task_sink = KMP_TASK_TO_TASKDATA(sink_task); - - ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)( - task_source->ompt_task_info.task_id, - task_sink->ompt_task_info.task_id); - } + // OMPT tracks dependences between task (a=source, b=sink) in which + // task a blocks the execution of b through the ompt_new_dependence_callback + if (ompt_enabled && + ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)) { + kmp_taskdata_t *task_source = KMP_TASK_TO_TASKDATA(source->dn.task); + kmp_taskdata_t *task_sink = KMP_TASK_TO_TASKDATA(sink_task); + + ompt_callbacks.ompt_callback(ompt_event_task_dependence_pair)( + task_source->ompt_task_info.task_id, task_sink->ompt_task_info.task_id); + } #endif /* OMPT_SUPPORT && OMPT_TRACE */ } -template< bool filter > +template static inline kmp_int32 -__kmp_process_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash, - bool dep_barrier,kmp_int32 ndeps, kmp_depend_info_t *dep_list, - kmp_task_t *task ) -{ - KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependencies : dep_barrier = %d\n", filter, gtid, ndeps, dep_barrier ) ); - - kmp_info_t *thread = __kmp_threads[ gtid ]; - kmp_int32 npredecessors=0; - for ( kmp_int32 i = 0; i < ndeps ; i++ ) { - const kmp_depend_info_t * dep = &dep_list[i]; - - KMP_DEBUG_ASSERT(dep->flags.in); - - if ( filter && dep->base_addr == 0 ) continue; // skip filtered entries - - kmp_dephash_entry_t *info = __kmp_dephash_find(thread,hash,dep->base_addr); - kmp_depnode_t *last_out = info->last_out; - - if ( dep->flags.out && info->last_ins ) { - for ( kmp_depnode_list_t * p = info->last_ins; p; p = p->next ) { - kmp_depnode_t * indep = p->node; - if ( indep->dn.task ) { - KMP_ACQUIRE_DEPNODE(gtid,indep); - if ( indep->dn.task ) { - __kmp_track_dependence(indep,node,task); - indep->dn.successors = __kmp_add_node(thread, indep->dn.successors, node); - KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n", - filter,gtid, KMP_TASK_TO_TASKDATA(indep->dn.task), KMP_TASK_TO_TASKDATA(task))); - npredecessors++; - } - KMP_RELEASE_DEPNODE(gtid,indep); - } - } - - __kmp_depnode_list_free(thread,info->last_ins); - info->last_ins = NULL; - - } else if ( last_out && last_out->dn.task ) { - KMP_ACQUIRE_DEPNODE(gtid,last_out); - if ( last_out->dn.task ) { - __kmp_track_dependence(last_out,node,task); - last_out->dn.successors = __kmp_add_node(thread, last_out->dn.successors, node); - KA_TRACE(40,("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n", - filter,gtid, KMP_TASK_TO_TASKDATA(last_out->dn.task), KMP_TASK_TO_TASKDATA(task))); - - npredecessors++; - } - KMP_RELEASE_DEPNODE(gtid,last_out); - } - - if ( dep_barrier ) { - // if this is a sync point in the serial sequence, then the previous outputs are guaranteed to be completed after - // the execution of this task so the previous output nodes can be cleared. - __kmp_node_deref(thread,last_out); - info->last_out = NULL; - } else { - if ( dep->flags.out ) { - __kmp_node_deref(thread,last_out); - info->last_out = __kmp_node_ref(node); - } else - info->last_ins = __kmp_add_node(thread, info->last_ins, node); +__kmp_process_deps(kmp_int32 gtid, kmp_depnode_t *node, kmp_dephash_t *hash, + bool dep_barrier, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, kmp_task_t *task) { + KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d processing %d dependencies : " + "dep_barrier = %d\n", + filter, gtid, ndeps, dep_barrier)); + + kmp_info_t *thread = __kmp_threads[gtid]; + kmp_int32 npredecessors = 0; + for (kmp_int32 i = 0; i < ndeps; i++) { + const kmp_depend_info_t *dep = &dep_list[i]; + + KMP_DEBUG_ASSERT(dep->flags.in); + + if (filter && dep->base_addr == 0) + continue; // skip filtered entries + + kmp_dephash_entry_t *info = + __kmp_dephash_find(thread, hash, dep->base_addr); + kmp_depnode_t *last_out = info->last_out; + + if (dep->flags.out && info->last_ins) { + for (kmp_depnode_list_t *p = info->last_ins; p; p = p->next) { + kmp_depnode_t *indep = p->node; + if (indep->dn.task) { + KMP_ACQUIRE_DEPNODE(gtid, indep); + if (indep->dn.task) { + __kmp_track_dependence(indep, node, task); + indep->dn.successors = + __kmp_add_node(thread, indep->dn.successors, node); + KA_TRACE(40, ("__kmp_process_deps<%d>: T#%d adding dependence from " + "%p to %p\n", + filter, gtid, KMP_TASK_TO_TASKDATA(indep->dn.task), + KMP_TASK_TO_TASKDATA(task))); + npredecessors++; + } + KMP_RELEASE_DEPNODE(gtid, indep); } + } + + __kmp_depnode_list_free(thread, info->last_ins); + info->last_ins = NULL; + + } else if (last_out && last_out->dn.task) { + KMP_ACQUIRE_DEPNODE(gtid, last_out); + if (last_out->dn.task) { + __kmp_track_dependence(last_out, node, task); + last_out->dn.successors = + __kmp_add_node(thread, last_out->dn.successors, node); + KA_TRACE( + 40, + ("__kmp_process_deps<%d>: T#%d adding dependence from %p to %p\n", + filter, gtid, KMP_TASK_TO_TASKDATA(last_out->dn.task), + KMP_TASK_TO_TASKDATA(task))); + + npredecessors++; + } + KMP_RELEASE_DEPNODE(gtid, last_out); + } + if (dep_barrier) { + // if this is a sync point in the serial sequence, then the previous + // outputs are guaranteed to be completed after + // the execution of this task so the previous output nodes can be cleared. + __kmp_node_deref(thread, last_out); + info->last_out = NULL; + } else { + if (dep->flags.out) { + __kmp_node_deref(thread, last_out); + info->last_out = __kmp_node_ref(node); + } else + info->last_ins = __kmp_add_node(thread, info->last_ins, node); } + } - KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter, gtid, npredecessors ) ); + KA_TRACE(30, ("__kmp_process_deps<%d>: T#%d found %d predecessors\n", filter, + gtid, npredecessors)); - return npredecessors; + return npredecessors; } #define NO_DEP_BARRIER (false) #define DEP_BARRIER (true) // returns true if the task has any outstanding dependence -static bool -__kmp_check_deps ( kmp_int32 gtid, kmp_depnode_t *node, kmp_task_t *task, kmp_dephash_t *hash, bool dep_barrier, - kmp_int32 ndeps, kmp_depend_info_t *dep_list, - kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list ) -{ - int i; +static bool __kmp_check_deps(kmp_int32 gtid, kmp_depnode_t *node, + kmp_task_t *task, kmp_dephash_t *hash, + bool dep_barrier, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list) { + int i; #if KMP_DEBUG - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); + kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); #endif - KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependencies for task %p : %d possibly aliased dependencies, %d non-aliased depedencies : dep_barrier=%d .\n", gtid, taskdata, ndeps, ndeps_noalias, dep_barrier ) ); - - // Filter deps in dep_list - // TODO: Different algorithm for large dep_list ( > 10 ? ) - for ( i = 0; i < ndeps; i ++ ) { - if ( dep_list[i].base_addr != 0 ) - for ( int j = i+1; j < ndeps; j++ ) - if ( dep_list[i].base_addr == dep_list[j].base_addr ) { - dep_list[i].flags.in |= dep_list[j].flags.in; - dep_list[i].flags.out |= dep_list[j].flags.out; - dep_list[j].base_addr = 0; // Mark j element as void - } - } + KA_TRACE(20, ("__kmp_check_deps: T#%d checking dependencies for task %p : %d " + "possibly aliased dependencies, %d non-aliased depedencies : " + "dep_barrier=%d .\n", + gtid, taskdata, ndeps, ndeps_noalias, dep_barrier)); + + // Filter deps in dep_list + // TODO: Different algorithm for large dep_list ( > 10 ? ) + for (i = 0; i < ndeps; i++) { + if (dep_list[i].base_addr != 0) + for (int j = i + 1; j < ndeps; j++) + if (dep_list[i].base_addr == dep_list[j].base_addr) { + dep_list[i].flags.in |= dep_list[j].flags.in; + dep_list[i].flags.out |= dep_list[j].flags.out; + dep_list[j].base_addr = 0; // Mark j element as void + } + } - // doesn't need to be atomic as no other thread is going to be accessing this node just yet - // npredecessors is set -1 to ensure that none of the releasing tasks queues this task before we have finished processing all the dependencies - node->dn.npredecessors = -1; + // doesn't need to be atomic as no other thread is going to be accessing this + // node just yet. + // npredecessors is set -1 to ensure that none of the releasing tasks queues + // this task before we have finished processing all the dependencies + node->dn.npredecessors = -1; - // used to pack all npredecessors additions into a single atomic operation at the end - int npredecessors; + // used to pack all npredecessors additions into a single atomic operation at + // the end + int npredecessors; - npredecessors = __kmp_process_deps(gtid, node, hash, dep_barrier, - ndeps, dep_list, task); - npredecessors += __kmp_process_deps(gtid, node, hash, dep_barrier, - ndeps_noalias, noalias_dep_list, task); + npredecessors = __kmp_process_deps(gtid, node, hash, dep_barrier, ndeps, + dep_list, task); + npredecessors += __kmp_process_deps( + gtid, node, hash, dep_barrier, ndeps_noalias, noalias_dep_list, task); - node->dn.task = task; - KMP_MB(); + node->dn.task = task; + KMP_MB(); - // Account for our initial fake value - npredecessors++; + // Account for our initial fake value + npredecessors++; - // Update predecessors and obtain current value to check if there are still any outstandig dependences (some tasks may have finished while we processed the dependences) - npredecessors = KMP_TEST_THEN_ADD32(&node->dn.npredecessors, npredecessors) + npredecessors; + // Update predecessors and obtain current value to check if there are still + // any outstandig dependences (some tasks may have finished while we processed + // the dependences) + npredecessors = KMP_TEST_THEN_ADD32(&node->dn.npredecessors, npredecessors) + + npredecessors; - KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n", gtid, npredecessors, taskdata ) ); + KA_TRACE(20, ("__kmp_check_deps: T#%d found %d predecessors for task %p \n", + gtid, npredecessors, taskdata)); - // beyond this point the task could be queued (and executed) by a releasing task... - return npredecessors > 0 ? true : false; + // beyond this point the task could be queued (and executed) by a releasing + // task... + return npredecessors > 0 ? true : false; } -void -__kmp_release_deps ( kmp_int32 gtid, kmp_taskdata_t *task ) -{ - kmp_info_t *thread = __kmp_threads[ gtid ]; - kmp_depnode_t *node = task->td_depnode; - - if ( task->td_dephash ) { - KA_TRACE(40, ("__kmp_release_deps: T#%d freeing dependencies hash of task %p.\n", gtid, task ) ); - __kmp_dephash_free(thread,task->td_dephash); - task->td_dephash = NULL; +void __kmp_release_deps(kmp_int32 gtid, kmp_taskdata_t *task) { + kmp_info_t *thread = __kmp_threads[gtid]; + kmp_depnode_t *node = task->td_depnode; + + if (task->td_dephash) { + KA_TRACE( + 40, ("__kmp_release_deps: T#%d freeing dependencies hash of task %p.\n", + gtid, task)); + __kmp_dephash_free(thread, task->td_dephash); + task->td_dephash = NULL; + } + + if (!node) + return; + + KA_TRACE(20, ("__kmp_release_deps: T#%d notifying successors of task %p.\n", + gtid, task)); + + KMP_ACQUIRE_DEPNODE(gtid, node); + node->dn.task = + NULL; // mark this task as finished, so no new dependencies are generated + KMP_RELEASE_DEPNODE(gtid, node); + + kmp_depnode_list_t *next; + for (kmp_depnode_list_t *p = node->dn.successors; p; p = next) { + kmp_depnode_t *successor = p->node; + kmp_int32 npredecessors = + KMP_TEST_THEN_DEC32(&successor->dn.npredecessors) - 1; + + // successor task can be NULL for wait_depends or because deps are still + // being processed + if (npredecessors == 0) { + KMP_MB(); + if (successor->dn.task) { + KA_TRACE(20, ("__kmp_release_deps: T#%d successor %p of %p scheduled " + "for execution.\n", + gtid, successor->dn.task, task)); + __kmp_omp_task(gtid, successor->dn.task, false); + } } - if ( !node ) return; - - KA_TRACE(20, ("__kmp_release_deps: T#%d notifying successors of task %p.\n", gtid, task ) ); - - KMP_ACQUIRE_DEPNODE(gtid,node); - node->dn.task = NULL; // mark this task as finished, so no new dependencies are generated - KMP_RELEASE_DEPNODE(gtid,node); - - kmp_depnode_list_t *next; - for ( kmp_depnode_list_t *p = node->dn.successors; p; p = next ) { - kmp_depnode_t *successor = p->node; - kmp_int32 npredecessors = KMP_TEST_THEN_DEC32(&successor->dn.npredecessors) - 1; - - // successor task can be NULL for wait_depends or because deps are still being processed - if ( npredecessors == 0 ) { - KMP_MB(); - if ( successor->dn.task ) { - KA_TRACE(20, ("__kmp_release_deps: T#%d successor %p of %p scheduled for execution.\n", gtid, successor->dn.task, task ) ); - __kmp_omp_task(gtid,successor->dn.task,false); - } - } - - next = p->next; - __kmp_node_deref(thread,p->node); + next = p->next; + __kmp_node_deref(thread, p->node); #if USE_FAST_MEMORY - __kmp_fast_free(thread,p); + __kmp_fast_free(thread, p); #else - __kmp_thread_free(thread,p); + __kmp_thread_free(thread, p); #endif - } + } - __kmp_node_deref(thread,node); + __kmp_node_deref(thread, node); - KA_TRACE(20, ("__kmp_release_deps: T#%d all successors of %p notified of completion\n", gtid, task ) ); + KA_TRACE( + 20, + ("__kmp_release_deps: T#%d all successors of %p notified of completion\n", + gtid, task)); } /*! @ingroup TASKING @param loc_ref location of the original task directive @param gtid Global Thread ID of encountering thread -@param new_task task thunk allocated by __kmp_omp_task_alloc() for the ''new task'' +@param new_task task thunk allocated by __kmp_omp_task_alloc() for the ''new +task'' @param ndeps Number of depend items with possible aliasing @param dep_list List of depend items with possible aliasing @param ndeps_noalias Number of depend items with no aliasing @param noalias_dep_list List of depend items with no aliasing -@return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not suspendend and queued, or TASK_CURRENT_QUEUED if it was suspended and queued +@return Returns either TASK_CURRENT_NOT_QUEUED if the current task was not +suspendend and queued, or TASK_CURRENT_QUEUED if it was suspended and queued Schedule a non-thread-switchable task with dependences for execution */ -kmp_int32 -__kmpc_omp_task_with_deps( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task, - kmp_int32 ndeps, kmp_depend_info_t *dep_list, - kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list ) -{ +kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, + kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list) { - kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task); - KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", - gtid, loc_ref, new_taskdata ) ); + kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); + KA_TRACE(10, ("__kmpc_omp_task_with_deps(enter): T#%d loc=%p task=%p\n", gtid, + loc_ref, new_taskdata)); - kmp_info_t *thread = __kmp_threads[ gtid ]; - kmp_taskdata_t * current_task = thread->th.th_current_task; + kmp_info_t *thread = __kmp_threads[gtid]; + kmp_taskdata_t *current_task = thread->th.th_current_task; #if OMPT_SUPPORT && OMPT_TRACE - /* OMPT grab all dependences if requested by the tool */ - if (ompt_enabled && ndeps+ndeps_noalias > 0 && - ompt_callbacks.ompt_callback(ompt_event_task_dependences)) - { - kmp_int32 i; - - new_taskdata->ompt_task_info.ndeps = ndeps+ndeps_noalias; - new_taskdata->ompt_task_info.deps = (ompt_task_dependence_t *) - KMP_OMPT_DEPS_ALLOC(thread, - (ndeps+ndeps_noalias)*sizeof(ompt_task_dependence_t)); - - KMP_ASSERT(new_taskdata->ompt_task_info.deps != NULL); - - for (i = 0; i < ndeps; i++) - { - new_taskdata->ompt_task_info.deps[i].variable_addr = - (void*) dep_list[i].base_addr; - if (dep_list[i].flags.in && dep_list[i].flags.out) - new_taskdata->ompt_task_info.deps[i].dependence_flags = - ompt_task_dependence_type_inout; - else if (dep_list[i].flags.out) - new_taskdata->ompt_task_info.deps[i].dependence_flags = - ompt_task_dependence_type_out; - else if (dep_list[i].flags.in) - new_taskdata->ompt_task_info.deps[i].dependence_flags = - ompt_task_dependence_type_in; - } - for (i = 0; i < ndeps_noalias; i++) - { - new_taskdata->ompt_task_info.deps[ndeps+i].variable_addr = - (void*) noalias_dep_list[i].base_addr; - if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out) - new_taskdata->ompt_task_info.deps[ndeps+i].dependence_flags = - ompt_task_dependence_type_inout; - else if (noalias_dep_list[i].flags.out) - new_taskdata->ompt_task_info.deps[ndeps+i].dependence_flags = - ompt_task_dependence_type_out; - else if (noalias_dep_list[i].flags.in) - new_taskdata->ompt_task_info.deps[ndeps+i].dependence_flags = - ompt_task_dependence_type_in; - } + /* OMPT grab all dependences if requested by the tool */ + if (ompt_enabled && ndeps + ndeps_noalias > 0 && + ompt_callbacks.ompt_callback(ompt_event_task_dependences)) { + kmp_int32 i; + + new_taskdata->ompt_task_info.ndeps = ndeps + ndeps_noalias; + new_taskdata->ompt_task_info.deps = + (ompt_task_dependence_t *)KMP_OMPT_DEPS_ALLOC( + thread, (ndeps + ndeps_noalias) * sizeof(ompt_task_dependence_t)); + + KMP_ASSERT(new_taskdata->ompt_task_info.deps != NULL); + + for (i = 0; i < ndeps; i++) { + new_taskdata->ompt_task_info.deps[i].variable_addr = + (void *)dep_list[i].base_addr; + if (dep_list[i].flags.in && dep_list[i].flags.out) + new_taskdata->ompt_task_info.deps[i].dependence_flags = + ompt_task_dependence_type_inout; + else if (dep_list[i].flags.out) + new_taskdata->ompt_task_info.deps[i].dependence_flags = + ompt_task_dependence_type_out; + else if (dep_list[i].flags.in) + new_taskdata->ompt_task_info.deps[i].dependence_flags = + ompt_task_dependence_type_in; } + for (i = 0; i < ndeps_noalias; i++) { + new_taskdata->ompt_task_info.deps[ndeps + i].variable_addr = + (void *)noalias_dep_list[i].base_addr; + if (noalias_dep_list[i].flags.in && noalias_dep_list[i].flags.out) + new_taskdata->ompt_task_info.deps[ndeps + i].dependence_flags = + ompt_task_dependence_type_inout; + else if (noalias_dep_list[i].flags.out) + new_taskdata->ompt_task_info.deps[ndeps + i].dependence_flags = + ompt_task_dependence_type_out; + else if (noalias_dep_list[i].flags.in) + new_taskdata->ompt_task_info.deps[ndeps + i].dependence_flags = + ompt_task_dependence_type_in; + } + } #endif /* OMPT_SUPPORT && OMPT_TRACE */ - bool serial = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final; + bool serial = current_task->td_flags.team_serial || + current_task->td_flags.tasking_ser || + current_task->td_flags.final; #if OMP_45_ENABLED - kmp_task_team_t * task_team = thread->th.th_task_team; - serial = serial && !(task_team && task_team->tt.tt_found_proxy_tasks); + kmp_task_team_t *task_team = thread->th.th_task_team; + serial = serial && !(task_team && task_team->tt.tt_found_proxy_tasks); #endif - if ( !serial && ( ndeps > 0 || ndeps_noalias > 0 )) { - /* if no dependencies have been tracked yet, create the dependence hash */ - if ( current_task->td_dephash == NULL ) - current_task->td_dephash = __kmp_dephash_create(thread, current_task); + if (!serial && (ndeps > 0 || ndeps_noalias > 0)) { + /* if no dependencies have been tracked yet, create the dependence hash */ + if (current_task->td_dephash == NULL) + current_task->td_dephash = __kmp_dephash_create(thread, current_task); #if USE_FAST_MEMORY - kmp_depnode_t *node = (kmp_depnode_t *) __kmp_fast_allocate(thread,sizeof(kmp_depnode_t)); + kmp_depnode_t *node = + (kmp_depnode_t *)__kmp_fast_allocate(thread, sizeof(kmp_depnode_t)); #else - kmp_depnode_t *node = (kmp_depnode_t *) __kmp_thread_malloc(thread,sizeof(kmp_depnode_t)); + kmp_depnode_t *node = + (kmp_depnode_t *)__kmp_thread_malloc(thread, sizeof(kmp_depnode_t)); #endif - __kmp_init_node(node); - new_taskdata->td_depnode = node; - - if ( __kmp_check_deps( gtid, node, new_task, current_task->td_dephash, NO_DEP_BARRIER, - ndeps, dep_list, ndeps_noalias,noalias_dep_list ) ) { - KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking dependencies: " - "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref, - new_taskdata ) ); - return TASK_CURRENT_NOT_QUEUED; - } - } else { - KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependencies for task (serialized)" - "loc=%p task=%p\n", gtid, loc_ref, new_taskdata ) ); + __kmp_init_node(node); + new_taskdata->td_depnode = node; + + if (__kmp_check_deps(gtid, node, new_task, current_task->td_dephash, + NO_DEP_BARRIER, ndeps, dep_list, ndeps_noalias, + noalias_dep_list)) { + KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had blocking " + "dependencies: " + "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", + gtid, loc_ref, new_taskdata)); + return TASK_CURRENT_NOT_QUEUED; } - - KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking dependencies : " - "loc=%p task=%p, transferring to __kmpc_omp_task\n", gtid, loc_ref, - new_taskdata ) ); - - return __kmpc_omp_task(loc_ref,gtid,new_task); + } else { + KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d ignored dependencies " + "for task (serialized)" + "loc=%p task=%p\n", + gtid, loc_ref, new_taskdata)); + } + + KA_TRACE(10, ("__kmpc_omp_task_with_deps(exit): T#%d task had no blocking " + "dependencies : " + "loc=%p task=%p, transferring to __kmpc_omp_task\n", + gtid, loc_ref, new_taskdata)); + + return __kmpc_omp_task(loc_ref, gtid, new_task); } /*! @@ -539,55 +571,64 @@ __kmpc_omp_task_with_deps( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_ta Blocks the current task until all specifies dependencies have been fulfilled. */ -void -__kmpc_omp_wait_deps ( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, kmp_depend_info_t *dep_list, - kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list ) -{ - KA_TRACE(10, ("__kmpc_omp_wait_deps(enter): T#%d loc=%p\n", gtid, loc_ref) ); - - if ( ndeps == 0 && ndeps_noalias == 0 ) { - KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no dependencies to wait upon : loc=%p\n", gtid, loc_ref) ); - return; - } - - kmp_info_t *thread = __kmp_threads[ gtid ]; - kmp_taskdata_t * current_task = thread->th.th_current_task; - - // We can return immediately as: - // - dependences are not computed in serial teams (except if we have proxy tasks) - // - if the dephash is not yet created it means we have nothing to wait for - bool ignore = current_task->td_flags.team_serial || current_task->td_flags.tasking_ser || current_task->td_flags.final; +void __kmpc_omp_wait_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ndeps, + kmp_depend_info_t *dep_list, kmp_int32 ndeps_noalias, + kmp_depend_info_t *noalias_dep_list) { + KA_TRACE(10, ("__kmpc_omp_wait_deps(enter): T#%d loc=%p\n", gtid, loc_ref)); + + if (ndeps == 0 && ndeps_noalias == 0) { + KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no dependencies to " + "wait upon : loc=%p\n", + gtid, loc_ref)); + return; + } + + kmp_info_t *thread = __kmp_threads[gtid]; + kmp_taskdata_t *current_task = thread->th.th_current_task; + + // We can return immediately as: + // - dependences are not computed in serial teams (except with proxy tasks) + // - if the dephash is not yet created it means we have nothing to wait for + bool ignore = current_task->td_flags.team_serial || + current_task->td_flags.tasking_ser || + current_task->td_flags.final; #if OMP_45_ENABLED - ignore = ignore && thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE; + ignore = ignore && thread->th.th_task_team != NULL && + thread->th.th_task_team->tt.tt_found_proxy_tasks == FALSE; #endif - ignore = ignore || current_task->td_dephash == NULL; - - if ( ignore ) { - KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking dependencies : loc=%p\n", gtid, loc_ref) ); - return; - } - - kmp_depnode_t node; - __kmp_init_node(&node); - - if (!__kmp_check_deps( gtid, &node, NULL, current_task->td_dephash, DEP_BARRIER, - ndeps, dep_list, ndeps_noalias, noalias_dep_list )) { - KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking dependencies : loc=%p\n", gtid, loc_ref) ); - return; - } - - int thread_finished = FALSE; - kmp_flag_32 flag((volatile kmp_uint32 *)&(node.dn.npredecessors), 0U); - while ( node.dn.npredecessors > 0 ) { - flag.execute_tasks(thread, gtid, FALSE, &thread_finished, + ignore = ignore || current_task->td_dephash == NULL; + + if (ignore) { + KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking " + "dependencies : loc=%p\n", + gtid, loc_ref)); + return; + } + + kmp_depnode_t node; + __kmp_init_node(&node); + + if (!__kmp_check_deps(gtid, &node, NULL, current_task->td_dephash, + DEP_BARRIER, ndeps, dep_list, ndeps_noalias, + noalias_dep_list)) { + KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d has no blocking " + "dependencies : loc=%p\n", + gtid, loc_ref)); + return; + } + + int thread_finished = FALSE; + kmp_flag_32 flag((volatile kmp_uint32 *)&(node.dn.npredecessors), 0U); + while (node.dn.npredecessors > 0) { + flag.execute_tasks(thread, gtid, FALSE, &thread_finished, #if USE_ITT_BUILD - NULL, + NULL, #endif - __kmp_task_stealing_constraint ); - } + __kmp_task_stealing_constraint); + } - KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n", gtid, loc_ref) ); + KA_TRACE(10, ("__kmpc_omp_wait_deps(exit): T#%d finished waiting : loc=%p\n", + gtid, loc_ref)); } #endif /* OMP_40_ENABLED */ - diff --git a/openmp/runtime/src/kmp_tasking.cpp b/openmp/runtime/src/kmp_tasking.cpp index d3cf1cc..4be322f 100644 --- a/openmp/runtime/src/kmp_tasking.cpp +++ b/openmp/runtime/src/kmp_tasking.cpp @@ -16,8 +16,8 @@ #include "kmp.h" #include "kmp_i18n.h" #include "kmp_itt.h" -#include "kmp_wait_release.h" #include "kmp_stats.h" +#include "kmp_wait_release.h" #if OMPT_SUPPORT #include "ompt-specific.h" @@ -25,1608 +25,1625 @@ #include "tsan_annotations.h" -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - - /* forward declaration */ -static void __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr ); -static void __kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data ); -static int __kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team ); +static void __kmp_enable_tasking(kmp_task_team_t *task_team, + kmp_info_t *this_thr); +static void __kmp_alloc_task_deque(kmp_info_t *thread, + kmp_thread_data_t *thread_data); +static int __kmp_realloc_task_threads_data(kmp_info_t *thread, + kmp_task_team_t *task_team); #ifdef OMP_45_ENABLED -static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask ); +static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask); #endif #ifdef BUILD_TIED_TASK_STACK -//--------------------------------------------------------------------------- // __kmp_trace_task_stack: print the tied tasks from the task stack in order -// from top do bottom +// from top do bottom // // gtid: global thread identifier for thread containing stack // thread_data: thread data for task team thread containing stack // threshold: value above which the trace statement triggers // location: string identifying call site of this function (for trace) +static void __kmp_trace_task_stack(kmp_int32 gtid, + kmp_thread_data_t *thread_data, + int threshold, char *location) { + kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; + kmp_taskdata_t **stack_top = task_stack->ts_top; + kmp_int32 entries = task_stack->ts_entries; + kmp_taskdata_t *tied_task; + + KA_TRACE( + threshold, + ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, " + "first_block = %p, stack_top = %p \n", + location, gtid, entries, task_stack->ts_first_block, stack_top)); + + KMP_DEBUG_ASSERT(stack_top != NULL); + KMP_DEBUG_ASSERT(entries > 0); + + while (entries != 0) { + KMP_DEBUG_ASSERT(stack_top != &task_stack->ts_first_block.sb_block[0]); + // fix up ts_top if we need to pop from previous block + if (entries & TASK_STACK_INDEX_MASK == 0) { + kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(stack_top); -static void -__kmp_trace_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data, int threshold, char *location ) -{ - kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks; - kmp_taskdata_t **stack_top = task_stack -> ts_top; - kmp_int32 entries = task_stack -> ts_entries; - kmp_taskdata_t *tied_task; - - KA_TRACE(threshold, ("__kmp_trace_task_stack(start): location = %s, gtid = %d, entries = %d, " - "first_block = %p, stack_top = %p \n", - location, gtid, entries, task_stack->ts_first_block, stack_top ) ); - - KMP_DEBUG_ASSERT( stack_top != NULL ); - KMP_DEBUG_ASSERT( entries > 0 ); - - while ( entries != 0 ) - { - KMP_DEBUG_ASSERT( stack_top != & task_stack->ts_first_block.sb_block[0] ); - // fix up ts_top if we need to pop from previous block - if ( entries & TASK_STACK_INDEX_MASK == 0 ) - { - kmp_stack_block_t *stack_block = (kmp_stack_block_t *) (stack_top) ; - - stack_block = stack_block -> sb_prev; - stack_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE]; - } + stack_block = stack_block->sb_prev; + stack_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE]; + } - // finish bookkeeping - stack_top--; - entries--; + // finish bookkeeping + stack_top--; + entries--; - tied_task = * stack_top; + tied_task = *stack_top; - KMP_DEBUG_ASSERT( tied_task != NULL ); - KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED ); + KMP_DEBUG_ASSERT(tied_task != NULL); + KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED); - KA_TRACE(threshold, ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, " - "stack_top=%p, tied_task=%p\n", - location, gtid, entries, stack_top, tied_task ) ); - } - KMP_DEBUG_ASSERT( stack_top == & task_stack->ts_first_block.sb_block[0] ); + KA_TRACE(threshold, + ("__kmp_trace_task_stack(%s): gtid=%d, entry=%d, " + "stack_top=%p, tied_task=%p\n", + location, gtid, entries, stack_top, tied_task)); + } + KMP_DEBUG_ASSERT(stack_top == &task_stack->ts_first_block.sb_block[0]); - KA_TRACE(threshold, ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n", - location, gtid ) ); + KA_TRACE(threshold, + ("__kmp_trace_task_stack(exit): location = %s, gtid = %d\n", + location, gtid)); } -//--------------------------------------------------------------------------- // __kmp_init_task_stack: initialize the task stack for the first time -// after a thread_data structure is created. -// It should not be necessary to do this again (assuming the stack works). +// after a thread_data structure is created. +// It should not be necessary to do this again (assuming the stack works). // // gtid: global thread identifier of calling thread // thread_data: thread data for task team thread containing stack - -static void -__kmp_init_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data ) -{ - kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks; - kmp_stack_block_t *first_block; - - // set up the first block of the stack - first_block = & task_stack -> ts_first_block; - task_stack -> ts_top = (kmp_taskdata_t **) first_block; - memset( (void *) first_block, '\0', TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *)); - - // initialize the stack to be empty - task_stack -> ts_entries = TASK_STACK_EMPTY; - first_block -> sb_next = NULL; - first_block -> sb_prev = NULL; +static void __kmp_init_task_stack(kmp_int32 gtid, + kmp_thread_data_t *thread_data) { + kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; + kmp_stack_block_t *first_block; + + // set up the first block of the stack + first_block = &task_stack->ts_first_block; + task_stack->ts_top = (kmp_taskdata_t **)first_block; + memset((void *)first_block, '\0', + TASK_STACK_BLOCK_SIZE * sizeof(kmp_taskdata_t *)); + + // initialize the stack to be empty + task_stack->ts_entries = TASK_STACK_EMPTY; + first_block->sb_next = NULL; + first_block->sb_prev = NULL; } - -//--------------------------------------------------------------------------- // __kmp_free_task_stack: free the task stack when thread_data is destroyed. // // gtid: global thread identifier for calling thread // thread_data: thread info for thread containing stack - -static void -__kmp_free_task_stack( kmp_int32 gtid, kmp_thread_data_t *thread_data ) -{ - kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks; - kmp_stack_block_t *stack_block = & task_stack -> ts_first_block; - - KMP_DEBUG_ASSERT( task_stack -> ts_entries == TASK_STACK_EMPTY ); - // free from the second block of the stack - while ( stack_block != NULL ) { - kmp_stack_block_t *next_block = (stack_block) ? stack_block -> sb_next : NULL; - - stack_block -> sb_next = NULL; - stack_block -> sb_prev = NULL; - if (stack_block != & task_stack -> ts_first_block) { - __kmp_thread_free( thread, stack_block ); // free the block, if not the first - } - stack_block = next_block; - } - // initialize the stack to be empty - task_stack -> ts_entries = 0; - task_stack -> ts_top = NULL; +static void __kmp_free_task_stack(kmp_int32 gtid, + kmp_thread_data_t *thread_data) { + kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; + kmp_stack_block_t *stack_block = &task_stack->ts_first_block; + + KMP_DEBUG_ASSERT(task_stack->ts_entries == TASK_STACK_EMPTY); + // free from the second block of the stack + while (stack_block != NULL) { + kmp_stack_block_t *next_block = (stack_block) ? stack_block->sb_next : NULL; + + stack_block->sb_next = NULL; + stack_block->sb_prev = NULL; + if (stack_block != &task_stack->ts_first_block) { + __kmp_thread_free(thread, + stack_block); // free the block, if not the first + } + stack_block = next_block; + } + // initialize the stack to be empty + task_stack->ts_entries = 0; + task_stack->ts_top = NULL; } - -//--------------------------------------------------------------------------- // __kmp_push_task_stack: Push the tied task onto the task stack. // Grow the stack if necessary by allocating another block. // // gtid: global thread identifier for calling thread // thread: thread info for thread containing stack // tied_task: the task to push on the stack - -static void -__kmp_push_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t * tied_task ) -{ - // GEH - need to consider what to do if tt_threads_data not allocated yet - kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> - tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ]; - kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ; - - if ( tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser ) { - return; // Don't push anything on stack if team or team tasks are serialized - } - - KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED ); - KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL ); - - KA_TRACE(20, ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n", - gtid, thread, tied_task ) ); - // Store entry - * (task_stack -> ts_top) = tied_task; - - // Do bookkeeping for next push - task_stack -> ts_top++; - task_stack -> ts_entries++; - - if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 ) - { - // Find beginning of this task block - kmp_stack_block_t *stack_block = - (kmp_stack_block_t *) (task_stack -> ts_top - TASK_STACK_BLOCK_SIZE); - - // Check if we already have a block - if ( stack_block -> sb_next != NULL ) - { // reset ts_top to beginning of next block - task_stack -> ts_top = & stack_block -> sb_next -> sb_block[0]; - } - else - { // Alloc new block and link it up - kmp_stack_block_t *new_block = (kmp_stack_block_t *) - __kmp_thread_calloc(thread, sizeof(kmp_stack_block_t)); - - task_stack -> ts_top = & new_block -> sb_block[0]; - stack_block -> sb_next = new_block; - new_block -> sb_prev = stack_block; - new_block -> sb_next = NULL; - - KA_TRACE(30, ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n", - gtid, tied_task, new_block ) ); - } - } - KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) ); +static void __kmp_push_task_stack(kmp_int32 gtid, kmp_info_t *thread, + kmp_taskdata_t *tied_task) { + // GEH - need to consider what to do if tt_threads_data not allocated yet + kmp_thread_data_t *thread_data = + &thread->th.th_task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)]; + kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; + + if (tied_task->td_flags.team_serial || tied_task->td_flags.tasking_ser) { + return; // Don't push anything on stack if team or team tasks are serialized + } + + KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED); + KMP_DEBUG_ASSERT(task_stack->ts_top != NULL); + + KA_TRACE(20, + ("__kmp_push_task_stack(enter): GTID: %d; THREAD: %p; TASK: %p\n", + gtid, thread, tied_task)); + // Store entry + *(task_stack->ts_top) = tied_task; + + // Do bookkeeping for next push + task_stack->ts_top++; + task_stack->ts_entries++; + + if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) { + // Find beginning of this task block + kmp_stack_block_t *stack_block = + (kmp_stack_block_t *)(task_stack->ts_top - TASK_STACK_BLOCK_SIZE); + + // Check if we already have a block + if (stack_block->sb_next != + NULL) { // reset ts_top to beginning of next block + task_stack->ts_top = &stack_block->sb_next->sb_block[0]; + } else { // Alloc new block and link it up + kmp_stack_block_t *new_block = (kmp_stack_block_t *)__kmp_thread_calloc( + thread, sizeof(kmp_stack_block_t)); + + task_stack->ts_top = &new_block->sb_block[0]; + stack_block->sb_next = new_block; + new_block->sb_prev = stack_block; + new_block->sb_next = NULL; + + KA_TRACE( + 30, + ("__kmp_push_task_stack(): GTID: %d; TASK: %p; Alloc new block: %p\n", + gtid, tied_task, new_block)); + } + } + KA_TRACE(20, ("__kmp_push_task_stack(exit): GTID: %d; TASK: %p\n", gtid, + tied_task)); } -//--------------------------------------------------------------------------- // __kmp_pop_task_stack: Pop the tied task from the task stack. Don't return -// the task, just check to make sure it matches the ending task passed in. +// the task, just check to make sure it matches the ending task passed in. // // gtid: global thread identifier for the calling thread // thread: thread info structure containing stack // tied_task: the task popped off the stack // ending_task: the task that is ending (should match popped task) +static void __kmp_pop_task_stack(kmp_int32 gtid, kmp_info_t *thread, + kmp_taskdata_t *ending_task) { + // GEH - need to consider what to do if tt_threads_data not allocated yet + kmp_thread_data_t *thread_data = + &thread->th.th_task_team->tt_threads_data[__kmp_tid_from_gtid(gtid)]; + kmp_task_stack_t *task_stack = &thread_data->td.td_susp_tied_tasks; + kmp_taskdata_t *tied_task; + + if (ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser) { + // Don't pop anything from stack if team or team tasks are serialized + return; + } -static void -__kmp_pop_task_stack( kmp_int32 gtid, kmp_info_t *thread, kmp_taskdata_t *ending_task ) -{ - // GEH - need to consider what to do if tt_threads_data not allocated yet - kmp_thread_data_t *thread_data = & thread -> th.th_task_team -> tt_threads_data[ __kmp_tid_from_gtid( gtid ) ]; - kmp_task_stack_t *task_stack = & thread_data->td.td_susp_tied_tasks ; - kmp_taskdata_t *tied_task; - - if ( ending_task->td_flags.team_serial || ending_task->td_flags.tasking_ser ) { - return; // Don't pop anything from stack if team or team tasks are serialized - } - - KMP_DEBUG_ASSERT( task_stack -> ts_top != NULL ); - KMP_DEBUG_ASSERT( task_stack -> ts_entries > 0 ); + KMP_DEBUG_ASSERT(task_stack->ts_top != NULL); + KMP_DEBUG_ASSERT(task_stack->ts_entries > 0); - KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, thread ) ); + KA_TRACE(20, ("__kmp_pop_task_stack(enter): GTID: %d; THREAD: %p\n", gtid, + thread)); - // fix up ts_top if we need to pop from previous block - if ( task_stack -> ts_entries & TASK_STACK_INDEX_MASK == 0 ) - { - kmp_stack_block_t *stack_block = - (kmp_stack_block_t *) (task_stack -> ts_top) ; + // fix up ts_top if we need to pop from previous block + if (task_stack->ts_entries & TASK_STACK_INDEX_MASK == 0) { + kmp_stack_block_t *stack_block = (kmp_stack_block_t *)(task_stack->ts_top); - stack_block = stack_block -> sb_prev; - task_stack -> ts_top = & stack_block -> sb_block[TASK_STACK_BLOCK_SIZE]; - } + stack_block = stack_block->sb_prev; + task_stack->ts_top = &stack_block->sb_block[TASK_STACK_BLOCK_SIZE]; + } - // finish bookkeeping - task_stack -> ts_top--; - task_stack -> ts_entries--; + // finish bookkeeping + task_stack->ts_top--; + task_stack->ts_entries--; - tied_task = * (task_stack -> ts_top ); + tied_task = *(task_stack->ts_top); - KMP_DEBUG_ASSERT( tied_task != NULL ); - KMP_DEBUG_ASSERT( tied_task -> td_flags.tasktype == TASK_TIED ); - KMP_DEBUG_ASSERT( tied_task == ending_task ); // If we built the stack correctly + KMP_DEBUG_ASSERT(tied_task != NULL); + KMP_DEBUG_ASSERT(tied_task->td_flags.tasktype == TASK_TIED); + KMP_DEBUG_ASSERT(tied_task == ending_task); // If we built the stack correctly - KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, tied_task ) ); - return; + KA_TRACE(20, ("__kmp_pop_task_stack(exit): GTID: %d; TASK: %p\n", gtid, + tied_task)); + return; } #endif /* BUILD_TIED_TASK_STACK */ -//--------------------------------------------------- // __kmp_push_task: Add a task to the thread's deque - -static kmp_int32 -__kmp_push_task(kmp_int32 gtid, kmp_task_t * task ) -{ - kmp_info_t * thread = __kmp_threads[ gtid ]; - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); - kmp_task_team_t * task_team = thread->th.th_task_team; - kmp_int32 tid = __kmp_tid_from_gtid( gtid ); - kmp_thread_data_t * thread_data; - - KA_TRACE(20, ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata ) ); - - if ( taskdata->td_flags.tiedness == TASK_UNTIED ) { - // untied task needs to increment counter so that the task structure is not freed prematurely - kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count); - KA_TRACE(20, ( "__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n", - gtid, counter, taskdata ) ); - } - - // The first check avoids building task_team thread data if serialized - if ( taskdata->td_flags.task_serial ) { - KA_TRACE(20, ( "__kmp_push_task: T#%d team serialized; returning TASK_NOT_PUSHED for task %p\n", - gtid, taskdata ) ); - return TASK_NOT_PUSHED; - } - - // Now that serialized tasks have returned, we can assume that we are not in immediate exec mode - KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); - if ( ! KMP_TASKING_ENABLED(task_team) ) { - __kmp_enable_tasking( task_team, thread ); - } - KMP_DEBUG_ASSERT( TCR_4(task_team -> tt.tt_found_tasks) == TRUE ); - KMP_DEBUG_ASSERT( TCR_PTR(task_team -> tt.tt_threads_data) != NULL ); - - // Find tasking deque specific to encountering thread - thread_data = & task_team -> tt.tt_threads_data[ tid ]; - - // No lock needed since only owner can allocate - if (thread_data -> td.td_deque == NULL ) { - __kmp_alloc_task_deque( thread, thread_data ); - } - - // Check if deque is full - if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) ) - { - KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full; returning TASK_NOT_PUSHED for task %p\n", - gtid, taskdata ) ); - return TASK_NOT_PUSHED; - } - - // Lock the deque for the task push operation - __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock ); +static kmp_int32 __kmp_push_task(kmp_int32 gtid, kmp_task_t *task) { + kmp_info_t *thread = __kmp_threads[gtid]; + kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); + kmp_task_team_t *task_team = thread->th.th_task_team; + kmp_int32 tid = __kmp_tid_from_gtid(gtid); + kmp_thread_data_t *thread_data; + + KA_TRACE(20, + ("__kmp_push_task: T#%d trying to push task %p.\n", gtid, taskdata)); + + if (taskdata->td_flags.tiedness == TASK_UNTIED) { + // untied task needs to increment counter so that the task structure is not + // freed prematurely + kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count); + KA_TRACE( + 20, + ("__kmp_push_task: T#%d untied_count (%d) incremented for task %p\n", + gtid, counter, taskdata)); + } + + // The first check avoids building task_team thread data if serialized + if (taskdata->td_flags.task_serial) { + KA_TRACE(20, ("__kmp_push_task: T#%d team serialized; returning " + "TASK_NOT_PUSHED for task %p\n", + gtid, taskdata)); + return TASK_NOT_PUSHED; + } + + // Now that serialized tasks have returned, we can assume that we are not in + // immediate exec mode + KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); + if (!KMP_TASKING_ENABLED(task_team)) { + __kmp_enable_tasking(task_team, thread); + } + KMP_DEBUG_ASSERT(TCR_4(task_team->tt.tt_found_tasks) == TRUE); + KMP_DEBUG_ASSERT(TCR_PTR(task_team->tt.tt_threads_data) != NULL); + + // Find tasking deque specific to encountering thread + thread_data = &task_team->tt.tt_threads_data[tid]; + + // No lock needed since only owner can allocate + if (thread_data->td.td_deque == NULL) { + __kmp_alloc_task_deque(thread, thread_data); + } + + // Check if deque is full + if (TCR_4(thread_data->td.td_deque_ntasks) >= + TASK_DEQUE_SIZE(thread_data->td)) { + KA_TRACE(20, ("__kmp_push_task: T#%d deque is full; returning " + "TASK_NOT_PUSHED for task %p\n", + gtid, taskdata)); + return TASK_NOT_PUSHED; + } + + // Lock the deque for the task push operation + __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); #if OMP_45_ENABLED - // Need to recheck as we can get a proxy task from a thread outside of OpenMP - if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) ) - { - __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); - KA_TRACE(20, ( "__kmp_push_task: T#%d deque is full on 2nd check; returning TASK_NOT_PUSHED for task %p\n", - gtid, taskdata ) ); - return TASK_NOT_PUSHED; - } + // Need to recheck as we can get a proxy task from a thread outside of OpenMP + if (TCR_4(thread_data->td.td_deque_ntasks) >= + TASK_DEQUE_SIZE(thread_data->td)) { + __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); + KA_TRACE(20, ("__kmp_push_task: T#%d deque is full on 2nd check; returning " + "TASK_NOT_PUSHED for task %p\n", + gtid, taskdata)); + return TASK_NOT_PUSHED; + } #else - // Must have room since no thread can add tasks but calling thread - KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) < TASK_DEQUE_SIZE(thread_data->td) ); + // Must have room since no thread can add tasks but calling thread + KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) < + TASK_DEQUE_SIZE(thread_data->td)); #endif - thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; // Push taskdata - // Wrap index. - thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td); - TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); // Adjust task count + thread_data->td.td_deque[thread_data->td.td_deque_tail] = + taskdata; // Push taskdata + // Wrap index. + thread_data->td.td_deque_tail = + (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td); + TCW_4(thread_data->td.td_deque_ntasks, + TCR_4(thread_data->td.td_deque_ntasks) + 1); // Adjust task count - KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: " - "task=%p ntasks=%d head=%u tail=%u\n", - gtid, taskdata, thread_data->td.td_deque_ntasks, - thread_data->td.td_deque_head, thread_data->td.td_deque_tail) ); + KA_TRACE(20, ("__kmp_push_task: T#%d returning TASK_SUCCESSFULLY_PUSHED: " + "task=%p ntasks=%d head=%u tail=%u\n", + gtid, taskdata, thread_data->td.td_deque_ntasks, + thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); - __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock ); + __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); - return TASK_SUCCESSFULLY_PUSHED; + return TASK_SUCCESSFULLY_PUSHED; } - -//----------------------------------------------------------------------------------------- -// __kmp_pop_current_task_from_thread: set up current task from called thread when team ends +// __kmp_pop_current_task_from_thread: set up current task from called thread +// when team ends +// // this_thr: thread structure to set current_task in. - -void -__kmp_pop_current_task_from_thread( kmp_info_t *this_thr ) -{ - KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(enter): T#%d this_thread=%p, curtask=%p, " - "curtask_parent=%p\n", - 0, this_thr, this_thr -> th.th_current_task, - this_thr -> th.th_current_task -> td_parent ) ); - - this_thr -> th.th_current_task = this_thr -> th.th_current_task -> td_parent; - - KF_TRACE( 10, ("__kmp_pop_current_task_from_thread(exit): T#%d this_thread=%p, curtask=%p, " - "curtask_parent=%p\n", - 0, this_thr, this_thr -> th.th_current_task, - this_thr -> th.th_current_task -> td_parent ) ); +void __kmp_pop_current_task_from_thread(kmp_info_t *this_thr) { + KF_TRACE(10, ("__kmp_pop_current_task_from_thread(enter): T#%d " + "this_thread=%p, curtask=%p, " + "curtask_parent=%p\n", + 0, this_thr, this_thr->th.th_current_task, + this_thr->th.th_current_task->td_parent)); + + this_thr->th.th_current_task = this_thr->th.th_current_task->td_parent; + + KF_TRACE(10, ("__kmp_pop_current_task_from_thread(exit): T#%d " + "this_thread=%p, curtask=%p, " + "curtask_parent=%p\n", + 0, this_thr, this_thr->th.th_current_task, + this_thr->th.th_current_task->td_parent)); } - -//--------------------------------------------------------------------------------------- -// __kmp_push_current_task_to_thread: set up current task in called thread for a new team +// __kmp_push_current_task_to_thread: set up current task in called thread for a +// new team +// // this_thr: thread structure to set up // team: team for implicit task data // tid: thread within team to set up - -void -__kmp_push_current_task_to_thread( kmp_info_t *this_thr, kmp_team_t *team, int tid ) -{ - // current task of the thread is a parent of the new just created implicit tasks of new team - KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p curtask=%p " - "parent_task=%p\n", - tid, this_thr, this_thr->th.th_current_task, - team->t.t_implicit_task_taskdata[tid].td_parent ) ); - - KMP_DEBUG_ASSERT (this_thr != NULL); - - if( tid == 0 ) { - if( this_thr->th.th_current_task != & team -> t.t_implicit_task_taskdata[ 0 ] ) { - team -> t.t_implicit_task_taskdata[ 0 ].td_parent = this_thr->th.th_current_task; - this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ 0 ]; - } - } else { - team -> t.t_implicit_task_taskdata[ tid ].td_parent = team -> t.t_implicit_task_taskdata[ 0 ].td_parent; - this_thr->th.th_current_task = & team -> t.t_implicit_task_taskdata[ tid ]; - } - - KF_TRACE( 10, ( "__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p curtask=%p " - "parent_task=%p\n", - tid, this_thr, this_thr->th.th_current_task, - team->t.t_implicit_task_taskdata[tid].td_parent ) ); +void __kmp_push_current_task_to_thread(kmp_info_t *this_thr, kmp_team_t *team, + int tid) { + // current task of the thread is a parent of the new just created implicit + // tasks of new team + KF_TRACE(10, ("__kmp_push_current_task_to_thread(enter): T#%d this_thread=%p " + "curtask=%p " + "parent_task=%p\n", + tid, this_thr, this_thr->th.th_current_task, + team->t.t_implicit_task_taskdata[tid].td_parent)); + + KMP_DEBUG_ASSERT(this_thr != NULL); + + if (tid == 0) { + if (this_thr->th.th_current_task != &team->t.t_implicit_task_taskdata[0]) { + team->t.t_implicit_task_taskdata[0].td_parent = + this_thr->th.th_current_task; + this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[0]; + } + } else { + team->t.t_implicit_task_taskdata[tid].td_parent = + team->t.t_implicit_task_taskdata[0].td_parent; + this_thr->th.th_current_task = &team->t.t_implicit_task_taskdata[tid]; + } + + KF_TRACE(10, ("__kmp_push_current_task_to_thread(exit): T#%d this_thread=%p " + "curtask=%p " + "parent_task=%p\n", + tid, this_thr, this_thr->th.th_current_task, + team->t.t_implicit_task_taskdata[tid].td_parent)); } - -//---------------------------------------------------------------------- // __kmp_task_start: bookkeeping for a task starting execution +// // GTID: global thread id of calling thread // task: task starting execution // current_task: task suspending +static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task, + kmp_taskdata_t *current_task) { + kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); + kmp_info_t *thread = __kmp_threads[gtid]; -static void -__kmp_task_start( kmp_int32 gtid, kmp_task_t * task, kmp_taskdata_t * current_task ) -{ - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); - kmp_info_t * thread = __kmp_threads[ gtid ]; - - KA_TRACE(10, ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n", - gtid, taskdata, current_task) ); + KA_TRACE(10, + ("__kmp_task_start(enter): T#%d starting task %p: current_task=%p\n", + gtid, taskdata, current_task)); - KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT ); + KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); - // mark currently executing task as suspended - // TODO: GEH - make sure root team implicit task is initialized properly. - // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 ); - current_task -> td_flags.executing = 0; + // mark currently executing task as suspended + // TODO: GEH - make sure root team implicit task is initialized properly. + // KMP_DEBUG_ASSERT( current_task -> td_flags.executing == 1 ); + current_task->td_flags.executing = 0; - // Add task to stack if tied +// Add task to stack if tied #ifdef BUILD_TIED_TASK_STACK - if ( taskdata -> td_flags.tiedness == TASK_TIED ) - { - __kmp_push_task_stack( gtid, thread, taskdata ); - } + if (taskdata->td_flags.tiedness == TASK_TIED) { + __kmp_push_task_stack(gtid, thread, taskdata); + } #endif /* BUILD_TIED_TASK_STACK */ - // mark starting task as executing and as current task - thread -> th.th_current_task = taskdata; + // mark starting task as executing and as current task + thread->th.th_current_task = taskdata; - KMP_DEBUG_ASSERT( taskdata->td_flags.started == 0 || taskdata->td_flags.tiedness == TASK_UNTIED ); - KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 || taskdata->td_flags.tiedness == TASK_UNTIED ); - taskdata -> td_flags.started = 1; - taskdata -> td_flags.executing = 1; - KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 ); - KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 ); + KMP_DEBUG_ASSERT(taskdata->td_flags.started == 0 || + taskdata->td_flags.tiedness == TASK_UNTIED); + KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0 || + taskdata->td_flags.tiedness == TASK_UNTIED); + taskdata->td_flags.started = 1; + taskdata->td_flags.executing = 1; + KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0); + KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0); - // GEH TODO: shouldn't we pass some sort of location identifier here? - // APT: yes, we will pass location here. - // need to store current thread state (in a thread or taskdata structure) - // before setting work_state, otherwise wrong state is set after end of task + // GEH TODO: shouldn't we pass some sort of location identifier here? + // APT: yes, we will pass location here. + // need to store current thread state (in a thread or taskdata structure) + // before setting work_state, otherwise wrong state is set after end of task - KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", - gtid, taskdata ) ); + KA_TRACE(10, ("__kmp_task_start(exit): T#%d task=%p\n", gtid, taskdata)); #if OMPT_SUPPORT - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_task_begin)) { - kmp_taskdata_t *parent = taskdata->td_parent; - ompt_callbacks.ompt_callback(ompt_event_task_begin)( - parent ? parent->ompt_task_info.task_id : ompt_task_id_none, - parent ? &(parent->ompt_task_info.frame) : NULL, - taskdata->ompt_task_info.task_id, - taskdata->ompt_task_info.function); - } + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_begin)) { + kmp_taskdata_t *parent = taskdata->td_parent; + ompt_callbacks.ompt_callback(ompt_event_task_begin)( + parent ? parent->ompt_task_info.task_id : ompt_task_id_none, + parent ? &(parent->ompt_task_info.frame) : NULL, + taskdata->ompt_task_info.task_id, taskdata->ompt_task_info.function); + } #endif #if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE - /* OMPT emit all dependences if requested by the tool */ - if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 && - ompt_callbacks.ompt_callback(ompt_event_task_dependences)) - { - ompt_callbacks.ompt_callback(ompt_event_task_dependences)( - taskdata->ompt_task_info.task_id, - taskdata->ompt_task_info.deps, - taskdata->ompt_task_info.ndeps - ); - /* We can now free the allocated memory for the dependencies */ - KMP_OMPT_DEPS_FREE (thread, taskdata->ompt_task_info.deps); - taskdata->ompt_task_info.deps = NULL; - taskdata->ompt_task_info.ndeps = 0; - } + /* OMPT emit all dependences if requested by the tool */ + if (ompt_enabled && taskdata->ompt_task_info.ndeps > 0 && + ompt_callbacks.ompt_callback(ompt_event_task_dependences)) { + ompt_callbacks.ompt_callback(ompt_event_task_dependences)( + taskdata->ompt_task_info.task_id, taskdata->ompt_task_info.deps, + taskdata->ompt_task_info.ndeps); + /* We can now free the allocated memory for the dependencies */ + KMP_OMPT_DEPS_FREE(thread, taskdata->ompt_task_info.deps); + taskdata->ompt_task_info.deps = NULL; + taskdata->ompt_task_info.ndeps = 0; + } #endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */ - return; + return; } - -//---------------------------------------------------------------------- -// __kmpc_omp_task_begin_if0: report that a given serialized task has started execution +// __kmpc_omp_task_begin_if0: report that a given serialized task has started +// execution +// // loc_ref: source location information; points to beginning of task block. // gtid: global thread number. // task: task thunk for the started task. - -void -__kmpc_omp_task_begin_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task ) -{ - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); - kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task; - - KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p current_task=%p\n", - gtid, loc_ref, taskdata, current_task ) ); - - if ( taskdata->td_flags.tiedness == TASK_UNTIED ) { - // untied task needs to increment counter so that the task structure is not freed prematurely - kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count); - KA_TRACE(20, ( "__kmpc_omp_task_begin_if0: T#%d untied_count (%d) incremented for task %p\n", - gtid, counter, taskdata ) ); - } - - taskdata -> td_flags.task_serial = 1; // Execute this task immediately, not deferred. - __kmp_task_start( gtid, task, current_task ); - - KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", - gtid, loc_ref, taskdata ) ); - - return; +void __kmpc_omp_task_begin_if0(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task) { + kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); + kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; + + KA_TRACE(10, ("__kmpc_omp_task_begin_if0(enter): T#%d loc=%p task=%p " + "current_task=%p\n", + gtid, loc_ref, taskdata, current_task)); + + if (taskdata->td_flags.tiedness == TASK_UNTIED) { + // untied task needs to increment counter so that the task structure is not + // freed prematurely + kmp_int32 counter = 1 + KMP_TEST_THEN_INC32(&taskdata->td_untied_count); + KA_TRACE(20, ("__kmpc_omp_task_begin_if0: T#%d untied_count (%d) " + "incremented for task %p\n", + gtid, counter, taskdata)); + } + + taskdata->td_flags.task_serial = + 1; // Execute this task immediately, not deferred. + __kmp_task_start(gtid, task, current_task); + + KA_TRACE(10, ("__kmpc_omp_task_begin_if0(exit): T#%d loc=%p task=%p,\n", gtid, + loc_ref, taskdata)); + + return; } #ifdef TASK_UNUSED -//---------------------------------------------------------------------- // __kmpc_omp_task_begin: report that a given task has started execution // NEVER GENERATED BY COMPILER, DEPRECATED!!! +void __kmpc_omp_task_begin(ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task) { + kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; -void -__kmpc_omp_task_begin( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * task ) -{ - kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task; - - KA_TRACE(10, ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n", - gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task ) ); + KA_TRACE( + 10, + ("__kmpc_omp_task_begin(enter): T#%d loc=%p task=%p current_task=%p\n", + gtid, loc_ref, KMP_TASK_TO_TASKDATA(task), current_task)); - __kmp_task_start( gtid, task, current_task ); + __kmp_task_start(gtid, task, current_task); - KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", - gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); - - return; + KA_TRACE(10, ("__kmpc_omp_task_begin(exit): T#%d loc=%p task=%p,\n", gtid, + loc_ref, KMP_TASK_TO_TASKDATA(task))); + return; } #endif // TASK_UNUSED - -//------------------------------------------------------------------------------------- // __kmp_free_task: free the current task space and the space for shareds +// // gtid: Global thread ID of calling thread // taskdata: task to free // thread: thread data structure of caller +static void __kmp_free_task(kmp_int32 gtid, kmp_taskdata_t *taskdata, + kmp_info_t *thread) { + KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n", gtid, + taskdata)); + + // Check to make sure all flags and counters have the correct values + KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); + KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 0); + KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 1); + KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0); + KMP_DEBUG_ASSERT(TCR_4(taskdata->td_allocated_child_tasks) == 0 || + taskdata->td_flags.task_serial == 1); + KMP_DEBUG_ASSERT(TCR_4(taskdata->td_incomplete_child_tasks) == 0); + + taskdata->td_flags.freed = 1; + ANNOTATE_HAPPENS_BEFORE(taskdata); +// deallocate the taskdata and shared variable blocks associated with this task +#if USE_FAST_MEMORY + __kmp_fast_free(thread, taskdata); +#else /* ! USE_FAST_MEMORY */ + __kmp_thread_free(thread, taskdata); +#endif -static void -__kmp_free_task( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread ) -{ - KA_TRACE(30, ("__kmp_free_task: T#%d freeing data from task %p\n", - gtid, taskdata) ); - - // Check to make sure all flags and counters have the correct values - KMP_DEBUG_ASSERT( taskdata->td_flags.tasktype == TASK_EXPLICIT ); - KMP_DEBUG_ASSERT( taskdata->td_flags.executing == 0 ); - KMP_DEBUG_ASSERT( taskdata->td_flags.complete == 1 ); - KMP_DEBUG_ASSERT( taskdata->td_flags.freed == 0 ); - KMP_DEBUG_ASSERT( TCR_4(taskdata->td_allocated_child_tasks) == 0 || taskdata->td_flags.task_serial == 1); - KMP_DEBUG_ASSERT( TCR_4(taskdata->td_incomplete_child_tasks) == 0 ); - - taskdata->td_flags.freed = 1; - ANNOTATE_HAPPENS_BEFORE(taskdata); - // deallocate the taskdata and shared variable blocks associated with this task - #if USE_FAST_MEMORY - __kmp_fast_free( thread, taskdata ); - #else /* ! USE_FAST_MEMORY */ - __kmp_thread_free( thread, taskdata ); - #endif - - KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n", - gtid, taskdata) ); + KA_TRACE(20, ("__kmp_free_task: T#%d freed task %p\n", gtid, taskdata)); } -//------------------------------------------------------------------------------------- -// __kmp_free_task_and_ancestors: free the current task and ancestors without children +// __kmp_free_task_and_ancestors: free the current task and ancestors without +// children // // gtid: Global thread ID of calling thread // taskdata: task to free // thread: thread data structure of caller - -static void -__kmp_free_task_and_ancestors( kmp_int32 gtid, kmp_taskdata_t * taskdata, kmp_info_t * thread ) -{ +static void __kmp_free_task_and_ancestors(kmp_int32 gtid, + kmp_taskdata_t *taskdata, + kmp_info_t *thread) { #if OMP_45_ENABLED - // Proxy tasks must always be allowed to free their parents - // because they can be run in background even in serial mode. - kmp_int32 team_serial = ( taskdata->td_flags.team_serial || - taskdata->td_flags.tasking_ser ) && !taskdata->td_flags.proxy; + // Proxy tasks must always be allowed to free their parents + // because they can be run in background even in serial mode. + kmp_int32 team_serial = + (taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) && + !taskdata->td_flags.proxy; #else - kmp_int32 team_serial = taskdata->td_flags.team_serial || - taskdata->td_flags.tasking_ser; + kmp_int32 team_serial = + taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser; #endif - KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT ); + KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); - kmp_int32 children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1; - KMP_DEBUG_ASSERT( children >= 0 ); + kmp_int32 children = + KMP_TEST_THEN_DEC32((kmp_int32 *)(&taskdata->td_allocated_child_tasks)) - + 1; + KMP_DEBUG_ASSERT(children >= 0); - // Now, go up the ancestor tree to see if any ancestors can now be freed. - while ( children == 0 ) - { - kmp_taskdata_t * parent_taskdata = taskdata -> td_parent; - - KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete " - "and freeing itself\n", gtid, taskdata) ); + // Now, go up the ancestor tree to see if any ancestors can now be freed. + while (children == 0) { + kmp_taskdata_t *parent_taskdata = taskdata->td_parent; - // --- Deallocate my ancestor task --- - __kmp_free_task( gtid, taskdata, thread ); + KA_TRACE(20, ("__kmp_free_task_and_ancestors(enter): T#%d task %p complete " + "and freeing itself\n", + gtid, taskdata)); - taskdata = parent_taskdata; + // --- Deallocate my ancestor task --- + __kmp_free_task(gtid, taskdata, thread); - // Stop checking ancestors at implicit task - // instead of walking up ancestor tree to avoid premature deallocation of ancestors. - if ( team_serial || taskdata -> td_flags.tasktype == TASK_IMPLICIT ) - return; + taskdata = parent_taskdata; - // Predecrement simulated by "- 1" calculation - children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_allocated_child_tasks) ) - 1; - KMP_DEBUG_ASSERT( children >= 0 ); - } + // Stop checking ancestors at implicit task instead of walking up ancestor + // tree to avoid premature deallocation of ancestors. + if (team_serial || taskdata->td_flags.tasktype == TASK_IMPLICIT) + return; - KA_TRACE(20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; " - "not freeing it yet\n", gtid, taskdata, children) ); + // Predecrement simulated by "- 1" calculation + children = KMP_TEST_THEN_DEC32( + (kmp_int32 *)(&taskdata->td_allocated_child_tasks)) - + 1; + KMP_DEBUG_ASSERT(children >= 0); + } + + KA_TRACE( + 20, ("__kmp_free_task_and_ancestors(exit): T#%d task %p has %d children; " + "not freeing it yet\n", + gtid, taskdata, children)); } -//--------------------------------------------------------------------- // __kmp_task_finish: bookkeeping to do when a task finishes execution +// // gtid: global thread ID for calling thread // task: task to be finished // resumed_task: task to be resumed. (may be NULL if task is serialized) - -static void -__kmp_task_finish( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task ) -{ - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); - kmp_info_t * thread = __kmp_threads[ gtid ]; - kmp_task_team_t * task_team = thread->th.th_task_team; // might be NULL for serial teams... - kmp_int32 children = 0; +static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, + kmp_taskdata_t *resumed_task) { + kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); + kmp_info_t *thread = __kmp_threads[gtid]; + kmp_task_team_t *task_team = + thread->th.th_task_team; // might be NULL for serial teams... + kmp_int32 children = 0; #if OMPT_SUPPORT - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_task_end)) { - kmp_taskdata_t *parent = taskdata->td_parent; - ompt_callbacks.ompt_callback(ompt_event_task_end)( - taskdata->ompt_task_info.task_id); - } + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_end)) { + kmp_taskdata_t *parent = taskdata->td_parent; + ompt_callbacks.ompt_callback(ompt_event_task_end)( + taskdata->ompt_task_info.task_id); + } #endif - KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming task %p\n", - gtid, taskdata, resumed_task) ); + KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming " + "task %p\n", + gtid, taskdata, resumed_task)); - KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT ); + KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); - // Pop task from stack if tied +// Pop task from stack if tied #ifdef BUILD_TIED_TASK_STACK - if ( taskdata -> td_flags.tiedness == TASK_TIED ) - { - __kmp_pop_task_stack( gtid, thread, taskdata ); - } + if (taskdata->td_flags.tiedness == TASK_TIED) { + __kmp_pop_task_stack(gtid, thread, taskdata); + } #endif /* BUILD_TIED_TASK_STACK */ - if ( taskdata->td_flags.tiedness == TASK_UNTIED ) { - // untied task needs to check the counter so that the task structure is not freed prematurely - kmp_int32 counter = KMP_TEST_THEN_DEC32(&taskdata->td_untied_count) - 1; - KA_TRACE(20, ( "__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n", - gtid, counter, taskdata ) ); - if ( counter > 0 ) { - // untied task is not done, to be continued possibly by other thread, do not free it now - if (resumed_task == NULL) { - KMP_DEBUG_ASSERT( taskdata->td_flags.task_serial ); - resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent - } - thread->th.th_current_task = resumed_task; // restore current_task - resumed_task->td_flags.executing = 1; // resume previous task - KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, resuming task %p\n", - gtid, taskdata, resumed_task) ); - return; - } - } - - KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 ); - taskdata -> td_flags.complete = 1; // mark the task as completed - KMP_DEBUG_ASSERT( taskdata -> td_flags.started == 1 ); - KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 ); - - // Only need to keep track of count if team parallel and tasking not serialized - if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) { - // Predecrement simulated by "- 1" calculation - children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1; - KMP_DEBUG_ASSERT( children >= 0 ); + if (taskdata->td_flags.tiedness == TASK_UNTIED) { + // untied task needs to check the counter so that the task structure is not + // freed prematurely + kmp_int32 counter = KMP_TEST_THEN_DEC32(&taskdata->td_untied_count) - 1; + KA_TRACE( + 20, + ("__kmp_task_finish: T#%d untied_count (%d) decremented for task %p\n", + gtid, counter, taskdata)); + if (counter > 0) { + // untied task is not done, to be continued possibly by other thread, do + // not free it now + if (resumed_task == NULL) { + KMP_DEBUG_ASSERT(taskdata->td_flags.task_serial); + resumed_task = taskdata->td_parent; // In a serialized task, the resumed + // task is the parent + } + thread->th.th_current_task = resumed_task; // restore current_task + resumed_task->td_flags.executing = 1; // resume previous task + KA_TRACE(10, ("__kmp_task_finish(exit): T#%d partially done task %p, " + "resuming task %p\n", + gtid, taskdata, resumed_task)); + return; + } + } + + KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0); + taskdata->td_flags.complete = 1; // mark the task as completed + KMP_DEBUG_ASSERT(taskdata->td_flags.started == 1); + KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0); + + // Only need to keep track of count if team parallel and tasking not + // serialized + if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) { + // Predecrement simulated by "- 1" calculation + children = + KMP_TEST_THEN_DEC32( + (kmp_int32 *)(&taskdata->td_parent->td_incomplete_child_tasks)) - + 1; + KMP_DEBUG_ASSERT(children >= 0); #if OMP_40_ENABLED - if ( taskdata->td_taskgroup ) - KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) ); + if (taskdata->td_taskgroup) + KMP_TEST_THEN_DEC32((kmp_int32 *)(&taskdata->td_taskgroup->count)); #if OMP_45_ENABLED - } - // if we found proxy tasks there could exist a dependency chain - // with the proxy task as origin - if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) || (task_team && task_team->tt.tt_found_proxy_tasks) ) { + } + // if we found proxy tasks there could exist a dependency chain + // with the proxy task as origin + if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser) || + (task_team && task_team->tt.tt_found_proxy_tasks)) { #endif - __kmp_release_deps(gtid,taskdata); + __kmp_release_deps(gtid, taskdata); #endif - } + } - // td_flags.executing must be marked as 0 after __kmp_release_deps has been called - // Othertwise, if a task is executed immediately from the release_deps code - // the flag will be reset to 1 again by this same function - KMP_DEBUG_ASSERT( taskdata -> td_flags.executing == 1 ); - taskdata -> td_flags.executing = 0; // suspend the finishing task + // td_flags.executing must be marked as 0 after __kmp_release_deps has been + // called. Othertwise, if a task is executed immediately from the release_deps + // code, the flag will be reset to 1 again by this same function + KMP_DEBUG_ASSERT(taskdata->td_flags.executing == 1); + taskdata->td_flags.executing = 0; // suspend the finishing task - KA_TRACE(20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n", - gtid, taskdata, children) ); + KA_TRACE( + 20, ("__kmp_task_finish: T#%d finished task %p, %d incomplete children\n", + gtid, taskdata, children)); #if OMP_40_ENABLED - /* If the tasks' destructor thunk flag has been set, we need to invoke the - destructor thunk that has been generated by the compiler. - The code is placed here, since at this point other tasks might have been released - hence overlapping the destructor invokations with some other work in the - released tasks. The OpenMP spec is not specific on when the destructors are - invoked, so we should be free to choose. - */ - if (taskdata->td_flags.destructors_thunk) { - kmp_routine_entry_t destr_thunk = task->data1.destructors; - KMP_ASSERT(destr_thunk); - destr_thunk(gtid, task); - } + /* If the tasks' destructor thunk flag has been set, we need to invoke the + destructor thunk that has been generated by the compiler. The code is + placed here, since at this point other tasks might have been released + hence overlapping the destructor invokations with some other work in the + released tasks. The OpenMP spec is not specific on when the destructors + are invoked, so we should be free to choose. */ + if (taskdata->td_flags.destructors_thunk) { + kmp_routine_entry_t destr_thunk = task->data1.destructors; + KMP_ASSERT(destr_thunk); + destr_thunk(gtid, task); + } #endif // OMP_40_ENABLED - // bookkeeping for resuming task: - // GEH - note tasking_ser => task_serial - KMP_DEBUG_ASSERT( (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) == - taskdata->td_flags.task_serial); - if ( taskdata->td_flags.task_serial ) - { - if (resumed_task == NULL) { - resumed_task = taskdata->td_parent; // In a serialized task, the resumed task is the parent - } - else + // bookkeeping for resuming task: + // GEH - note tasking_ser => task_serial + KMP_DEBUG_ASSERT( + (taskdata->td_flags.tasking_ser || taskdata->td_flags.task_serial) == + taskdata->td_flags.task_serial); + if (taskdata->td_flags.task_serial) { + if (resumed_task == NULL) { + resumed_task = taskdata->td_parent; // In a serialized task, the resumed + // task is the parent + } else #if OMP_45_ENABLED - if ( !(task_team && task_team->tt.tt_found_proxy_tasks) ) + if (!(task_team && task_team->tt.tt_found_proxy_tasks)) #endif - { - // verify resumed task passed in points to parent - KMP_DEBUG_ASSERT( resumed_task == taskdata->td_parent ); - } - } - else { - KMP_DEBUG_ASSERT( resumed_task != NULL ); // verify that resumed task is passed as arguemnt - } - - // Free this task and then ancestor tasks if they have no children. - // Restore th_current_task first as suggested by John: - // johnmc: if an asynchronous inquiry peers into the runtime system - // it doesn't see the freed task as the current task. - thread->th.th_current_task = resumed_task; - __kmp_free_task_and_ancestors(gtid, taskdata, thread); - - // TODO: GEH - make sure root team implicit task is initialized properly. - // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 ); - resumed_task->td_flags.executing = 1; // resume previous task - - KA_TRACE(10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n", - gtid, taskdata, resumed_task) ); - - return; + { + // verify resumed task passed in points to parent + KMP_DEBUG_ASSERT(resumed_task == taskdata->td_parent); + } + } else { + KMP_DEBUG_ASSERT(resumed_task != + NULL); // verify that resumed task is passed as arguemnt + } + + // Free this task and then ancestor tasks if they have no children. + // Restore th_current_task first as suggested by John: + // johnmc: if an asynchronous inquiry peers into the runtime system + // it doesn't see the freed task as the current task. + thread->th.th_current_task = resumed_task; + __kmp_free_task_and_ancestors(gtid, taskdata, thread); + + // TODO: GEH - make sure root team implicit task is initialized properly. + // KMP_DEBUG_ASSERT( resumed_task->td_flags.executing == 0 ); + resumed_task->td_flags.executing = 1; // resume previous task + + KA_TRACE( + 10, ("__kmp_task_finish(exit): T#%d finished task %p, resuming task %p\n", + gtid, taskdata, resumed_task)); + + return; } -//--------------------------------------------------------------------- // __kmpc_omp_task_complete_if0: report that a task has completed execution +// // loc_ref: source location information; points to end of task block. // gtid: global thread number. // task: task thunk for the completed task. - -void -__kmpc_omp_task_complete_if0( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task ) -{ - KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n", - gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); - - __kmp_task_finish( gtid, task, NULL ); // this routine will provide task to resume - - KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n", - gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); - - return; +void __kmpc_omp_task_complete_if0(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task) { + KA_TRACE(10, ("__kmpc_omp_task_complete_if0(enter): T#%d loc=%p task=%p\n", + gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); + // this routine will provide task to resume + __kmp_task_finish(gtid, task, NULL); + + KA_TRACE(10, ("__kmpc_omp_task_complete_if0(exit): T#%d loc=%p task=%p\n", + gtid, loc_ref, KMP_TASK_TO_TASKDATA(task))); + return; } #ifdef TASK_UNUSED -//--------------------------------------------------------------------- // __kmpc_omp_task_complete: report that a task has completed execution // NEVER GENERATED BY COMPILER, DEPRECATED!!! +void __kmpc_omp_task_complete(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *task) { + KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", gtid, + loc_ref, KMP_TASK_TO_TASKDATA(task))); -void -__kmpc_omp_task_complete( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *task ) -{ - KA_TRACE(10, ("__kmpc_omp_task_complete(enter): T#%d loc=%p task=%p\n", - gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); - - __kmp_task_finish( gtid, task, NULL ); // Not sure how to find task to resume + __kmp_task_finish(gtid, task, NULL); // Not sure how to find task to resume - KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", - gtid, loc_ref, KMP_TASK_TO_TASKDATA(task) ) ); - return; + KA_TRACE(10, ("__kmpc_omp_task_complete(exit): T#%d loc=%p task=%p\n", gtid, + loc_ref, KMP_TASK_TO_TASKDATA(task))); + return; } #endif // TASK_UNUSED - #if OMPT_SUPPORT -//---------------------------------------------------------------------------------------------------- -// __kmp_task_init_ompt: -// Initialize OMPT fields maintained by a task. This will only be called after -// ompt_tool, so we already know whether ompt is enabled or not. - -static inline void -__kmp_task_init_ompt( kmp_taskdata_t * task, int tid, void * function ) -{ - if (ompt_enabled) { - task->ompt_task_info.task_id = __ompt_task_id_new(tid); - task->ompt_task_info.function = function; - task->ompt_task_info.frame.exit_runtime_frame = NULL; - task->ompt_task_info.frame.reenter_runtime_frame = NULL; +// __kmp_task_init_ompt: Initialize OMPT fields maintained by a task. This will +// only be called after ompt_tool, so we already know whether ompt is enabled +// or not. +static inline void __kmp_task_init_ompt(kmp_taskdata_t *task, int tid, + void *function) { + if (ompt_enabled) { + task->ompt_task_info.task_id = __ompt_task_id_new(tid); + task->ompt_task_info.function = function; + task->ompt_task_info.frame.exit_runtime_frame = NULL; + task->ompt_task_info.frame.reenter_runtime_frame = NULL; #if OMP_40_ENABLED - task->ompt_task_info.ndeps = 0; - task->ompt_task_info.deps = NULL; + task->ompt_task_info.ndeps = 0; + task->ompt_task_info.deps = NULL; #endif /* OMP_40_ENABLED */ - } + } } #endif - -//---------------------------------------------------------------------------------------------------- -// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit task for a given thread +// __kmp_init_implicit_task: Initialize the appropriate fields in the implicit +// task for a given thread // // loc_ref: reference to source location of parallel region // this_thr: thread data structure corresponding to implicit task // team: team for this_thr // tid: thread id of given thread within team // set_curr_task: TRUE if need to push current task to thread -// NOTE: Routine does not set up the implicit task ICVS. This is assumed to have already been done elsewhere. +// NOTE: Routine does not set up the implicit task ICVS. This is assumed to +// have already been done elsewhere. // TODO: Get better loc_ref. Value passed in may be NULL - -void -__kmp_init_implicit_task( ident_t *loc_ref, kmp_info_t *this_thr, kmp_team_t *team, int tid, int set_curr_task ) -{ - kmp_taskdata_t * task = & team->t.t_implicit_task_taskdata[ tid ]; - - KF_TRACE(10, ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n", - tid, team, task, set_curr_task ? "TRUE" : "FALSE" ) ); - - task->td_task_id = KMP_GEN_TASK_ID(); - task->td_team = team; -// task->td_parent = NULL; // fix for CQ230101 (broken parent task info in debugger) - task->td_ident = loc_ref; - task->td_taskwait_ident = NULL; - task->td_taskwait_counter = 0; - task->td_taskwait_thread = 0; - - task->td_flags.tiedness = TASK_TIED; - task->td_flags.tasktype = TASK_IMPLICIT; +void __kmp_init_implicit_task(ident_t *loc_ref, kmp_info_t *this_thr, + kmp_team_t *team, int tid, int set_curr_task) { + kmp_taskdata_t *task = &team->t.t_implicit_task_taskdata[tid]; + + KF_TRACE( + 10, + ("__kmp_init_implicit_task(enter): T#:%d team=%p task=%p, reinit=%s\n", + tid, team, task, set_curr_task ? "TRUE" : "FALSE")); + + task->td_task_id = KMP_GEN_TASK_ID(); + task->td_team = team; + // task->td_parent = NULL; // fix for CQ230101 (broken parent task info + // in debugger) + task->td_ident = loc_ref; + task->td_taskwait_ident = NULL; + task->td_taskwait_counter = 0; + task->td_taskwait_thread = 0; + + task->td_flags.tiedness = TASK_TIED; + task->td_flags.tasktype = TASK_IMPLICIT; #if OMP_45_ENABLED - task->td_flags.proxy = TASK_FULL; + task->td_flags.proxy = TASK_FULL; #endif - // All implicit tasks are executed immediately, not deferred - task->td_flags.task_serial = 1; - task->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec ); - task->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0; + // All implicit tasks are executed immediately, not deferred + task->td_flags.task_serial = 1; + task->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec); + task->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0; - task->td_flags.started = 1; - task->td_flags.executing = 1; - task->td_flags.complete = 0; - task->td_flags.freed = 0; + task->td_flags.started = 1; + task->td_flags.executing = 1; + task->td_flags.complete = 0; + task->td_flags.freed = 0; #if OMP_40_ENABLED - task->td_depnode = NULL; + task->td_depnode = NULL; #endif - if (set_curr_task) { // only do this initialization the first time a thread is created - task->td_incomplete_child_tasks = 0; - task->td_allocated_child_tasks = 0; // Not used because do not need to deallocate implicit task + if (set_curr_task) { // only do this init first time thread is created + task->td_incomplete_child_tasks = 0; + task->td_allocated_child_tasks = 0; // Not used: don't need to +// deallocate implicit task #if OMP_40_ENABLED - task->td_taskgroup = NULL; // An implicit task does not have taskgroup - task->td_dephash = NULL; + task->td_taskgroup = NULL; // An implicit task does not have taskgroup + task->td_dephash = NULL; #endif - __kmp_push_current_task_to_thread( this_thr, team, tid ); - } else { - KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0); - KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0); - } + __kmp_push_current_task_to_thread(this_thr, team, tid); + } else { + KMP_DEBUG_ASSERT(task->td_incomplete_child_tasks == 0); + KMP_DEBUG_ASSERT(task->td_allocated_child_tasks == 0); + } #if OMPT_SUPPORT - __kmp_task_init_ompt(task, tid, NULL); + __kmp_task_init_ompt(task, tid, NULL); #endif - KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", - tid, team, task ) ); + KF_TRACE(10, ("__kmp_init_implicit_task(exit): T#:%d team=%p task=%p\n", tid, + team, task)); } - -//----------------------------------------------------------------------------- -//// __kmp_finish_implicit_task: Release resources associated to implicit tasks -//// at the end of parallel regions. Some resources are kept for reuse in the -//// next parallel region. -//// -//// thread: thread data structure corresponding to implicit task +// __kmp_finish_implicit_task: Release resources associated to implicit tasks +// at the end of parallel regions. Some resources are kept for reuse in the next +// parallel region. // -void -__kmp_finish_implicit_task(kmp_info_t *thread) -{ - kmp_taskdata_t *task = thread->th.th_current_task; - if (task->td_dephash) - __kmp_dephash_free_entries(thread, task->td_dephash); +// thread: thread data structure corresponding to implicit task +void __kmp_finish_implicit_task(kmp_info_t *thread) { + kmp_taskdata_t *task = thread->th.th_current_task; + if (task->td_dephash) + __kmp_dephash_free_entries(thread, task->td_dephash); } - -//----------------------------------------------------------------------------- -//// __kmp_free_implicit_task: Release resources associated to implicit tasks -//// when these are destroyed regions -//// -//// thread: thread data structure corresponding to implicit task +// __kmp_free_implicit_task: Release resources associated to implicit tasks +// when these are destroyed regions // -void -__kmp_free_implicit_task(kmp_info_t *thread) -{ - kmp_taskdata_t *task = thread->th.th_current_task; - if (task->td_dephash) - __kmp_dephash_free(thread, task->td_dephash); - task->td_dephash = NULL; +// thread: thread data structure corresponding to implicit task +void __kmp_free_implicit_task(kmp_info_t *thread) { + kmp_taskdata_t *task = thread->th.th_current_task; + if (task->td_dephash) + __kmp_dephash_free(thread, task->td_dephash); + task->td_dephash = NULL; } - -// Round up a size to a power of two specified by val -// Used to insert padding between structures co-allocated using a single malloc() call -static size_t -__kmp_round_up_to_val( size_t size, size_t val ) { - if ( size & ( val - 1 ) ) { - size &= ~ ( val - 1 ); - if ( size <= KMP_SIZE_T_MAX - val ) { - size += val; // Round up if there is no overflow. - }; // if +// Round up a size to a power of two specified by val: Used to insert padding +// between structures co-allocated using a single malloc() call +static size_t __kmp_round_up_to_val(size_t size, size_t val) { + if (size & (val - 1)) { + size &= ~(val - 1); + if (size <= KMP_SIZE_T_MAX - val) { + size += val; // Round up if there is no overflow. }; // if - return size; + }; // if + return size; } // __kmp_round_up_to_va - -//--------------------------------------------------------------------------------- // __kmp_task_alloc: Allocate the taskdata and task data structures for a task // // loc_ref: source location information // gtid: global thread number. -// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' task encountered. -// Converted from kmp_int32 to kmp_tasking_flags_t in routine. -// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including private vars accessed in task. -// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed in task. +// flags: include tiedness & task type (explicit vs. implicit) of the ''new'' +// task encountered. Converted from kmp_int32 to kmp_tasking_flags_t in routine. +// sizeof_kmp_task_t: Size in bytes of kmp_task_t data structure including +// private vars accessed in task. +// sizeof_shareds: Size in bytes of array of pointers to shared vars accessed +// in task. // task_entry: Pointer to task code entry point generated by compiler. // returns: a pointer to the allocated kmp_task_t structure (task). - -kmp_task_t * -__kmp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_tasking_flags_t *flags, - size_t sizeof_kmp_task_t, size_t sizeof_shareds, - kmp_routine_entry_t task_entry ) -{ - kmp_task_t *task; - kmp_taskdata_t *taskdata; - kmp_info_t *thread = __kmp_threads[ gtid ]; - kmp_team_t *team = thread->th.th_team; - kmp_taskdata_t *parent_task = thread->th.th_current_task; - size_t shareds_offset; - - KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " - "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", - gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t, - sizeof_shareds, task_entry) ); - - if ( parent_task->td_flags.final ) { - if (flags->merged_if0) { - } - flags->final = 1; - } +kmp_task_t *__kmp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, + kmp_tasking_flags_t *flags, + size_t sizeof_kmp_task_t, size_t sizeof_shareds, + kmp_routine_entry_t task_entry) { + kmp_task_t *task; + kmp_taskdata_t *taskdata; + kmp_info_t *thread = __kmp_threads[gtid]; + kmp_team_t *team = thread->th.th_team; + kmp_taskdata_t *parent_task = thread->th.th_current_task; + size_t shareds_offset; + + KA_TRACE(10, ("__kmp_task_alloc(enter): T#%d loc=%p, flags=(0x%x) " + "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", + gtid, loc_ref, *((kmp_int32 *)flags), sizeof_kmp_task_t, + sizeof_shareds, task_entry)); + + if (parent_task->td_flags.final) { + if (flags->merged_if0) { + } + flags->final = 1; + } #if OMP_45_ENABLED - if ( flags->proxy == TASK_PROXY ) { - flags->tiedness = TASK_UNTIED; - flags->merged_if0 = 1; - - /* are we running in a sequential parallel or tskm_immediate_exec... we need tasking support enabled */ - if ( (thread->th.th_task_team) == NULL ) { - /* This should only happen if the team is serialized - setup a task team and propagate it to the thread - */ - KMP_DEBUG_ASSERT(team->t.t_serialized); - KA_TRACE(30,("T#%d creating task team in __kmp_task_alloc for proxy task\n", gtid)); - __kmp_task_team_setup(thread,team,1); // 1 indicates setup the current team regardless of nthreads - thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state]; - } - kmp_task_team_t * task_team = thread->th.th_task_team; - - /* tasking must be enabled now as the task might not be pushed */ - if ( !KMP_TASKING_ENABLED( task_team ) ) { - KA_TRACE(30,("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid)); - __kmp_enable_tasking( task_team, thread ); - kmp_int32 tid = thread->th.th_info.ds.ds_tid; - kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ]; - // No lock needed since only owner can allocate - if (thread_data -> td.td_deque == NULL ) { - __kmp_alloc_task_deque( thread, thread_data ); - } - } - - if ( task_team->tt.tt_found_proxy_tasks == FALSE ) - TCW_4(task_team -> tt.tt_found_proxy_tasks, TRUE); + if (flags->proxy == TASK_PROXY) { + flags->tiedness = TASK_UNTIED; + flags->merged_if0 = 1; + + /* are we running in a sequential parallel or tskm_immediate_exec... we need + tasking support enabled */ + if ((thread->th.th_task_team) == NULL) { + /* This should only happen if the team is serialized + setup a task team and propagate it to the thread */ + KMP_DEBUG_ASSERT(team->t.t_serialized); + KA_TRACE(30, + ("T#%d creating task team in __kmp_task_alloc for proxy task\n", + gtid)); + __kmp_task_team_setup( + thread, team, + 1); // 1 indicates setup the current team regardless of nthreads + thread->th.th_task_team = team->t.t_task_team[thread->th.th_task_state]; + } + kmp_task_team_t *task_team = thread->th.th_task_team; + + /* tasking must be enabled now as the task might not be pushed */ + if (!KMP_TASKING_ENABLED(task_team)) { + KA_TRACE( + 30, + ("T#%d enabling tasking in __kmp_task_alloc for proxy task\n", gtid)); + __kmp_enable_tasking(task_team, thread); + kmp_int32 tid = thread->th.th_info.ds.ds_tid; + kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid]; + // No lock needed since only owner can allocate + if (thread_data->td.td_deque == NULL) { + __kmp_alloc_task_deque(thread, thread_data); + } } -#endif - - // Calculate shared structure offset including padding after kmp_task_t struct - // to align pointers in shared struct - shareds_offset = sizeof( kmp_taskdata_t ) + sizeof_kmp_task_t; - shareds_offset = __kmp_round_up_to_val( shareds_offset, sizeof( void * )); - - // Allocate a kmp_taskdata_t block and a kmp_task_t block. - KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n", - gtid, shareds_offset) ); - KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n", - gtid, sizeof_shareds) ); - - // Avoid double allocation here by combining shareds with taskdata - #if USE_FAST_MEMORY - taskdata = (kmp_taskdata_t *) __kmp_fast_allocate( thread, shareds_offset + sizeof_shareds ); - #else /* ! USE_FAST_MEMORY */ - taskdata = (kmp_taskdata_t *) __kmp_thread_malloc( thread, shareds_offset + sizeof_shareds ); - #endif /* USE_FAST_MEMORY */ - ANNOTATE_HAPPENS_AFTER(taskdata); - task = KMP_TASKDATA_TO_TASK(taskdata); + if (task_team->tt.tt_found_proxy_tasks == FALSE) + TCW_4(task_team->tt.tt_found_proxy_tasks, TRUE); + } +#endif - // Make sure task & taskdata are aligned appropriately + // Calculate shared structure offset including padding after kmp_task_t struct + // to align pointers in shared struct + shareds_offset = sizeof(kmp_taskdata_t) + sizeof_kmp_task_t; + shareds_offset = __kmp_round_up_to_val(shareds_offset, sizeof(void *)); + + // Allocate a kmp_taskdata_t block and a kmp_task_t block. + KA_TRACE(30, ("__kmp_task_alloc: T#%d First malloc size: %ld\n", gtid, + shareds_offset)); + KA_TRACE(30, ("__kmp_task_alloc: T#%d Second malloc size: %ld\n", gtid, + sizeof_shareds)); + +// Avoid double allocation here by combining shareds with taskdata +#if USE_FAST_MEMORY + taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, shareds_offset + + sizeof_shareds); +#else /* ! USE_FAST_MEMORY */ + taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, shareds_offset + + sizeof_shareds); +#endif /* USE_FAST_MEMORY */ + ANNOTATE_HAPPENS_AFTER(taskdata); + + task = KMP_TASKDATA_TO_TASK(taskdata); + +// Make sure task & taskdata are aligned appropriately #if KMP_ARCH_X86 || KMP_ARCH_PPC64 || !KMP_HAVE_QUAD - KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(double)-1) ) == 0 ); - KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(double)-1) ) == 0 ); + KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(double) - 1)) == 0); + KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(double) - 1)) == 0); #else - KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)taskdata) & (sizeof(_Quad)-1) ) == 0 ); - KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task) & (sizeof(_Quad)-1) ) == 0 ); + KMP_DEBUG_ASSERT((((kmp_uintptr_t)taskdata) & (sizeof(_Quad) - 1)) == 0); + KMP_DEBUG_ASSERT((((kmp_uintptr_t)task) & (sizeof(_Quad) - 1)) == 0); #endif - if (sizeof_shareds > 0) { - // Avoid double allocation here by combining shareds with taskdata - task->shareds = & ((char *) taskdata)[ shareds_offset ]; - // Make sure shareds struct is aligned to pointer size - KMP_DEBUG_ASSERT( ( ((kmp_uintptr_t)task->shareds) & (sizeof(void *)-1) ) == 0 ); - } else { - task->shareds = NULL; - } - task->routine = task_entry; - task->part_id = 0; // AC: Always start with 0 part id - - taskdata->td_task_id = KMP_GEN_TASK_ID(); - taskdata->td_team = team; - taskdata->td_alloc_thread = thread; - taskdata->td_parent = parent_task; - taskdata->td_level = parent_task->td_level + 1; // increment nesting level - taskdata->td_untied_count = 0; - taskdata->td_ident = loc_ref; - taskdata->td_taskwait_ident = NULL; - taskdata->td_taskwait_counter = 0; - taskdata->td_taskwait_thread = 0; - KMP_DEBUG_ASSERT( taskdata->td_parent != NULL ); + if (sizeof_shareds > 0) { + // Avoid double allocation here by combining shareds with taskdata + task->shareds = &((char *)taskdata)[shareds_offset]; + // Make sure shareds struct is aligned to pointer size + KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) == + 0); + } else { + task->shareds = NULL; + } + task->routine = task_entry; + task->part_id = 0; // AC: Always start with 0 part id + + taskdata->td_task_id = KMP_GEN_TASK_ID(); + taskdata->td_team = team; + taskdata->td_alloc_thread = thread; + taskdata->td_parent = parent_task; + taskdata->td_level = parent_task->td_level + 1; // increment nesting level + taskdata->td_untied_count = 0; + taskdata->td_ident = loc_ref; + taskdata->td_taskwait_ident = NULL; + taskdata->td_taskwait_counter = 0; + taskdata->td_taskwait_thread = 0; + KMP_DEBUG_ASSERT(taskdata->td_parent != NULL); #if OMP_45_ENABLED - // avoid copying icvs for proxy tasks - if ( flags->proxy == TASK_FULL ) + // avoid copying icvs for proxy tasks + if (flags->proxy == TASK_FULL) #endif - copy_icvs( &taskdata->td_icvs, &taskdata->td_parent->td_icvs ); + copy_icvs(&taskdata->td_icvs, &taskdata->td_parent->td_icvs); - taskdata->td_flags.tiedness = flags->tiedness; - taskdata->td_flags.final = flags->final; - taskdata->td_flags.merged_if0 = flags->merged_if0; + taskdata->td_flags.tiedness = flags->tiedness; + taskdata->td_flags.final = flags->final; + taskdata->td_flags.merged_if0 = flags->merged_if0; #if OMP_40_ENABLED - taskdata->td_flags.destructors_thunk = flags->destructors_thunk; + taskdata->td_flags.destructors_thunk = flags->destructors_thunk; #endif // OMP_40_ENABLED #if OMP_45_ENABLED - taskdata->td_flags.proxy = flags->proxy; - taskdata->td_task_team = thread->th.th_task_team; - taskdata->td_size_alloc = shareds_offset + sizeof_shareds; + taskdata->td_flags.proxy = flags->proxy; + taskdata->td_task_team = thread->th.th_task_team; + taskdata->td_size_alloc = shareds_offset + sizeof_shareds; #endif - taskdata->td_flags.tasktype = TASK_EXPLICIT; + taskdata->td_flags.tasktype = TASK_EXPLICIT; - // GEH - TODO: fix this to copy parent task's value of tasking_ser flag - taskdata->td_flags.tasking_ser = ( __kmp_tasking_mode == tskm_immediate_exec ); + // GEH - TODO: fix this to copy parent task's value of tasking_ser flag + taskdata->td_flags.tasking_ser = (__kmp_tasking_mode == tskm_immediate_exec); - // GEH - TODO: fix this to copy parent task's value of team_serial flag - taskdata->td_flags.team_serial = ( team->t.t_serialized ) ? 1 : 0; + // GEH - TODO: fix this to copy parent task's value of team_serial flag + taskdata->td_flags.team_serial = (team->t.t_serialized) ? 1 : 0; - // GEH - Note we serialize the task if the team is serialized to make sure implicit parallel region - // tasks are not left until program termination to execute. Also, it helps locality to execute - // immediately. - taskdata->td_flags.task_serial = ( parent_task->td_flags.final - || taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser ); + // GEH - Note we serialize the task if the team is serialized to make sure + // implicit parallel region tasks are not left until program termination to + // execute. Also, it helps locality to execute immediately. - taskdata->td_flags.started = 0; - taskdata->td_flags.executing = 0; - taskdata->td_flags.complete = 0; - taskdata->td_flags.freed = 0; + taskdata->td_flags.task_serial = + (parent_task->td_flags.final || taskdata->td_flags.team_serial || + taskdata->td_flags.tasking_ser); - taskdata->td_flags.native = flags->native; + taskdata->td_flags.started = 0; + taskdata->td_flags.executing = 0; + taskdata->td_flags.complete = 0; + taskdata->td_flags.freed = 0; - taskdata->td_incomplete_child_tasks = 0; - taskdata->td_allocated_child_tasks = 1; // start at one because counts current task and children + taskdata->td_flags.native = flags->native; + + taskdata->td_incomplete_child_tasks = 0; + taskdata->td_allocated_child_tasks = 1; // start at one because counts current +// task and children #if OMP_40_ENABLED - taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task - taskdata->td_dephash = NULL; - taskdata->td_depnode = NULL; + taskdata->td_taskgroup = + parent_task->td_taskgroup; // task inherits taskgroup from the parent task + taskdata->td_dephash = NULL; + taskdata->td_depnode = NULL; #endif - // Only need to keep track of child task counts if team parallel and tasking not serialized or if it is a proxy task +// Only need to keep track of child task counts if team parallel and tasking not +// serialized or if it is a proxy task #if OMP_45_ENABLED - if ( flags->proxy == TASK_PROXY || !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) + if (flags->proxy == TASK_PROXY || + !(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) #else - if ( !( taskdata -> td_flags.team_serial || taskdata -> td_flags.tasking_ser ) ) + if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) #endif - { - KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) ); + { + KMP_TEST_THEN_INC32((kmp_int32 *)(&parent_task->td_incomplete_child_tasks)); #if OMP_40_ENABLED - if ( parent_task->td_taskgroup ) - KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) ); + if (parent_task->td_taskgroup) + KMP_TEST_THEN_INC32((kmp_int32 *)(&parent_task->td_taskgroup->count)); #endif - // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated - if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) { - KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) ); - } + // Only need to keep track of allocated child tasks for explicit tasks since + // implicit not deallocated + if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) { + KMP_TEST_THEN_INC32( + (kmp_int32 *)(&taskdata->td_parent->td_allocated_child_tasks)); } + } - KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n", - gtid, taskdata, taskdata->td_parent) ); - ANNOTATE_HAPPENS_BEFORE(task); + KA_TRACE(20, ("__kmp_task_alloc(exit): T#%d created task %p parent=%p\n", + gtid, taskdata, taskdata->td_parent)); + ANNOTATE_HAPPENS_BEFORE(task); #if OMPT_SUPPORT - __kmp_task_init_ompt(taskdata, gtid, (void*) task_entry); + __kmp_task_init_ompt(taskdata, gtid, (void *)task_entry); #endif - return task; + return task; } +kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, + kmp_int32 flags, size_t sizeof_kmp_task_t, + size_t sizeof_shareds, + kmp_routine_entry_t task_entry) { + kmp_task_t *retval; + kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *)&flags; -kmp_task_t * -__kmpc_omp_task_alloc( ident_t *loc_ref, kmp_int32 gtid, kmp_int32 flags, - size_t sizeof_kmp_task_t, size_t sizeof_shareds, - kmp_routine_entry_t task_entry ) -{ - kmp_task_t *retval; - kmp_tasking_flags_t *input_flags = (kmp_tasking_flags_t *) & flags; - - input_flags->native = FALSE; - // __kmp_task_alloc() sets up all other runtime flags + input_flags->native = FALSE; +// __kmp_task_alloc() sets up all other runtime flags #if OMP_45_ENABLED - KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) " - "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", - gtid, loc_ref, input_flags->tiedness ? "tied " : "untied", - input_flags->proxy ? "proxy" : "", - sizeof_kmp_task_t, sizeof_shareds, task_entry) ); + KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s %s) " + "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", + gtid, loc_ref, input_flags->tiedness ? "tied " : "untied", + input_flags->proxy ? "proxy" : "", sizeof_kmp_task_t, + sizeof_shareds, task_entry)); #else - KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) " - "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", - gtid, loc_ref, input_flags->tiedness ? "tied " : "untied", - sizeof_kmp_task_t, sizeof_shareds, task_entry) ); + KA_TRACE(10, ("__kmpc_omp_task_alloc(enter): T#%d loc=%p, flags=(%s) " + "sizeof_task=%ld sizeof_shared=%ld entry=%p\n", + gtid, loc_ref, input_flags->tiedness ? "tied " : "untied", + sizeof_kmp_task_t, sizeof_shareds, task_entry)); #endif - retval = __kmp_task_alloc( loc_ref, gtid, input_flags, sizeof_kmp_task_t, - sizeof_shareds, task_entry ); + retval = __kmp_task_alloc(loc_ref, gtid, input_flags, sizeof_kmp_task_t, + sizeof_shareds, task_entry); - KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval) ); + KA_TRACE(20, ("__kmpc_omp_task_alloc(exit): T#%d retval %p\n", gtid, retval)); - return retval; + return retval; } -//----------------------------------------------------------- // __kmp_invoke_task: invoke the specified task // // gtid: global thread ID of caller // task: the task to invoke // current_task: the task to resume after task invokation - -static void -__kmp_invoke_task( kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t * current_task ) -{ - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); - kmp_uint64 cur_time; +static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task, + kmp_taskdata_t *current_task) { + kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); + kmp_uint64 cur_time; #if OMP_40_ENABLED - int discard = 0 /* false */; + int discard = 0 /* false */; #endif - KA_TRACE(30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n", - gtid, taskdata, current_task) ); - KMP_DEBUG_ASSERT(task); + KA_TRACE( + 30, ("__kmp_invoke_task(enter): T#%d invoking task %p, current_task=%p\n", + gtid, taskdata, current_task)); + KMP_DEBUG_ASSERT(task); #if OMP_45_ENABLED - if ( taskdata->td_flags.proxy == TASK_PROXY && - taskdata->td_flags.complete == 1) - { - // This is a proxy task that was already completed but it needs to run - // its bottom-half finish - KA_TRACE(30, ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n", - gtid, taskdata) ); + if (taskdata->td_flags.proxy == TASK_PROXY && + taskdata->td_flags.complete == 1) { + // This is a proxy task that was already completed but it needs to run + // its bottom-half finish + KA_TRACE( + 30, + ("__kmp_invoke_task: T#%d running bottom finish for proxy task %p\n", + gtid, taskdata)); - __kmp_bottom_half_finish_proxy(gtid,task); + __kmp_bottom_half_finish_proxy(gtid, task); - KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for proxy task %p, resuming task %p\n", gtid, taskdata, current_task) ); + KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed bottom finish for " + "proxy task %p, resuming task %p\n", + gtid, taskdata, current_task)); - return; - } + return; + } #endif #if USE_ITT_BUILD && USE_ITT_NOTIFY - if(__kmp_forkjoin_frames_mode == 3) { - // Get the current time stamp to measure task execution time to correct barrier imbalance time - cur_time = __itt_get_timestamp(); - } + if (__kmp_forkjoin_frames_mode == 3) { + // Get the current time stamp to measure task execution time to correct + // barrier imbalance time + cur_time = __itt_get_timestamp(); + } #endif #if OMP_45_ENABLED - // Proxy tasks are not handled by the runtime - if ( taskdata->td_flags.proxy != TASK_PROXY ) { + // Proxy tasks are not handled by the runtime + if (taskdata->td_flags.proxy != TASK_PROXY) { #endif - ANNOTATE_HAPPENS_AFTER(task); - __kmp_task_start( gtid, task, current_task ); + ANNOTATE_HAPPENS_AFTER(task); + __kmp_task_start(gtid, task, current_task); #if OMP_45_ENABLED - } + } #endif #if OMPT_SUPPORT - ompt_thread_info_t oldInfo; - kmp_info_t * thread; - if (ompt_enabled) { - // Store the threads states and restore them after the task - thread = __kmp_threads[ gtid ]; - oldInfo = thread->th.ompt_thread_info; - thread->th.ompt_thread_info.wait_id = 0; - thread->th.ompt_thread_info.state = ompt_state_work_parallel; - taskdata->ompt_task_info.frame.exit_runtime_frame = __builtin_frame_address(0); - } + ompt_thread_info_t oldInfo; + kmp_info_t *thread; + if (ompt_enabled) { + // Store the threads states and restore them after the task + thread = __kmp_threads[gtid]; + oldInfo = thread->th.ompt_thread_info; + thread->th.ompt_thread_info.wait_id = 0; + thread->th.ompt_thread_info.state = ompt_state_work_parallel; + taskdata->ompt_task_info.frame.exit_runtime_frame = + __builtin_frame_address(0); + } #endif #if OMP_40_ENABLED - // TODO: cancel tasks if the parallel region has also been cancelled - // TODO: check if this sequence can be hoisted above __kmp_task_start - // if cancellation has been enabled for this run ... - if (__kmp_omp_cancellation) { - kmp_info_t *this_thr = __kmp_threads [ gtid ]; - kmp_team_t * this_team = this_thr->th.th_team; - kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup; - if ((taskgroup && taskgroup->cancel_request) || (this_team->t.t_cancel_request == cancel_parallel)) { - KMP_COUNT_BLOCK(TASK_cancelled); - // this task belongs to a task group and we need to cancel it - discard = 1 /* true */; - } - } - - // - // Invoke the task routine and pass in relevant data. - // Thunks generated by gcc take a different argument list. - // - if (!discard) { + // TODO: cancel tasks if the parallel region has also been cancelled + // TODO: check if this sequence can be hoisted above __kmp_task_start + // if cancellation has been enabled for this run ... + if (__kmp_omp_cancellation) { + kmp_info_t *this_thr = __kmp_threads[gtid]; + kmp_team_t *this_team = this_thr->th.th_team; + kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup; + if ((taskgroup && taskgroup->cancel_request) || + (this_team->t.t_cancel_request == cancel_parallel)) { + KMP_COUNT_BLOCK(TASK_cancelled); + // this task belongs to a task group and we need to cancel it + discard = 1 /* true */; + } + } + + // Invoke the task routine and pass in relevant data. + // Thunks generated by gcc take a different argument list. + if (!discard) { #if KMP_STATS_ENABLED - KMP_COUNT_BLOCK(TASK_executed); - switch(KMP_GET_THREAD_STATE()) { - case FORK_JOIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar); break; - case PLAIN_BARRIER: KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar); break; - case TASKYIELD: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield); break; - case TASKWAIT: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait); break; - case TASKGROUP: KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup); break; - default: KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate); break; - } + KMP_COUNT_BLOCK(TASK_executed); + switch (KMP_GET_THREAD_STATE()) { + case FORK_JOIN_BARRIER: + KMP_PUSH_PARTITIONED_TIMER(OMP_task_join_bar); + break; + case PLAIN_BARRIER: + KMP_PUSH_PARTITIONED_TIMER(OMP_task_plain_bar); + break; + case TASKYIELD: + KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskyield); + break; + case TASKWAIT: + KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskwait); + break; + case TASKGROUP: + KMP_PUSH_PARTITIONED_TIMER(OMP_task_taskgroup); + break; + default: + KMP_PUSH_PARTITIONED_TIMER(OMP_task_immediate); + break; + } #endif // KMP_STATS_ENABLED #endif // OMP_40_ENABLED #if OMPT_SUPPORT && OMPT_TRACE - /* let OMPT know that we're about to run this task */ - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_task_switch)) - { - ompt_callbacks.ompt_callback(ompt_event_task_switch)( - current_task->ompt_task_info.task_id, - taskdata->ompt_task_info.task_id); - } + /* let OMPT know that we're about to run this task */ + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_switch)) { + ompt_callbacks.ompt_callback(ompt_event_task_switch)( + current_task->ompt_task_info.task_id, + taskdata->ompt_task_info.task_id); + } #endif #ifdef KMP_GOMP_COMPAT - if (taskdata->td_flags.native) { - ((void (*)(void *))(*(task->routine)))(task->shareds); - } - else + if (taskdata->td_flags.native) { + ((void (*)(void *))(*(task->routine)))(task->shareds); + } else #endif /* KMP_GOMP_COMPAT */ - { - (*(task->routine))(gtid, task); - } - KMP_POP_PARTITIONED_TIMER(); + { + (*(task->routine))(gtid, task); + } + KMP_POP_PARTITIONED_TIMER(); #if OMPT_SUPPORT && OMPT_TRACE - /* let OMPT know that we're returning to the callee task */ - if (ompt_enabled && - ompt_callbacks.ompt_callback(ompt_event_task_switch)) - { - ompt_callbacks.ompt_callback(ompt_event_task_switch)( - taskdata->ompt_task_info.task_id, - current_task->ompt_task_info.task_id); - } + /* let OMPT know that we're returning to the callee task */ + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_task_switch)) { + ompt_callbacks.ompt_callback(ompt_event_task_switch)( + taskdata->ompt_task_info.task_id, + current_task->ompt_task_info.task_id); + } #endif #if OMP_40_ENABLED - } + } #endif // OMP_40_ENABLED - #if OMPT_SUPPORT - if (ompt_enabled) { - thread->th.ompt_thread_info = oldInfo; - taskdata->ompt_task_info.frame.exit_runtime_frame = NULL; - } + if (ompt_enabled) { + thread->th.ompt_thread_info = oldInfo; + taskdata->ompt_task_info.frame.exit_runtime_frame = NULL; + } #endif #if OMP_45_ENABLED - // Proxy tasks are not handled by the runtime - if ( taskdata->td_flags.proxy != TASK_PROXY ) { + // Proxy tasks are not handled by the runtime + if (taskdata->td_flags.proxy != TASK_PROXY) { #endif - ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent); - __kmp_task_finish( gtid, task, current_task ); + ANNOTATE_HAPPENS_BEFORE(taskdata->td_parent); + __kmp_task_finish(gtid, task, current_task); #if OMP_45_ENABLED - } + } #endif #if USE_ITT_BUILD && USE_ITT_NOTIFY - // Barrier imbalance - correct arrive time after the task finished - if(__kmp_forkjoin_frames_mode == 3) { - kmp_info_t *this_thr = __kmp_threads [ gtid ]; - if(this_thr->th.th_bar_arrive_time) { - this_thr->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time); - } + // Barrier imbalance - correct arrive time after the task finished + if (__kmp_forkjoin_frames_mode == 3) { + kmp_info_t *this_thr = __kmp_threads[gtid]; + if (this_thr->th.th_bar_arrive_time) { + this_thr->th.th_bar_arrive_time += (__itt_get_timestamp() - cur_time); } + } #endif - KA_TRACE(30, ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n", - gtid, taskdata, current_task) ); - return; + KA_TRACE( + 30, + ("__kmp_invoke_task(exit): T#%d completed task %p, resuming task %p\n", + gtid, taskdata, current_task)); + return; } -//----------------------------------------------------------------------- // __kmpc_omp_task_parts: Schedule a thread-switchable task for execution // // loc_ref: location of original task pragma (ignored) // gtid: Global Thread ID of encountering thread // new_task: task thunk allocated by __kmp_omp_task_alloc() for the ''new task'' // Returns: -// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later. -// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later. - -kmp_int32 -__kmpc_omp_task_parts( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task) -{ - kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task); - - KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", - gtid, loc_ref, new_taskdata ) ); - - /* Should we execute the new task or queue it? For now, let's just always try to - queue it. If the queue fills up, then we'll execute it. */ - - if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer - { // Execute this task immediately - kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task; - new_taskdata->td_flags.task_serial = 1; - __kmp_invoke_task( gtid, new_task, current_task ); - } - - KA_TRACE(10, ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: " - "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", gtid, loc_ref, - new_taskdata ) ); - - ANNOTATE_HAPPENS_BEFORE(new_task); - return TASK_CURRENT_NOT_QUEUED; +// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to +// be resumed later. +// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be +// resumed later. +kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task) { + kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); + + KA_TRACE(10, ("__kmpc_omp_task_parts(enter): T#%d loc=%p task=%p\n", gtid, + loc_ref, new_taskdata)); + + /* Should we execute the new task or queue it? For now, let's just always try + to queue it. If the queue fills up, then we'll execute it. */ + + if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer + { // Execute this task immediately + kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; + new_taskdata->td_flags.task_serial = 1; + __kmp_invoke_task(gtid, new_task, current_task); + } + + KA_TRACE( + 10, + ("__kmpc_omp_task_parts(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: " + "loc=%p task=%p, return: TASK_CURRENT_NOT_QUEUED\n", + gtid, loc_ref, new_taskdata)); + + ANNOTATE_HAPPENS_BEFORE(new_task); + return TASK_CURRENT_NOT_QUEUED; } -//--------------------------------------------------------------------- // __kmp_omp_task: Schedule a non-thread-switchable task for execution -// gtid: Global Thread ID of encountering thread -// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc() -// serialize_immediate: if TRUE then if the task is executed immediately its execution will be serialized -// returns: // -// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later. -// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later. -kmp_int32 -__kmp_omp_task( kmp_int32 gtid, kmp_task_t * new_task, bool serialize_immediate ) -{ - kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task); +// gtid: Global Thread ID of encountering thread +// new_task:non-thread-switchable task thunk allocated by __kmp_omp_task_alloc() +// serialize_immediate: if TRUE then if the task is executed immediately its +// execution will be serialized +// Returns: +// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to +// be resumed later. +// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be +// resumed later. +kmp_int32 __kmp_omp_task(kmp_int32 gtid, kmp_task_t *new_task, + bool serialize_immediate) { + kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); #if OMPT_SUPPORT - if (ompt_enabled) { - new_taskdata->ompt_task_info.frame.reenter_runtime_frame = - __builtin_frame_address(1); - } + if (ompt_enabled) { + new_taskdata->ompt_task_info.frame.reenter_runtime_frame = + __builtin_frame_address(1); + } #endif - /* Should we execute the new task or queue it? For now, let's just always try to - queue it. If the queue fills up, then we'll execute it. */ +/* Should we execute the new task or queue it? For now, let's just always try to + queue it. If the queue fills up, then we'll execute it. */ #if OMP_45_ENABLED - if ( new_taskdata->td_flags.proxy == TASK_PROXY || __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer + if (new_taskdata->td_flags.proxy == TASK_PROXY || + __kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer #else - if ( __kmp_push_task( gtid, new_task ) == TASK_NOT_PUSHED ) // if cannot defer + if (__kmp_push_task(gtid, new_task) == TASK_NOT_PUSHED) // if cannot defer #endif - { // Execute this task immediately - kmp_taskdata_t * current_task = __kmp_threads[ gtid ] -> th.th_current_task; - if ( serialize_immediate ) - new_taskdata -> td_flags.task_serial = 1; - __kmp_invoke_task( gtid, new_task, current_task ); - } + { // Execute this task immediately + kmp_taskdata_t *current_task = __kmp_threads[gtid]->th.th_current_task; + if (serialize_immediate) + new_taskdata->td_flags.task_serial = 1; + __kmp_invoke_task(gtid, new_task, current_task); + } #if OMPT_SUPPORT - if (ompt_enabled) { - new_taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL; - } + if (ompt_enabled) { + new_taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL; + } #endif - ANNOTATE_HAPPENS_BEFORE(new_task); - return TASK_CURRENT_NOT_QUEUED; + ANNOTATE_HAPPENS_BEFORE(new_task); + return TASK_CURRENT_NOT_QUEUED; } -//--------------------------------------------------------------------- -// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a non-thread-switchable task from -// the parent thread only! +// __kmpc_omp_task: Wrapper around __kmp_omp_task to schedule a +// non-thread-switchable task from the parent thread only! +// // loc_ref: location of original task pragma (ignored) // gtid: Global Thread ID of encountering thread -// new_task: non-thread-switchable task thunk allocated by __kmp_omp_task_alloc() -// returns: -// -// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to be resumed later. -// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be resumed later. - -kmp_int32 -__kmpc_omp_task( ident_t *loc_ref, kmp_int32 gtid, kmp_task_t * new_task) -{ - kmp_int32 res; - KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK); +// new_task: non-thread-switchable task thunk allocated by +// __kmp_omp_task_alloc() +// Returns: +// TASK_CURRENT_NOT_QUEUED (0) if did not suspend and queue current task to +// be resumed later. +// TASK_CURRENT_QUEUED (1) if suspended and queued the current task to be +// resumed later. +kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task) { + kmp_int32 res; + KMP_SET_THREAD_STATE_BLOCK(EXPLICIT_TASK); #if KMP_DEBUG - kmp_taskdata_t * new_taskdata = KMP_TASK_TO_TASKDATA(new_task); + kmp_taskdata_t *new_taskdata = KMP_TASK_TO_TASKDATA(new_task); #endif - KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", - gtid, loc_ref, new_taskdata ) ); + KA_TRACE(10, ("__kmpc_omp_task(enter): T#%d loc=%p task=%p\n", gtid, loc_ref, + new_taskdata)); - res = __kmp_omp_task(gtid,new_task,true); + res = __kmp_omp_task(gtid, new_task, true); - KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", - gtid, loc_ref, new_taskdata ) ); - return res; + KA_TRACE(10, ("__kmpc_omp_task(exit): T#%d returning " + "TASK_CURRENT_NOT_QUEUED: loc=%p task=%p\n", + gtid, loc_ref, new_taskdata)); + return res; } -//------------------------------------------------------------------------------------- -// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are complete - -kmp_int32 -__kmpc_omp_taskwait( ident_t *loc_ref, kmp_int32 gtid ) -{ - kmp_taskdata_t * taskdata; - kmp_info_t * thread; - int thread_finished = FALSE; - KMP_SET_THREAD_STATE_BLOCK(TASKWAIT); +// __kmpc_omp_taskwait: Wait until all tasks generated by the current task are +// complete +kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) { + kmp_taskdata_t *taskdata; + kmp_info_t *thread; + int thread_finished = FALSE; + KMP_SET_THREAD_STATE_BLOCK(TASKWAIT); - KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref) ); + KA_TRACE(10, ("__kmpc_omp_taskwait(enter): T#%d loc=%p\n", gtid, loc_ref)); - if ( __kmp_tasking_mode != tskm_immediate_exec ) { - // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait? + if (__kmp_tasking_mode != tskm_immediate_exec) { + // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark + // begin wait? - thread = __kmp_threads[ gtid ]; - taskdata = thread -> th.th_current_task; + thread = __kmp_threads[gtid]; + taskdata = thread->th.th_current_task; #if OMPT_SUPPORT && OMPT_TRACE - ompt_task_id_t my_task_id; - ompt_parallel_id_t my_parallel_id; - - if (ompt_enabled) { - kmp_team_t *team = thread->th.th_team; - my_task_id = taskdata->ompt_task_info.task_id; - my_parallel_id = team->t.ompt_team_info.parallel_id; - - taskdata->ompt_task_info.frame.reenter_runtime_frame = __builtin_frame_address(1); - if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) { - ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)( - my_parallel_id, my_task_id); - } - } + ompt_task_id_t my_task_id; + ompt_parallel_id_t my_parallel_id; + + if (ompt_enabled) { + kmp_team_t *team = thread->th.th_team; + my_task_id = taskdata->ompt_task_info.task_id; + my_parallel_id = team->t.ompt_team_info.parallel_id; + + taskdata->ompt_task_info.frame.reenter_runtime_frame = + __builtin_frame_address(1); + if (ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)) { + ompt_callbacks.ompt_callback(ompt_event_taskwait_begin)(my_parallel_id, + my_task_id); + } + } #endif - // Debugger: The taskwait is active. Store location and thread encountered the taskwait. +// Debugger: The taskwait is active. Store location and thread encountered the +// taskwait. #if USE_ITT_BUILD - // Note: These values are used by ITT events as well. +// Note: These values are used by ITT events as well. #endif /* USE_ITT_BUILD */ - taskdata->td_taskwait_counter += 1; - taskdata->td_taskwait_ident = loc_ref; - taskdata->td_taskwait_thread = gtid + 1; + taskdata->td_taskwait_counter += 1; + taskdata->td_taskwait_ident = loc_ref; + taskdata->td_taskwait_thread = gtid + 1; #if USE_ITT_BUILD - void * itt_sync_obj = __kmp_itt_taskwait_object( gtid ); - if ( itt_sync_obj != NULL ) - __kmp_itt_taskwait_starting( gtid, itt_sync_obj ); + void *itt_sync_obj = __kmp_itt_taskwait_object(gtid); + if (itt_sync_obj != NULL) + __kmp_itt_taskwait_starting(gtid, itt_sync_obj); #endif /* USE_ITT_BUILD */ - bool must_wait = ! taskdata->td_flags.team_serial && ! taskdata->td_flags.final; + bool must_wait = + !taskdata->td_flags.team_serial && !taskdata->td_flags.final; #if OMP_45_ENABLED - must_wait = must_wait || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks); + must_wait = must_wait || (thread->th.th_task_team != NULL && + thread->th.th_task_team->tt.tt_found_proxy_tasks); #endif - if (must_wait) - { - kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U); - while ( TCR_4(taskdata -> td_incomplete_child_tasks) != 0 ) { - flag.execute_tasks(thread, gtid, FALSE, &thread_finished - USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint ); - } - } + if (must_wait) { + kmp_flag_32 flag(&(taskdata->td_incomplete_child_tasks), 0U); + while (TCR_4(taskdata->td_incomplete_child_tasks) != 0) { + flag.execute_tasks(thread, gtid, FALSE, + &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), + __kmp_task_stealing_constraint); + } + } #if USE_ITT_BUILD - if ( itt_sync_obj != NULL ) - __kmp_itt_taskwait_finished( gtid, itt_sync_obj ); + if (itt_sync_obj != NULL) + __kmp_itt_taskwait_finished(gtid, itt_sync_obj); #endif /* USE_ITT_BUILD */ - // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait? - // Debugger: The taskwait is completed. Location remains, but thread is negated. - taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread; + // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark + // end of wait? + // Debugger: The taskwait is completed. Location remains, but thread is + // negated. + taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread; #if OMPT_SUPPORT && OMPT_TRACE - if (ompt_enabled) { - if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) { - ompt_callbacks.ompt_callback(ompt_event_taskwait_end)( - my_parallel_id, my_task_id); - } - taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL; - } -#endif - ANNOTATE_HAPPENS_AFTER(taskdata); + if (ompt_enabled) { + if (ompt_callbacks.ompt_callback(ompt_event_taskwait_end)) { + ompt_callbacks.ompt_callback(ompt_event_taskwait_end)(my_parallel_id, + my_task_id); + } + taskdata->ompt_task_info.frame.reenter_runtime_frame = NULL; } +#endif + ANNOTATE_HAPPENS_AFTER(taskdata); + } - KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, " - "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) ); + KA_TRACE(10, ("__kmpc_omp_taskwait(exit): T#%d task %p finished waiting, " + "returning TASK_CURRENT_NOT_QUEUED\n", + gtid, taskdata)); - return TASK_CURRENT_NOT_QUEUED; + return TASK_CURRENT_NOT_QUEUED; } - -//------------------------------------------------- // __kmpc_omp_taskyield: switch to a different task - -kmp_int32 -__kmpc_omp_taskyield( ident_t *loc_ref, kmp_int32 gtid, int end_part ) -{ - kmp_taskdata_t * taskdata; - kmp_info_t * thread; - int thread_finished = FALSE; - - KMP_COUNT_BLOCK(OMP_TASKYIELD); - KMP_SET_THREAD_STATE_BLOCK(TASKYIELD); - - KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n", - gtid, loc_ref, end_part) ); - - if ( __kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel ) { - // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark begin wait? - - thread = __kmp_threads[ gtid ]; - taskdata = thread -> th.th_current_task; - // Should we model this as a task wait or not? - // Debugger: The taskwait is active. Store location and thread encountered the taskwait. +kmp_int32 __kmpc_omp_taskyield(ident_t *loc_ref, kmp_int32 gtid, int end_part) { + kmp_taskdata_t *taskdata; + kmp_info_t *thread; + int thread_finished = FALSE; + + KMP_COUNT_BLOCK(OMP_TASKYIELD); + KMP_SET_THREAD_STATE_BLOCK(TASKYIELD); + + KA_TRACE(10, ("__kmpc_omp_taskyield(enter): T#%d loc=%p end_part = %d\n", + gtid, loc_ref, end_part)); + + if (__kmp_tasking_mode != tskm_immediate_exec && __kmp_init_parallel) { + // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark + // begin wait? + + thread = __kmp_threads[gtid]; + taskdata = thread->th.th_current_task; +// Should we model this as a task wait or not? +// Debugger: The taskwait is active. Store location and thread encountered the +// taskwait. #if USE_ITT_BUILD - // Note: These values are used by ITT events as well. +// Note: These values are used by ITT events as well. #endif /* USE_ITT_BUILD */ - taskdata->td_taskwait_counter += 1; - taskdata->td_taskwait_ident = loc_ref; - taskdata->td_taskwait_thread = gtid + 1; + taskdata->td_taskwait_counter += 1; + taskdata->td_taskwait_ident = loc_ref; + taskdata->td_taskwait_thread = gtid + 1; #if USE_ITT_BUILD - void * itt_sync_obj = __kmp_itt_taskwait_object( gtid ); - if ( itt_sync_obj != NULL ) - __kmp_itt_taskwait_starting( gtid, itt_sync_obj ); + void *itt_sync_obj = __kmp_itt_taskwait_object(gtid); + if (itt_sync_obj != NULL) + __kmp_itt_taskwait_starting(gtid, itt_sync_obj); #endif /* USE_ITT_BUILD */ - if ( ! taskdata->td_flags.team_serial ) { - kmp_task_team_t * task_team = thread->th.th_task_team; - if (task_team != NULL) { - if (KMP_TASKING_ENABLED(task_team)) { - __kmp_execute_tasks_32( thread, gtid, NULL, FALSE, &thread_finished - USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint ); - } - } + if (!taskdata->td_flags.team_serial) { + kmp_task_team_t *task_team = thread->th.th_task_team; + if (task_team != NULL) { + if (KMP_TASKING_ENABLED(task_team)) { + __kmp_execute_tasks_32( + thread, gtid, NULL, FALSE, + &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), + __kmp_task_stealing_constraint); } + } + } #if USE_ITT_BUILD - if ( itt_sync_obj != NULL ) - __kmp_itt_taskwait_finished( gtid, itt_sync_obj ); + if (itt_sync_obj != NULL) + __kmp_itt_taskwait_finished(gtid, itt_sync_obj); #endif /* USE_ITT_BUILD */ - // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark end of wait? - // Debugger: The taskwait is completed. Location remains, but thread is negated. - taskdata->td_taskwait_thread = - taskdata->td_taskwait_thread; - } + // GEH TODO: shouldn't we have some sort of OMPRAP API calls here to mark + // end of wait? + // Debugger: The taskwait is completed. Location remains, but thread is + // negated. + taskdata->td_taskwait_thread = -taskdata->td_taskwait_thread; + } - KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, " - "returning TASK_CURRENT_NOT_QUEUED\n", gtid, taskdata) ); + KA_TRACE(10, ("__kmpc_omp_taskyield(exit): T#%d task %p resuming, " + "returning TASK_CURRENT_NOT_QUEUED\n", + gtid, taskdata)); - return TASK_CURRENT_NOT_QUEUED; + return TASK_CURRENT_NOT_QUEUED; } // TODO: change to OMP_50_ENABLED, need to change build tools for this to work #if OMP_45_ENABLED -// // Task Reduction implementation -// typedef struct kmp_task_red_flags { - unsigned lazy_priv : 1; // hint: (1) use lazy allocation (big objects) - unsigned reserved31 : 31; + unsigned lazy_priv : 1; // hint: (1) use lazy allocation (big objects) + unsigned reserved31 : 31; } kmp_task_red_flags_t; // internal structure for reduction data item related info typedef struct kmp_task_red_data { - void *reduce_shar; // shared reduction item - size_t reduce_size; // size of data item - void *reduce_priv; // thread specific data - void *reduce_pend; // end of private data for comparison op - void *reduce_init; // data initialization routine - void *reduce_fini; // data finalization routine - void *reduce_comb; // data combiner routine - kmp_task_red_flags_t flags; // flags for additional info from compiler + void *reduce_shar; // shared reduction item + size_t reduce_size; // size of data item + void *reduce_priv; // thread specific data + void *reduce_pend; // end of private data for comparison op + void *reduce_init; // data initialization routine + void *reduce_fini; // data finalization routine + void *reduce_comb; // data combiner routine + kmp_task_red_flags_t flags; // flags for additional info from compiler } kmp_task_red_data_t; // structure sent us by compiler - one per reduction item typedef struct kmp_task_red_input { - void *reduce_shar; // shared reduction item - size_t reduce_size; // size of data item - void *reduce_init; // data initialization routine - void *reduce_fini; // data finalization routine - void *reduce_comb; // data combiner routine - kmp_task_red_flags_t flags; // flags for additional info from compiler + void *reduce_shar; // shared reduction item + size_t reduce_size; // size of data item + void *reduce_init; // data initialization routine + void *reduce_fini; // data finalization routine + void *reduce_comb; // data combiner routine + kmp_task_red_flags_t flags; // flags for additional info from compiler } kmp_task_red_input_t; /*! @@ -1638,58 +1655,57 @@ typedef struct kmp_task_red_input { Initialize task reduction for the taskgroup. */ -void* -__kmpc_task_reduction_init(int gtid, int num, void *data) -{ - kmp_info_t * thread = __kmp_threads[gtid]; - kmp_taskgroup_t * tg = thread->th.th_current_task->td_taskgroup; - kmp_int32 nth = thread->th.th_team_nproc; - kmp_task_red_input_t *input = (kmp_task_red_input_t*)data; - kmp_task_red_data_t *arr; - - // check input data just in case - KMP_ASSERT(tg != NULL); - KMP_ASSERT(data != NULL); - KMP_ASSERT(num > 0); - if (nth == 1) { - KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n", - gtid, tg)); - return (void*)tg; - } - KA_TRACE(10,("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n", - gtid, tg, num)); - arr = (kmp_task_red_data_t*)__kmp_thread_malloc(thread, num * sizeof(kmp_task_red_data_t)); - for (int i = 0; i < num; ++i) { - void(*f_init)(void*) = (void(*)(void*))(input[i].reduce_init); - size_t size = input[i].reduce_size - 1; - // round the size up to cache line per thread-specific item - size += CACHE_LINE - size % CACHE_LINE; - KMP_ASSERT(input[i].reduce_comb != NULL); // combiner is mandatory - arr[i].reduce_shar = input[i].reduce_shar; - arr[i].reduce_size = size; - arr[i].reduce_init = input[i].reduce_init; - arr[i].reduce_fini = input[i].reduce_fini; - arr[i].reduce_comb = input[i].reduce_comb; - arr[i].flags = input[i].flags; - if (!input[i].flags.lazy_priv) { - // allocate cache-line aligned block and fill it with zeros - arr[i].reduce_priv = __kmp_allocate(nth * size); - arr[i].reduce_pend = (char*)(arr[i].reduce_priv) + nth * size; - if (f_init != NULL) { - // initialize thread-specific items - for (int j = 0; j < nth; ++j) { - f_init((char*)(arr[i].reduce_priv) + j * size); - } - } - } else { - // only allocate space for pointers now, - // objects will be lazily allocated/initialized once requested - arr[i].reduce_priv = __kmp_allocate(nth * sizeof(void*)); +void *__kmpc_task_reduction_init(int gtid, int num, void *data) { + kmp_info_t *thread = __kmp_threads[gtid]; + kmp_taskgroup_t *tg = thread->th.th_current_task->td_taskgroup; + kmp_int32 nth = thread->th.th_team_nproc; + kmp_task_red_input_t *input = (kmp_task_red_input_t *)data; + kmp_task_red_data_t *arr; + + // check input data just in case + KMP_ASSERT(tg != NULL); + KMP_ASSERT(data != NULL); + KMP_ASSERT(num > 0); + if (nth == 1) { + KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, tg %p, exiting nth=1\n", + gtid, tg)); + return (void *)tg; + } + KA_TRACE(10, ("__kmpc_task_reduction_init: T#%d, taskgroup %p, #items %d\n", + gtid, tg, num)); + arr = (kmp_task_red_data_t *)__kmp_thread_malloc( + thread, num * sizeof(kmp_task_red_data_t)); + for (int i = 0; i < num; ++i) { + void (*f_init)(void *) = (void (*)(void *))(input[i].reduce_init); + size_t size = input[i].reduce_size - 1; + // round the size up to cache line per thread-specific item + size += CACHE_LINE - size % CACHE_LINE; + KMP_ASSERT(input[i].reduce_comb != NULL); // combiner is mandatory + arr[i].reduce_shar = input[i].reduce_shar; + arr[i].reduce_size = size; + arr[i].reduce_init = input[i].reduce_init; + arr[i].reduce_fini = input[i].reduce_fini; + arr[i].reduce_comb = input[i].reduce_comb; + arr[i].flags = input[i].flags; + if (!input[i].flags.lazy_priv) { + // allocate cache-line aligned block and fill it with zeros + arr[i].reduce_priv = __kmp_allocate(nth * size); + arr[i].reduce_pend = (char *)(arr[i].reduce_priv) + nth * size; + if (f_init != NULL) { + // initialize thread-specific items + for (int j = 0; j < nth; ++j) { + f_init((char *)(arr[i].reduce_priv) + j * size); } - } - tg->reduce_data = (void*)arr; - tg->reduce_num_data = num; - return (void*)tg; + } + } else { + // only allocate space for pointers now, + // objects will be lazily allocated/initialized once requested + arr[i].reduce_priv = __kmp_allocate(nth * sizeof(void *)); + } + } + tg->reduce_data = (void *)arr; + tg->reduce_num_data = num; + return (void *)tg; } /*! @@ -1701,370 +1717,386 @@ __kmpc_task_reduction_init(int gtid, int num, void *data) Get thread-specific location of data item */ -void* -__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) -{ - kmp_info_t * thread = __kmp_threads[gtid]; - kmp_int32 nth = thread->th.th_team_nproc; - if (nth == 1) - return data; // nothing to do - - kmp_taskgroup_t *tg = (kmp_taskgroup_t*)tskgrp; - if (tg == NULL) - tg = thread->th.th_current_task->td_taskgroup; - KMP_ASSERT(tg != NULL); - kmp_task_red_data_t *arr = (kmp_task_red_data_t*)(tg->reduce_data); - kmp_int32 num = tg->reduce_num_data; - kmp_int32 tid = thread->th.th_info.ds.ds_tid; - - KMP_ASSERT(data != NULL); - while (tg != NULL) { - for (int i = 0; i < num; ++i) { - if (!arr[i].flags.lazy_priv) { - if (data == arr[i].reduce_shar || - (data >= arr[i].reduce_priv && data < arr[i].reduce_pend)) - return (char*)(arr[i].reduce_priv) + tid * arr[i].reduce_size; - } else { - // check shared location first - void **p_priv = (void**)(arr[i].reduce_priv); - if (data == arr[i].reduce_shar) +void *__kmpc_task_reduction_get_th_data(int gtid, void *tskgrp, void *data) { + kmp_info_t *thread = __kmp_threads[gtid]; + kmp_int32 nth = thread->th.th_team_nproc; + if (nth == 1) + return data; // nothing to do + + kmp_taskgroup_t *tg = (kmp_taskgroup_t *)tskgrp; + if (tg == NULL) + tg = thread->th.th_current_task->td_taskgroup; + KMP_ASSERT(tg != NULL); + kmp_task_red_data_t *arr = (kmp_task_red_data_t *)(tg->reduce_data); + kmp_int32 num = tg->reduce_num_data; + kmp_int32 tid = thread->th.th_info.ds.ds_tid; + + KMP_ASSERT(data != NULL); + while (tg != NULL) { + for (int i = 0; i < num; ++i) { + if (!arr[i].flags.lazy_priv) { + if (data == arr[i].reduce_shar || + (data >= arr[i].reduce_priv && data < arr[i].reduce_pend)) + return (char *)(arr[i].reduce_priv) + tid * arr[i].reduce_size; + } else { + // check shared location first + void **p_priv = (void **)(arr[i].reduce_priv); + if (data == arr[i].reduce_shar) + goto found; + // check if we get some thread specific location as parameter + for (int j = 0; j < nth; ++j) + if (data == p_priv[j]) goto found; - // check if we get some thread specific location as parameter - for (int j = 0; j < nth; ++j) - if (data == p_priv[j]) - goto found; - continue; // not found, continue search - found: - if (p_priv[tid] == NULL) { - // allocate thread specific object lazily - void(*f_init)(void*) = (void(*)(void*))(arr[i].reduce_init); - p_priv[tid] = __kmp_allocate(arr[i].reduce_size); - if (f_init != NULL) { - f_init(p_priv[tid]); - } + continue; // not found, continue search + found: + if (p_priv[tid] == NULL) { + // allocate thread specific object lazily + void (*f_init)(void *) = (void (*)(void *))(arr[i].reduce_init); + p_priv[tid] = __kmp_allocate(arr[i].reduce_size); + if (f_init != NULL) { + f_init(p_priv[tid]); } - return p_priv[tid]; } + return p_priv[tid]; } - tg = tg->parent; - arr = (kmp_task_red_data_t*)(tg->reduce_data); - num = tg->reduce_num_data; } - KMP_ASSERT2(0, "Unknown task reduction item"); - return NULL; // ERROR, this line never executed + tg = tg->parent; + arr = (kmp_task_red_data_t *)(tg->reduce_data); + num = tg->reduce_num_data; + } + KMP_ASSERT2(0, "Unknown task reduction item"); + return NULL; // ERROR, this line never executed } // Finalize task reduction. // Called from __kmpc_end_taskgroup() -static void -__kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) -{ - kmp_int32 nth = th->th.th_team_nproc; - KMP_DEBUG_ASSERT(nth > 1); // should not be called if nth == 1 - kmp_task_red_data_t *arr = (kmp_task_red_data_t*)tg->reduce_data; - kmp_int32 num = tg->reduce_num_data; - for (int i = 0; i < num; ++i) { - void *sh_data = arr[i].reduce_shar; - void(*f_fini)(void*) = (void(*)(void*))(arr[i].reduce_fini); - void(*f_comb)(void*,void*) = (void(*)(void*,void*))(arr[i].reduce_comb); - if (!arr[i].flags.lazy_priv) { - void *pr_data = arr[i].reduce_priv; - size_t size = arr[i].reduce_size; - for (int j = 0; j < nth; ++j) { - void * priv_data = (char*)pr_data + j * size; - f_comb(sh_data, priv_data); // combine results - if (f_fini) - f_fini(priv_data); // finalize if needed - } - } else { - void **pr_data = (void**)(arr[i].reduce_priv); - for (int j = 0; j < nth; ++j) { - if (pr_data[j] != NULL) { - f_comb(sh_data, pr_data[j]); // combine results - if (f_fini) - f_fini(pr_data[j]); // finalize if needed - __kmp_free(pr_data[j]); - } - } +static void __kmp_task_reduction_fini(kmp_info_t *th, kmp_taskgroup_t *tg) { + kmp_int32 nth = th->th.th_team_nproc; + KMP_DEBUG_ASSERT(nth > 1); // should not be called if nth == 1 + kmp_task_red_data_t *arr = (kmp_task_red_data_t *)tg->reduce_data; + kmp_int32 num = tg->reduce_num_data; + for (int i = 0; i < num; ++i) { + void *sh_data = arr[i].reduce_shar; + void (*f_fini)(void *) = (void (*)(void *))(arr[i].reduce_fini); + void (*f_comb)(void *, void *) = + (void (*)(void *, void *))(arr[i].reduce_comb); + if (!arr[i].flags.lazy_priv) { + void *pr_data = arr[i].reduce_priv; + size_t size = arr[i].reduce_size; + for (int j = 0; j < nth; ++j) { + void *priv_data = (char *)pr_data + j * size; + f_comb(sh_data, priv_data); // combine results + if (f_fini) + f_fini(priv_data); // finalize if needed + } + } else { + void **pr_data = (void **)(arr[i].reduce_priv); + for (int j = 0; j < nth; ++j) { + if (pr_data[j] != NULL) { + f_comb(sh_data, pr_data[j]); // combine results + if (f_fini) + f_fini(pr_data[j]); // finalize if needed + __kmp_free(pr_data[j]); } - __kmp_free(arr[i].reduce_priv); + } } - __kmp_thread_free(th, arr); - tg->reduce_data = NULL; - tg->reduce_num_data = 0; + __kmp_free(arr[i].reduce_priv); + } + __kmp_thread_free(th, arr); + tg->reduce_data = NULL; + tg->reduce_num_data = 0; } #endif #if OMP_40_ENABLED -//------------------------------------------------------------------------------------- // __kmpc_taskgroup: Start a new taskgroup - -void -__kmpc_taskgroup( ident_t* loc, int gtid ) -{ - kmp_info_t * thread = __kmp_threads[ gtid ]; - kmp_taskdata_t * taskdata = thread->th.th_current_task; - kmp_taskgroup_t * tg_new = - (kmp_taskgroup_t *)__kmp_thread_malloc( thread, sizeof( kmp_taskgroup_t ) ); - KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new) ); - tg_new->count = 0; - tg_new->cancel_request = cancel_noreq; - tg_new->parent = taskdata->td_taskgroup; +void __kmpc_taskgroup(ident_t *loc, int gtid) { + kmp_info_t *thread = __kmp_threads[gtid]; + kmp_taskdata_t *taskdata = thread->th.th_current_task; + kmp_taskgroup_t *tg_new = + (kmp_taskgroup_t *)__kmp_thread_malloc(thread, sizeof(kmp_taskgroup_t)); + KA_TRACE(10, ("__kmpc_taskgroup: T#%d loc=%p group=%p\n", gtid, loc, tg_new)); + tg_new->count = 0; + tg_new->cancel_request = cancel_noreq; + tg_new->parent = taskdata->td_taskgroup; // TODO: change to OMP_50_ENABLED, need to change build tools for this to work #if OMP_45_ENABLED - tg_new->reduce_data = NULL; - tg_new->reduce_num_data = 0; + tg_new->reduce_data = NULL; + tg_new->reduce_num_data = 0; #endif - taskdata->td_taskgroup = tg_new; + taskdata->td_taskgroup = tg_new; } - -//------------------------------------------------------------------------------------- // __kmpc_end_taskgroup: Wait until all tasks generated by the current task // and its descendants are complete +void __kmpc_end_taskgroup(ident_t *loc, int gtid) { + kmp_info_t *thread = __kmp_threads[gtid]; + kmp_taskdata_t *taskdata = thread->th.th_current_task; + kmp_taskgroup_t *taskgroup = taskdata->td_taskgroup; + int thread_finished = FALSE; -void -__kmpc_end_taskgroup( ident_t* loc, int gtid ) -{ - kmp_info_t * thread = __kmp_threads[ gtid ]; - kmp_taskdata_t * taskdata = thread->th.th_current_task; - kmp_taskgroup_t * taskgroup = taskdata->td_taskgroup; - int thread_finished = FALSE; + KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc)); + KMP_DEBUG_ASSERT(taskgroup != NULL); + KMP_SET_THREAD_STATE_BLOCK(TASKGROUP); - KA_TRACE(10, ("__kmpc_end_taskgroup(enter): T#%d loc=%p\n", gtid, loc) ); - KMP_DEBUG_ASSERT( taskgroup != NULL ); - KMP_SET_THREAD_STATE_BLOCK(TASKGROUP); - - if ( __kmp_tasking_mode != tskm_immediate_exec ) { + if (__kmp_tasking_mode != tskm_immediate_exec) { #if USE_ITT_BUILD - // For ITT the taskgroup wait is similar to taskwait until we need to distinguish them - void * itt_sync_obj = __kmp_itt_taskwait_object( gtid ); - if ( itt_sync_obj != NULL ) - __kmp_itt_taskwait_starting( gtid, itt_sync_obj ); + // For ITT the taskgroup wait is similar to taskwait until we need to + // distinguish them + void *itt_sync_obj = __kmp_itt_taskwait_object(gtid); + if (itt_sync_obj != NULL) + __kmp_itt_taskwait_starting(gtid, itt_sync_obj); #endif /* USE_ITT_BUILD */ #if OMP_45_ENABLED - if ( ! taskdata->td_flags.team_serial || (thread->th.th_task_team != NULL && thread->th.th_task_team->tt.tt_found_proxy_tasks) ) + if (!taskdata->td_flags.team_serial || + (thread->th.th_task_team != NULL && + thread->th.th_task_team->tt.tt_found_proxy_tasks)) #else - if ( ! taskdata->td_flags.team_serial ) + if (!taskdata->td_flags.team_serial) #endif - { - kmp_flag_32 flag(&(taskgroup->count), 0U); - while ( TCR_4(taskgroup->count) != 0 ) { - flag.execute_tasks(thread, gtid, FALSE, &thread_finished - USE_ITT_BUILD_ARG(itt_sync_obj), __kmp_task_stealing_constraint ); - } - } + { + kmp_flag_32 flag(&(taskgroup->count), 0U); + while (TCR_4(taskgroup->count) != 0) { + flag.execute_tasks(thread, gtid, FALSE, + &thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), + __kmp_task_stealing_constraint); + } + } #if USE_ITT_BUILD - if ( itt_sync_obj != NULL ) - __kmp_itt_taskwait_finished( gtid, itt_sync_obj ); + if (itt_sync_obj != NULL) + __kmp_itt_taskwait_finished(gtid, itt_sync_obj); #endif /* USE_ITT_BUILD */ - } - KMP_DEBUG_ASSERT( taskgroup->count == 0 ); + } + KMP_DEBUG_ASSERT(taskgroup->count == 0); // TODO: change to OMP_50_ENABLED, need to change build tools for this to work #if OMP_45_ENABLED - if( taskgroup->reduce_data != NULL ) // need to reduce? - __kmp_task_reduction_fini(thread, taskgroup); + if (taskgroup->reduce_data != NULL) // need to reduce? + __kmp_task_reduction_fini(thread, taskgroup); #endif - // Restore parent taskgroup for the current task - taskdata->td_taskgroup = taskgroup->parent; - __kmp_thread_free( thread, taskgroup ); + // Restore parent taskgroup for the current task + taskdata->td_taskgroup = taskgroup->parent; + __kmp_thread_free(thread, taskgroup); - KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", gtid, taskdata) ); - ANNOTATE_HAPPENS_AFTER(taskdata); + KA_TRACE(10, ("__kmpc_end_taskgroup(exit): T#%d task %p finished waiting\n", + gtid, taskdata)); + ANNOTATE_HAPPENS_AFTER(taskdata); } #endif - -//------------------------------------------------------ // __kmp_remove_my_task: remove a task from my own deque - -static kmp_task_t * -__kmp_remove_my_task( kmp_info_t * thread, kmp_int32 gtid, kmp_task_team_t *task_team, - kmp_int32 is_constrained ) -{ - kmp_task_t * task; - kmp_taskdata_t * taskdata; - kmp_thread_data_t *thread_data; - kmp_uint32 tail; - - KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); - KMP_DEBUG_ASSERT( task_team -> tt.tt_threads_data != NULL ); // Caller should check this condition - - thread_data = & task_team -> tt.tt_threads_data[ __kmp_tid_from_gtid( gtid ) ]; - - KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n", - gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, - thread_data->td.td_deque_tail) ); - - if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) { - KA_TRACE(10, ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n", - gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, - thread_data->td.td_deque_tail) ); - return NULL; - } - - __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock ); - - if (TCR_4(thread_data -> td.td_deque_ntasks) == 0) { - __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); - KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n", - gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, - thread_data->td.td_deque_tail) ); - return NULL; - } - - tail = ( thread_data -> td.td_deque_tail - 1 ) & TASK_DEQUE_MASK(thread_data->td); // Wrap index. - taskdata = thread_data -> td.td_deque[ tail ]; - - if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) { - // we need to check if the candidate obeys task scheduling constraint: - // only child of current task can be scheduled - kmp_taskdata_t * current = thread->th.th_current_task; - kmp_int32 level = current->td_level; - kmp_taskdata_t * parent = taskdata->td_parent; - while ( parent != current && parent->td_level > level ) { - parent = parent->td_parent; // check generation up to the level of the current task - KMP_DEBUG_ASSERT(parent != NULL); - } - if ( parent != current ) { - // If the tail task is not a child, then no other child can appear in the deque. - __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); - KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: ntasks=%d head=%u tail=%u\n", - gtid, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, - thread_data->td.td_deque_tail) ); - return NULL; - } - } - - thread_data -> td.td_deque_tail = tail; - TCW_4(thread_data -> td.td_deque_ntasks, thread_data -> td.td_deque_ntasks - 1); - - __kmp_release_bootstrap_lock( & thread_data->td.td_deque_lock ); - - KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: ntasks=%d head=%u tail=%u\n", - gtid, taskdata, thread_data->td.td_deque_ntasks, thread_data->td.td_deque_head, - thread_data->td.td_deque_tail) ); - - task = KMP_TASKDATA_TO_TASK( taskdata ); - return task; +static kmp_task_t *__kmp_remove_my_task(kmp_info_t *thread, kmp_int32 gtid, + kmp_task_team_t *task_team, + kmp_int32 is_constrained) { + kmp_task_t *task; + kmp_taskdata_t *taskdata; + kmp_thread_data_t *thread_data; + kmp_uint32 tail; + + KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); + KMP_DEBUG_ASSERT(task_team->tt.tt_threads_data != + NULL); // Caller should check this condition + + thread_data = &task_team->tt.tt_threads_data[__kmp_tid_from_gtid(gtid)]; + + KA_TRACE(10, ("__kmp_remove_my_task(enter): T#%d ntasks=%d head=%u tail=%u\n", + gtid, thread_data->td.td_deque_ntasks, + thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); + + if (TCR_4(thread_data->td.td_deque_ntasks) == 0) { + KA_TRACE(10, + ("__kmp_remove_my_task(exit #1): T#%d No tasks to remove: " + "ntasks=%d head=%u tail=%u\n", + gtid, thread_data->td.td_deque_ntasks, + thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); + return NULL; + } + + __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); + + if (TCR_4(thread_data->td.td_deque_ntasks) == 0) { + __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); + KA_TRACE(10, + ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: " + "ntasks=%d head=%u tail=%u\n", + gtid, thread_data->td.td_deque_ntasks, + thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); + return NULL; + } + + tail = (thread_data->td.td_deque_tail - 1) & + TASK_DEQUE_MASK(thread_data->td); // Wrap index. + taskdata = thread_data->td.td_deque[tail]; + + if (is_constrained && (taskdata->td_flags.tiedness == TASK_TIED)) { + // we need to check if the candidate obeys task scheduling constraint: + // only child of current task can be scheduled + kmp_taskdata_t *current = thread->th.th_current_task; + kmp_int32 level = current->td_level; + kmp_taskdata_t *parent = taskdata->td_parent; + while (parent != current && parent->td_level > level) { + parent = parent->td_parent; // check generation up to the level of the + // current task + KMP_DEBUG_ASSERT(parent != NULL); + } + if (parent != current) { + // If the tail task is not a child, then no other child can appear in the + // deque. + __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); + KA_TRACE(10, + ("__kmp_remove_my_task(exit #2): T#%d No tasks to remove: " + "ntasks=%d head=%u tail=%u\n", + gtid, thread_data->td.td_deque_ntasks, + thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); + return NULL; + } + } + + thread_data->td.td_deque_tail = tail; + TCW_4(thread_data->td.td_deque_ntasks, thread_data->td.td_deque_ntasks - 1); + + __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); + + KA_TRACE(10, ("__kmp_remove_my_task(exit #2): T#%d task %p removed: " + "ntasks=%d head=%u tail=%u\n", + gtid, taskdata, thread_data->td.td_deque_ntasks, + thread_data->td.td_deque_head, thread_data->td.td_deque_tail)); + + task = KMP_TASKDATA_TO_TASK(taskdata); + return task; } - -//----------------------------------------------------------- // __kmp_steal_task: remove a task from another thread's deque // Assume that calling thread has already checked existence of // task_team thread_data before calling this routine. - static kmp_task_t * -__kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team, - volatile kmp_uint32 *unfinished_threads, int *thread_finished, - kmp_int32 is_constrained ) +__kmp_steal_task(kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team, + volatile kmp_uint32 *unfinished_threads, int *thread_finished, + kmp_int32 is_constrained) { - kmp_task_t * task; - kmp_taskdata_t * taskdata; - kmp_thread_data_t *victim_td, *threads_data; - kmp_int32 victim_tid; - - KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); - - threads_data = task_team -> tt.tt_threads_data; - KMP_DEBUG_ASSERT( threads_data != NULL ); // Caller should check this condition - - victim_tid = victim->th.th_info.ds.ds_tid; - victim_td = & threads_data[ victim_tid ]; - - KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: task_team=%p ntasks=%d " - "head=%u tail=%u\n", - gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks, - victim_td->td.td_deque_head, victim_td->td.td_deque_tail) ); - - if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || // Caller should not check this condition - (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen? - { - KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: task_team=%p " - "ntasks=%d head=%u tail=%u\n", - gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks, - victim_td->td.td_deque_head, victim_td->td.td_deque_tail) ); - return NULL; - } - - __kmp_acquire_bootstrap_lock( & victim_td -> td.td_deque_lock ); - - // Check again after we acquire the lock - if ( (TCR_4(victim_td -> td.td_deque_ntasks) == 0) || - (TCR_PTR(victim->th.th_task_team) != task_team)) // GEH: why would this happen? - { - __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock ); - KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p " - "ntasks=%d head=%u tail=%u\n", - gtid, __kmp_gtid_from_thread( victim ), task_team, victim_td->td.td_deque_ntasks, - victim_td->td.td_deque_head, victim_td->td.td_deque_tail) ); - return NULL; - } - - KMP_DEBUG_ASSERT( victim_td -> td.td_deque != NULL ); - - taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head]; - if ( is_constrained ) { - // we need to check if the candidate obeys task scheduling constraint: - // only descendant of current task can be scheduled - kmp_taskdata_t * current = __kmp_threads[ gtid ]->th.th_current_task; - kmp_int32 level = current->td_level; - kmp_taskdata_t * parent = taskdata->td_parent; - while ( parent != current && parent->td_level > level ) { - parent = parent->td_parent; // check generation up to the level of the current task - KMP_DEBUG_ASSERT(parent != NULL); - } - if ( parent != current ) { - // If the head task is not a descendant of the current task then do not - // steal it. No other task in victim's deque can be a descendant of the - // current task. - __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock ); - KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: task_team=%p " - "ntasks=%d head=%u tail=%u\n", - gtid, __kmp_gtid_from_thread( threads_data[victim_tid].td.td_thr ), - task_team, victim_td->td.td_deque_ntasks, - victim_td->td.td_deque_head, victim_td->td.td_deque_tail) ); - return NULL; - } - } - // Bump head pointer and Wrap. - victim_td->td.td_deque_head = (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td); - if (*thread_finished) { - // We need to un-mark this victim as a finished victim. This must be done before - // releasing the lock, or else other threads (starting with the master victim) - // might be prematurely released from the barrier!!! - kmp_uint32 count; - - count = KMP_TEST_THEN_INC32( (kmp_int32 *)unfinished_threads ); - - KA_TRACE(20, ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n", - gtid, count + 1, task_team) ); - - *thread_finished = FALSE; - } - TCW_4(victim_td -> td.td_deque_ntasks, TCR_4(victim_td -> td.td_deque_ntasks) - 1); - - __kmp_release_bootstrap_lock( & victim_td -> td.td_deque_lock ); - - KMP_COUNT_BLOCK(TASK_stolen); - KA_TRACE(10, ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p " + kmp_task_t *task; + kmp_taskdata_t *taskdata; + kmp_thread_data_t *victim_td, *threads_data; + kmp_int32 victim_tid; + + KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); + + threads_data = task_team->tt.tt_threads_data; + KMP_DEBUG_ASSERT(threads_data != NULL); // Caller should check this condition + + victim_tid = victim->th.th_info.ds.ds_tid; + victim_td = &threads_data[victim_tid]; + + KA_TRACE(10, ("__kmp_steal_task(enter): T#%d try to steal from T#%d: " + "task_team=%p ntasks=%d " + "head=%u tail=%u\n", + gtid, __kmp_gtid_from_thread(victim), task_team, + victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head, + victim_td->td.td_deque_tail)); + + if ((TCR_4(victim_td->td.td_deque_ntasks) == + 0) || // Caller should not check this condition + (TCR_PTR(victim->th.th_task_team) != + task_team)) // GEH: why would this happen? + { + KA_TRACE(10, ("__kmp_steal_task(exit #1): T#%d could not steal from T#%d: " + "task_team=%p " "ntasks=%d head=%u tail=%u\n", - gtid, taskdata, __kmp_gtid_from_thread( victim ), task_team, + gtid, __kmp_gtid_from_thread(victim), task_team, victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head, - victim_td->td.td_deque_tail) ); - - task = KMP_TASKDATA_TO_TASK( taskdata ); - return task; + victim_td->td.td_deque_tail)); + return NULL; + } + + __kmp_acquire_bootstrap_lock(&victim_td->td.td_deque_lock); + + // Check again after we acquire the lock + if ((TCR_4(victim_td->td.td_deque_ntasks) == 0) || + (TCR_PTR(victim->th.th_task_team) != + task_team)) // GEH: why would this happen? + { + __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock); + KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from T#%d: " + "task_team=%p " + "ntasks=%d head=%u tail=%u\n", + gtid, __kmp_gtid_from_thread(victim), task_team, + victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head, + victim_td->td.td_deque_tail)); + return NULL; + } + + KMP_DEBUG_ASSERT(victim_td->td.td_deque != NULL); + + taskdata = victim_td->td.td_deque[victim_td->td.td_deque_head]; + if (is_constrained) { + // we need to check if the candidate obeys task scheduling constraint: + // only descendant of current task can be scheduled + kmp_taskdata_t *current = __kmp_threads[gtid]->th.th_current_task; + kmp_int32 level = current->td_level; + kmp_taskdata_t *parent = taskdata->td_parent; + while (parent != current && parent->td_level > level) { + parent = parent->td_parent; // check generation up to the level of the + // current task + KMP_DEBUG_ASSERT(parent != NULL); + } + if (parent != current) { + // If the head task is not a descendant of the current task then do not + // steal it. No other task in victim's deque can be a descendant of the + // current task. + __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock); + KA_TRACE(10, ("__kmp_steal_task(exit #2): T#%d could not steal from " + "T#%d: task_team=%p " + "ntasks=%d head=%u tail=%u\n", + gtid, + __kmp_gtid_from_thread(threads_data[victim_tid].td.td_thr), + task_team, victim_td->td.td_deque_ntasks, + victim_td->td.td_deque_head, victim_td->td.td_deque_tail)); + return NULL; + } + } + // Bump head pointer and Wrap. + victim_td->td.td_deque_head = + (victim_td->td.td_deque_head + 1) & TASK_DEQUE_MASK(victim_td->td); + if (*thread_finished) { + // We need to un-mark this victim as a finished victim. This must be done + // before releasing the lock, or else other threads (starting with the + // master victim) might be prematurely released from the barrier!!! + kmp_uint32 count; + + count = KMP_TEST_THEN_INC32((kmp_int32 *)unfinished_threads); + + KA_TRACE( + 20, + ("__kmp_steal_task: T#%d inc unfinished_threads to %d: task_team=%p\n", + gtid, count + 1, task_team)); + + *thread_finished = FALSE; + } + TCW_4(victim_td->td.td_deque_ntasks, + TCR_4(victim_td->td.td_deque_ntasks) - 1); + + + __kmp_release_bootstrap_lock(&victim_td->td.td_deque_lock); + + KMP_COUNT_BLOCK(TASK_stolen); + KA_TRACE( + 10, + ("__kmp_steal_task(exit #3): T#%d stole task %p from T#%d: task_team=%p " + "ntasks=%d head=%u tail=%u\n", + gtid, taskdata, __kmp_gtid_from_thread(victim), task_team, + victim_td->td.td_deque_ntasks, victim_td->td.td_deque_head, + victim_td->td.td_deque_tail)); + + task = KMP_TASKDATA_TO_TASK(taskdata); + return task; } -//----------------------------------------------------------------------------- -// __kmp_execute_tasks_template: Choose and execute tasks until either the condition -// is statisfied (return true) or there are none left (return false). +// __kmp_execute_tasks_template: Choose and execute tasks until either the +// condition is statisfied (return true) or there are none left (return false). +// // final_spin is TRUE if this is the spin at the release barrier. // thread_finished indicates whether the thread is finished executing all // the tasks it has on its deque, and is at the release barrier. @@ -2072,289 +2104,318 @@ __kmp_steal_task( kmp_info_t *victim, kmp_int32 gtid, kmp_task_team_t *task_team // spinner == NULL means only execute a single task and return. // checker is the value to check to terminate the spin. template -static inline int __kmp_execute_tasks_template(kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin, - int *thread_finished - USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) -{ - kmp_task_team_t * task_team = thread->th.th_task_team; - kmp_thread_data_t * threads_data; - kmp_task_t * task; - kmp_info_t * other_thread; - kmp_taskdata_t * current_task = thread -> th.th_current_task; - volatile kmp_uint32 * unfinished_threads; - kmp_int32 nthreads, victim=-2, use_own_tasks=1, new_victim=0, tid=thread->th.th_info.ds.ds_tid; - - KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); - KMP_DEBUG_ASSERT( thread == __kmp_threads[ gtid ] ); - - if (task_team == NULL) return FALSE; - - KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d *thread_finished=%d\n", - gtid, final_spin, *thread_finished) ); - - thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; - threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data); - KMP_DEBUG_ASSERT( threads_data != NULL ); - - nthreads = task_team -> tt.tt_nproc; - unfinished_threads = &(task_team -> tt.tt_unfinished_threads); +static inline int __kmp_execute_tasks_template( + kmp_info_t *thread, kmp_int32 gtid, C *flag, int final_spin, + int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), + kmp_int32 is_constrained) { + kmp_task_team_t *task_team = thread->th.th_task_team; + kmp_thread_data_t *threads_data; + kmp_task_t *task; + kmp_info_t *other_thread; + kmp_taskdata_t *current_task = thread->th.th_current_task; + volatile kmp_uint32 *unfinished_threads; + kmp_int32 nthreads, victim = -2, use_own_tasks = 1, new_victim = 0, + tid = thread->th.th_info.ds.ds_tid; + + KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); + KMP_DEBUG_ASSERT(thread == __kmp_threads[gtid]); + + if (task_team == NULL) + return FALSE; + + KA_TRACE(15, ("__kmp_execute_tasks_template(enter): T#%d final_spin=%d " + "*thread_finished=%d\n", + gtid, final_spin, *thread_finished)); + + thread->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; + threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data); + KMP_DEBUG_ASSERT(threads_data != NULL); + + nthreads = task_team->tt.tt_nproc; + unfinished_threads = &(task_team->tt.tt_unfinished_threads); #if OMP_45_ENABLED - KMP_DEBUG_ASSERT( nthreads > 1 || task_team->tt.tt_found_proxy_tasks); + KMP_DEBUG_ASSERT(nthreads > 1 || task_team->tt.tt_found_proxy_tasks); #else - KMP_DEBUG_ASSERT( nthreads > 1 ); + KMP_DEBUG_ASSERT(nthreads > 1); #endif - KMP_DEBUG_ASSERT( (int)(TCR_4(*unfinished_threads)) >= 0 ); - - while (1) { // Outer loop keeps trying to find tasks in case of single thread getting tasks from target constructs - while (1) { // Inner loop to find a task and execute it - task = NULL; - if (use_own_tasks) { // check on own queue first - task = __kmp_remove_my_task( thread, gtid, task_team, is_constrained ); + KMP_DEBUG_ASSERT((int)(TCR_4(*unfinished_threads)) >= 0); + + while (1) { // Outer loop keeps trying to find tasks in case of single thread + // getting tasks from target constructs + while (1) { // Inner loop to find a task and execute it + task = NULL; + if (use_own_tasks) { // check on own queue first + task = __kmp_remove_my_task(thread, gtid, task_team, is_constrained); + } + if ((task == NULL) && (nthreads > 1)) { // Steal a task + int asleep = 1; + use_own_tasks = 0; + // Try to steal from the last place I stole from successfully. + if (victim == -2) { // haven't stolen anything yet + victim = threads_data[tid].td.td_deque_last_stolen; + if (victim != + -1) // if we have a last stolen from victim, get the thread + other_thread = threads_data[victim].td.td_thr; + } + if (victim != -1) { // found last victim + asleep = 0; + } else if (!new_victim) { // no recent steals and we haven't already + // used a new victim; select a random thread + do { // Find a different thread to steal work from. + // Pick a random thread. Initial plan was to cycle through all the + // threads, and only return if we tried to steal from every thread, + // and failed. Arch says that's not such a great idea. + victim = __kmp_get_random(thread) % (nthreads - 1); + if (victim >= tid) { + ++victim; // Adjusts random distribution to exclude self } - if ((task == NULL) && (nthreads > 1)) { // Steal a task - int asleep = 1; - use_own_tasks = 0; - // Try to steal from the last place I stole from successfully. - if (victim == -2) { // haven't stolen anything yet - victim = threads_data[tid].td.td_deque_last_stolen; - if (victim != -1) // if we have a last stolen from victim, get the thread - other_thread = threads_data[victim].td.td_thr; - } - if (victim != -1) { // found last victim - asleep = 0; - } - else if (!new_victim) { // no recent steals and we haven't already used a new victim; select a random thread - do { // Find a different thread to steal work from. - // Pick a random thread. Initial plan was to cycle through all the threads, and only return if - // we tried to steal from every thread, and failed. Arch says that's not such a great idea. - victim = __kmp_get_random(thread) % (nthreads - 1); - if (victim >= tid) { - ++victim; // Adjusts random distribution to exclude self - } - // Found a potential victim - other_thread = threads_data[victim].td.td_thr; - // There is a slight chance that __kmp_enable_tasking() did not wake up all threads - // waiting at the barrier. If victim is sleeping, then wake it up. Since we were going to - // pay the cache miss penalty for referencing another thread's kmp_info_t struct anyway, - // the check shouldn't cost too much performance at this point. In extra barrier mode, tasks - // do not sleep at the separate tasking barrier, so this isn't a problem. - asleep = 0; - if ( ( __kmp_tasking_mode == tskm_task_teams ) && - (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) && - (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) { - asleep = 1; - __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), other_thread->th.th_sleep_loc); - // A sleeping thread should not have any tasks on it's queue. There is a slight - // possibility that it resumes, steals a task from another thread, which spawns more - // tasks, all in the time that it takes this thread to check => don't write an assertion - // that the victim's queue is empty. Try stealing from a different thread. - } - } while (asleep); - } - - if (!asleep) { - // We have a victim to try to steal from - task = __kmp_steal_task(other_thread, gtid, task_team, unfinished_threads, thread_finished, is_constrained); - } - if (task != NULL) { // set last stolen to victim - if (threads_data[tid].td.td_deque_last_stolen != victim) { - threads_data[tid].td.td_deque_last_stolen = victim; - // The pre-refactored code did not try more than 1 successful new vicitm, - // unless the last one generated more local tasks; new_victim keeps track of this - new_victim = 1; - } - } - else { // No tasks found; unset last_stolen - KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1); - victim = -2; // no successful victim found - } + // Found a potential victim + other_thread = threads_data[victim].td.td_thr; + // There is a slight chance that __kmp_enable_tasking() did not wake + // up all threads waiting at the barrier. If victim is sleeping, + // then wake it up. Since we were going to pay the cache miss + // penalty for referencing another thread's kmp_info_t struct + // anyway, + // the check shouldn't cost too much performance at this point. In + // extra barrier mode, tasks do not sleep at the separate tasking + // barrier, so this isn't a problem. + asleep = 0; + if ((__kmp_tasking_mode == tskm_task_teams) && + (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) && + (TCR_PTR(other_thread->th.th_sleep_loc) != NULL)) { + asleep = 1; + __kmp_null_resume_wrapper(__kmp_gtid_from_thread(other_thread), + other_thread->th.th_sleep_loc); + // A sleeping thread should not have any tasks on it's queue. + // There is a slight possibility that it resumes, steals a task + // from another thread, which spawns more tasks, all in the time + // that it takes this thread to check => don't write an assertion + // that the victim's queue is empty. Try stealing from a + // different thread. } + } while (asleep); + } - if (task == NULL) // break out of tasking loop - break; + if (!asleep) { + // We have a victim to try to steal from + task = __kmp_steal_task(other_thread, gtid, task_team, + unfinished_threads, thread_finished, + is_constrained); + } + if (task != NULL) { // set last stolen to victim + if (threads_data[tid].td.td_deque_last_stolen != victim) { + threads_data[tid].td.td_deque_last_stolen = victim; + // The pre-refactored code did not try more than 1 successful new + // vicitm, unless the last one generated more local tasks; + // new_victim keeps track of this + new_victim = 1; + } + } else { // No tasks found; unset last_stolen + KMP_CHECK_UPDATE(threads_data[tid].td.td_deque_last_stolen, -1); + victim = -2; // no successful victim found + } + } - // Found a task; execute it + if (task == NULL) // break out of tasking loop + break; + +// Found a task; execute it #if USE_ITT_BUILD && USE_ITT_NOTIFY - if ( __itt_sync_create_ptr || KMP_ITT_DEBUG ) { - if ( itt_sync_obj == NULL ) { // we are at fork barrier where we could not get the object reliably - itt_sync_obj = __kmp_itt_barrier_object( gtid, bs_forkjoin_barrier ); - } - __kmp_itt_task_starting( itt_sync_obj ); - } + if (__itt_sync_create_ptr || KMP_ITT_DEBUG) { + if (itt_sync_obj == NULL) { // we are at fork barrier where we could not + // get the object reliably + itt_sync_obj = __kmp_itt_barrier_object(gtid, bs_forkjoin_barrier); + } + __kmp_itt_task_starting(itt_sync_obj); + } #endif /* USE_ITT_BUILD && USE_ITT_NOTIFY */ - __kmp_invoke_task( gtid, task, current_task ); + __kmp_invoke_task(gtid, task, current_task); #if USE_ITT_BUILD - if ( itt_sync_obj != NULL ) __kmp_itt_task_finished( itt_sync_obj ); + if (itt_sync_obj != NULL) + __kmp_itt_task_finished(itt_sync_obj); #endif /* USE_ITT_BUILD */ - // If this thread is only partway through the barrier and the condition is met, then return now, - // so that the barrier gather/release pattern can proceed. If this thread is in the last spin loop - // in the barrier, waiting to be released, we know that the termination condition will not be - // satisified, so don't waste any cycles checking it. - if (flag == NULL || (!final_spin && flag->done_check())) { - KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) ); - return TRUE; - } - if (thread->th.th_task_team == NULL) { - break; - } - KMP_YIELD( __kmp_library == library_throughput ); // Yield before executing next task - // If execution of a stolen task results in more tasks being placed on our run queue, reset use_own_tasks - if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) { - KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned other tasks, restart\n", gtid)); - use_own_tasks = 1; - new_victim = 0; - } - } + // If this thread is only partway through the barrier and the condition is + // met, then return now, so that the barrier gather/release pattern can + // proceed. If this thread is in the last spin loop in the barrier, + // waiting to be released, we know that the termination condition will not + // be satisified, so don't waste any cycles checking it. + if (flag == NULL || (!final_spin && flag->done_check())) { + KA_TRACE( + 15, + ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", + gtid)); + return TRUE; + } + if (thread->th.th_task_team == NULL) { + break; + } + // Yield before executing next task + KMP_YIELD(__kmp_library == library_throughput); + // If execution of a stolen task results in more tasks being placed on our + // run queue, reset use_own_tasks + if (!use_own_tasks && TCR_4(threads_data[tid].td.td_deque_ntasks) != 0) { + KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d stolen task spawned " + "other tasks, restart\n", + gtid)); + use_own_tasks = 1; + new_victim = 0; + } + } - // The task source has been exhausted. If in final spin loop of barrier, check if termination condition is satisfied. +// The task source has been exhausted. If in final spin loop of barrier, check +// if termination condition is satisfied. #if OMP_45_ENABLED - // The work queue may be empty but there might be proxy tasks still executing - if (final_spin && TCR_4(current_task->td_incomplete_child_tasks) == 0) + // The work queue may be empty but there might be proxy tasks still + // executing + if (final_spin && TCR_4(current_task->td_incomplete_child_tasks) == 0) #else - if (final_spin) + if (final_spin) #endif - { - // First, decrement the #unfinished threads, if that has not already been done. This decrement - // might be to the spin location, and result in the termination condition being satisfied. - if (! *thread_finished) { - kmp_uint32 count; - - count = KMP_TEST_THEN_DEC32( (kmp_int32 *)unfinished_threads ) - 1; - KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec unfinished_threads to %d task_team=%p\n", - gtid, count, task_team) ); - *thread_finished = TRUE; - } + { + // First, decrement the #unfinished threads, if that has not already been + // done. This decrement might be to the spin location, and result in the + // termination condition being satisfied. + if (!*thread_finished) { + kmp_uint32 count; - // It is now unsafe to reference thread->th.th_team !!! - // Decrementing task_team->tt.tt_unfinished_threads can allow the master thread to pass through - // the barrier, where it might reset each thread's th.th_team field for the next parallel region. - // If we can steal more work, we know that this has not happened yet. - if (flag != NULL && flag->done_check()) { - KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", gtid) ); - return TRUE; - } - } + count = KMP_TEST_THEN_DEC32((kmp_int32 *)unfinished_threads) - 1; + KA_TRACE(20, ("__kmp_execute_tasks_template: T#%d dec " + "unfinished_threads to %d task_team=%p\n", + gtid, count, task_team)); + *thread_finished = TRUE; + } - // If this thread's task team is NULL, master has recognized that there are no more tasks; bail out - if (thread->th.th_task_team == NULL) { - KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid) ); - return FALSE; - } + // It is now unsafe to reference thread->th.th_team !!! + // Decrementing task_team->tt.tt_unfinished_threads can allow the master + // thread to pass through the barrier, where it might reset each thread's + // th.th_team field for the next parallel region. If we can steal more + // work, we know that this has not happened yet. + if (flag != NULL && flag->done_check()) { + KA_TRACE( + 15, + ("__kmp_execute_tasks_template: T#%d spin condition satisfied\n", + gtid)); + return TRUE; + } + } + + // If this thread's task team is NULL, master has recognized that there are + // no more tasks; bail out + if (thread->th.th_task_team == NULL) { + KA_TRACE(15, + ("__kmp_execute_tasks_template: T#%d no more tasks\n", gtid)); + return FALSE; + } #if OMP_45_ENABLED - // We could be getting tasks from target constructs; if this is the only thread, keep trying to execute - // tasks from own queue - if (nthreads == 1) - use_own_tasks = 1; - else + // We could be getting tasks from target constructs; if this is the only + // thread, keep trying to execute tasks from own queue + if (nthreads == 1) + use_own_tasks = 1; + else #endif - { - KA_TRACE(15, ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid) ); - return FALSE; - } + { + KA_TRACE(15, + ("__kmp_execute_tasks_template: T#%d can't find work\n", gtid)); + return FALSE; } + } } -int __kmp_execute_tasks_32(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin, - int *thread_finished - USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) -{ - return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished - USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); +int __kmp_execute_tasks_32( + kmp_info_t *thread, kmp_int32 gtid, kmp_flag_32 *flag, int final_spin, + int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), + kmp_int32 is_constrained) { + return __kmp_execute_tasks_template( + thread, gtid, flag, final_spin, + thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); } -int __kmp_execute_tasks_64(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin, - int *thread_finished - USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) -{ - return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished - USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); +int __kmp_execute_tasks_64( + kmp_info_t *thread, kmp_int32 gtid, kmp_flag_64 *flag, int final_spin, + int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), + kmp_int32 is_constrained) { + return __kmp_execute_tasks_template( + thread, gtid, flag, final_spin, + thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); } -int __kmp_execute_tasks_oncore(kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin, - int *thread_finished - USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) -{ - return __kmp_execute_tasks_template(thread, gtid, flag, final_spin, thread_finished - USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); +int __kmp_execute_tasks_oncore( + kmp_info_t *thread, kmp_int32 gtid, kmp_flag_oncore *flag, int final_spin, + int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), + kmp_int32 is_constrained) { + return __kmp_execute_tasks_template( + thread, gtid, flag, final_spin, + thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); } - - -//----------------------------------------------------------------------------- // __kmp_enable_tasking: Allocate task team and resume threads sleeping at the // next barrier so they can assist in executing enqueued tasks. // First thread in allocates the task team atomically. - -static void -__kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr ) -{ - kmp_thread_data_t *threads_data; - int nthreads, i, is_init_thread; - - KA_TRACE( 10, ( "__kmp_enable_tasking(enter): T#%d\n", - __kmp_gtid_from_thread( this_thr ) ) ); - - KMP_DEBUG_ASSERT(task_team != NULL); - KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL); - - nthreads = task_team->tt.tt_nproc; - KMP_DEBUG_ASSERT(nthreads > 0); - KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc); - - // Allocate or increase the size of threads_data if necessary - is_init_thread = __kmp_realloc_task_threads_data( this_thr, task_team ); - - if (!is_init_thread) { - // Some other thread already set up the array. - KA_TRACE( 20, ( "__kmp_enable_tasking(exit): T#%d: threads array already set up.\n", - __kmp_gtid_from_thread( this_thr ) ) ); - return; - } - threads_data = (kmp_thread_data_t *)TCR_PTR(task_team -> tt.tt_threads_data); - KMP_DEBUG_ASSERT( threads_data != NULL ); - - if ( ( __kmp_tasking_mode == tskm_task_teams ) && - ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) ) - { - // Release any threads sleeping at the barrier, so that they can steal - // tasks and execute them. In extra barrier mode, tasks do not sleep - // at the separate tasking barrier, so this isn't a problem. - for (i = 0; i < nthreads; i++) { - volatile void *sleep_loc; - kmp_info_t *thread = threads_data[i].td.td_thr; - - if (i == this_thr->th.th_info.ds.ds_tid) { - continue; - } - // Since we haven't locked the thread's suspend mutex lock at this - // point, there is a small window where a thread might be putting - // itself to sleep, but hasn't set the th_sleep_loc field yet. - // To work around this, __kmp_execute_tasks_template() periodically checks - // see if other threads are sleeping (using the same random - // mechanism that is used for task stealing) and awakens them if - // they are. - if ( ( sleep_loc = TCR_PTR( thread -> th.th_sleep_loc) ) != NULL ) - { - KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d waking up thread T#%d\n", - __kmp_gtid_from_thread( this_thr ), - __kmp_gtid_from_thread( thread ) ) ); - __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc); - } - else { - KF_TRACE( 50, ( "__kmp_enable_tasking: T#%d don't wake up thread T#%d\n", - __kmp_gtid_from_thread( this_thr ), - __kmp_gtid_from_thread( thread ) ) ); - } - } +static void __kmp_enable_tasking(kmp_task_team_t *task_team, + kmp_info_t *this_thr) { + kmp_thread_data_t *threads_data; + int nthreads, i, is_init_thread; + + KA_TRACE(10, ("__kmp_enable_tasking(enter): T#%d\n", + __kmp_gtid_from_thread(this_thr))); + + KMP_DEBUG_ASSERT(task_team != NULL); + KMP_DEBUG_ASSERT(this_thr->th.th_team != NULL); + + nthreads = task_team->tt.tt_nproc; + KMP_DEBUG_ASSERT(nthreads > 0); + KMP_DEBUG_ASSERT(nthreads == this_thr->th.th_team->t.t_nproc); + + // Allocate or increase the size of threads_data if necessary + is_init_thread = __kmp_realloc_task_threads_data(this_thr, task_team); + + if (!is_init_thread) { + // Some other thread already set up the array. + KA_TRACE( + 20, + ("__kmp_enable_tasking(exit): T#%d: threads array already set up.\n", + __kmp_gtid_from_thread(this_thr))); + return; + } + threads_data = (kmp_thread_data_t *)TCR_PTR(task_team->tt.tt_threads_data); + KMP_DEBUG_ASSERT(threads_data != NULL); + + if ((__kmp_tasking_mode == tskm_task_teams) && + (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME)) { + // Release any threads sleeping at the barrier, so that they can steal + // tasks and execute them. In extra barrier mode, tasks do not sleep + // at the separate tasking barrier, so this isn't a problem. + for (i = 0; i < nthreads; i++) { + volatile void *sleep_loc; + kmp_info_t *thread = threads_data[i].td.td_thr; + + if (i == this_thr->th.th_info.ds.ds_tid) { + continue; + } + // Since we haven't locked the thread's suspend mutex lock at this + // point, there is a small window where a thread might be putting + // itself to sleep, but hasn't set the th_sleep_loc field yet. + // To work around this, __kmp_execute_tasks_template() periodically checks + // see if other threads are sleeping (using the same random mechanism that + // is used for task stealing) and awakens them if they are. + if ((sleep_loc = TCR_PTR(thread->th.th_sleep_loc)) != NULL) { + KF_TRACE(50, ("__kmp_enable_tasking: T#%d waking up thread T#%d\n", + __kmp_gtid_from_thread(this_thr), + __kmp_gtid_from_thread(thread))); + __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc); + } else { + KF_TRACE(50, ("__kmp_enable_tasking: T#%d don't wake up thread T#%d\n", + __kmp_gtid_from_thread(this_thr), + __kmp_gtid_from_thread(thread))); + } } + } - KA_TRACE( 10, ( "__kmp_enable_tasking(exit): T#%d\n", - __kmp_gtid_from_thread( this_thr ) ) ); + KA_TRACE(10, ("__kmp_enable_tasking(exit): T#%d\n", + __kmp_gtid_from_thread(this_thr))); } - -/* ------------------------------------------------------------------------ */ /* // TODO: Check the comment consistency * Utility routines for "task teams". A task team (kmp_task_t) is kind of * like a shadow of the kmp_team_t data struct, with a different lifetime. @@ -2389,685 +2450,683 @@ __kmp_enable_tasking( kmp_task_team_t *task_team, kmp_info_t *this_thr ) * barriers, when no explicit tasks were spawned (pushed, actually). */ - -static kmp_task_team_t *__kmp_free_task_teams = NULL; // Free list for task_team data structures +static kmp_task_team_t *__kmp_free_task_teams = + NULL; // Free list for task_team data structures // Lock for task team data structures -static kmp_bootstrap_lock_t __kmp_task_team_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( __kmp_task_team_lock ); - +static kmp_bootstrap_lock_t __kmp_task_team_lock = + KMP_BOOTSTRAP_LOCK_INITIALIZER(__kmp_task_team_lock); -//------------------------------------------------------------------------------ // __kmp_alloc_task_deque: // Allocates a task deque for a particular thread, and initialize the necessary // data structures relating to the deque. This only happens once per thread -// per task team since task teams are recycled. -// No lock is needed during allocation since each thread allocates its own -// deque. - -static void -__kmp_alloc_task_deque( kmp_info_t *thread, kmp_thread_data_t *thread_data ) -{ - __kmp_init_bootstrap_lock( & thread_data -> td.td_deque_lock ); - KMP_DEBUG_ASSERT( thread_data -> td.td_deque == NULL ); - - // Initialize last stolen task field to "none" - thread_data -> td.td_deque_last_stolen = -1; - - KMP_DEBUG_ASSERT( TCR_4(thread_data -> td.td_deque_ntasks) == 0 ); - KMP_DEBUG_ASSERT( thread_data -> td.td_deque_head == 0 ); - KMP_DEBUG_ASSERT( thread_data -> td.td_deque_tail == 0 ); - - KE_TRACE( 10, ( "__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n", - __kmp_gtid_from_thread( thread ), INITIAL_TASK_DEQUE_SIZE, thread_data ) ); - // Allocate space for task deque, and zero the deque - // Cannot use __kmp_thread_calloc() because threads not around for - // kmp_reap_task_team( ). - thread_data -> td.td_deque = (kmp_taskdata_t **) - __kmp_allocate( INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *)); - thread_data -> td.td_deque_size = INITIAL_TASK_DEQUE_SIZE; +// per task team since task teams are recycled. No lock is needed during +// allocation since each thread allocates its own deque. +static void __kmp_alloc_task_deque(kmp_info_t *thread, + kmp_thread_data_t *thread_data) { + __kmp_init_bootstrap_lock(&thread_data->td.td_deque_lock); + KMP_DEBUG_ASSERT(thread_data->td.td_deque == NULL); + + // Initialize last stolen task field to "none" + thread_data->td.td_deque_last_stolen = -1; + + KMP_DEBUG_ASSERT(TCR_4(thread_data->td.td_deque_ntasks) == 0); + KMP_DEBUG_ASSERT(thread_data->td.td_deque_head == 0); + KMP_DEBUG_ASSERT(thread_data->td.td_deque_tail == 0); + + KE_TRACE( + 10, + ("__kmp_alloc_task_deque: T#%d allocating deque[%d] for thread_data %p\n", + __kmp_gtid_from_thread(thread), INITIAL_TASK_DEQUE_SIZE, thread_data)); + // Allocate space for task deque, and zero the deque + // Cannot use __kmp_thread_calloc() because threads not around for + // kmp_reap_task_team( ). + thread_data->td.td_deque = (kmp_taskdata_t **)__kmp_allocate( + INITIAL_TASK_DEQUE_SIZE * sizeof(kmp_taskdata_t *)); + thread_data->td.td_deque_size = INITIAL_TASK_DEQUE_SIZE; } -//------------------------------------------------------------------------------ // __kmp_realloc_task_deque: -// Re-allocates a task deque for a particular thread, copies the content from the old deque -// and adjusts the necessary data structures relating to the deque. -// This operation must be done with a the deque_lock being held - -static void __kmp_realloc_task_deque ( kmp_info_t *thread, kmp_thread_data_t *thread_data ) -{ - kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td); - kmp_int32 new_size = 2 * size; - - KE_TRACE( 10, ( "__kmp_realloc_task_deque: T#%d reallocating deque[from %d to %d] for thread_data %p\n", - __kmp_gtid_from_thread( thread ), size, new_size, thread_data ) ); - - kmp_taskdata_t ** new_deque = (kmp_taskdata_t **) __kmp_allocate( new_size * sizeof(kmp_taskdata_t *)); - - int i,j; - for ( i = thread_data->td.td_deque_head, j = 0; j < size; i = (i+1) & TASK_DEQUE_MASK(thread_data->td), j++ ) - new_deque[j] = thread_data->td.td_deque[i]; - - __kmp_free(thread_data->td.td_deque); - - thread_data -> td.td_deque_head = 0; - thread_data -> td.td_deque_tail = size; - thread_data -> td.td_deque = new_deque; - thread_data -> td.td_deque_size = new_size; +// Re-allocates a task deque for a particular thread, copies the content from +// the old deque and adjusts the necessary data structures relating to the +// deque. This operation must be done with a the deque_lock being held +static void __kmp_realloc_task_deque(kmp_info_t *thread, + kmp_thread_data_t *thread_data) { + kmp_int32 size = TASK_DEQUE_SIZE(thread_data->td); + kmp_int32 new_size = 2 * size; + + KE_TRACE(10, ("__kmp_realloc_task_deque: T#%d reallocating deque[from %d to " + "%d] for thread_data %p\n", + __kmp_gtid_from_thread(thread), size, new_size, thread_data)); + + kmp_taskdata_t **new_deque = + (kmp_taskdata_t **)__kmp_allocate(new_size * sizeof(kmp_taskdata_t *)); + + int i, j; + for (i = thread_data->td.td_deque_head, j = 0; j < size; + i = (i + 1) & TASK_DEQUE_MASK(thread_data->td), j++) + new_deque[j] = thread_data->td.td_deque[i]; + + __kmp_free(thread_data->td.td_deque); + + thread_data->td.td_deque_head = 0; + thread_data->td.td_deque_tail = size; + thread_data->td.td_deque = new_deque; + thread_data->td.td_deque_size = new_size; } -//------------------------------------------------------------------------------ // __kmp_free_task_deque: -// Deallocates a task deque for a particular thread. -// Happens at library deallocation so don't need to reset all thread data fields. +// Deallocates a task deque for a particular thread. Happens at library +// deallocation so don't need to reset all thread data fields. +static void __kmp_free_task_deque(kmp_thread_data_t *thread_data) { + __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); -static void -__kmp_free_task_deque( kmp_thread_data_t *thread_data ) -{ - __kmp_acquire_bootstrap_lock( & thread_data -> td.td_deque_lock ); - - if ( thread_data -> td.td_deque != NULL ) { - TCW_4(thread_data -> td.td_deque_ntasks, 0); - __kmp_free( thread_data -> td.td_deque ); - thread_data -> td.td_deque = NULL; - } - __kmp_release_bootstrap_lock( & thread_data -> td.td_deque_lock ); + if (thread_data->td.td_deque != NULL) { + TCW_4(thread_data->td.td_deque_ntasks, 0); + __kmp_free(thread_data->td.td_deque); + thread_data->td.td_deque = NULL; + } + __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); #ifdef BUILD_TIED_TASK_STACK - // GEH: Figure out what to do here for td_susp_tied_tasks - if ( thread_data -> td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY ) { - __kmp_free_task_stack( __kmp_thread_from_gtid( gtid ), thread_data ); - } + // GEH: Figure out what to do here for td_susp_tied_tasks + if (thread_data->td.td_susp_tied_tasks.ts_entries != TASK_STACK_EMPTY) { + __kmp_free_task_stack(__kmp_thread_from_gtid(gtid), thread_data); + } #endif // BUILD_TIED_TASK_STACK } - -//------------------------------------------------------------------------------ // __kmp_realloc_task_threads_data: -// Allocates a threads_data array for a task team, either by allocating an initial -// array or enlarging an existing array. Only the first thread to get the lock -// allocs or enlarges the array and re-initializes the array eleemnts. +// Allocates a threads_data array for a task team, either by allocating an +// initial array or enlarging an existing array. Only the first thread to get +// the lock allocs or enlarges the array and re-initializes the array eleemnts. // That thread returns "TRUE", the rest return "FALSE". // Assumes that the new array size is given by task_team -> tt.tt_nproc. // The current size is given by task_team -> tt.tt_max_threads. - -static int -__kmp_realloc_task_threads_data( kmp_info_t *thread, kmp_task_team_t *task_team ) -{ - kmp_thread_data_t ** threads_data_p; - kmp_int32 nthreads, maxthreads; - int is_init_thread = FALSE; - - if ( TCR_4(task_team -> tt.tt_found_tasks) ) { - // Already reallocated and initialized. - return FALSE; - } - - threads_data_p = & task_team -> tt.tt_threads_data; - nthreads = task_team -> tt.tt_nproc; - maxthreads = task_team -> tt.tt_max_threads; - - // All threads must lock when they encounter the first task of the implicit task - // region to make sure threads_data fields are (re)initialized before used. - __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock ); - - if ( ! TCR_4(task_team -> tt.tt_found_tasks) ) { - // first thread to enable tasking - kmp_team_t *team = thread -> th.th_team; - int i; - - is_init_thread = TRUE; - if ( maxthreads < nthreads ) { - - if ( *threads_data_p != NULL ) { - kmp_thread_data_t *old_data = *threads_data_p; - kmp_thread_data_t *new_data = NULL; - - KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d reallocating " - "threads data for task_team %p, new_size = %d, old_size = %d\n", - __kmp_gtid_from_thread( thread ), task_team, - nthreads, maxthreads ) ); - // Reallocate threads_data to have more elements than current array - // Cannot use __kmp_thread_realloc() because threads not around for - // kmp_reap_task_team( ). Note all new array entries are initialized - // to zero by __kmp_allocate(). - new_data = (kmp_thread_data_t *) - __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) ); - // copy old data to new data - KMP_MEMCPY_S( (void *) new_data, nthreads * sizeof(kmp_thread_data_t), - (void *) old_data, - maxthreads * sizeof(kmp_taskdata_t *) ); +static int __kmp_realloc_task_threads_data(kmp_info_t *thread, + kmp_task_team_t *task_team) { + kmp_thread_data_t **threads_data_p; + kmp_int32 nthreads, maxthreads; + int is_init_thread = FALSE; + + if (TCR_4(task_team->tt.tt_found_tasks)) { + // Already reallocated and initialized. + return FALSE; + } + + threads_data_p = &task_team->tt.tt_threads_data; + nthreads = task_team->tt.tt_nproc; + maxthreads = task_team->tt.tt_max_threads; + + // All threads must lock when they encounter the first task of the implicit + // task region to make sure threads_data fields are (re)initialized before + // used. + __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock); + + if (!TCR_4(task_team->tt.tt_found_tasks)) { + // first thread to enable tasking + kmp_team_t *team = thread->th.th_team; + int i; + + is_init_thread = TRUE; + if (maxthreads < nthreads) { + + if (*threads_data_p != NULL) { + kmp_thread_data_t *old_data = *threads_data_p; + kmp_thread_data_t *new_data = NULL; + + KE_TRACE( + 10, + ("__kmp_realloc_task_threads_data: T#%d reallocating " + "threads data for task_team %p, new_size = %d, old_size = %d\n", + __kmp_gtid_from_thread(thread), task_team, nthreads, maxthreads)); + // Reallocate threads_data to have more elements than current array + // Cannot use __kmp_thread_realloc() because threads not around for + // kmp_reap_task_team( ). Note all new array entries are initialized + // to zero by __kmp_allocate(). + new_data = (kmp_thread_data_t *)__kmp_allocate( + nthreads * sizeof(kmp_thread_data_t)); + // copy old data to new data + KMP_MEMCPY_S((void *)new_data, nthreads * sizeof(kmp_thread_data_t), + (void *)old_data, maxthreads * sizeof(kmp_taskdata_t *)); #ifdef BUILD_TIED_TASK_STACK - // GEH: Figure out if this is the right thing to do - for (i = maxthreads; i < nthreads; i++) { - kmp_thread_data_t *thread_data = & (*threads_data_p)[i]; - __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data ); - } + // GEH: Figure out if this is the right thing to do + for (i = maxthreads; i < nthreads; i++) { + kmp_thread_data_t *thread_data = &(*threads_data_p)[i]; + __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data); + } #endif // BUILD_TIED_TASK_STACK - // Install the new data and free the old data - (*threads_data_p) = new_data; - __kmp_free( old_data ); - } - else { - KE_TRACE( 10, ( "__kmp_realloc_task_threads_data: T#%d allocating " - "threads data for task_team %p, size = %d\n", - __kmp_gtid_from_thread( thread ), task_team, nthreads ) ); - // Make the initial allocate for threads_data array, and zero entries - // Cannot use __kmp_thread_calloc() because threads not around for - // kmp_reap_task_team( ). - ANNOTATE_IGNORE_WRITES_BEGIN(); - *threads_data_p = (kmp_thread_data_t *) - __kmp_allocate( nthreads * sizeof(kmp_thread_data_t) ); - ANNOTATE_IGNORE_WRITES_END(); + // Install the new data and free the old data + (*threads_data_p) = new_data; + __kmp_free(old_data); + } else { + KE_TRACE(10, ("__kmp_realloc_task_threads_data: T#%d allocating " + "threads data for task_team %p, size = %d\n", + __kmp_gtid_from_thread(thread), task_team, nthreads)); + // Make the initial allocate for threads_data array, and zero entries + // Cannot use __kmp_thread_calloc() because threads not around for + // kmp_reap_task_team( ). + ANNOTATE_IGNORE_WRITES_BEGIN(); + *threads_data_p = (kmp_thread_data_t *)__kmp_allocate( + nthreads * sizeof(kmp_thread_data_t)); + ANNOTATE_IGNORE_WRITES_END(); #ifdef BUILD_TIED_TASK_STACK - // GEH: Figure out if this is the right thing to do - for (i = 0; i < nthreads; i++) { - kmp_thread_data_t *thread_data = & (*threads_data_p)[i]; - __kmp_init_task_stack( __kmp_gtid_from_thread( thread ), thread_data ); - } -#endif // BUILD_TIED_TASK_STACK - } - task_team -> tt.tt_max_threads = nthreads; - } - else { - // If array has (more than) enough elements, go ahead and use it - KMP_DEBUG_ASSERT( *threads_data_p != NULL ); - } - - // initialize threads_data pointers back to thread_info structures + // GEH: Figure out if this is the right thing to do for (i = 0; i < nthreads; i++) { - kmp_thread_data_t *thread_data = & (*threads_data_p)[i]; - thread_data -> td.td_thr = team -> t.t_threads[i]; - - if ( thread_data -> td.td_deque_last_stolen >= nthreads) { - // The last stolen field survives across teams / barrier, and the number - // of threads may have changed. It's possible (likely?) that a new - // parallel region will exhibit the same behavior as the previous region. - thread_data -> td.td_deque_last_stolen = -1; - } + kmp_thread_data_t *thread_data = &(*threads_data_p)[i]; + __kmp_init_task_stack(__kmp_gtid_from_thread(thread), thread_data); } +#endif // BUILD_TIED_TASK_STACK + } + task_team->tt.tt_max_threads = nthreads; + } else { + // If array has (more than) enough elements, go ahead and use it + KMP_DEBUG_ASSERT(*threads_data_p != NULL); + } + + // initialize threads_data pointers back to thread_info structures + for (i = 0; i < nthreads; i++) { + kmp_thread_data_t *thread_data = &(*threads_data_p)[i]; + thread_data->td.td_thr = team->t.t_threads[i]; - KMP_MB(); - TCW_SYNC_4(task_team -> tt.tt_found_tasks, TRUE); + if (thread_data->td.td_deque_last_stolen >= nthreads) { + // The last stolen field survives across teams / barrier, and the number + // of threads may have changed. It's possible (likely?) that a new + // parallel region will exhibit the same behavior as previous region. + thread_data->td.td_deque_last_stolen = -1; + } } - __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock ); - return is_init_thread; -} + KMP_MB(); + TCW_SYNC_4(task_team->tt.tt_found_tasks, TRUE); + } + __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock); + return is_init_thread; +} -//------------------------------------------------------------------------------ // __kmp_free_task_threads_data: // Deallocates a threads_data array for a task team, including any attached // tasking deques. Only occurs at library shutdown. - -static void -__kmp_free_task_threads_data( kmp_task_team_t *task_team ) -{ - __kmp_acquire_bootstrap_lock( & task_team -> tt.tt_threads_lock ); - if ( task_team -> tt.tt_threads_data != NULL ) { - int i; - for (i = 0; i < task_team->tt.tt_max_threads; i++ ) { - __kmp_free_task_deque( & task_team -> tt.tt_threads_data[i] ); - } - __kmp_free( task_team -> tt.tt_threads_data ); - task_team -> tt.tt_threads_data = NULL; - } - __kmp_release_bootstrap_lock( & task_team -> tt.tt_threads_lock ); +static void __kmp_free_task_threads_data(kmp_task_team_t *task_team) { + __kmp_acquire_bootstrap_lock(&task_team->tt.tt_threads_lock); + if (task_team->tt.tt_threads_data != NULL) { + int i; + for (i = 0; i < task_team->tt.tt_max_threads; i++) { + __kmp_free_task_deque(&task_team->tt.tt_threads_data[i]); + } + __kmp_free(task_team->tt.tt_threads_data); + task_team->tt.tt_threads_data = NULL; + } + __kmp_release_bootstrap_lock(&task_team->tt.tt_threads_lock); } - -//------------------------------------------------------------------------------ // __kmp_allocate_task_team: // Allocates a task team associated with a specific team, taking it from -// the global task team free list if possible. Also initializes data structures. - -static kmp_task_team_t * -__kmp_allocate_task_team( kmp_info_t *thread, kmp_team_t *team ) -{ - kmp_task_team_t *task_team = NULL; - int nthreads; - - KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d entering; team = %p\n", - (thread ? __kmp_gtid_from_thread( thread ) : -1), team ) ); - - if (TCR_PTR(__kmp_free_task_teams) != NULL) { - // Take a task team from the task team pool - __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock ); - if (__kmp_free_task_teams != NULL) { - task_team = __kmp_free_task_teams; - TCW_PTR(__kmp_free_task_teams, task_team -> tt.tt_next); - task_team -> tt.tt_next = NULL; - } - __kmp_release_bootstrap_lock( &__kmp_task_team_lock ); - } - - if (task_team == NULL) { - KE_TRACE( 10, ( "__kmp_allocate_task_team: T#%d allocating " - "task team for team %p\n", - __kmp_gtid_from_thread( thread ), team ) ); - // Allocate a new task team if one is not available. - // Cannot use __kmp_thread_malloc() because threads not around for - // kmp_reap_task_team( ). - task_team = (kmp_task_team_t *) __kmp_allocate( sizeof(kmp_task_team_t) ); - __kmp_init_bootstrap_lock( & task_team -> tt.tt_threads_lock ); - //task_team -> tt.tt_threads_data = NULL; // AC: __kmp_allocate zeroes returned memory - //task_team -> tt.tt_max_threads = 0; - //task_team -> tt.tt_next = NULL; - } - - TCW_4(task_team -> tt.tt_found_tasks, FALSE); +// the global task team free list if possible. Also initializes data +// structures. +static kmp_task_team_t *__kmp_allocate_task_team(kmp_info_t *thread, + kmp_team_t *team) { + kmp_task_team_t *task_team = NULL; + int nthreads; + + KA_TRACE(20, ("__kmp_allocate_task_team: T#%d entering; team = %p\n", + (thread ? __kmp_gtid_from_thread(thread) : -1), team)); + + if (TCR_PTR(__kmp_free_task_teams) != NULL) { + // Take a task team from the task team pool + __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock); + if (__kmp_free_task_teams != NULL) { + task_team = __kmp_free_task_teams; + TCW_PTR(__kmp_free_task_teams, task_team->tt.tt_next); + task_team->tt.tt_next = NULL; + } + __kmp_release_bootstrap_lock(&__kmp_task_team_lock); + } + + if (task_team == NULL) { + KE_TRACE(10, ("__kmp_allocate_task_team: T#%d allocating " + "task team for team %p\n", + __kmp_gtid_from_thread(thread), team)); + // Allocate a new task team if one is not available. + // Cannot use __kmp_thread_malloc() because threads not around for + // kmp_reap_task_team( ). + task_team = (kmp_task_team_t *)__kmp_allocate(sizeof(kmp_task_team_t)); + __kmp_init_bootstrap_lock(&task_team->tt.tt_threads_lock); + // AC: __kmp_allocate zeroes returned memory + // task_team -> tt.tt_threads_data = NULL; + // task_team -> tt.tt_max_threads = 0; + // task_team -> tt.tt_next = NULL; + } + + TCW_4(task_team->tt.tt_found_tasks, FALSE); #if OMP_45_ENABLED - TCW_4(task_team -> tt.tt_found_proxy_tasks, FALSE); + TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE); #endif - task_team -> tt.tt_nproc = nthreads = team->t.t_nproc; + task_team->tt.tt_nproc = nthreads = team->t.t_nproc; - TCW_4( task_team -> tt.tt_unfinished_threads, nthreads ); - TCW_4( task_team -> tt.tt_active, TRUE ); + TCW_4(task_team->tt.tt_unfinished_threads, nthreads); + TCW_4(task_team->tt.tt_active, TRUE); - KA_TRACE( 20, ( "__kmp_allocate_task_team: T#%d exiting; task_team = %p unfinished_threads init'd to %d\n", - (thread ? __kmp_gtid_from_thread( thread ) : -1), task_team, task_team -> tt.tt_unfinished_threads) ); - return task_team; + KA_TRACE(20, ("__kmp_allocate_task_team: T#%d exiting; task_team = %p " + "unfinished_threads init'd to %d\n", + (thread ? __kmp_gtid_from_thread(thread) : -1), task_team, + task_team->tt.tt_unfinished_threads)); + return task_team; } - -//------------------------------------------------------------------------------ // __kmp_free_task_team: // Frees the task team associated with a specific thread, and adds it // to the global task team free list. +void __kmp_free_task_team(kmp_info_t *thread, kmp_task_team_t *task_team) { + KA_TRACE(20, ("__kmp_free_task_team: T#%d task_team = %p\n", + thread ? __kmp_gtid_from_thread(thread) : -1, task_team)); -void -__kmp_free_task_team( kmp_info_t *thread, kmp_task_team_t *task_team ) -{ - KA_TRACE( 20, ( "__kmp_free_task_team: T#%d task_team = %p\n", - thread ? __kmp_gtid_from_thread( thread ) : -1, task_team ) ); + // Put task team back on free list + __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock); - // Put task team back on free list - __kmp_acquire_bootstrap_lock( & __kmp_task_team_lock ); + KMP_DEBUG_ASSERT(task_team->tt.tt_next == NULL); + task_team->tt.tt_next = __kmp_free_task_teams; + TCW_PTR(__kmp_free_task_teams, task_team); - KMP_DEBUG_ASSERT( task_team -> tt.tt_next == NULL ); - task_team -> tt.tt_next = __kmp_free_task_teams; - TCW_PTR(__kmp_free_task_teams, task_team); - - __kmp_release_bootstrap_lock( & __kmp_task_team_lock ); + __kmp_release_bootstrap_lock(&__kmp_task_team_lock); } - -//------------------------------------------------------------------------------ // __kmp_reap_task_teams: // Free all the task teams on the task team free list. // Should only be done during library shutdown. -// Cannot do anything that needs a thread structure or gtid since they are already gone. - -void -__kmp_reap_task_teams( void ) -{ - kmp_task_team_t *task_team; - - if ( TCR_PTR(__kmp_free_task_teams) != NULL ) { - // Free all task_teams on the free list - __kmp_acquire_bootstrap_lock( &__kmp_task_team_lock ); - while ( ( task_team = __kmp_free_task_teams ) != NULL ) { - __kmp_free_task_teams = task_team -> tt.tt_next; - task_team -> tt.tt_next = NULL; - - // Free threads_data if necessary - if ( task_team -> tt.tt_threads_data != NULL ) { - __kmp_free_task_threads_data( task_team ); - } - __kmp_free( task_team ); - } - __kmp_release_bootstrap_lock( &__kmp_task_team_lock ); +// Cannot do anything that needs a thread structure or gtid since they are +// already gone. +void __kmp_reap_task_teams(void) { + kmp_task_team_t *task_team; + + if (TCR_PTR(__kmp_free_task_teams) != NULL) { + // Free all task_teams on the free list + __kmp_acquire_bootstrap_lock(&__kmp_task_team_lock); + while ((task_team = __kmp_free_task_teams) != NULL) { + __kmp_free_task_teams = task_team->tt.tt_next; + task_team->tt.tt_next = NULL; + + // Free threads_data if necessary + if (task_team->tt.tt_threads_data != NULL) { + __kmp_free_task_threads_data(task_team); + } + __kmp_free(task_team); } + __kmp_release_bootstrap_lock(&__kmp_task_team_lock); + } } -//------------------------------------------------------------------------------ // __kmp_wait_to_unref_task_teams: // Some threads could still be in the fork barrier release code, possibly // trying to steal tasks. Wait for each thread to unreference its task team. -// -void -__kmp_wait_to_unref_task_teams(void) -{ - kmp_info_t *thread; - kmp_uint32 spins; - int done; - - KMP_INIT_YIELD( spins ); - - for (;;) { - done = TRUE; - - // TODO: GEH - this may be is wrong because some sync would be necessary - // in case threads are added to the pool during the traversal. - // Need to verify that lock for thread pool is held when calling - // this routine. - for (thread = (kmp_info_t *)__kmp_thread_pool; - thread != NULL; - thread = thread->th.th_next_pool) - { +void __kmp_wait_to_unref_task_teams(void) { + kmp_info_t *thread; + kmp_uint32 spins; + int done; + + KMP_INIT_YIELD(spins); + + for (;;) { + done = TRUE; + + // TODO: GEH - this may be is wrong because some sync would be necessary + // in case threads are added to the pool during the traversal. Need to + // verify that lock for thread pool is held when calling this routine. + for (thread = (kmp_info_t *)__kmp_thread_pool; thread != NULL; + thread = thread->th.th_next_pool) { #if KMP_OS_WINDOWS - DWORD exit_val; + DWORD exit_val; #endif - if ( TCR_PTR(thread->th.th_task_team) == NULL ) { - KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n", - __kmp_gtid_from_thread( thread ) ) ); - continue; - } + if (TCR_PTR(thread->th.th_task_team) == NULL) { + KA_TRACE(10, ("__kmp_wait_to_unref_task_team: T#%d task_team == NULL\n", + __kmp_gtid_from_thread(thread))); + continue; + } #if KMP_OS_WINDOWS - // TODO: GEH - add this check for Linux* OS / OS X* as well? - if (!__kmp_is_thread_alive(thread, &exit_val)) { - thread->th.th_task_team = NULL; - continue; - } + // TODO: GEH - add this check for Linux* OS / OS X* as well? + if (!__kmp_is_thread_alive(thread, &exit_val)) { + thread->th.th_task_team = NULL; + continue; + } #endif - done = FALSE; // Because th_task_team pointer is not NULL for this thread - - KA_TRACE( 10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to unreference task_team\n", - __kmp_gtid_from_thread( thread ) ) ); - - if ( __kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ) { - volatile void *sleep_loc; - // If the thread is sleeping, awaken it. - if ( ( sleep_loc = TCR_PTR( thread->th.th_sleep_loc) ) != NULL ) { - KA_TRACE( 10, ( "__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n", - __kmp_gtid_from_thread( thread ), __kmp_gtid_from_thread( thread ) ) ); - __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc); - } - } - } - if (done) { - break; + done = FALSE; // Because th_task_team pointer is not NULL for this thread + + KA_TRACE(10, ("__kmp_wait_to_unref_task_team: Waiting for T#%d to " + "unreference task_team\n", + __kmp_gtid_from_thread(thread))); + + if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { + volatile void *sleep_loc; + // If the thread is sleeping, awaken it. + if ((sleep_loc = TCR_PTR(thread->th.th_sleep_loc)) != NULL) { + KA_TRACE( + 10, + ("__kmp_wait_to_unref_task_team: T#%d waking up thread T#%d\n", + __kmp_gtid_from_thread(thread), __kmp_gtid_from_thread(thread))); + __kmp_null_resume_wrapper(__kmp_gtid_from_thread(thread), sleep_loc); } - - // If we are oversubscribed, - // or have waited a bit (and library mode is throughput), yield. - // Pause is in the following code. - KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc ); - KMP_YIELD_SPIN( spins ); // Yields only if KMP_LIBRARY=throughput + } + } + if (done) { + break; } -} + // If we are oversubscribed, or have waited a bit (and library mode is + // throughput), yield. Pause is in the following code. + KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc); + KMP_YIELD_SPIN(spins); // Yields only if KMP_LIBRARY=throughput + } +} -//------------------------------------------------------------------------------ // __kmp_task_team_setup: Create a task_team for the current team, but use // an already created, unused one if it already exists. -void -__kmp_task_team_setup( kmp_info_t *this_thr, kmp_team_t *team, int always ) -{ - KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); - - // If this task_team hasn't been created yet, allocate it. It will be used in the region after the next. - // If it exists, it is the current task team and shouldn't be touched yet as it may still be in use. - if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && (always || team->t.t_nproc > 1) ) { - team->t.t_task_team[this_thr->th.th_task_state] = __kmp_allocate_task_team( this_thr, team ); - KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p for team %d at parity=%d\n", - __kmp_gtid_from_thread(this_thr), team->t.t_task_team[this_thr->th.th_task_state], - ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state)); - } - - // After threads exit the release, they will call sync, and then point to this other task_team; make sure it is - // allocated and properly initialized. As threads spin in the barrier release phase, they will continue to use the - // previous task_team struct(above), until they receive the signal to stop checking for tasks (they can't safely - // reference the kmp_team_t struct, which could be reallocated by the master thread). No task teams are formed for - // serialized teams. - if (team->t.t_nproc > 1) { - int other_team = 1 - this_thr->th.th_task_state; - if (team->t.t_task_team[other_team] == NULL) { // setup other team as well - team->t.t_task_team[other_team] = __kmp_allocate_task_team( this_thr, team ); - KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new task_team %p for team %d at parity=%d\n", - __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team], - ((team != NULL) ? team->t.t_id : -1), other_team )); - } - else { // Leave the old task team struct in place for the upcoming region; adjust as needed - kmp_task_team_t *task_team = team->t.t_task_team[other_team]; - if (!task_team->tt.tt_active || team->t.t_nproc != task_team->tt.tt_nproc) { - TCW_4(task_team->tt.tt_nproc, team->t.t_nproc); - TCW_4(task_team->tt.tt_found_tasks, FALSE); +void __kmp_task_team_setup(kmp_info_t *this_thr, kmp_team_t *team, int always) { + KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); + + // If this task_team hasn't been created yet, allocate it. It will be used in + // the region after the next. + // If it exists, it is the current task team and shouldn't be touched yet as + // it may still be in use. + if (team->t.t_task_team[this_thr->th.th_task_state] == NULL && + (always || team->t.t_nproc > 1)) { + team->t.t_task_team[this_thr->th.th_task_state] = + __kmp_allocate_task_team(this_thr, team); + KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created new task_team %p " + "for team %d at parity=%d\n", + __kmp_gtid_from_thread(this_thr), + team->t.t_task_team[this_thr->th.th_task_state], + ((team != NULL) ? team->t.t_id : -1), + this_thr->th.th_task_state)); + } + + // After threads exit the release, they will call sync, and then point to this + // other task_team; make sure it is allocated and properly initialized. As + // threads spin in the barrier release phase, they will continue to use the + // previous task_team struct(above), until they receive the signal to stop + // checking for tasks (they can't safely reference the kmp_team_t struct, + // which could be reallocated by the master thread). No task teams are formed + // for serialized teams. + if (team->t.t_nproc > 1) { + int other_team = 1 - this_thr->th.th_task_state; + if (team->t.t_task_team[other_team] == NULL) { // setup other team as well + team->t.t_task_team[other_team] = + __kmp_allocate_task_team(this_thr, team); + KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d created second new " + "task_team %p for team %d at parity=%d\n", + __kmp_gtid_from_thread(this_thr), + team->t.t_task_team[other_team], + ((team != NULL) ? team->t.t_id : -1), other_team)); + } else { // Leave the old task team struct in place for the upcoming region; + // adjust as needed + kmp_task_team_t *task_team = team->t.t_task_team[other_team]; + if (!task_team->tt.tt_active || + team->t.t_nproc != task_team->tt.tt_nproc) { + TCW_4(task_team->tt.tt_nproc, team->t.t_nproc); + TCW_4(task_team->tt.tt_found_tasks, FALSE); #if OMP_45_ENABLED - TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE); + TCW_4(task_team->tt.tt_found_proxy_tasks, FALSE); #endif - TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc ); - TCW_4(task_team->tt.tt_active, TRUE ); - } - // if team size has changed, the first thread to enable tasking will realloc threads_data if necessary - KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team %p for team %d at parity=%d\n", - __kmp_gtid_from_thread( this_thr ), team->t.t_task_team[other_team], - ((team != NULL) ? team->t.t_id : -1), other_team )); - } - } + TCW_4(task_team->tt.tt_unfinished_threads, team->t.t_nproc); + TCW_4(task_team->tt.tt_active, TRUE); + } + // if team size has changed, the first thread to enable tasking will + // realloc threads_data if necessary + KA_TRACE(20, ("__kmp_task_team_setup: Master T#%d reset next task_team " + "%p for team %d at parity=%d\n", + __kmp_gtid_from_thread(this_thr), + team->t.t_task_team[other_team], + ((team != NULL) ? team->t.t_id : -1), other_team)); + } + } } - -//------------------------------------------------------------------------------ // __kmp_task_team_sync: Propagation of task team data from team to threads // which happens just after the release phase of a team barrier. This may be // called by any thread, but only for teams with # threads > 1. - -void -__kmp_task_team_sync( kmp_info_t *this_thr, kmp_team_t *team ) -{ - KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); - - // Toggle the th_task_state field, to switch which task_team this thread refers to - this_thr->th.th_task_state = 1 - this_thr->th.th_task_state; - // It is now safe to propagate the task team pointer from the team struct to the current thread. - TCW_PTR(this_thr->th.th_task_team, team->t.t_task_team[this_thr->th.th_task_state]); - KA_TRACE(20, ("__kmp_task_team_sync: Thread T#%d task team switched to task_team %p from Team #%d (parity=%d)\n", - __kmp_gtid_from_thread( this_thr ), this_thr->th.th_task_team, - ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state)); +void __kmp_task_team_sync(kmp_info_t *this_thr, kmp_team_t *team) { + KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); + + // Toggle the th_task_state field, to switch which task_team this thread + // refers to + this_thr->th.th_task_state = 1 - this_thr->th.th_task_state; + // It is now safe to propagate the task team pointer from the team struct to + // the current thread. + TCW_PTR(this_thr->th.th_task_team, + team->t.t_task_team[this_thr->th.th_task_state]); + KA_TRACE(20, + ("__kmp_task_team_sync: Thread T#%d task team switched to task_team " + "%p from Team #%d (parity=%d)\n", + __kmp_gtid_from_thread(this_thr), this_thr->th.th_task_team, + ((team != NULL) ? team->t.t_id : -1), this_thr->th.th_task_state)); } - -//-------------------------------------------------------------------------------------------- -// __kmp_task_team_wait: Master thread waits for outstanding tasks after the barrier gather -// phase. Only called by master thread if #threads in team > 1 or if proxy tasks were created. -// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off by passing in 0 -// optionally as the last argument. When wait is zero, master thread does not wait for -// unfinished_threads to reach 0. -void -__kmp_task_team_wait( kmp_info_t *this_thr, kmp_team_t *team - USE_ITT_BUILD_ARG(void * itt_sync_obj) - , int wait) -{ - kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state]; - - KMP_DEBUG_ASSERT( __kmp_tasking_mode != tskm_immediate_exec ); - KMP_DEBUG_ASSERT( task_team == this_thr->th.th_task_team ); - - if ( ( task_team != NULL ) && KMP_TASKING_ENABLED(task_team) ) { - if (wait) { - KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks (for unfinished_threads to reach 0) on task_team = %p\n", - __kmp_gtid_from_thread(this_thr), task_team)); - // Worker threads may have dropped through to release phase, but could still be executing tasks. Wait - // here for tasks to complete. To avoid memory contention, only master thread checks termination condition. - kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U); - flag.wait(this_thr, TRUE - USE_ITT_BUILD_ARG(itt_sync_obj)); - } - // Deactivate the old task team, so that the worker threads will stop referencing it while spinning. - KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: " - "setting active to false, setting local and team's pointer to NULL\n", - __kmp_gtid_from_thread(this_thr), task_team)); +// __kmp_task_team_wait: Master thread waits for outstanding tasks after the +// barrier gather phase. Only called by master thread if #threads in team > 1 or +// if proxy tasks were created. +// +// wait is a flag that defaults to 1 (see kmp.h), but waiting can be turned off +// by passing in 0 optionally as the last argument. When wait is zero, master +// thread does not wait for unfinished_threads to reach 0. +void __kmp_task_team_wait( + kmp_info_t *this_thr, + kmp_team_t *team USE_ITT_BUILD_ARG(void *itt_sync_obj), int wait) { + kmp_task_team_t *task_team = team->t.t_task_team[this_thr->th.th_task_state]; + + KMP_DEBUG_ASSERT(__kmp_tasking_mode != tskm_immediate_exec); + KMP_DEBUG_ASSERT(task_team == this_thr->th.th_task_team); + + if ((task_team != NULL) && KMP_TASKING_ENABLED(task_team)) { + if (wait) { + KA_TRACE(20, ("__kmp_task_team_wait: Master T#%d waiting for all tasks " + "(for unfinished_threads to reach 0) on task_team = %p\n", + __kmp_gtid_from_thread(this_thr), task_team)); + // Worker threads may have dropped through to release phase, but could + // still be executing tasks. Wait here for tasks to complete. To avoid + // memory contention, only master thread checks termination condition. + kmp_flag_32 flag(&task_team->tt.tt_unfinished_threads, 0U); + flag.wait(this_thr, TRUE USE_ITT_BUILD_ARG(itt_sync_obj)); + } + // Deactivate the old task team, so that the worker threads will stop + // referencing it while spinning. + KA_TRACE( + 20, + ("__kmp_task_team_wait: Master T#%d deactivating task_team %p: " + "setting active to false, setting local and team's pointer to NULL\n", + __kmp_gtid_from_thread(this_thr), task_team)); #if OMP_45_ENABLED - KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 || task_team->tt.tt_found_proxy_tasks == TRUE ); - TCW_SYNC_4( task_team->tt.tt_found_proxy_tasks, FALSE ); + KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1 || + task_team->tt.tt_found_proxy_tasks == TRUE); + TCW_SYNC_4(task_team->tt.tt_found_proxy_tasks, FALSE); #else - KMP_DEBUG_ASSERT( task_team->tt.tt_nproc > 1 ); + KMP_DEBUG_ASSERT(task_team->tt.tt_nproc > 1); #endif - TCW_SYNC_4( task_team->tt.tt_active, FALSE ); - KMP_MB(); + TCW_SYNC_4(task_team->tt.tt_active, FALSE); + KMP_MB(); - TCW_PTR(this_thr->th.th_task_team, NULL); - } + TCW_PTR(this_thr->th.th_task_team, NULL); + } } - -//------------------------------------------------------------------------------ // __kmp_tasking_barrier: // This routine may only called when __kmp_tasking_mode == tskm_extra_barrier. -// Internal function to execute all tasks prior to a regular barrier or a -// join barrier. It is a full barrier itself, which unfortunately turns -// regular barriers into double barriers and join barriers into 1 1/2 -// barriers. -void -__kmp_tasking_barrier( kmp_team_t *team, kmp_info_t *thread, int gtid ) -{ - volatile kmp_uint32 *spin = &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads; - int flag = FALSE; - KMP_DEBUG_ASSERT( __kmp_tasking_mode == tskm_extra_barrier ); +// Internal function to execute all tasks prior to a regular barrier or a join +// barrier. It is a full barrier itself, which unfortunately turns regular +// barriers into double barriers and join barriers into 1 1/2 barriers. +void __kmp_tasking_barrier(kmp_team_t *team, kmp_info_t *thread, int gtid) { + volatile kmp_uint32 *spin = + &team->t.t_task_team[thread->th.th_task_state]->tt.tt_unfinished_threads; + int flag = FALSE; + KMP_DEBUG_ASSERT(__kmp_tasking_mode == tskm_extra_barrier); #if USE_ITT_BUILD - KMP_FSYNC_SPIN_INIT( spin, (kmp_uint32*) NULL ); + KMP_FSYNC_SPIN_INIT(spin, (kmp_uint32 *)NULL); #endif /* USE_ITT_BUILD */ - kmp_flag_32 spin_flag(spin, 0U); - while (! spin_flag.execute_tasks(thread, gtid, TRUE, &flag - USE_ITT_BUILD_ARG(NULL), 0 ) ) { + kmp_flag_32 spin_flag(spin, 0U); + while (!spin_flag.execute_tasks(thread, gtid, TRUE, + &flag USE_ITT_BUILD_ARG(NULL), 0)) { #if USE_ITT_BUILD - // TODO: What about itt_sync_obj?? - KMP_FSYNC_SPIN_PREPARE( spin ); + // TODO: What about itt_sync_obj?? + KMP_FSYNC_SPIN_PREPARE(spin); #endif /* USE_ITT_BUILD */ - if( TCR_4(__kmp_global.g.g_done) ) { - if( __kmp_global.g.g_abort ) - __kmp_abort_thread( ); - break; - } - KMP_YIELD( TRUE ); // GH: We always yield here + if (TCR_4(__kmp_global.g.g_done)) { + if (__kmp_global.g.g_abort) + __kmp_abort_thread(); + break; } + KMP_YIELD(TRUE); // GH: We always yield here + } #if USE_ITT_BUILD - KMP_FSYNC_SPIN_ACQUIRED( (void*) spin ); + KMP_FSYNC_SPIN_ACQUIRED((void *)spin); #endif /* USE_ITT_BUILD */ } - #if OMP_45_ENABLED -/* __kmp_give_task puts a task into a given thread queue if: - - the queue for that thread was created - - there's space in that queue - - Because of this, __kmp_push_task needs to check if there's space after getting the lock - */ -static bool __kmp_give_task ( kmp_info_t *thread, kmp_int32 tid, kmp_task_t * task, kmp_int32 pass ) -{ - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); - kmp_task_team_t * task_team = taskdata->td_task_team; - - KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", taskdata, tid ) ); - - // If task_team is NULL something went really bad... - KMP_DEBUG_ASSERT( task_team != NULL ); - - bool result = false; - kmp_thread_data_t * thread_data = & task_team -> tt.tt_threads_data[ tid ]; - - if (thread_data -> td.td_deque == NULL ) { - // There's no queue in this thread, go find another one - // We're guaranteed that at least one thread has a queue - KA_TRACE(30, ("__kmp_give_task: thread %d has no queue while giving task %p.\n", tid, taskdata ) ); - return result; - } - - if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) ) - { - KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) ); - - // if this deque is bigger than the pass ratio give a chance to another thread - if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass ) return result; - - __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock ); - __kmp_realloc_task_deque(thread,thread_data); - - } else { - - __kmp_acquire_bootstrap_lock( & thread_data-> td.td_deque_lock ); - - if ( TCR_4(thread_data -> td.td_deque_ntasks) >= TASK_DEQUE_SIZE(thread_data->td) ) - { - KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", taskdata, tid ) ); - - // if this deque is bigger than the pass ratio give a chance to another thread - if ( TASK_DEQUE_SIZE(thread_data->td)/INITIAL_TASK_DEQUE_SIZE >= pass ) - goto release_and_exit; - - __kmp_realloc_task_deque(thread,thread_data); - } - } - - // lock is held here, and there is space in the deque - - thread_data -> td.td_deque[ thread_data -> td.td_deque_tail ] = taskdata; - // Wrap index. - thread_data -> td.td_deque_tail = ( thread_data -> td.td_deque_tail + 1 ) & TASK_DEQUE_MASK(thread_data->td); - TCW_4(thread_data -> td.td_deque_ntasks, TCR_4(thread_data -> td.td_deque_ntasks) + 1); - - result = true; - KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", taskdata, tid ) ); +// __kmp_give_task puts a task into a given thread queue if: +// - the queue for that thread was created +// - there's space in that queue +// Because of this, __kmp_push_task needs to check if there's space after +// getting the lock +static bool __kmp_give_task(kmp_info_t *thread, kmp_int32 tid, kmp_task_t *task, + kmp_int32 pass) { + kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); + kmp_task_team_t *task_team = taskdata->td_task_team; + + KA_TRACE(20, ("__kmp_give_task: trying to give task %p to thread %d.\n", + taskdata, tid)); + + // If task_team is NULL something went really bad... + KMP_DEBUG_ASSERT(task_team != NULL); + + bool result = false; + kmp_thread_data_t *thread_data = &task_team->tt.tt_threads_data[tid]; + + if (thread_data->td.td_deque == NULL) { + // There's no queue in this thread, go find another one + // We're guaranteed that at least one thread has a queue + KA_TRACE(30, + ("__kmp_give_task: thread %d has no queue while giving task %p.\n", + tid, taskdata)); + return result; + } + + if (TCR_4(thread_data->td.td_deque_ntasks) >= + TASK_DEQUE_SIZE(thread_data->td)) { + KA_TRACE( + 30, + ("__kmp_give_task: queue is full while giving task %p to thread %d.\n", + taskdata, tid)); + + // if this deque is bigger than the pass ratio give a chance to another + // thread + if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass) + return result; + + __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); + __kmp_realloc_task_deque(thread, thread_data); + + } else { + + __kmp_acquire_bootstrap_lock(&thread_data->td.td_deque_lock); + + if (TCR_4(thread_data->td.td_deque_ntasks) >= + TASK_DEQUE_SIZE(thread_data->td)) { + KA_TRACE(30, ("__kmp_give_task: queue is full while giving task %p to " + "thread %d.\n", + taskdata, tid)); + + // if this deque is bigger than the pass ratio give a chance to another + // thread + if (TASK_DEQUE_SIZE(thread_data->td) / INITIAL_TASK_DEQUE_SIZE >= pass) + goto release_and_exit; + + __kmp_realloc_task_deque(thread, thread_data); + } + } + + // lock is held here, and there is space in the deque + + thread_data->td.td_deque[thread_data->td.td_deque_tail] = taskdata; + // Wrap index. + thread_data->td.td_deque_tail = + (thread_data->td.td_deque_tail + 1) & TASK_DEQUE_MASK(thread_data->td); + TCW_4(thread_data->td.td_deque_ntasks, + TCR_4(thread_data->td.td_deque_ntasks) + 1); + + result = true; + KA_TRACE(30, ("__kmp_give_task: successfully gave task %p to thread %d.\n", + taskdata, tid)); release_and_exit: - __kmp_release_bootstrap_lock( & thread_data-> td.td_deque_lock ); + __kmp_release_bootstrap_lock(&thread_data->td.td_deque_lock); - return result; + return result; } - -/* The finish of the a proxy tasks is divided in two pieces: +/* The finish of the proxy tasks is divided in two pieces: - the top half is the one that can be done from a thread outside the team - the bottom half must be run from a them within the team - In order to run the bottom half the task gets queued back into one of the threads of the team. - Once the td_incomplete_child_task counter of the parent is decremented the threads can leave the barriers. - So, the bottom half needs to be queued before the counter is decremented. The top half is therefore divided in two parts: + In order to run the bottom half the task gets queued back into one of the + threads of the team. Once the td_incomplete_child_task counter of the parent + is decremented the threads can leave the barriers. So, the bottom half needs + to be queued before the counter is decremented. The top half is therefore + divided in two parts: - things that can be run before queuing the bottom half - things that must be run after queuing the bottom half - This creates a second race as the bottom half can free the task before the second top half is executed. To avoid this - we use the td_incomplete_child_task of the proxy task to synchronize the top and bottom half. -*/ + This creates a second race as the bottom half can free the task before the + second top half is executed. To avoid this we use the + td_incomplete_child_task of the proxy task to synchronize the top and bottom + half. */ +static void __kmp_first_top_half_finish_proxy(kmp_taskdata_t *taskdata) { + KMP_DEBUG_ASSERT(taskdata->td_flags.tasktype == TASK_EXPLICIT); + KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY); + KMP_DEBUG_ASSERT(taskdata->td_flags.complete == 0); + KMP_DEBUG_ASSERT(taskdata->td_flags.freed == 0); -static void __kmp_first_top_half_finish_proxy( kmp_taskdata_t * taskdata ) -{ - KMP_DEBUG_ASSERT( taskdata -> td_flags.tasktype == TASK_EXPLICIT ); - KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY ); - KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 0 ); - KMP_DEBUG_ASSERT( taskdata -> td_flags.freed == 0 ); - - taskdata -> td_flags.complete = 1; // mark the task as completed + taskdata->td_flags.complete = 1; // mark the task as completed - if ( taskdata->td_taskgroup ) - KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata->td_taskgroup->count) ); + if (taskdata->td_taskgroup) + KMP_TEST_THEN_DEC32((kmp_int32 *)(&taskdata->td_taskgroup->count)); - // Create an imaginary children for this task so the bottom half cannot release the task before we have completed the second top half - TCI_4(taskdata->td_incomplete_child_tasks); + // Create an imaginary children for this task so the bottom half cannot + // release the task before we have completed the second top half + TCI_4(taskdata->td_incomplete_child_tasks); } -static void __kmp_second_top_half_finish_proxy( kmp_taskdata_t * taskdata ) -{ - kmp_int32 children = 0; +static void __kmp_second_top_half_finish_proxy(kmp_taskdata_t *taskdata) { + kmp_int32 children = 0; - // Predecrement simulated by "- 1" calculation - children = KMP_TEST_THEN_DEC32( (kmp_int32 *)(& taskdata -> td_parent -> td_incomplete_child_tasks) ) - 1; - KMP_DEBUG_ASSERT( children >= 0 ); + // Predecrement simulated by "- 1" calculation + children = + KMP_TEST_THEN_DEC32( + (kmp_int32 *)(&taskdata->td_parent->td_incomplete_child_tasks)) - + 1; + KMP_DEBUG_ASSERT(children >= 0); - // Remove the imaginary children - TCD_4(taskdata->td_incomplete_child_tasks); + // Remove the imaginary children + TCD_4(taskdata->td_incomplete_child_tasks); } -static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask ) -{ - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask); - kmp_info_t * thread = __kmp_threads[ gtid ]; +static void __kmp_bottom_half_finish_proxy(kmp_int32 gtid, kmp_task_t *ptask) { + kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask); + kmp_info_t *thread = __kmp_threads[gtid]; - KMP_DEBUG_ASSERT( taskdata -> td_flags.proxy == TASK_PROXY ); - KMP_DEBUG_ASSERT( taskdata -> td_flags.complete == 1 ); // top half must run before bottom half + KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY); + KMP_DEBUG_ASSERT(taskdata->td_flags.complete == + 1); // top half must run before bottom half - // We need to wait to make sure the top half is finished - // Spinning here should be ok as this should happen quickly - while ( TCR_4(taskdata->td_incomplete_child_tasks) > 0 ) ; + // We need to wait to make sure the top half is finished + // Spinning here should be ok as this should happen quickly + while (TCR_4(taskdata->td_incomplete_child_tasks) > 0) + ; - __kmp_release_deps(gtid,taskdata); - __kmp_free_task_and_ancestors(gtid, taskdata, thread); + __kmp_release_deps(gtid, taskdata); + __kmp_free_task_and_ancestors(gtid, taskdata, thread); } /*! @@ -3075,132 +3134,153 @@ static void __kmp_bottom_half_finish_proxy( kmp_int32 gtid, kmp_task_t * ptask ) @param gtid Global Thread ID of encountering thread @param ptask Task which execution is completed -Execute the completation of a proxy task from a thread of that is part of the team. Run first and bottom halves directly. +Execute the completation of a proxy task from a thread of that is part of the +team. Run first and bottom halves directly. */ -void __kmpc_proxy_task_completed( kmp_int32 gtid, kmp_task_t *ptask ) -{ - KMP_DEBUG_ASSERT( ptask != NULL ); - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask); - KA_TRACE(10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", gtid, taskdata ) ); - - KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY ); - - __kmp_first_top_half_finish_proxy(taskdata); - __kmp_second_top_half_finish_proxy(taskdata); - __kmp_bottom_half_finish_proxy(gtid,ptask); - - KA_TRACE(10, ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", gtid, taskdata ) ); +void __kmpc_proxy_task_completed(kmp_int32 gtid, kmp_task_t *ptask) { + KMP_DEBUG_ASSERT(ptask != NULL); + kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask); + KA_TRACE( + 10, ("__kmp_proxy_task_completed(enter): T#%d proxy task %p completing\n", + gtid, taskdata)); + + KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY); + + __kmp_first_top_half_finish_proxy(taskdata); + __kmp_second_top_half_finish_proxy(taskdata); + __kmp_bottom_half_finish_proxy(gtid, ptask); + + KA_TRACE(10, + ("__kmp_proxy_task_completed(exit): T#%d proxy task %p completing\n", + gtid, taskdata)); } /*! @ingroup TASKING @param ptask Task which execution is completed -Execute the completation of a proxy task from a thread that could not belong to the team. +Execute the completation of a proxy task from a thread that could not belong to +the team. */ -void __kmpc_proxy_task_completed_ooo ( kmp_task_t *ptask ) -{ - KMP_DEBUG_ASSERT( ptask != NULL ); - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(ptask); +void __kmpc_proxy_task_completed_ooo(kmp_task_t *ptask) { + KMP_DEBUG_ASSERT(ptask != NULL); + kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(ptask); - KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", taskdata ) ); + KA_TRACE( + 10, + ("__kmp_proxy_task_completed_ooo(enter): proxy task completing ooo %p\n", + taskdata)); - KMP_DEBUG_ASSERT( taskdata->td_flags.proxy == TASK_PROXY ); + KMP_DEBUG_ASSERT(taskdata->td_flags.proxy == TASK_PROXY); - __kmp_first_top_half_finish_proxy(taskdata); + __kmp_first_top_half_finish_proxy(taskdata); - // Enqueue task to complete bottom half completion from a thread within the corresponding team - kmp_team_t * team = taskdata->td_team; - kmp_int32 nthreads = team->t.t_nproc; - kmp_info_t *thread; + // Enqueue task to complete bottom half completion from a thread within the + // corresponding team + kmp_team_t *team = taskdata->td_team; + kmp_int32 nthreads = team->t.t_nproc; + kmp_info_t *thread; - //This should be similar to start_k = __kmp_get_random( thread ) % nthreads but we cannot use __kmp_get_random here - kmp_int32 start_k = 0; - kmp_int32 pass = 1; - kmp_int32 k = start_k; + // This should be similar to start_k = __kmp_get_random( thread ) % nthreads + // but we cannot use __kmp_get_random here + kmp_int32 start_k = 0; + kmp_int32 pass = 1; + kmp_int32 k = start_k; - do { - //For now we're just linearly trying to find a thread - thread = team->t.t_threads[k]; - k = (k+1) % nthreads; + do { + // For now we're just linearly trying to find a thread + thread = team->t.t_threads[k]; + k = (k + 1) % nthreads; - // we did a full pass through all the threads - if ( k == start_k ) pass = pass << 1; + // we did a full pass through all the threads + if (k == start_k) + pass = pass << 1; - } while ( !__kmp_give_task( thread, k, ptask, pass ) ); + } while (!__kmp_give_task(thread, k, ptask, pass)); - __kmp_second_top_half_finish_proxy(taskdata); + __kmp_second_top_half_finish_proxy(taskdata); - KA_TRACE(10, ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", taskdata ) ); + KA_TRACE( + 10, + ("__kmp_proxy_task_completed_ooo(exit): proxy task completing ooo %p\n", + taskdata)); } -//--------------------------------------------------------------------------------- -// __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task for taskloop +// __kmp_task_dup_alloc: Allocate the taskdata and make a copy of source task +// for taskloop // // thread: allocating thread // task_src: pointer to source task to be duplicated // returns: a pointer to the allocated kmp_task_t structure (task). -kmp_task_t * -__kmp_task_dup_alloc( kmp_info_t *thread, kmp_task_t *task_src ) -{ - kmp_task_t *task; - kmp_taskdata_t *taskdata; - kmp_taskdata_t *taskdata_src; - kmp_taskdata_t *parent_task = thread->th.th_current_task; - size_t shareds_offset; - size_t task_size; - - KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread, task_src) ); - taskdata_src = KMP_TASK_TO_TASKDATA( task_src ); - KMP_DEBUG_ASSERT( taskdata_src->td_flags.proxy == TASK_FULL ); // it should not be proxy task - KMP_DEBUG_ASSERT( taskdata_src->td_flags.tasktype == TASK_EXPLICIT ); - task_size = taskdata_src->td_size_alloc; - - // Allocate a kmp_taskdata_t block and a kmp_task_t block. - KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread, task_size) ); - #if USE_FAST_MEMORY - taskdata = (kmp_taskdata_t *)__kmp_fast_allocate( thread, task_size ); - #else - taskdata = (kmp_taskdata_t *)__kmp_thread_malloc( thread, task_size ); - #endif /* USE_FAST_MEMORY */ - KMP_MEMCPY(taskdata, taskdata_src, task_size); - - task = KMP_TASKDATA_TO_TASK(taskdata); - - // Initialize new task (only specific fields not affected by memcpy) - taskdata->td_task_id = KMP_GEN_TASK_ID(); - if( task->shareds != NULL ) { // need setup shareds pointer - shareds_offset = (char*)task_src->shareds - (char*)taskdata_src; - task->shareds = &((char*)taskdata)[shareds_offset]; - KMP_DEBUG_ASSERT( (((kmp_uintptr_t)task->shareds) & (sizeof(void*)-1)) == 0 ); - } - taskdata->td_alloc_thread = thread; - taskdata->td_taskgroup = parent_task->td_taskgroup; // task inherits the taskgroup from the parent task - - // Only need to keep track of child task counts if team parallel and tasking not serialized - if ( !( taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser ) ) { - KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_incomplete_child_tasks) ); - if ( parent_task->td_taskgroup ) - KMP_TEST_THEN_INC32( (kmp_int32 *)(& parent_task->td_taskgroup->count) ); - // Only need to keep track of allocated child tasks for explicit tasks since implicit not deallocated - if ( taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT ) - KMP_TEST_THEN_INC32( (kmp_int32 *)(& taskdata->td_parent->td_allocated_child_tasks) ); - } - - KA_TRACE(20, ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n", - thread, taskdata, taskdata->td_parent) ); +kmp_task_t *__kmp_task_dup_alloc(kmp_info_t *thread, kmp_task_t *task_src) { + kmp_task_t *task; + kmp_taskdata_t *taskdata; + kmp_taskdata_t *taskdata_src; + kmp_taskdata_t *parent_task = thread->th.th_current_task; + size_t shareds_offset; + size_t task_size; + + KA_TRACE(10, ("__kmp_task_dup_alloc(enter): Th %p, source task %p\n", thread, + task_src)); + taskdata_src = KMP_TASK_TO_TASKDATA(task_src); + KMP_DEBUG_ASSERT(taskdata_src->td_flags.proxy == + TASK_FULL); // it should not be proxy task + KMP_DEBUG_ASSERT(taskdata_src->td_flags.tasktype == TASK_EXPLICIT); + task_size = taskdata_src->td_size_alloc; + + // Allocate a kmp_taskdata_t block and a kmp_task_t block. + KA_TRACE(30, ("__kmp_task_dup_alloc: Th %p, malloc size %ld\n", thread, + task_size)); +#if USE_FAST_MEMORY + taskdata = (kmp_taskdata_t *)__kmp_fast_allocate(thread, task_size); +#else + taskdata = (kmp_taskdata_t *)__kmp_thread_malloc(thread, task_size); +#endif /* USE_FAST_MEMORY */ + KMP_MEMCPY(taskdata, taskdata_src, task_size); + + task = KMP_TASKDATA_TO_TASK(taskdata); + + // Initialize new task (only specific fields not affected by memcpy) + taskdata->td_task_id = KMP_GEN_TASK_ID(); + if (task->shareds != NULL) { // need setup shareds pointer + shareds_offset = (char *)task_src->shareds - (char *)taskdata_src; + task->shareds = &((char *)taskdata)[shareds_offset]; + KMP_DEBUG_ASSERT((((kmp_uintptr_t)task->shareds) & (sizeof(void *) - 1)) == + 0); + } + taskdata->td_alloc_thread = thread; + taskdata->td_taskgroup = + parent_task + ->td_taskgroup; // task inherits the taskgroup from the parent task + + // Only need to keep track of child task counts if team parallel and tasking + // not serialized + if (!(taskdata->td_flags.team_serial || taskdata->td_flags.tasking_ser)) { + KMP_TEST_THEN_INC32((kmp_int32 *)(&parent_task->td_incomplete_child_tasks)); + if (parent_task->td_taskgroup) + KMP_TEST_THEN_INC32((kmp_int32 *)(&parent_task->td_taskgroup->count)); + // Only need to keep track of allocated child tasks for explicit tasks since + // implicit not deallocated + if (taskdata->td_parent->td_flags.tasktype == TASK_EXPLICIT) + KMP_TEST_THEN_INC32( + (kmp_int32 *)(&taskdata->td_parent->td_allocated_child_tasks)); + } + + KA_TRACE(20, + ("__kmp_task_dup_alloc(exit): Th %p, created task %p, parent=%p\n", + thread, taskdata, taskdata->td_parent)); #if OMPT_SUPPORT - __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid, (void*)task->routine); + __kmp_task_init_ompt(taskdata, thread->th.th_info.ds.ds_gtid, + (void *)task->routine); #endif - return task; + return task; } // Routine optionally generated by th ecompiler for setting the lastprivate flag // and calling needed constructors for private/firstprivate objects // (used to form taskloop tasks from pattern task) -typedef void(*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32); +typedef void (*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32); -//--------------------------------------------------------------------------------- // __kmp_taskloop_linear: Start tasks of the taskloop linearly // // loc Source location information @@ -3212,114 +3292,120 @@ typedef void(*p_task_dup_t)(kmp_task_t *, kmp_task_t *, kmp_int32); // sched Schedule specified 0/1/2 for none/grainsize/num_tasks // grainsize Schedule value if specified // task_dup Tasks duplication routine -void -__kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, - kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, - int sched, kmp_uint64 grainsize, void *task_dup ) -{ - KMP_COUNT_BLOCK(OMP_TASKLOOP); - KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling); - p_task_dup_t ptask_dup = (p_task_dup_t)task_dup; - kmp_uint64 tc; - kmp_uint64 lower = *lb; // compiler provides global bounds here - kmp_uint64 upper = *ub; - kmp_uint64 i, num_tasks = 0, extras = 0; - kmp_info_t *thread = __kmp_threads[gtid]; - kmp_taskdata_t *current_task = thread->th.th_current_task; - kmp_task_t *next_task; - kmp_int32 lastpriv = 0; - size_t lower_offset = (char*)lb - (char*)task; // remember offset of lb in the task structure - size_t upper_offset = (char*)ub - (char*)task; // remember offset of ub in the task structure - - // compute trip count - if ( st == 1 ) { // most common case - tc = upper - lower + 1; - } else if ( st < 0 ) { - tc = (lower - upper) / (-st) + 1; - } else { // st > 0 - tc = (upper - lower) / st + 1; - } - if(tc == 0) { - KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid)); - // free the pattern task and exit - __kmp_task_start( gtid, task, current_task ); - // do not execute anything for zero-trip loop - __kmp_task_finish( gtid, task, current_task ); - return; - } - - // compute num_tasks/grainsize based on the input provided - switch( sched ) { - case 0: // no schedule clause specified, we can choose the default - // let's try to schedule (team_size*10) tasks - grainsize = thread->th.th_team_nproc * 10; - case 2: // num_tasks provided - if( grainsize > tc ) { - num_tasks = tc; // too big num_tasks requested, adjust values - grainsize = 1; - extras = 0; - } else { - num_tasks = grainsize; - grainsize = tc / num_tasks; - extras = tc % num_tasks; - } - break; - case 1: // grainsize provided - if( grainsize > tc ) { - num_tasks = 1; // too big grainsize requested, adjust values - grainsize = tc; - extras = 0; - } else { - num_tasks = tc / grainsize; - grainsize = tc / num_tasks; // adjust grainsize for balanced distribution of iterations - extras = tc % num_tasks; - } - break; - default: - KMP_ASSERT2(0, "unknown scheduling of taskloop"); +void __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, + kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, + int sched, kmp_uint64 grainsize, void *task_dup) { + KMP_COUNT_BLOCK(OMP_TASKLOOP); + KMP_TIME_PARTITIONED_BLOCK(OMP_taskloop_scheduling); + p_task_dup_t ptask_dup = (p_task_dup_t)task_dup; + kmp_uint64 tc; + kmp_uint64 lower = *lb; // compiler provides global bounds here + kmp_uint64 upper = *ub; + kmp_uint64 i, num_tasks = 0, extras = 0; + kmp_info_t *thread = __kmp_threads[gtid]; + kmp_taskdata_t *current_task = thread->th.th_current_task; + kmp_task_t *next_task; + kmp_int32 lastpriv = 0; + size_t lower_offset = + (char *)lb - (char *)task; // remember offset of lb in the task structure + size_t upper_offset = + (char *)ub - (char *)task; // remember offset of ub in the task structure + + // compute trip count + if (st == 1) { // most common case + tc = upper - lower + 1; + } else if (st < 0) { + tc = (lower - upper) / (-st) + 1; + } else { // st > 0 + tc = (upper - lower) / st + 1; + } + if (tc == 0) { + KA_TRACE(20, ("__kmpc_taskloop(exit): T#%d zero-trip loop\n", gtid)); + // free the pattern task and exit + __kmp_task_start(gtid, task, current_task); + // do not execute anything for zero-trip loop + __kmp_task_finish(gtid, task, current_task); + return; + } + + // compute num_tasks/grainsize based on the input provided + switch (sched) { + case 0: // no schedule clause specified, we can choose the default + // let's try to schedule (team_size*10) tasks + grainsize = thread->th.th_team_nproc * 10; + case 2: // num_tasks provided + if (grainsize > tc) { + num_tasks = tc; // too big num_tasks requested, adjust values + grainsize = 1; + extras = 0; + } else { + num_tasks = grainsize; + grainsize = tc / num_tasks; + extras = tc % num_tasks; + } + break; + case 1: // grainsize provided + if (grainsize > tc) { + num_tasks = 1; // too big grainsize requested, adjust values + grainsize = tc; + extras = 0; + } else { + num_tasks = tc / grainsize; + grainsize = + tc / + num_tasks; // adjust grainsize for balanced distribution of iterations + extras = tc % num_tasks; + } + break; + default: + KMP_ASSERT2(0, "unknown scheduling of taskloop"); + } + KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras); + KMP_DEBUG_ASSERT(num_tasks > extras); + KMP_DEBUG_ASSERT(num_tasks > 0); + KA_TRACE(20, ("__kmpc_taskloop: T#%d will launch: num_tasks %lld, grainsize " + "%lld, extras %lld\n", + gtid, num_tasks, grainsize, extras)); + + // Main loop, launch num_tasks tasks, assign grainsize iterations each task + for (i = 0; i < num_tasks; ++i) { + kmp_uint64 chunk_minus_1; + if (extras == 0) { + chunk_minus_1 = grainsize - 1; + } else { + chunk_minus_1 = grainsize; + --extras; // first extras iterations get bigger chunk (grainsize+1) } - KMP_DEBUG_ASSERT(tc == num_tasks * grainsize + extras); - KMP_DEBUG_ASSERT(num_tasks > extras); - KMP_DEBUG_ASSERT(num_tasks > 0); - KA_TRACE(20, ("__kmpc_taskloop: T#%d will launch: num_tasks %lld, grainsize %lld, extras %lld\n", - gtid, num_tasks, grainsize, extras)); - - // Main loop, launch num_tasks tasks, assign grainsize iterations each task - for( i = 0; i < num_tasks; ++i ) { - kmp_uint64 chunk_minus_1; - if( extras == 0 ) { - chunk_minus_1 = grainsize - 1; - } else { - chunk_minus_1 = grainsize; - --extras; // first extras iterations get bigger chunk (grainsize+1) - } - upper = lower + st * chunk_minus_1; - if( i == num_tasks - 1 ) { - // schedule the last task, set lastprivate flag - lastpriv = 1; + upper = lower + st * chunk_minus_1; + if (i == num_tasks - 1) { + // schedule the last task, set lastprivate flag + lastpriv = 1; #if KMP_DEBUG - if( st == 1 ) - KMP_DEBUG_ASSERT(upper == *ub); - else if( st > 0 ) - KMP_DEBUG_ASSERT(upper+st > *ub); - else - KMP_DEBUG_ASSERT(upper+st < *ub); + if (st == 1) + KMP_DEBUG_ASSERT(upper == *ub); + else if (st > 0) + KMP_DEBUG_ASSERT(upper + st > *ub); + else + KMP_DEBUG_ASSERT(upper + st < *ub); #endif - } - next_task = __kmp_task_dup_alloc(thread, task); // allocate new task - *(kmp_uint64*)((char*)next_task + lower_offset) = lower; // adjust task-specific bounds - *(kmp_uint64*)((char*)next_task + upper_offset) = upper; - if( ptask_dup != NULL ) - ptask_dup(next_task, task, lastpriv); // set lastprivate flag, construct fistprivates, etc. - KA_TRACE(20, ("__kmpc_taskloop: T#%d schedule task %p: lower %lld, upper %lld (offsets %p %p)\n", - gtid, next_task, lower, upper, lower_offset, upper_offset)); - __kmp_omp_task(gtid, next_task, true); // schedule new task - lower = upper + st; // adjust lower bound for the next iteration } - // free the pattern task and exit - __kmp_task_start( gtid, task, current_task ); - // do not execute the pattern task, just do bookkeeping - __kmp_task_finish( gtid, task, current_task ); + next_task = __kmp_task_dup_alloc(thread, task); // allocate new task + *(kmp_uint64 *)((char *)next_task + lower_offset) = + lower; // adjust task-specific bounds + *(kmp_uint64 *)((char *)next_task + upper_offset) = upper; + if (ptask_dup != NULL) + ptask_dup(next_task, task, + lastpriv); // set lastprivate flag, construct fistprivates, etc. + KA_TRACE(20, ("__kmpc_taskloop: T#%d schedule task %p: lower %lld, upper " + "%lld (offsets %p %p)\n", + gtid, next_task, lower, upper, lower_offset, upper_offset)); + __kmp_omp_task(gtid, next_task, true); // schedule new task + lower = upper + st; // adjust lower bound for the next iteration + } + // free the pattern task and exit + __kmp_task_start(gtid, task, current_task); + // do not execute the pattern task, just do bookkeeping + __kmp_task_finish(gtid, task, current_task); } /*! @@ -3338,34 +3424,34 @@ __kmp_taskloop_linear(ident_t *loc, int gtid, kmp_task_t *task, Execute the taskloop construct. */ -void -__kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, - kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, - int nogroup, int sched, kmp_uint64 grainsize, void *task_dup ) -{ - kmp_taskdata_t * taskdata = KMP_TASK_TO_TASKDATA(task); - KMP_DEBUG_ASSERT( task != NULL ); - - KA_TRACE(10, ("__kmpc_taskloop(enter): T#%d, pattern task %p, lb %lld ub %lld st %lld, grain %llu(%d)\n", - gtid, taskdata, *lb, *ub, st, grainsize, sched)); - - // check if clause value first - if( if_val == 0 ) { // if(0) specified, mark task as serial - taskdata->td_flags.task_serial = 1; - taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied - } - if( nogroup == 0 ) { - __kmpc_taskgroup( loc, gtid ); - } - - if( 1 /* AC: use some heuristic here to choose task scheduling method */ ) { - __kmp_taskloop_linear( loc, gtid, task, lb, ub, st, sched, grainsize, task_dup ); - } - - if( nogroup == 0 ) { - __kmpc_end_taskgroup( loc, gtid ); - } - KA_TRACE(10, ("__kmpc_taskloop(exit): T#%d\n", gtid)); +void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int if_val, + kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, + int sched, kmp_uint64 grainsize, void *task_dup) { + kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); + KMP_DEBUG_ASSERT(task != NULL); + + KA_TRACE(10, ("__kmpc_taskloop(enter): T#%d, pattern task %p, lb %lld ub " + "%lld st %lld, grain %llu(%d)\n", + gtid, taskdata, *lb, *ub, st, grainsize, sched)); + + // check if clause value first + if (if_val == 0) { // if(0) specified, mark task as serial + taskdata->td_flags.task_serial = 1; + taskdata->td_flags.tiedness = TASK_TIED; // AC: serial task cannot be untied + } + if (nogroup == 0) { + __kmpc_taskgroup(loc, gtid); + } + + if (1 /* AC: use some heuristic here to choose task scheduling method */) { + __kmp_taskloop_linear(loc, gtid, task, lb, ub, st, sched, grainsize, + task_dup); + } + + if (nogroup == 0) { + __kmpc_end_taskgroup(loc, gtid); + } + KA_TRACE(10, ("__kmpc_taskloop(exit): T#%d\n", gtid)); } #endif diff --git a/openmp/runtime/src/kmp_taskq.cpp b/openmp/runtime/src/kmp_taskq.cpp index b07bda8..4b4571a 100644 --- a/openmp/runtime/src/kmp_taskq.cpp +++ b/openmp/runtime/src/kmp_taskq.cpp @@ -14,762 +14,748 @@ #include "kmp.h" +#include "kmp_error.h" #include "kmp_i18n.h" #include "kmp_io.h" -#include "kmp_error.h" #define MAX_MESSAGE 512 -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -/* - * Taskq routines and global variables - */ +/* Taskq routines and global variables */ -#define KMP_DEBUG_REF_CTS(x) KF_TRACE(1, x); +#define KMP_DEBUG_REF_CTS(x) KF_TRACE(1, x); #define THREAD_ALLOC_FOR_TASKQ -static int -in_parallel_context( kmp_team_t *team ) -{ - return ! team -> t.t_serialized; +static int in_parallel_context(kmp_team_t *team) { + return !team->t.t_serialized; } -static void -__kmp_taskq_eo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) -{ - int gtid = *gtid_ref; - int tid = __kmp_tid_from_gtid( gtid ); - kmp_uint32 my_token; - kmpc_task_queue_t *taskq; - kmp_taskq_t *tq = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq; +static void __kmp_taskq_eo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { + int gtid = *gtid_ref; + int tid = __kmp_tid_from_gtid(gtid); + kmp_uint32 my_token; + kmpc_task_queue_t *taskq; + kmp_taskq_t *tq = &__kmp_threads[gtid]->th.th_team->t.t_taskq; - if ( __kmp_env_consistency_check ) + if (__kmp_env_consistency_check) #if KMP_USE_DYNAMIC_LOCK - __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL, 0 ); + __kmp_push_sync(gtid, ct_ordered_in_taskq, loc_ref, NULL, 0); #else - __kmp_push_sync( gtid, ct_ordered_in_taskq, loc_ref, NULL ); + __kmp_push_sync(gtid, ct_ordered_in_taskq, loc_ref, NULL); #endif - if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) { - KMP_MB(); /* Flush all pending memory write invalidates. */ + if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) { + KMP_MB(); /* Flush all pending memory write invalidates. */ - /* GEH - need check here under stats to make sure */ - /* inside task (curr_thunk[*tid_ref] != NULL) */ + /* GEH - need check here under stats to make sure */ + /* inside task (curr_thunk[*tid_ref] != NULL) */ - my_token =tq->tq_curr_thunk[ tid ]-> th_tasknum; + my_token = tq->tq_curr_thunk[tid]->th_tasknum; - taskq = tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue; + taskq = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue; - KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL); - KMP_MB(); - } + KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_EQ, NULL); + KMP_MB(); + } } -static void -__kmp_taskq_xo( int *gtid_ref, int *cid_ref, ident_t *loc_ref ) -{ - int gtid = *gtid_ref; - int tid = __kmp_tid_from_gtid( gtid ); - kmp_uint32 my_token; - kmp_taskq_t *tq = & __kmp_threads[gtid] -> th.th_team -> t.t_taskq; +static void __kmp_taskq_xo(int *gtid_ref, int *cid_ref, ident_t *loc_ref) { + int gtid = *gtid_ref; + int tid = __kmp_tid_from_gtid(gtid); + kmp_uint32 my_token; + kmp_taskq_t *tq = &__kmp_threads[gtid]->th.th_team->t.t_taskq; - if ( __kmp_env_consistency_check ) - __kmp_pop_sync( gtid, ct_ordered_in_taskq, loc_ref ); + if (__kmp_env_consistency_check) + __kmp_pop_sync(gtid, ct_ordered_in_taskq, loc_ref); - if ( ! __kmp_threads[ gtid ]-> th.th_team -> t.t_serialized ) { - KMP_MB(); /* Flush all pending memory write invalidates. */ + if (!__kmp_threads[gtid]->th.th_team->t.t_serialized) { + KMP_MB(); /* Flush all pending memory write invalidates. */ - /* GEH - need check here under stats to make sure */ - /* inside task (curr_thunk[tid] != NULL) */ + /* GEH - need check here under stats to make sure */ + /* inside task (curr_thunk[tid] != NULL) */ - my_token = tq->tq_curr_thunk[ tid ]->th_tasknum; + my_token = tq->tq_curr_thunk[tid]->th_tasknum; - KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_MB(); /* Flush all pending memory write invalidates. */ - tq->tq_curr_thunk[ tid ]-> th.th_shareds -> sv_queue -> tq_tasknum_serving = my_token + 1; + tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue->tq_tasknum_serving = + my_token + 1; - KMP_MB(); /* Flush all pending memory write invalidates. */ - } + KMP_MB(); /* Flush all pending memory write invalidates. */ + } } -static void -__kmp_taskq_check_ordered( kmp_int32 gtid, kmpc_thunk_t *thunk ) -{ - kmp_uint32 my_token; - kmpc_task_queue_t *taskq; +static void __kmp_taskq_check_ordered(kmp_int32 gtid, kmpc_thunk_t *thunk) { + kmp_uint32 my_token; + kmpc_task_queue_t *taskq; - /* assume we are always called from an active parallel context */ + /* assume we are always called from an active parallel context */ - KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_MB(); /* Flush all pending memory write invalidates. */ - my_token = thunk -> th_tasknum; + my_token = thunk->th_tasknum; - taskq = thunk -> th.th_shareds -> sv_queue; + taskq = thunk->th.th_shareds->sv_queue; - if(taskq->tq_tasknum_serving <= my_token) { - KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL); - KMP_MB(); - taskq->tq_tasknum_serving = my_token +1; - KMP_MB(); - } + if (taskq->tq_tasknum_serving <= my_token) { + KMP_WAIT_YIELD(&taskq->tq_tasknum_serving, my_token, KMP_GE, NULL); + KMP_MB(); + taskq->tq_tasknum_serving = my_token + 1; + KMP_MB(); + } } #ifdef KMP_DEBUG -static void -__kmp_dump_TQF(kmp_int32 flags) -{ - if (flags & TQF_IS_ORDERED) - __kmp_printf("ORDERED "); - if (flags & TQF_IS_LASTPRIVATE) - __kmp_printf("LAST_PRIV "); - if (flags & TQF_IS_NOWAIT) - __kmp_printf("NOWAIT "); - if (flags & TQF_HEURISTICS) - __kmp_printf("HEURIST "); - if (flags & TQF_INTERFACE_RESERVED1) - __kmp_printf("RESERV1 "); - if (flags & TQF_INTERFACE_RESERVED2) - __kmp_printf("RESERV2 "); - if (flags & TQF_INTERFACE_RESERVED3) - __kmp_printf("RESERV3 "); - if (flags & TQF_INTERFACE_RESERVED4) - __kmp_printf("RESERV4 "); - if (flags & TQF_IS_LAST_TASK) - __kmp_printf("LAST_TASK "); - if (flags & TQF_TASKQ_TASK) - __kmp_printf("TASKQ_TASK "); - if (flags & TQF_RELEASE_WORKERS) - __kmp_printf("RELEASE "); - if (flags & TQF_ALL_TASKS_QUEUED) - __kmp_printf("ALL_QUEUED "); - if (flags & TQF_PARALLEL_CONTEXT) - __kmp_printf("PARALLEL "); - if (flags & TQF_DEALLOCATED) - __kmp_printf("DEALLOC "); - if (!(flags & (TQF_INTERNAL_FLAGS|TQF_INTERFACE_FLAGS))) - __kmp_printf("(NONE)"); +static void __kmp_dump_TQF(kmp_int32 flags) { + if (flags & TQF_IS_ORDERED) + __kmp_printf("ORDERED "); + if (flags & TQF_IS_LASTPRIVATE) + __kmp_printf("LAST_PRIV "); + if (flags & TQF_IS_NOWAIT) + __kmp_printf("NOWAIT "); + if (flags & TQF_HEURISTICS) + __kmp_printf("HEURIST "); + if (flags & TQF_INTERFACE_RESERVED1) + __kmp_printf("RESERV1 "); + if (flags & TQF_INTERFACE_RESERVED2) + __kmp_printf("RESERV2 "); + if (flags & TQF_INTERFACE_RESERVED3) + __kmp_printf("RESERV3 "); + if (flags & TQF_INTERFACE_RESERVED4) + __kmp_printf("RESERV4 "); + if (flags & TQF_IS_LAST_TASK) + __kmp_printf("LAST_TASK "); + if (flags & TQF_TASKQ_TASK) + __kmp_printf("TASKQ_TASK "); + if (flags & TQF_RELEASE_WORKERS) + __kmp_printf("RELEASE "); + if (flags & TQF_ALL_TASKS_QUEUED) + __kmp_printf("ALL_QUEUED "); + if (flags & TQF_PARALLEL_CONTEXT) + __kmp_printf("PARALLEL "); + if (flags & TQF_DEALLOCATED) + __kmp_printf("DEALLOC "); + if (!(flags & (TQF_INTERNAL_FLAGS | TQF_INTERFACE_FLAGS))) + __kmp_printf("(NONE)"); } -static void -__kmp_dump_thunk( kmp_taskq_t *tq, kmpc_thunk_t *thunk, kmp_int32 global_tid ) -{ - int i; - int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc; +static void __kmp_dump_thunk(kmp_taskq_t *tq, kmpc_thunk_t *thunk, + kmp_int32 global_tid) { + int i; + int nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc; - __kmp_printf("\tThunk at %p on (%d): ", thunk, global_tid); + __kmp_printf("\tThunk at %p on (%d): ", thunk, global_tid); - if (thunk != NULL) { - for (i = 0; i < nproc; i++) { - if( tq->tq_curr_thunk[i] == thunk ) { - __kmp_printf("[%i] ", i); - } - } - __kmp_printf("th_shareds=%p, ", thunk->th.th_shareds); - __kmp_printf("th_task=%p, ", thunk->th_task); - __kmp_printf("th_encl_thunk=%p, ", thunk->th_encl_thunk); - __kmp_printf("th_status=%d, ", thunk->th_status); - __kmp_printf("th_tasknum=%u, ", thunk->th_tasknum); - __kmp_printf("th_flags="); __kmp_dump_TQF(thunk->th_flags); + if (thunk != NULL) { + for (i = 0; i < nproc; i++) { + if (tq->tq_curr_thunk[i] == thunk) { + __kmp_printf("[%i] ", i); + } } - - __kmp_printf("\n"); + __kmp_printf("th_shareds=%p, ", thunk->th.th_shareds); + __kmp_printf("th_task=%p, ", thunk->th_task); + __kmp_printf("th_encl_thunk=%p, ", thunk->th_encl_thunk); + __kmp_printf("th_status=%d, ", thunk->th_status); + __kmp_printf("th_tasknum=%u, ", thunk->th_tasknum); + __kmp_printf("th_flags="); + __kmp_dump_TQF(thunk->th_flags); + } + + __kmp_printf("\n"); } -static void -__kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num) -{ - kmpc_thunk_t *th; +static void __kmp_dump_thunk_stack(kmpc_thunk_t *thunk, kmp_int32 thread_num) { + kmpc_thunk_t *th; - __kmp_printf(" Thunk stack for T#%d: ", thread_num); + __kmp_printf(" Thunk stack for T#%d: ", thread_num); - for (th = thunk; th != NULL; th = th->th_encl_thunk ) - __kmp_printf("%p ", th); + for (th = thunk; th != NULL; th = th->th_encl_thunk) + __kmp_printf("%p ", th); - __kmp_printf("\n"); + __kmp_printf("\n"); } -static void -__kmp_dump_task_queue( kmp_taskq_t *tq, kmpc_task_queue_t *queue, kmp_int32 global_tid ) -{ - int qs, count, i; - kmpc_thunk_t *thunk; - kmpc_task_queue_t *taskq; +static void __kmp_dump_task_queue(kmp_taskq_t *tq, kmpc_task_queue_t *queue, + kmp_int32 global_tid) { + int qs, count, i; + kmpc_thunk_t *thunk; + kmpc_task_queue_t *taskq; - __kmp_printf("Task Queue at %p on (%d):\n", queue, global_tid); + __kmp_printf("Task Queue at %p on (%d):\n", queue, global_tid); - if (queue != NULL) { - int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT; + if (queue != NULL) { + int in_parallel = queue->tq_flags & TQF_PARALLEL_CONTEXT; - if ( __kmp_env_consistency_check ) { - __kmp_printf(" tq_loc : "); + if (__kmp_env_consistency_check) { + __kmp_printf(" tq_loc : "); } - if (in_parallel) { + if (in_parallel) { - //if (queue->tq.tq_parent != 0) - //__kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + // if (queue->tq.tq_parent != 0) + //__kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - //__kmp_acquire_lock(& queue->tq_link_lck, global_tid); + //__kmp_acquire_lock(& queue->tq_link_lck, global_tid); - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + // Make sure data structures are in consistent state before querying them + // Seems to work without this for digital/alpha, needed for IBM/RS6000 + KMP_MB(); - __kmp_printf(" tq_parent : %p\n", queue->tq.tq_parent); - __kmp_printf(" tq_first_child : %p\n", queue->tq_first_child); - __kmp_printf(" tq_next_child : %p\n", queue->tq_next_child); - __kmp_printf(" tq_prev_child : %p\n", queue->tq_prev_child); - __kmp_printf(" tq_ref_count : %d\n", queue->tq_ref_count); + __kmp_printf(" tq_parent : %p\n", queue->tq.tq_parent); + __kmp_printf(" tq_first_child : %p\n", queue->tq_first_child); + __kmp_printf(" tq_next_child : %p\n", queue->tq_next_child); + __kmp_printf(" tq_prev_child : %p\n", queue->tq_prev_child); + __kmp_printf(" tq_ref_count : %d\n", queue->tq_ref_count); - //__kmp_release_lock(& queue->tq_link_lck, global_tid); + //__kmp_release_lock(& queue->tq_link_lck, global_tid); - //if (queue->tq.tq_parent != 0) - //__kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + // if (queue->tq.tq_parent != 0) + //__kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - //__kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid); - //__kmp_acquire_lock(& queue->tq_queue_lck, global_tid); + //__kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid); + //__kmp_acquire_lock(& queue->tq_queue_lck, global_tid); - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - } + // Make sure data structures are in consistent state before querying them + // Seems to work without this for digital/alpha, needed for IBM/RS6000 + KMP_MB(); + } - __kmp_printf(" tq_shareds : "); - for (i=0; i<((queue == tq->tq_root) ? queue->tq_nproc : 1); i++) - __kmp_printf("%p ", queue->tq_shareds[i].ai_data); - __kmp_printf("\n"); + __kmp_printf(" tq_shareds : "); + for (i = 0; i < ((queue == tq->tq_root) ? queue->tq_nproc : 1); i++) + __kmp_printf("%p ", queue->tq_shareds[i].ai_data); + __kmp_printf("\n"); - if (in_parallel) { - __kmp_printf(" tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing); - __kmp_printf(" tq_tasknum_serving : %u\n", queue->tq_tasknum_serving); - } + if (in_parallel) { + __kmp_printf(" tq_tasknum_queuing : %u\n", queue->tq_tasknum_queuing); + __kmp_printf(" tq_tasknum_serving : %u\n", queue->tq_tasknum_serving); + } - __kmp_printf(" tq_queue : %p\n", queue->tq_queue); - __kmp_printf(" tq_thunk_space : %p\n", queue->tq_thunk_space); - __kmp_printf(" tq_taskq_slot : %p\n", queue->tq_taskq_slot); + __kmp_printf(" tq_queue : %p\n", queue->tq_queue); + __kmp_printf(" tq_thunk_space : %p\n", queue->tq_thunk_space); + __kmp_printf(" tq_taskq_slot : %p\n", queue->tq_taskq_slot); - __kmp_printf(" tq_free_thunks : "); - for (thunk = queue->tq_free_thunks; thunk != NULL; thunk = thunk->th.th_next_free ) - __kmp_printf("%p ", thunk); - __kmp_printf("\n"); + __kmp_printf(" tq_free_thunks : "); + for (thunk = queue->tq_free_thunks; thunk != NULL; + thunk = thunk->th.th_next_free) + __kmp_printf("%p ", thunk); + __kmp_printf("\n"); - __kmp_printf(" tq_nslots : %d\n", queue->tq_nslots); - __kmp_printf(" tq_head : %d\n", queue->tq_head); - __kmp_printf(" tq_tail : %d\n", queue->tq_tail); - __kmp_printf(" tq_nfull : %d\n", queue->tq_nfull); - __kmp_printf(" tq_hiwat : %d\n", queue->tq_hiwat); - __kmp_printf(" tq_flags : "); __kmp_dump_TQF(queue->tq_flags); - __kmp_printf("\n"); + __kmp_printf(" tq_nslots : %d\n", queue->tq_nslots); + __kmp_printf(" tq_head : %d\n", queue->tq_head); + __kmp_printf(" tq_tail : %d\n", queue->tq_tail); + __kmp_printf(" tq_nfull : %d\n", queue->tq_nfull); + __kmp_printf(" tq_hiwat : %d\n", queue->tq_hiwat); + __kmp_printf(" tq_flags : "); + __kmp_dump_TQF(queue->tq_flags); + __kmp_printf("\n"); - if (in_parallel) { - __kmp_printf(" tq_th_thunks : "); - for (i = 0; i < queue->tq_nproc; i++) { - __kmp_printf("%d ", queue->tq_th_thunks[i].ai_data); - } - __kmp_printf("\n"); - } + if (in_parallel) { + __kmp_printf(" tq_th_thunks : "); + for (i = 0; i < queue->tq_nproc; i++) { + __kmp_printf("%d ", queue->tq_th_thunks[i].ai_data); + } + __kmp_printf("\n"); + } - __kmp_printf("\n"); - __kmp_printf(" Queue slots:\n"); + __kmp_printf("\n"); + __kmp_printf(" Queue slots:\n"); + qs = queue->tq_tail; + for (count = 0; count < queue->tq_nfull; ++count) { + __kmp_printf("(%d)", qs); + __kmp_dump_thunk(tq, queue->tq_queue[qs].qs_thunk, global_tid); + qs = (qs + 1) % queue->tq_nslots; + } - qs = queue->tq_tail; - for ( count = 0; count < queue->tq_nfull; ++count ) { - __kmp_printf("(%d)", qs); - __kmp_dump_thunk( tq, queue->tq_queue[qs].qs_thunk, global_tid ); - qs = (qs+1) % queue->tq_nslots; - } + __kmp_printf("\n"); + if (in_parallel) { + if (queue->tq_taskq_slot != NULL) { + __kmp_printf(" TaskQ slot:\n"); + __kmp_dump_thunk(tq, (kmpc_thunk_t *)queue->tq_taskq_slot, global_tid); __kmp_printf("\n"); - - if (in_parallel) { - if (queue->tq_taskq_slot != NULL) { - __kmp_printf(" TaskQ slot:\n"); - __kmp_dump_thunk( tq, (kmpc_thunk_t *) queue->tq_taskq_slot, global_tid ); - __kmp_printf("\n"); - } - //__kmp_release_lock(& queue->tq_queue_lck, global_tid); - //__kmp_release_lock(& queue->tq_free_thunks_lck, global_tid); - } + } + //__kmp_release_lock(& queue->tq_queue_lck, global_tid); + //__kmp_release_lock(& queue->tq_free_thunks_lck, global_tid); } + } - __kmp_printf(" Taskq freelist: "); + __kmp_printf(" Taskq freelist: "); - //__kmp_acquire_lock( & tq->tq_freelist_lck, global_tid ); + //__kmp_acquire_lock( & tq->tq_freelist_lck, global_tid ); - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + // Make sure data structures are in consistent state before querying them + // Seems to work without this call for digital/alpha, needed for IBM/RS6000 + KMP_MB(); - for( taskq = tq->tq_freelist; taskq != NULL; taskq = taskq->tq.tq_next_free ) - __kmp_printf("%p ", taskq); + for (taskq = tq->tq_freelist; taskq != NULL; taskq = taskq->tq.tq_next_free) + __kmp_printf("%p ", taskq); - //__kmp_release_lock( & tq->tq_freelist_lck, global_tid ); + //__kmp_release_lock( & tq->tq_freelist_lck, global_tid ); - __kmp_printf("\n\n"); + __kmp_printf("\n\n"); } -static void -__kmp_aux_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *curr_queue, kmp_int32 level, kmp_int32 global_tid ) -{ - int i, count, qs; - int nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc; - kmpc_task_queue_t *queue = curr_queue; +static void __kmp_aux_dump_task_queue_tree(kmp_taskq_t *tq, + kmpc_task_queue_t *curr_queue, + kmp_int32 level, + kmp_int32 global_tid) { + int i, count, qs; + int nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc; + kmpc_task_queue_t *queue = curr_queue; - if (curr_queue == NULL) - return; + if (curr_queue == NULL) + return; - __kmp_printf(" "); + __kmp_printf(" "); - for (i=0; itq_curr_thunk[i] && tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue ) { - __kmp_printf(" [%i]", i); - } + for (i = 0; i < nproc; i++) { + if (tq->tq_curr_thunk[i] && + tq->tq_curr_thunk[i]->th.th_shareds->sv_queue == curr_queue) { + __kmp_printf(" [%i]", i); } + } - __kmp_printf(":"); + __kmp_printf(":"); - //__kmp_acquire_lock(& curr_queue->tq_queue_lck, global_tid); + //__kmp_acquire_lock(& curr_queue->tq_queue_lck, global_tid); - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + // Make sure data structures are in consistent state before querying them + // Seems to work without this call for digital/alpha, needed for IBM/RS6000 + KMP_MB(); - qs = curr_queue->tq_tail; + qs = curr_queue->tq_tail; - for ( count = 0; count < curr_queue->tq_nfull; ++count ) { - __kmp_printf("%p ", curr_queue->tq_queue[qs].qs_thunk); - qs = (qs+1) % curr_queue->tq_nslots; - } + for (count = 0; count < curr_queue->tq_nfull; ++count) { + __kmp_printf("%p ", curr_queue->tq_queue[qs].qs_thunk); + qs = (qs + 1) % curr_queue->tq_nslots; + } - //__kmp_release_lock(& curr_queue->tq_queue_lck, global_tid); + //__kmp_release_lock(& curr_queue->tq_queue_lck, global_tid); - __kmp_printf("\n"); + __kmp_printf("\n"); - if (curr_queue->tq_first_child) { - //__kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + if (curr_queue->tq_first_child) { + //__kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); - if (curr_queue->tq_first_child) { - for(queue = (kmpc_task_queue_t *)curr_queue->tq_first_child; - queue != NULL; - queue = queue->tq_next_child) { - __kmp_aux_dump_task_queue_tree( tq, queue, level+1, global_tid ); - } - } + // Make sure data structures are in consistent state before querying them + // Seems to work without this call for digital/alpha, needed for IBM/RS6000 + KMP_MB(); - //__kmp_release_lock(& curr_queue->tq_link_lck, global_tid); + if (curr_queue->tq_first_child) { + for (queue = (kmpc_task_queue_t *)curr_queue->tq_first_child; + queue != NULL; queue = queue->tq_next_child) { + __kmp_aux_dump_task_queue_tree(tq, queue, level + 1, global_tid); + } } + + //__kmp_release_lock(& curr_queue->tq_link_lck, global_tid); + } } -static void -__kmp_dump_task_queue_tree( kmp_taskq_t *tq, kmpc_task_queue_t *tqroot, kmp_int32 global_tid) -{ - __kmp_printf("TaskQ Tree at root %p on (%d):\n", tqroot, global_tid); +static void __kmp_dump_task_queue_tree(kmp_taskq_t *tq, + kmpc_task_queue_t *tqroot, + kmp_int32 global_tid) { + __kmp_printf("TaskQ Tree at root %p on (%d):\n", tqroot, global_tid); - __kmp_aux_dump_task_queue_tree( tq, tqroot, 0, global_tid ); + __kmp_aux_dump_task_queue_tree(tq, tqroot, 0, global_tid); - __kmp_printf("\n"); + __kmp_printf("\n"); } #endif -/* --------------------------------------------------------------------------- */ - -/* - New taskq storage routines that try to minimize overhead of mallocs but - still provide cache line alignment. -*/ +/* New taskq storage routines that try to minimize overhead of mallocs but + still provide cache line alignment. */ +static void *__kmp_taskq_allocate(size_t size, kmp_int32 global_tid) { + void *addr, *orig_addr; + size_t bytes; + KB_TRACE(5, ("__kmp_taskq_allocate: called size=%d, gtid=%d\n", (int)size, + global_tid)); -static void * -__kmp_taskq_allocate(size_t size, kmp_int32 global_tid) -{ - void *addr, *orig_addr; - size_t bytes; - - KB_TRACE( 5, ("__kmp_taskq_allocate: called size=%d, gtid=%d\n", (int) size, global_tid ) ); - - bytes = sizeof(void *) + CACHE_LINE + size; + bytes = sizeof(void *) + CACHE_LINE + size; #ifdef THREAD_ALLOC_FOR_TASKQ - orig_addr = (void *) __kmp_thread_malloc( __kmp_thread_from_gtid(global_tid), bytes ); + orig_addr = + (void *)__kmp_thread_malloc(__kmp_thread_from_gtid(global_tid), bytes); #else - KE_TRACE( 10, ("%%%%%% MALLOC( %d )\n", bytes ) ); - orig_addr = (void *) KMP_INTERNAL_MALLOC( bytes ); + KE_TRACE(10, ("%%%%%% MALLOC( %d )\n", bytes)); + orig_addr = (void *)KMP_INTERNAL_MALLOC(bytes); #endif /* THREAD_ALLOC_FOR_TASKQ */ - if (orig_addr == 0) - KMP_FATAL( OutOfHeapMemory ); + if (orig_addr == 0) + KMP_FATAL(OutOfHeapMemory); - addr = orig_addr; + addr = orig_addr; - if (((kmp_uintptr_t) addr & ( CACHE_LINE - 1 )) != 0) { - KB_TRACE( 50, ("__kmp_taskq_allocate: adjust for cache alignment\n" ) ); - addr = (void *) (((kmp_uintptr_t) addr + CACHE_LINE) & ~( CACHE_LINE - 1 )); - } + if (((kmp_uintptr_t)addr & (CACHE_LINE - 1)) != 0) { + KB_TRACE(50, ("__kmp_taskq_allocate: adjust for cache alignment\n")); + addr = (void *)(((kmp_uintptr_t)addr + CACHE_LINE) & ~(CACHE_LINE - 1)); + } - (* (void **) addr) = orig_addr; + (*(void **)addr) = orig_addr; - KB_TRACE( 10, ("__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, gtid: %d\n", - orig_addr, ((void **) addr) + 1, ((char *)(((void **) addr) + 1)) + size-1, - (int) size, global_tid )); + KB_TRACE(10, + ("__kmp_taskq_allocate: allocate: %p, use: %p - %p, size: %d, " + "gtid: %d\n", + orig_addr, ((void **)addr) + 1, + ((char *)(((void **)addr) + 1)) + size - 1, (int)size, global_tid)); - return ( ((void **) addr) + 1 ); + return (((void **)addr) + 1); } -static void -__kmpc_taskq_free(void *p, kmp_int32 global_tid) -{ - KB_TRACE( 5, ("__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid ) ); +static void __kmpc_taskq_free(void *p, kmp_int32 global_tid) { + KB_TRACE(5, ("__kmpc_taskq_free: called addr=%p, gtid=%d\n", p, global_tid)); - KB_TRACE(10, ("__kmpc_taskq_free: freeing: %p, gtid: %d\n", (*( ((void **) p)-1)), global_tid )); + KB_TRACE(10, ("__kmpc_taskq_free: freeing: %p, gtid: %d\n", + (*(((void **)p) - 1)), global_tid)); #ifdef THREAD_ALLOC_FOR_TASKQ - __kmp_thread_free( __kmp_thread_from_gtid(global_tid), *( ((void **) p)-1) ); + __kmp_thread_free(__kmp_thread_from_gtid(global_tid), *(((void **)p) - 1)); #else - KMP_INTERNAL_FREE( *( ((void **) p)-1) ); + KMP_INTERNAL_FREE(*(((void **)p) - 1)); #endif /* THREAD_ALLOC_FOR_TASKQ */ } -/* --------------------------------------------------------------------------- */ - -/* - * Keep freed kmpc_task_queue_t on an internal freelist and recycle since - * they're of constant size. - */ +/* Keep freed kmpc_task_queue_t on an internal freelist and recycle since + they're of constant size. */ static kmpc_task_queue_t * -__kmp_alloc_taskq ( kmp_taskq_t *tq, int in_parallel, kmp_int32 nslots, kmp_int32 nthunks, - kmp_int32 nshareds, kmp_int32 nproc, size_t sizeof_thunk, - size_t sizeof_shareds, kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid ) -{ - kmp_int32 i; - size_t bytes; - kmpc_task_queue_t *new_queue; - kmpc_aligned_shared_vars_t *shared_var_array; - char *shared_var_storage; - char *pt; /* for doing byte-adjusted address computations */ +__kmp_alloc_taskq(kmp_taskq_t *tq, int in_parallel, kmp_int32 nslots, + kmp_int32 nthunks, kmp_int32 nshareds, kmp_int32 nproc, + size_t sizeof_thunk, size_t sizeof_shareds, + kmpc_thunk_t **new_taskq_thunk, kmp_int32 global_tid) { + kmp_int32 i; + size_t bytes; + kmpc_task_queue_t *new_queue; + kmpc_aligned_shared_vars_t *shared_var_array; + char *shared_var_storage; + char *pt; /* for doing byte-adjusted address computations */ - __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid ); + __kmp_acquire_lock(&tq->tq_freelist_lck, global_tid); - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + // Make sure data structures are in consistent state before querying them + // Seems to work without this call for digital/alpha, needed for IBM/RS6000 + KMP_MB(); - if( tq->tq_freelist ) { - new_queue = tq -> tq_freelist; - tq -> tq_freelist = tq -> tq_freelist -> tq.tq_next_free; + if (tq->tq_freelist) { + new_queue = tq->tq_freelist; + tq->tq_freelist = tq->tq_freelist->tq.tq_next_free; - KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED); + KMP_DEBUG_ASSERT(new_queue->tq_flags & TQF_DEALLOCATED); - new_queue->tq_flags = 0; + new_queue->tq_flags = 0; - __kmp_release_lock( & tq->tq_freelist_lck, global_tid ); - } - else { - __kmp_release_lock( & tq->tq_freelist_lck, global_tid ); + __kmp_release_lock(&tq->tq_freelist_lck, global_tid); + } else { + __kmp_release_lock(&tq->tq_freelist_lck, global_tid); - new_queue = (kmpc_task_queue_t *) __kmp_taskq_allocate (sizeof (kmpc_task_queue_t), global_tid); - new_queue->tq_flags = 0; - } + new_queue = (kmpc_task_queue_t *)__kmp_taskq_allocate( + sizeof(kmpc_task_queue_t), global_tid); + new_queue->tq_flags = 0; + } - /* space in the task queue for queue slots (allocate as one big chunk */ - /* of storage including new_taskq_task space) */ + /* space in the task queue for queue slots (allocate as one big chunk */ + /* of storage including new_taskq_task space) */ - sizeof_thunk += (CACHE_LINE - (sizeof_thunk % CACHE_LINE)); /* pad to cache line size */ - pt = (char *) __kmp_taskq_allocate (nthunks * sizeof_thunk, global_tid); - new_queue->tq_thunk_space = (kmpc_thunk_t *)pt; - *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk); + sizeof_thunk += + (CACHE_LINE - (sizeof_thunk % CACHE_LINE)); /* pad to cache line size */ + pt = (char *)__kmp_taskq_allocate(nthunks * sizeof_thunk, global_tid); + new_queue->tq_thunk_space = (kmpc_thunk_t *)pt; + *new_taskq_thunk = (kmpc_thunk_t *)(pt + (nthunks - 1) * sizeof_thunk); - /* chain the allocated thunks into a freelist for this queue */ + /* chain the allocated thunks into a freelist for this queue */ - new_queue->tq_free_thunks = (kmpc_thunk_t *)pt; + new_queue->tq_free_thunks = (kmpc_thunk_t *)pt; - for (i = 0; i < (nthunks - 2); i++) { - ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th.th_next_free = (kmpc_thunk_t *)(pt + (i+1)*sizeof_thunk); + for (i = 0; i < (nthunks - 2); i++) { + ((kmpc_thunk_t *)(pt + i * sizeof_thunk))->th.th_next_free = + (kmpc_thunk_t *)(pt + (i + 1) * sizeof_thunk); #ifdef KMP_DEBUG - ((kmpc_thunk_t *)(pt+i*sizeof_thunk))->th_flags = TQF_DEALLOCATED; + ((kmpc_thunk_t *)(pt + i * sizeof_thunk))->th_flags = TQF_DEALLOCATED; #endif - } + } - ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th.th_next_free = NULL; + ((kmpc_thunk_t *)(pt + (nthunks - 2) * sizeof_thunk))->th.th_next_free = NULL; #ifdef KMP_DEBUG - ((kmpc_thunk_t *)(pt+(nthunks-2)*sizeof_thunk))->th_flags = TQF_DEALLOCATED; + ((kmpc_thunk_t *)(pt + (nthunks - 2) * sizeof_thunk))->th_flags = + TQF_DEALLOCATED; #endif - /* initialize the locks */ - - if (in_parallel) { - __kmp_init_lock( & new_queue->tq_link_lck ); - __kmp_init_lock( & new_queue->tq_free_thunks_lck ); - __kmp_init_lock( & new_queue->tq_queue_lck ); - } + /* initialize the locks */ - /* now allocate the slots */ + if (in_parallel) { + __kmp_init_lock(&new_queue->tq_link_lck); + __kmp_init_lock(&new_queue->tq_free_thunks_lck); + __kmp_init_lock(&new_queue->tq_queue_lck); + } - bytes = nslots * sizeof (kmpc_aligned_queue_slot_t); - new_queue->tq_queue = (kmpc_aligned_queue_slot_t *) __kmp_taskq_allocate( bytes, global_tid ); + /* now allocate the slots */ - /* space for array of pointers to shared variable structures */ - sizeof_shareds += sizeof(kmpc_task_queue_t *); - sizeof_shareds += (CACHE_LINE - (sizeof_shareds % CACHE_LINE)); /* pad to cache line size */ + bytes = nslots * sizeof(kmpc_aligned_queue_slot_t); + new_queue->tq_queue = + (kmpc_aligned_queue_slot_t *)__kmp_taskq_allocate(bytes, global_tid); - bytes = nshareds * sizeof (kmpc_aligned_shared_vars_t); - shared_var_array = (kmpc_aligned_shared_vars_t *) __kmp_taskq_allocate ( bytes, global_tid); + /* space for array of pointers to shared variable structures */ + sizeof_shareds += sizeof(kmpc_task_queue_t *); + sizeof_shareds += + (CACHE_LINE - (sizeof_shareds % CACHE_LINE)); /* pad to cache line size */ - bytes = nshareds * sizeof_shareds; - shared_var_storage = (char *) __kmp_taskq_allocate ( bytes, global_tid); + bytes = nshareds * sizeof(kmpc_aligned_shared_vars_t); + shared_var_array = + (kmpc_aligned_shared_vars_t *)__kmp_taskq_allocate(bytes, global_tid); - for (i=0; isv_queue = new_queue; - } - new_queue->tq_shareds = shared_var_array; + bytes = nshareds * sizeof_shareds; + shared_var_storage = (char *)__kmp_taskq_allocate(bytes, global_tid); + for (i = 0; i < nshareds; i++) { + shared_var_array[i].ai_data = + (kmpc_shared_vars_t *)(shared_var_storage + i * sizeof_shareds); + shared_var_array[i].ai_data->sv_queue = new_queue; + } + new_queue->tq_shareds = shared_var_array; - /* array for number of outstanding thunks per thread */ + /* array for number of outstanding thunks per thread */ - if (in_parallel) { - bytes = nproc * sizeof(kmpc_aligned_int32_t); - new_queue->tq_th_thunks = (kmpc_aligned_int32_t *) __kmp_taskq_allocate ( bytes, global_tid); - new_queue->tq_nproc = nproc; + if (in_parallel) { + bytes = nproc * sizeof(kmpc_aligned_int32_t); + new_queue->tq_th_thunks = + (kmpc_aligned_int32_t *)__kmp_taskq_allocate(bytes, global_tid); + new_queue->tq_nproc = nproc; - for (i=0; itq_th_thunks[i].ai_data = 0; - } + for (i = 0; i < nproc; i++) + new_queue->tq_th_thunks[i].ai_data = 0; + } - return new_queue; + return new_queue; } -static void -__kmp_free_taskq (kmp_taskq_t *tq, kmpc_task_queue_t *p, int in_parallel, kmp_int32 global_tid) -{ - __kmpc_taskq_free(p->tq_thunk_space, global_tid); - __kmpc_taskq_free(p->tq_queue, global_tid); +static void __kmp_free_taskq(kmp_taskq_t *tq, kmpc_task_queue_t *p, + int in_parallel, kmp_int32 global_tid) { + __kmpc_taskq_free(p->tq_thunk_space, global_tid); + __kmpc_taskq_free(p->tq_queue, global_tid); - /* free shared var structure storage */ - __kmpc_taskq_free((void *) p->tq_shareds[0].ai_data, global_tid); + /* free shared var structure storage */ + __kmpc_taskq_free((void *)p->tq_shareds[0].ai_data, global_tid); - /* free array of pointers to shared vars storage */ - __kmpc_taskq_free(p->tq_shareds, global_tid); + /* free array of pointers to shared vars storage */ + __kmpc_taskq_free(p->tq_shareds, global_tid); #ifdef KMP_DEBUG - p->tq_first_child = NULL; - p->tq_next_child = NULL; - p->tq_prev_child = NULL; - p->tq_ref_count = -10; - p->tq_shareds = NULL; - p->tq_tasknum_queuing = 0; - p->tq_tasknum_serving = 0; - p->tq_queue = NULL; - p->tq_thunk_space = NULL; - p->tq_taskq_slot = NULL; - p->tq_free_thunks = NULL; - p->tq_nslots = 0; - p->tq_head = 0; - p->tq_tail = 0; - p->tq_nfull = 0; - p->tq_hiwat = 0; - - if (in_parallel) { - int i; + p->tq_first_child = NULL; + p->tq_next_child = NULL; + p->tq_prev_child = NULL; + p->tq_ref_count = -10; + p->tq_shareds = NULL; + p->tq_tasknum_queuing = 0; + p->tq_tasknum_serving = 0; + p->tq_queue = NULL; + p->tq_thunk_space = NULL; + p->tq_taskq_slot = NULL; + p->tq_free_thunks = NULL; + p->tq_nslots = 0; + p->tq_head = 0; + p->tq_tail = 0; + p->tq_nfull = 0; + p->tq_hiwat = 0; + + if (in_parallel) { + int i; - for (i=0; itq_nproc; i++) - p->tq_th_thunks[i].ai_data = 0; - } - if ( __kmp_env_consistency_check ) - p->tq_loc = NULL; - KMP_DEBUG_ASSERT( p->tq_flags & TQF_DEALLOCATED ); - p->tq_flags = TQF_DEALLOCATED; + for (i = 0; i < p->tq_nproc; i++) + p->tq_th_thunks[i].ai_data = 0; + } + if (__kmp_env_consistency_check) + p->tq_loc = NULL; + KMP_DEBUG_ASSERT(p->tq_flags & TQF_DEALLOCATED); + p->tq_flags = TQF_DEALLOCATED; #endif /* KMP_DEBUG */ - if (in_parallel) { - __kmpc_taskq_free(p->tq_th_thunks, global_tid); - __kmp_destroy_lock(& p->tq_link_lck); - __kmp_destroy_lock(& p->tq_queue_lck); - __kmp_destroy_lock(& p->tq_free_thunks_lck); - } + if (in_parallel) { + __kmpc_taskq_free(p->tq_th_thunks, global_tid); + __kmp_destroy_lock(&p->tq_link_lck); + __kmp_destroy_lock(&p->tq_queue_lck); + __kmp_destroy_lock(&p->tq_free_thunks_lck); + } #ifdef KMP_DEBUG - p->tq_th_thunks = NULL; + p->tq_th_thunks = NULL; #endif /* KMP_DEBUG */ - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + // Make sure data structures are in consistent state before querying them + // Seems to work without this call for digital/alpha, needed for IBM/RS6000 + KMP_MB(); - __kmp_acquire_lock( & tq->tq_freelist_lck, global_tid ); - p->tq.tq_next_free = tq->tq_freelist; + __kmp_acquire_lock(&tq->tq_freelist_lck, global_tid); + p->tq.tq_next_free = tq->tq_freelist; - tq->tq_freelist = p; - __kmp_release_lock( & tq->tq_freelist_lck, global_tid ); + tq->tq_freelist = p; + __kmp_release_lock(&tq->tq_freelist_lck, global_tid); } -/* - * Once a group of thunks has been allocated for use in a particular queue, - * these are managed via a per-queue freelist. - * We force a check that there's always a thunk free if we need one. - */ - -static kmpc_thunk_t * -__kmp_alloc_thunk (kmpc_task_queue_t *queue, int in_parallel, kmp_int32 global_tid) -{ - kmpc_thunk_t *fl; +/* Once a group of thunks has been allocated for use in a particular queue, + these are managed via a per-queue freelist. + We force a check that there's always a thunk free if we need one. */ - if (in_parallel) { - __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid); +static kmpc_thunk_t *__kmp_alloc_thunk(kmpc_task_queue_t *queue, + int in_parallel, kmp_int32 global_tid) { + kmpc_thunk_t *fl; - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - } + if (in_parallel) { + __kmp_acquire_lock(&queue->tq_free_thunks_lck, global_tid); + // Make sure data structures are in consistent state before querying them + // Seems to work without this call for digital/alpha, needed for IBM/RS6000 + KMP_MB(); + } - fl = queue->tq_free_thunks; + fl = queue->tq_free_thunks; - KMP_DEBUG_ASSERT (fl != NULL); + KMP_DEBUG_ASSERT(fl != NULL); - queue->tq_free_thunks = fl->th.th_next_free; - fl->th_flags = 0; + queue->tq_free_thunks = fl->th.th_next_free; + fl->th_flags = 0; - if (in_parallel) - __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid); + if (in_parallel) + __kmp_release_lock(&queue->tq_free_thunks_lck, global_tid); - return fl; + return fl; } -static void -__kmp_free_thunk (kmpc_task_queue_t *queue, kmpc_thunk_t *p, int in_parallel, kmp_int32 global_tid) -{ +static void __kmp_free_thunk(kmpc_task_queue_t *queue, kmpc_thunk_t *p, + int in_parallel, kmp_int32 global_tid) { #ifdef KMP_DEBUG - p->th_task = 0; - p->th_encl_thunk = 0; - p->th_status = 0; - p->th_tasknum = 0; - /* Also could zero pointers to private vars */ + p->th_task = 0; + p->th_encl_thunk = 0; + p->th_status = 0; + p->th_tasknum = 0; +/* Also could zero pointers to private vars */ #endif - if (in_parallel) { - __kmp_acquire_lock(& queue->tq_free_thunks_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - } + if (in_parallel) { + __kmp_acquire_lock(&queue->tq_free_thunks_lck, global_tid); + // Make sure data structures are in consistent state before querying them + // Seems to work without this call for digital/alpha, needed for IBM/RS6000 + KMP_MB(); + } - p->th.th_next_free = queue->tq_free_thunks; - queue->tq_free_thunks = p; + p->th.th_next_free = queue->tq_free_thunks; + queue->tq_free_thunks = p; #ifdef KMP_DEBUG - p->th_flags = TQF_DEALLOCATED; + p->th_flags = TQF_DEALLOCATED; #endif - if (in_parallel) - __kmp_release_lock(& queue->tq_free_thunks_lck, global_tid); + if (in_parallel) + __kmp_release_lock(&queue->tq_free_thunks_lck, global_tid); } -/* --------------------------------------------------------------------------- */ - /* returns nonzero if the queue just became full after the enqueue */ +static kmp_int32 __kmp_enqueue_task(kmp_taskq_t *tq, kmp_int32 global_tid, + kmpc_task_queue_t *queue, + kmpc_thunk_t *thunk, int in_parallel) { + kmp_int32 ret; + + /* dkp: can we get around the lock in the TQF_RELEASE_WORKERS case (only the + * master is executing then) */ + if (in_parallel) { + __kmp_acquire_lock(&queue->tq_queue_lck, global_tid); + // Make sure data structures are in consistent state before querying them + // Seems to work without this call for digital/alpha, needed for IBM/RS6000 + KMP_MB(); + } -static kmp_int32 -__kmp_enqueue_task ( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, kmpc_thunk_t *thunk, int in_parallel ) -{ - kmp_int32 ret; - - /* dkp: can we get around the lock in the TQF_RELEASE_WORKERS case (only the master is executing then) */ - if (in_parallel) { - __kmp_acquire_lock(& queue->tq_queue_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - } - - KMP_DEBUG_ASSERT (queue->tq_nfull < queue->tq_nslots); /* check queue not full */ - - queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk; - - if (queue->tq_head >= queue->tq_nslots) - queue->tq_head = 0; + KMP_DEBUG_ASSERT(queue->tq_nfull < queue->tq_nslots); // check queue not full - (queue->tq_nfull)++; + queue->tq_queue[(queue->tq_head)++].qs_thunk = thunk; - KMP_MB(); /* to assure that nfull is seen to increase before TQF_ALL_TASKS_QUEUED is set */ + if (queue->tq_head >= queue->tq_nslots) + queue->tq_head = 0; - ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE; + (queue->tq_nfull)++; - if (in_parallel) { - /* don't need to wait until workers are released before unlocking */ - __kmp_release_lock(& queue->tq_queue_lck, global_tid); + KMP_MB(); /* to assure that nfull is seen to increase before + TQF_ALL_TASKS_QUEUED is set */ - if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) { - /* If just creating the root queue, the worker threads are waiting at */ - /* a join barrier until now, when there's something in the queue for */ - /* them to do; release them now to do work. */ - /* This should only be done when this is the first task enqueued, */ - /* so reset the flag here also. */ + ret = (in_parallel) ? (queue->tq_nfull == queue->tq_nslots) : FALSE; - tq->tq_global_flags &= ~TQF_RELEASE_WORKERS; /* no lock needed, workers are still in spin mode */ + if (in_parallel) { + /* don't need to wait until workers are released before unlocking */ + __kmp_release_lock(&queue->tq_queue_lck, global_tid); - KMP_MB(); /* avoid releasing barrier twice if taskq_task switches threads */ + if (tq->tq_global_flags & TQF_RELEASE_WORKERS) { + // If just creating the root queue, the worker threads are waiting at a + // join barrier until now, when there's something in the queue for them to + // do; release them now to do work. This should only be done when this is + // the first task enqueued, so reset the flag here also. + tq->tq_global_flags &= ~TQF_RELEASE_WORKERS; /* no lock needed, workers + are still in spin mode */ + // avoid releasing barrier twice if taskq_task switches threads + KMP_MB(); - __kmpc_end_barrier_master( NULL, global_tid); - } + __kmpc_end_barrier_master(NULL, global_tid); } + } - return ret; + return ret; } -static kmpc_thunk_t * -__kmp_dequeue_task (kmp_int32 global_tid, kmpc_task_queue_t *queue, int in_parallel) -{ - kmpc_thunk_t *pt; - int tid = __kmp_tid_from_gtid( global_tid ); +static kmpc_thunk_t *__kmp_dequeue_task(kmp_int32 global_tid, + kmpc_task_queue_t *queue, + int in_parallel) { + kmpc_thunk_t *pt; + int tid = __kmp_tid_from_gtid(global_tid); - KMP_DEBUG_ASSERT (queue->tq_nfull > 0); /* check queue not empty */ + KMP_DEBUG_ASSERT(queue->tq_nfull > 0); /* check queue not empty */ - if (queue->tq.tq_parent != NULL && in_parallel) { - int ct; - __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - ct = ++(queue->tq_ref_count); - __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", - __LINE__, global_tid, queue, ct)); - } + if (queue->tq.tq_parent != NULL && in_parallel) { + int ct; + __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); + ct = ++(queue->tq_ref_count); + __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); + KMP_DEBUG_REF_CTS( + ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct)); + } - pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk; + pt = queue->tq_queue[(queue->tq_tail)++].qs_thunk; - if (queue->tq_tail >= queue->tq_nslots) - queue->tq_tail = 0; + if (queue->tq_tail >= queue->tq_nslots) + queue->tq_tail = 0; - if (in_parallel) { - queue->tq_th_thunks[tid].ai_data++; + if (in_parallel) { + queue->tq_th_thunks[tid].ai_data++; - KMP_MB(); /* necessary so ai_data increment is propagated to other threads immediately (digital) */ + KMP_MB(); /* necessary so ai_data increment is propagated to other threads + immediately (digital) */ - KF_TRACE(200, ("__kmp_dequeue_task: T#%d(:%d) now has %d outstanding thunks from queue %p\n", - global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue)); - } + KF_TRACE(200, ("__kmp_dequeue_task: T#%d(:%d) now has %d outstanding " + "thunks from queue %p\n", + global_tid, tid, queue->tq_th_thunks[tid].ai_data, queue)); + } - (queue->tq_nfull)--; + (queue->tq_nfull)--; #ifdef KMP_DEBUG - KMP_MB(); + KMP_MB(); - /* necessary so (queue->tq_nfull > 0) above succeeds after tq_nfull is decremented */ + /* necessary so (queue->tq_nfull > 0) above succeeds after tq_nfull is + * decremented */ - KMP_DEBUG_ASSERT(queue->tq_nfull >= 0); + KMP_DEBUG_ASSERT(queue->tq_nfull >= 0); - if (in_parallel) { - KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <= __KMP_TASKQ_THUNKS_PER_TH); - } + if (in_parallel) { + KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data <= + __KMP_TASKQ_THUNKS_PER_TH); + } #endif - return pt; + return pt; } -/* - * Find the next (non-null) task to dequeue and return it. +/* Find the next (non-null) task to dequeue and return it. * This is never called unless in_parallel=TRUE * * Here are the rules for deciding which queue to take the task from: @@ -792,1241 +778,1252 @@ __kmp_dequeue_task (kmp_int32 global_tid, kmpc_task_queue_t *queue, int in_paral * TQF_IS_LASTPRIVATE). */ -static kmpc_thunk_t * -__kmp_find_task_in_queue (kmp_int32 global_tid, kmpc_task_queue_t *queue) -{ - kmpc_thunk_t *pt = NULL; - int tid = __kmp_tid_from_gtid( global_tid ); - - /* To prevent deadlock from tq_queue_lck if queue already deallocated */ - if ( !(queue->tq_flags & TQF_DEALLOCATED) ) { - - __kmp_acquire_lock(& queue->tq_queue_lck, global_tid); - - /* Check again to avoid race in __kmpc_end_taskq() */ - if ( !(queue->tq_flags & TQF_DEALLOCATED) ) { - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - if ((queue->tq_taskq_slot != NULL) && (queue->tq_nfull <= queue->tq_hiwat)) { - /* if there's enough room in the queue and the dispatcher */ - /* (taskq task) is available, schedule more tasks */ - pt = (kmpc_thunk_t *) queue->tq_taskq_slot; - queue->tq_taskq_slot = NULL; - } - else if (queue->tq_nfull == 0 || - queue->tq_th_thunks[tid].ai_data >= __KMP_TASKQ_THUNKS_PER_TH) { - /* do nothing if no thunks available or this thread can't */ - /* run any because it already is executing too many */ - - pt = NULL; - } - else if (queue->tq_nfull > 1) { - /* always safe to schedule a task even if TQF_IS_LASTPRIVATE */ - - pt = __kmp_dequeue_task (global_tid, queue, TRUE); - } - else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE)) { - /* one thing in queue, always safe to schedule if !TQF_IS_LASTPRIVATE */ - - pt = __kmp_dequeue_task (global_tid, queue, TRUE); - } - else if (queue->tq_flags & TQF_IS_LAST_TASK) { - /* TQF_IS_LASTPRIVATE, one thing in queue, kmpc_end_taskq_task() */ - /* has been run so this is last task, run with TQF_IS_LAST_TASK so */ - /* instrumentation does copy-out. */ - - pt = __kmp_dequeue_task (global_tid, queue, TRUE); - pt->th_flags |= TQF_IS_LAST_TASK; /* don't need test_then_or since already locked */ - } - } - - /* GEH - What happens here if is lastprivate, but not last task? */ - __kmp_release_lock(& queue->tq_queue_lck, global_tid); +static kmpc_thunk_t *__kmp_find_task_in_queue(kmp_int32 global_tid, + kmpc_task_queue_t *queue) { + kmpc_thunk_t *pt = NULL; + int tid = __kmp_tid_from_gtid(global_tid); + + /* To prevent deadlock from tq_queue_lck if queue already deallocated */ + if (!(queue->tq_flags & TQF_DEALLOCATED)) { + + __kmp_acquire_lock(&queue->tq_queue_lck, global_tid); + + /* Check again to avoid race in __kmpc_end_taskq() */ + if (!(queue->tq_flags & TQF_DEALLOCATED)) { + // Make sure data structures are in consistent state before querying them + // Seems to work without this for digital/alpha, needed for IBM/RS6000 + KMP_MB(); + + if ((queue->tq_taskq_slot != NULL) && + (queue->tq_nfull <= queue->tq_hiwat)) { + /* if there's enough room in the queue and the dispatcher */ + /* (taskq task) is available, schedule more tasks */ + pt = (kmpc_thunk_t *)queue->tq_taskq_slot; + queue->tq_taskq_slot = NULL; + } else if (queue->tq_nfull == 0 || + queue->tq_th_thunks[tid].ai_data >= + __KMP_TASKQ_THUNKS_PER_TH) { + /* do nothing if no thunks available or this thread can't */ + /* run any because it already is executing too many */ + pt = NULL; + } else if (queue->tq_nfull > 1) { + /* always safe to schedule a task even if TQF_IS_LASTPRIVATE */ + + pt = __kmp_dequeue_task(global_tid, queue, TRUE); + } else if (!(queue->tq_flags & TQF_IS_LASTPRIVATE)) { + // one thing in queue, always safe to schedule if !TQF_IS_LASTPRIVATE + pt = __kmp_dequeue_task(global_tid, queue, TRUE); + } else if (queue->tq_flags & TQF_IS_LAST_TASK) { + /* TQF_IS_LASTPRIVATE, one thing in queue, kmpc_end_taskq_task() */ + /* has been run so this is last task, run with TQF_IS_LAST_TASK so */ + /* instrumentation does copy-out. */ + pt = __kmp_dequeue_task(global_tid, queue, TRUE); + pt->th_flags |= + TQF_IS_LAST_TASK; /* don't need test_then_or since already locked */ + } } - return pt; + /* GEH - What happens here if is lastprivate, but not last task? */ + __kmp_release_lock(&queue->tq_queue_lck, global_tid); + } + + return pt; } -/* - * Walk a tree of queues starting at queue's first child - * and return a non-NULL thunk if one can be scheduled. - * Must only be called when in_parallel=TRUE - */ +/* Walk a tree of queues starting at queue's first child and return a non-NULL + thunk if one can be scheduled. Must only be called when in_parallel=TRUE */ static kmpc_thunk_t * -__kmp_find_task_in_descendant_queue (kmp_int32 global_tid, kmpc_task_queue_t *curr_queue) -{ - kmpc_thunk_t *pt = NULL; - kmpc_task_queue_t *queue = curr_queue; - - if (curr_queue->tq_first_child != NULL) { - __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - queue = (kmpc_task_queue_t *) curr_queue->tq_first_child; - if (queue == NULL) { - __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); - return NULL; - } - - while (queue != NULL) { - int ct; - kmpc_task_queue_t *next; - - ct= ++(queue->tq_ref_count); - __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", - __LINE__, global_tid, queue, ct)); - - pt = __kmp_find_task_in_queue (global_tid, queue); +__kmp_find_task_in_descendant_queue(kmp_int32 global_tid, + kmpc_task_queue_t *curr_queue) { + kmpc_thunk_t *pt = NULL; + kmpc_task_queue_t *queue = curr_queue; + + if (curr_queue->tq_first_child != NULL) { + __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); + // Make sure data structures are in consistent state before querying them + // Seems to work without this call for digital/alpha, needed for IBM/RS6000 + KMP_MB(); - if (pt != NULL) { - int ct; + queue = (kmpc_task_queue_t *)curr_queue->tq_first_child; + if (queue == NULL) { + __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); + return NULL; + } - __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); + while (queue != NULL) { + int ct; + kmpc_task_queue_t *next; - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + ct = ++(queue->tq_ref_count); + __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); + KMP_DEBUG_REF_CTS( + ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct)); - ct = --(queue->tq_ref_count); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", - __LINE__, global_tid, queue, ct)); - KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 ); + pt = __kmp_find_task_in_queue(global_tid, queue); - __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); + if (pt != NULL) { + int ct; - return pt; - } + __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); + // Make sure data structures in consistent state before querying them + // Seems to work without this for digital/alpha, needed for IBM/RS6000 + KMP_MB(); - /* although reference count stays active during descendant walk, shouldn't matter */ - /* since if children still exist, reference counts aren't being monitored anyway */ + ct = --(queue->tq_ref_count); + KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__, + global_tid, queue, ct)); + KMP_DEBUG_ASSERT(queue->tq_ref_count >= 0); - pt = __kmp_find_task_in_descendant_queue (global_tid, queue); + __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); - if (pt != NULL) { - int ct; + return pt; + } - __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); + /* although reference count stays active during descendant walk, shouldn't + matter since if children still exist, reference counts aren't being + monitored anyway */ - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + pt = __kmp_find_task_in_descendant_queue(global_tid, queue); - ct = --(queue->tq_ref_count); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", - __LINE__, global_tid, queue, ct)); - KMP_DEBUG_ASSERT( ct >= 0 ); + if (pt != NULL) { + int ct; - __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); + __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); + // Make sure data structures in consistent state before querying them + // Seems to work without this for digital/alpha, needed for IBM/RS6000 + KMP_MB(); - return pt; - } + ct = --(queue->tq_ref_count); + KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__, + global_tid, queue, ct)); + KMP_DEBUG_ASSERT(ct >= 0); - __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); + __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + return pt; + } - next = queue->tq_next_child; + __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); + // Make sure data structures in consistent state before querying them + // Seems to work without this for digital/alpha, needed for IBM/RS6000 + KMP_MB(); - ct = --(queue->tq_ref_count); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", - __LINE__, global_tid, queue, ct)); - KMP_DEBUG_ASSERT( ct >= 0 ); + next = queue->tq_next_child; - queue = next; - } + ct = --(queue->tq_ref_count); + KMP_DEBUG_REF_CTS( + ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct)); + KMP_DEBUG_ASSERT(ct >= 0); - __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); + queue = next; } - return pt; -} + __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); + } -/* - * Walk up the taskq tree looking for a task to execute. - * If we get to the root, search the tree for a descendent queue task. - * Must only be called when in_parallel=TRUE - */ + return pt; +} +/* Walk up the taskq tree looking for a task to execute. If we get to the root, + search the tree for a descendent queue task. Must only be called when + in_parallel=TRUE */ static kmpc_thunk_t * -__kmp_find_task_in_ancestor_queue (kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue) -{ - kmpc_task_queue_t *queue; - kmpc_thunk_t *pt; - - pt = NULL; - - if (curr_queue->tq.tq_parent != NULL) { - queue = curr_queue->tq.tq_parent; - - while (queue != NULL) { - if (queue->tq.tq_parent != NULL) { - int ct; - __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - ct = ++(queue->tq_ref_count); - __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", - __LINE__, global_tid, queue, ct)); - } +__kmp_find_task_in_ancestor_queue(kmp_taskq_t *tq, kmp_int32 global_tid, + kmpc_task_queue_t *curr_queue) { + kmpc_task_queue_t *queue; + kmpc_thunk_t *pt; - pt = __kmp_find_task_in_queue (global_tid, queue); - if (pt != NULL) { - if (queue->tq.tq_parent != NULL) { - int ct; - __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + pt = NULL; - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work without this call for digital/alpha, needed for IBM/RS6000 */ + if (curr_queue->tq.tq_parent != NULL) { + queue = curr_queue->tq.tq_parent; - ct = --(queue->tq_ref_count); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", - __LINE__, global_tid, queue, ct)); - KMP_DEBUG_ASSERT( ct >= 0 ); - - __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - } - - return pt; - } + while (queue != NULL) { + if (queue->tq.tq_parent != NULL) { + int ct; + __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); + // Make sure data structures in consistent state before querying them + // Seems to work without this for digital/alpha, needed for IBM/RS6000 + KMP_MB(); - if (queue->tq.tq_parent != NULL) { - int ct; - __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + ct = ++(queue->tq_ref_count); + __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); + KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", __LINE__, + global_tid, queue, ct)); + } + + pt = __kmp_find_task_in_queue(global_tid, queue); + if (pt != NULL) { + if (queue->tq.tq_parent != NULL) { + int ct; + __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); + // Make sure data structures in consistent state before querying them + // Seems to work without this for digital/alpha, needed for IBM/RS6000 + KMP_MB(); + + ct = --(queue->tq_ref_count); + KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__, + global_tid, queue, ct)); + KMP_DEBUG_ASSERT(ct >= 0); + + __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); + } - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + return pt; + } - ct = --(queue->tq_ref_count); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", - __LINE__, global_tid, queue, ct)); - KMP_DEBUG_ASSERT( ct >= 0 ); - } - queue = queue->tq.tq_parent; + if (queue->tq.tq_parent != NULL) { + int ct; + __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); + // Make sure data structures in consistent state before querying them + // Seems to work without this for digital/alpha, needed for IBM/RS6000 + KMP_MB(); - if (queue != NULL) - __kmp_release_lock(& queue->tq_link_lck, global_tid); - } + ct = --(queue->tq_ref_count); + KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", __LINE__, + global_tid, queue, ct)); + KMP_DEBUG_ASSERT(ct >= 0); + } + queue = queue->tq.tq_parent; + if (queue != NULL) + __kmp_release_lock(&queue->tq_link_lck, global_tid); } + } - pt = __kmp_find_task_in_descendant_queue( global_tid, tq->tq_root ); + pt = __kmp_find_task_in_descendant_queue(global_tid, tq->tq_root); - return pt; + return pt; } -static int -__kmp_taskq_tasks_finished (kmpc_task_queue_t *queue) -{ - int i; +static int __kmp_taskq_tasks_finished(kmpc_task_queue_t *queue) { + int i; - /* KMP_MB(); *//* is this really necessary? */ + /* KMP_MB(); */ /* is this really necessary? */ - for (i=0; itq_nproc; i++) { - if (queue->tq_th_thunks[i].ai_data != 0) - return FALSE; - } + for (i = 0; i < queue->tq_nproc; i++) { + if (queue->tq_th_thunks[i].ai_data != 0) + return FALSE; + } - return TRUE; + return TRUE; } -static int -__kmp_taskq_has_any_children (kmpc_task_queue_t *queue) -{ - return (queue->tq_first_child != NULL); +static int __kmp_taskq_has_any_children(kmpc_task_queue_t *queue) { + return (queue->tq_first_child != NULL); } -static void -__kmp_remove_queue_from_tree( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue, int in_parallel ) -{ +static void __kmp_remove_queue_from_tree(kmp_taskq_t *tq, kmp_int32 global_tid, + kmpc_task_queue_t *queue, + int in_parallel) { #ifdef KMP_DEBUG - kmp_int32 i; - kmpc_thunk_t *thunk; + kmp_int32 i; + kmpc_thunk_t *thunk; #endif - KF_TRACE(50, ("Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid)); - KF_DUMP(50, __kmp_dump_task_queue( tq, queue, global_tid )); - - /* sub-queue in a recursion, not the root task queue */ - KMP_DEBUG_ASSERT (queue->tq.tq_parent != NULL); - - if (in_parallel) { - __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - } + KF_TRACE(50, + ("Before Deletion of TaskQ at %p on (%d):\n", queue, global_tid)); + KF_DUMP(50, __kmp_dump_task_queue(tq, queue, global_tid)); - KMP_DEBUG_ASSERT (queue->tq_first_child == NULL); + /* sub-queue in a recursion, not the root task queue */ + KMP_DEBUG_ASSERT(queue->tq.tq_parent != NULL); - /* unlink queue from its siblings if any at this level */ - if (queue->tq_prev_child != NULL) - queue->tq_prev_child->tq_next_child = queue->tq_next_child; - if (queue->tq_next_child != NULL) - queue->tq_next_child->tq_prev_child = queue->tq_prev_child; - if (queue->tq.tq_parent->tq_first_child == queue) - queue->tq.tq_parent->tq_first_child = queue->tq_next_child; + if (in_parallel) { + __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); + // Make sure data structures are in consistent state before querying them + // Seems to work without this call for digital/alpha, needed for IBM/RS6000 + KMP_MB(); + } - queue->tq_prev_child = NULL; - queue->tq_next_child = NULL; + KMP_DEBUG_ASSERT(queue->tq_first_child == NULL); - if (in_parallel) { - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n", - __LINE__, global_tid, queue, queue->tq_ref_count)); + /* unlink queue from its siblings if any at this level */ + if (queue->tq_prev_child != NULL) + queue->tq_prev_child->tq_next_child = queue->tq_next_child; + if (queue->tq_next_child != NULL) + queue->tq_next_child->tq_prev_child = queue->tq_prev_child; + if (queue->tq.tq_parent->tq_first_child == queue) + queue->tq.tq_parent->tq_first_child = queue->tq_next_child; - /* wait until all other threads have stopped accessing this queue */ - while (queue->tq_ref_count > 1) { - __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + queue->tq_prev_child = NULL; + queue->tq_next_child = NULL; - KMP_WAIT_YIELD((volatile kmp_uint32*)&queue->tq_ref_count, 1, KMP_LE, NULL); + if (in_parallel) { + KMP_DEBUG_REF_CTS( + ("line %d gtid %d: Q %p waiting for ref_count of %d to reach 1\n", + __LINE__, global_tid, queue, queue->tq_ref_count)); - __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + /* wait until all other threads have stopped accessing this queue */ + while (queue->tq_ref_count > 1) { + __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - } + KMP_WAIT_YIELD((volatile kmp_uint32 *)&queue->tq_ref_count, 1, KMP_LE, + NULL); - __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); + // Make sure data structures are in consistent state before querying them + // Seems to work without this for digital/alpha, needed for IBM/RS6000 + KMP_MB(); } - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p freeing queue\n", - __LINE__, global_tid, queue)); + __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); + } + + KMP_DEBUG_REF_CTS( + ("line %d gtid %d: Q %p freeing queue\n", __LINE__, global_tid, queue)); #ifdef KMP_DEBUG - KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED); - KMP_DEBUG_ASSERT(queue->tq_nfull == 0); + KMP_DEBUG_ASSERT(queue->tq_flags & TQF_ALL_TASKS_QUEUED); + KMP_DEBUG_ASSERT(queue->tq_nfull == 0); - for (i=0; itq_nproc; i++) { - KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0); - } + for (i = 0; i < queue->tq_nproc; i++) { + KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0); + } - i = 0; - for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free) - ++i; + i = 0; + for (thunk = queue->tq_free_thunks; thunk != NULL; + thunk = thunk->th.th_next_free) + ++i; - KMP_ASSERT (i == queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH)); + KMP_ASSERT(i == + queue->tq_nslots + (queue->tq_nproc * __KMP_TASKQ_THUNKS_PER_TH)); #endif - /* release storage for queue entry */ - __kmp_free_taskq ( tq, queue, TRUE, global_tid ); + /* release storage for queue entry */ + __kmp_free_taskq(tq, queue, TRUE, global_tid); - KF_TRACE(50, ("After Deletion of TaskQ at %p on (%d):\n", queue, global_tid)); - KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid )); + KF_TRACE(50, ("After Deletion of TaskQ at %p on (%d):\n", queue, global_tid)); + KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid)); } -/* - * Starting from indicated queue, proceed downward through tree and - * remove all taskqs which are finished, but only go down to taskqs - * which have the "nowait" clause present. Assume this is only called - * when in_parallel=TRUE. - */ - -static void -__kmp_find_and_remove_finished_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue ) -{ - kmpc_task_queue_t *queue = curr_queue; - - if (curr_queue->tq_first_child != NULL) { - __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); +/* Starting from indicated queue, proceed downward through tree and remove all + taskqs which are finished, but only go down to taskqs which have the "nowait" + clause present. Assume this is only called when in_parallel=TRUE. */ - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ +static void __kmp_find_and_remove_finished_child_taskq( + kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *curr_queue) { + kmpc_task_queue_t *queue = curr_queue; - queue = (kmpc_task_queue_t *) curr_queue->tq_first_child; - if (queue != NULL) { - __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); - return; - } + if (curr_queue->tq_first_child != NULL) { + __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); + // Make sure data structures are in consistent state before querying them + // Seems to work without this call for digital/alpha, needed for IBM/RS6000 + KMP_MB(); - while (queue != NULL) { - kmpc_task_queue_t *next; - int ct = ++(queue->tq_ref_count); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p inc %d\n", - __LINE__, global_tid, queue, ct)); + queue = (kmpc_task_queue_t *)curr_queue->tq_first_child; + if (queue != NULL) { + __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); + return; + } + while (queue != NULL) { + kmpc_task_queue_t *next; + int ct = ++(queue->tq_ref_count); + KMP_DEBUG_REF_CTS( + ("line %d gtid %d: Q %p inc %d\n", __LINE__, global_tid, queue, ct)); - /* although reference count stays active during descendant walk, */ - /* shouldn't matter since if children still exist, reference */ - /* counts aren't being monitored anyway */ + /* although reference count stays active during descendant walk, */ + /* shouldn't matter since if children still exist, reference */ + /* counts aren't being monitored anyway */ - if (queue->tq_flags & TQF_IS_NOWAIT) { - __kmp_find_and_remove_finished_child_taskq ( tq, global_tid, queue ); + if (queue->tq_flags & TQF_IS_NOWAIT) { + __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue); - if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED) && (queue->tq_nfull == 0) && - __kmp_taskq_tasks_finished(queue) && ! __kmp_taskq_has_any_children(queue)) { + if ((queue->tq_flags & TQF_ALL_TASKS_QUEUED) && + (queue->tq_nfull == 0) && __kmp_taskq_tasks_finished(queue) && + !__kmp_taskq_has_any_children(queue)) { - /* - Only remove this if we have not already marked it for deallocation. - This should prevent multiple threads from trying to free this. - */ + /* Only remove this if we have not already marked it for deallocation. + This should prevent multiple threads from trying to free this. */ - if ( __kmp_test_lock(& queue->tq_queue_lck, global_tid) ) { - if ( !(queue->tq_flags & TQF_DEALLOCATED) ) { - queue->tq_flags |= TQF_DEALLOCATED; - __kmp_release_lock(& queue->tq_queue_lck, global_tid); + if (__kmp_test_lock(&queue->tq_queue_lck, global_tid)) { + if (!(queue->tq_flags & TQF_DEALLOCATED)) { + queue->tq_flags |= TQF_DEALLOCATED; + __kmp_release_lock(&queue->tq_queue_lck, global_tid); - __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE ); + __kmp_remove_queue_from_tree(tq, global_tid, queue, TRUE); - /* Can't do any more here since can't be sure where sibling queue is so just exit this level */ - return; - } - else { - __kmp_release_lock(& queue->tq_queue_lck, global_tid); - } - } - /* otherwise, just fall through and decrement reference count */ - } + /* Can't do any more here since can't be sure where sibling queue + * is so just exit this level */ + return; + } else { + __kmp_release_lock(&queue->tq_queue_lck, global_tid); } + } + /* otherwise, just fall through and decrement reference count */ + } + } - __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ - - next = queue->tq_next_child; + __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); + // Make sure data structures are in consistent state before querying them + // Seems to work without this for digital/alpha, needed for IBM/RS6000 + KMP_MB(); - ct = --(queue->tq_ref_count); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", - __LINE__, global_tid, queue, ct)); - KMP_DEBUG_ASSERT( ct >= 0 ); + next = queue->tq_next_child; - queue = next; - } + ct = --(queue->tq_ref_count); + KMP_DEBUG_REF_CTS( + ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct)); + KMP_DEBUG_ASSERT(ct >= 0); - __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); + queue = next; } -} -/* - * Starting from indicated queue, proceed downward through tree and - * remove all taskq's assuming all are finished and - * assuming NO other threads are executing at this point. - */ + __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); + } +} -static void -__kmp_remove_all_child_taskq( kmp_taskq_t *tq, kmp_int32 global_tid, kmpc_task_queue_t *queue ) -{ - kmpc_task_queue_t *next_child; +/* Starting from indicated queue, proceed downward through tree and remove all + taskq's assuming all are finished and assuming NO other threads are executing + at this point. */ +static void __kmp_remove_all_child_taskq(kmp_taskq_t *tq, kmp_int32 global_tid, + kmpc_task_queue_t *queue) { + kmpc_task_queue_t *next_child; - queue = (kmpc_task_queue_t *) queue->tq_first_child; + queue = (kmpc_task_queue_t *)queue->tq_first_child; - while (queue != NULL) { - __kmp_remove_all_child_taskq ( tq, global_tid, queue ); + while (queue != NULL) { + __kmp_remove_all_child_taskq(tq, global_tid, queue); - next_child = queue->tq_next_child; - queue->tq_flags |= TQF_DEALLOCATED; - __kmp_remove_queue_from_tree ( tq, global_tid, queue, FALSE ); - queue = next_child; - } + next_child = queue->tq_next_child; + queue->tq_flags |= TQF_DEALLOCATED; + __kmp_remove_queue_from_tree(tq, global_tid, queue, FALSE); + queue = next_child; + } } -static void -__kmp_execute_task_from_queue( kmp_taskq_t *tq, ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, int in_parallel ) -{ - kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue; - kmp_int32 tid = __kmp_tid_from_gtid( global_tid ); - - KF_TRACE(100, ("After dequeueing this Task on (%d):\n", global_tid)); - KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid )); - KF_TRACE(100, ("Task Queue: %p looks like this (%d):\n", queue, global_tid)); - KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid )); - - /* - * For the taskq task, the curr_thunk pushes and pop pairs are set up as follows: - * - * happens exactly once: - * 1) __kmpc_taskq : push (if returning thunk only) - * 4) __kmpc_end_taskq_task : pop - * - * optionally happens *each* time taskq task is dequeued/enqueued: - * 2) __kmpc_taskq_task : pop - * 3) __kmp_execute_task_from_queue : push - * - * execution ordering: 1,(2,3)*,4 - */ - - if (!(thunk->th_flags & TQF_TASKQ_TASK)) { - kmp_int32 index = (queue == tq->tq_root) ? tid : 0; - thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[index].ai_data; - - if ( __kmp_env_consistency_check ) { - __kmp_push_workshare( global_tid, - (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task, - queue->tq_loc ); - } - } - else { - if ( __kmp_env_consistency_check ) - __kmp_push_workshare( global_tid, ct_taskq, queue->tq_loc ); +static void __kmp_execute_task_from_queue(kmp_taskq_t *tq, ident_t *loc, + kmp_int32 global_tid, + kmpc_thunk_t *thunk, + int in_parallel) { + kmpc_task_queue_t *queue = thunk->th.th_shareds->sv_queue; + kmp_int32 tid = __kmp_tid_from_gtid(global_tid); + + KF_TRACE(100, ("After dequeueing this Task on (%d):\n", global_tid)); + KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid)); + KF_TRACE(100, ("Task Queue: %p looks like this (%d):\n", queue, global_tid)); + KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid)); + + /* For the taskq task, the curr_thunk pushes and pop pairs are set up as + * follows: + * + * happens exactly once: + * 1) __kmpc_taskq : push (if returning thunk only) + * 4) __kmpc_end_taskq_task : pop + * + * optionally happens *each* time taskq task is dequeued/enqueued: + * 2) __kmpc_taskq_task : pop + * 3) __kmp_execute_task_from_queue : push + * + * execution ordering: 1,(2,3)*,4 + */ + + if (!(thunk->th_flags & TQF_TASKQ_TASK)) { + kmp_int32 index = (queue == tq->tq_root) ? tid : 0; + thunk->th.th_shareds = + (kmpc_shared_vars_t *)queue->tq_shareds[index].ai_data; + + if (__kmp_env_consistency_check) { + __kmp_push_workshare(global_tid, + (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered + : ct_task, + queue->tq_loc); } + } else { + if (__kmp_env_consistency_check) + __kmp_push_workshare(global_tid, ct_taskq, queue->tq_loc); + } + + if (in_parallel) { + thunk->th_encl_thunk = tq->tq_curr_thunk[tid]; + tq->tq_curr_thunk[tid] = thunk; + + KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid)); + } + + KF_TRACE(50, ("Begin Executing Thunk %p from queue %p on (%d)\n", thunk, + queue, global_tid)); + thunk->th_task(global_tid, thunk); + KF_TRACE(50, ("End Executing Thunk %p from queue %p on (%d)\n", thunk, queue, + global_tid)); + + if (!(thunk->th_flags & TQF_TASKQ_TASK)) { + if (__kmp_env_consistency_check) + __kmp_pop_workshare(global_tid, + (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered + : ct_task, + queue->tq_loc); if (in_parallel) { - thunk->th_encl_thunk = tq->tq_curr_thunk[tid]; - tq->tq_curr_thunk[tid] = thunk; - - KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid )); + tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; + thunk->th_encl_thunk = NULL; + KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid)); } - KF_TRACE( 50, ("Begin Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid)); - thunk->th_task (global_tid, thunk); - KF_TRACE( 50, ("End Executing Thunk %p from queue %p on (%d)\n", thunk, queue, global_tid)); - - if (!(thunk->th_flags & TQF_TASKQ_TASK)) { - if ( __kmp_env_consistency_check ) - __kmp_pop_workshare( global_tid, (queue->tq_flags & TQF_IS_ORDERED) ? ct_task_ordered : ct_task, - queue->tq_loc ); - - if (in_parallel) { - tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; - thunk->th_encl_thunk = NULL; - KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid )); - } - - if ((thunk->th_flags & TQF_IS_ORDERED) && in_parallel) { - __kmp_taskq_check_ordered(global_tid, thunk); - } + if ((thunk->th_flags & TQF_IS_ORDERED) && in_parallel) { + __kmp_taskq_check_ordered(global_tid, thunk); + } - __kmp_free_thunk (queue, thunk, in_parallel, global_tid); + __kmp_free_thunk(queue, thunk, in_parallel, global_tid); - KF_TRACE(100, ("T#%d After freeing thunk: %p, TaskQ looks like this:\n", global_tid, thunk)); - KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid )); + KF_TRACE(100, ("T#%d After freeing thunk: %p, TaskQ looks like this:\n", + global_tid, thunk)); + KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid)); - if (in_parallel) { - KMP_MB(); /* needed so thunk put on free list before outstanding thunk count is decremented */ + if (in_parallel) { + KMP_MB(); /* needed so thunk put on free list before outstanding thunk + count is decremented */ - KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1); + KMP_DEBUG_ASSERT(queue->tq_th_thunks[tid].ai_data >= 1); - KF_TRACE( 200, ("__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n", - global_tid, queue->tq_th_thunks[tid].ai_data-1, queue)); + KF_TRACE( + 200, + ("__kmp_execute_task_from_queue: T#%d has %d thunks in queue %p\n", + global_tid, queue->tq_th_thunks[tid].ai_data - 1, queue)); - queue->tq_th_thunks[tid].ai_data--; + queue->tq_th_thunks[tid].ai_data--; - /* KMP_MB(); */ /* is MB really necessary ? */ - } + /* KMP_MB(); */ /* is MB really necessary ? */ + } - if (queue->tq.tq_parent != NULL && in_parallel) { - int ct; - __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - ct = --(queue->tq_ref_count); - __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p dec %d\n", - __LINE__, global_tid, queue, ct)); - KMP_DEBUG_ASSERT( ct >= 0 ); - } + if (queue->tq.tq_parent != NULL && in_parallel) { + int ct; + __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); + ct = --(queue->tq_ref_count); + __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); + KMP_DEBUG_REF_CTS( + ("line %d gtid %d: Q %p dec %d\n", __LINE__, global_tid, queue, ct)); + KMP_DEBUG_ASSERT(ct >= 0); } + } } -/* --------------------------------------------------------------------------- */ - /* starts a taskq; creates and returns a thunk for the taskq_task */ /* also, returns pointer to shared vars for this thread in "shareds" arg */ +kmpc_thunk_t *__kmpc_taskq(ident_t *loc, kmp_int32 global_tid, + kmpc_task_t taskq_task, size_t sizeof_thunk, + size_t sizeof_shareds, kmp_int32 flags, + kmpc_shared_vars_t **shareds) { + int in_parallel; + kmp_int32 nslots, nthunks, nshareds, nproc; + kmpc_task_queue_t *new_queue, *curr_queue; + kmpc_thunk_t *new_taskq_thunk; + kmp_info_t *th; + kmp_team_t *team; + kmp_taskq_t *tq; + kmp_int32 tid; + + KE_TRACE(10, ("__kmpc_taskq called (%d)\n", global_tid)); + + th = __kmp_threads[global_tid]; + team = th->th.th_team; + tq = &team->t.t_taskq; + nproc = team->t.t_nproc; + tid = __kmp_tid_from_gtid(global_tid); + + /* find out whether this is a parallel taskq or serialized one. */ + in_parallel = in_parallel_context(team); + + if (!tq->tq_root) { + if (in_parallel) { + /* Vector ORDERED SECTION to taskq version */ + th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo; -kmpc_thunk_t * -__kmpc_taskq( ident_t *loc, kmp_int32 global_tid, kmpc_task_t taskq_task, - size_t sizeof_thunk, size_t sizeof_shareds, - kmp_int32 flags, kmpc_shared_vars_t **shareds ) -{ - int in_parallel; - kmp_int32 nslots, nthunks, nshareds, nproc; - kmpc_task_queue_t *new_queue, *curr_queue; - kmpc_thunk_t *new_taskq_thunk; - kmp_info_t *th; - kmp_team_t *team; - kmp_taskq_t *tq; - kmp_int32 tid; - - KE_TRACE( 10, ("__kmpc_taskq called (%d)\n", global_tid)); - - th = __kmp_threads[ global_tid ]; - team = th -> th.th_team; - tq = & team -> t.t_taskq; - nproc = team -> t.t_nproc; - tid = __kmp_tid_from_gtid( global_tid ); - - /* find out whether this is a parallel taskq or serialized one. */ - in_parallel = in_parallel_context( team ); - - if( ! tq->tq_root ) { - if (in_parallel) { - /* Vector ORDERED SECTION to taskq version */ - th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo; - - /* Vector ORDERED SECTION to taskq version */ - th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo; - } - - if (in_parallel) { - /* This shouldn't be a barrier region boundary, it will confuse the user. */ - /* Need the boundary to be at the end taskq instead. */ - if ( __kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) { - /* Creating the active root queue, and we are not the master thread. */ - /* The master thread below created the queue and tasks have been */ - /* enqueued, and the master thread released this barrier. This */ - /* worker thread can now proceed and execute tasks. See also the */ - /* TQF_RELEASE_WORKERS which is used to handle this case. */ - - *shareds = (kmpc_shared_vars_t *) tq->tq_root->tq_shareds[tid].ai_data; - - KE_TRACE( 10, ("__kmpc_taskq return (%d)\n", global_tid)); - - return NULL; - } - } - - /* master thread only executes this code */ - - if( tq->tq_curr_thunk_capacity < nproc ) { - if(tq->tq_curr_thunk) - __kmp_free(tq->tq_curr_thunk); - else { - /* only need to do this once at outer level, i.e. when tq_curr_thunk is still NULL */ - __kmp_init_lock( & tq->tq_freelist_lck ); - } - - tq->tq_curr_thunk = (kmpc_thunk_t **) __kmp_allocate( nproc * sizeof(kmpc_thunk_t *) ); - tq -> tq_curr_thunk_capacity = nproc; - } - - if (in_parallel) - tq->tq_global_flags = TQF_RELEASE_WORKERS; + /* Vector ORDERED SECTION to taskq version */ + th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo; } - /* dkp: in future, if flags & TQF_HEURISTICS, will choose nslots based */ - /* on some heuristics (e.g., depth of queue nesting?). */ - - nslots = (in_parallel) ? (2 * nproc) : 1; - - /* There must be nproc * __KMP_TASKQ_THUNKS_PER_TH extra slots for pending */ - /* jobs being executed by other threads, and one extra for taskq slot */ - - nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH) + 1) : nslots + 2; - - /* Only the root taskq gets a per-thread array of shareds. */ - /* The rest of the taskq's only get one copy of the shared vars. */ - - nshareds = ( !tq->tq_root && in_parallel) ? nproc : 1; - - /* create overall queue data structure and its components that require allocation */ - - new_queue = __kmp_alloc_taskq ( tq, in_parallel, nslots, nthunks, nshareds, nproc, - sizeof_thunk, sizeof_shareds, &new_taskq_thunk, global_tid ); - - /* rest of new_queue initializations */ - - new_queue->tq_flags = flags & TQF_INTERFACE_FLAGS; - if (in_parallel) { - new_queue->tq_tasknum_queuing = 0; - new_queue->tq_tasknum_serving = 0; - new_queue->tq_flags |= TQF_PARALLEL_CONTEXT; + // This shouldn't be a barrier region boundary, it will confuse the user. + /* Need the boundary to be at the end taskq instead. */ + if (__kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL)) { + /* Creating the active root queue, and we are not the master thread. */ + /* The master thread below created the queue and tasks have been */ + /* enqueued, and the master thread released this barrier. This */ + /* worker thread can now proceed and execute tasks. See also the */ + /* TQF_RELEASE_WORKERS which is used to handle this case. */ + *shareds = (kmpc_shared_vars_t *)tq->tq_root->tq_shareds[tid].ai_data; + + KE_TRACE(10, ("__kmpc_taskq return (%d)\n", global_tid)); + + return NULL; + } } - new_queue->tq_taskq_slot = NULL; - new_queue->tq_nslots = nslots; - new_queue->tq_hiwat = HIGH_WATER_MARK (nslots); - new_queue->tq_nfull = 0; - new_queue->tq_head = 0; - new_queue->tq_tail = 0; - new_queue->tq_loc = loc; - - if ((new_queue->tq_flags & TQF_IS_ORDERED) && in_parallel) { - /* prepare to serve the first-queued task's ORDERED directive */ - new_queue->tq_tasknum_serving = 1; - - /* Vector ORDERED SECTION to taskq version */ - th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo; - - /* Vector ORDERED SECTION to taskq version */ - th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo; + /* master thread only executes this code */ + if (tq->tq_curr_thunk_capacity < nproc) { + if (tq->tq_curr_thunk) + __kmp_free(tq->tq_curr_thunk); + else { + /* only need to do this once at outer level, i.e. when tq_curr_thunk is + * still NULL */ + __kmp_init_lock(&tq->tq_freelist_lck); + } + + tq->tq_curr_thunk = + (kmpc_thunk_t **)__kmp_allocate(nproc * sizeof(kmpc_thunk_t *)); + tq->tq_curr_thunk_capacity = nproc; } - /* create a new thunk for the taskq_task in the new_queue */ - *shareds = (kmpc_shared_vars_t *) new_queue->tq_shareds[0].ai_data; - - new_taskq_thunk->th.th_shareds = *shareds; - new_taskq_thunk->th_task = taskq_task; - new_taskq_thunk->th_flags = new_queue->tq_flags | TQF_TASKQ_TASK; - new_taskq_thunk->th_status = 0; - - KMP_DEBUG_ASSERT (new_taskq_thunk->th_flags & TQF_TASKQ_TASK); - - /* KMP_MB(); */ /* make sure these inits complete before threads start using this queue (necessary?) */ - - /* insert the new task queue into the tree, but only after all fields initialized */ - - if (in_parallel) { - if( ! tq->tq_root ) { - new_queue->tq.tq_parent = NULL; - new_queue->tq_first_child = NULL; - new_queue->tq_next_child = NULL; - new_queue->tq_prev_child = NULL; - new_queue->tq_ref_count = 1; - tq->tq_root = new_queue; - } - else { - curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue; - new_queue->tq.tq_parent = curr_queue; - new_queue->tq_first_child = NULL; - new_queue->tq_prev_child = NULL; - new_queue->tq_ref_count = 1; /* for this the thread that built the queue */ - - KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p alloc %d\n", - __LINE__, global_tid, new_queue, new_queue->tq_ref_count)); + if (in_parallel) + tq->tq_global_flags = TQF_RELEASE_WORKERS; + } + + /* dkp: in future, if flags & TQF_HEURISTICS, will choose nslots based */ + /* on some heuristics (e.g., depth of queue nesting?). */ + nslots = (in_parallel) ? (2 * nproc) : 1; + + /* There must be nproc * __KMP_TASKQ_THUNKS_PER_TH extra slots for pending */ + /* jobs being executed by other threads, and one extra for taskq slot */ + nthunks = (in_parallel) ? (nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH) + 1) + : nslots + 2; + + /* Only the root taskq gets a per-thread array of shareds. */ + /* The rest of the taskq's only get one copy of the shared vars. */ + nshareds = (!tq->tq_root && in_parallel) ? nproc : 1; + + /* create overall queue data structure and its components that require + * allocation */ + new_queue = __kmp_alloc_taskq(tq, in_parallel, nslots, nthunks, nshareds, + nproc, sizeof_thunk, sizeof_shareds, + &new_taskq_thunk, global_tid); + + /* rest of new_queue initializations */ + new_queue->tq_flags = flags & TQF_INTERFACE_FLAGS; + + if (in_parallel) { + new_queue->tq_tasknum_queuing = 0; + new_queue->tq_tasknum_serving = 0; + new_queue->tq_flags |= TQF_PARALLEL_CONTEXT; + } + + new_queue->tq_taskq_slot = NULL; + new_queue->tq_nslots = nslots; + new_queue->tq_hiwat = HIGH_WATER_MARK(nslots); + new_queue->tq_nfull = 0; + new_queue->tq_head = 0; + new_queue->tq_tail = 0; + new_queue->tq_loc = loc; + + if ((new_queue->tq_flags & TQF_IS_ORDERED) && in_parallel) { + /* prepare to serve the first-queued task's ORDERED directive */ + new_queue->tq_tasknum_serving = 1; + + /* Vector ORDERED SECTION to taskq version */ + th->th.th_dispatch->th_deo_fcn = __kmp_taskq_eo; + + /* Vector ORDERED SECTION to taskq version */ + th->th.th_dispatch->th_dxo_fcn = __kmp_taskq_xo; + } + + /* create a new thunk for the taskq_task in the new_queue */ + *shareds = (kmpc_shared_vars_t *)new_queue->tq_shareds[0].ai_data; + + new_taskq_thunk->th.th_shareds = *shareds; + new_taskq_thunk->th_task = taskq_task; + new_taskq_thunk->th_flags = new_queue->tq_flags | TQF_TASKQ_TASK; + new_taskq_thunk->th_status = 0; + + KMP_DEBUG_ASSERT(new_taskq_thunk->th_flags & TQF_TASKQ_TASK); + + // Make sure these inits complete before threads start using this queue + /* KMP_MB(); */ // (necessary?) + + /* insert the new task queue into the tree, but only after all fields + * initialized */ + + if (in_parallel) { + if (!tq->tq_root) { + new_queue->tq.tq_parent = NULL; + new_queue->tq_first_child = NULL; + new_queue->tq_next_child = NULL; + new_queue->tq_prev_child = NULL; + new_queue->tq_ref_count = 1; + tq->tq_root = new_queue; + } else { + curr_queue = tq->tq_curr_thunk[tid]->th.th_shareds->sv_queue; + new_queue->tq.tq_parent = curr_queue; + new_queue->tq_first_child = NULL; + new_queue->tq_prev_child = NULL; + new_queue->tq_ref_count = + 1; /* for this the thread that built the queue */ - __kmp_acquire_lock(& curr_queue->tq_link_lck, global_tid); + KMP_DEBUG_REF_CTS(("line %d gtid %d: Q %p alloc %d\n", __LINE__, + global_tid, new_queue, new_queue->tq_ref_count)); - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + __kmp_acquire_lock(&curr_queue->tq_link_lck, global_tid); - new_queue->tq_next_child = (struct kmpc_task_queue_t *) curr_queue->tq_first_child; + // Make sure data structures are in consistent state before querying them + // Seems to work without this for digital/alpha, needed for IBM/RS6000 + KMP_MB(); - if (curr_queue->tq_first_child != NULL) - curr_queue->tq_first_child->tq_prev_child = new_queue; + new_queue->tq_next_child = + (struct kmpc_task_queue_t *)curr_queue->tq_first_child; - curr_queue->tq_first_child = new_queue; + if (curr_queue->tq_first_child != NULL) + curr_queue->tq_first_child->tq_prev_child = new_queue; - __kmp_release_lock(& curr_queue->tq_link_lck, global_tid); - } + curr_queue->tq_first_child = new_queue; - /* set up thunk stack only after code that determines curr_queue above */ - new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid]; - tq->tq_curr_thunk[tid] = new_taskq_thunk; - - KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid )); - } - else { - new_taskq_thunk->th_encl_thunk = 0; - new_queue->tq.tq_parent = NULL; - new_queue->tq_first_child = NULL; - new_queue->tq_next_child = NULL; - new_queue->tq_prev_child = NULL; - new_queue->tq_ref_count = 1; + __kmp_release_lock(&curr_queue->tq_link_lck, global_tid); } -#ifdef KMP_DEBUG - KF_TRACE(150, ("Creating TaskQ Task on (%d):\n", global_tid)); - KF_DUMP(150, __kmp_dump_thunk( tq, new_taskq_thunk, global_tid )); - - if (in_parallel) { - KF_TRACE(25, ("After TaskQ at %p Creation on (%d):\n", new_queue, global_tid)); - } else { - KF_TRACE(25, ("After Serial TaskQ at %p Creation on (%d):\n", new_queue, global_tid)); - } + /* set up thunk stack only after code that determines curr_queue above */ + new_taskq_thunk->th_encl_thunk = tq->tq_curr_thunk[tid]; + tq->tq_curr_thunk[tid] = new_taskq_thunk; - KF_DUMP(25, __kmp_dump_task_queue( tq, new_queue, global_tid )); + KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid)); + } else { + new_taskq_thunk->th_encl_thunk = 0; + new_queue->tq.tq_parent = NULL; + new_queue->tq_first_child = NULL; + new_queue->tq_next_child = NULL; + new_queue->tq_prev_child = NULL; + new_queue->tq_ref_count = 1; + } - if (in_parallel) { - KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid )); - } +#ifdef KMP_DEBUG + KF_TRACE(150, ("Creating TaskQ Task on (%d):\n", global_tid)); + KF_DUMP(150, __kmp_dump_thunk(tq, new_taskq_thunk, global_tid)); + + if (in_parallel) { + KF_TRACE(25, + ("After TaskQ at %p Creation on (%d):\n", new_queue, global_tid)); + } else { + KF_TRACE(25, ("After Serial TaskQ at %p Creation on (%d):\n", new_queue, + global_tid)); + } + + KF_DUMP(25, __kmp_dump_task_queue(tq, new_queue, global_tid)); + + if (in_parallel) { + KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid)); + } #endif /* KMP_DEBUG */ - if ( __kmp_env_consistency_check ) - __kmp_push_workshare( global_tid, ct_taskq, new_queue->tq_loc ); + if (__kmp_env_consistency_check) + __kmp_push_workshare(global_tid, ct_taskq, new_queue->tq_loc); - KE_TRACE( 10, ("__kmpc_taskq return (%d)\n", global_tid)); + KE_TRACE(10, ("__kmpc_taskq return (%d)\n", global_tid)); - return new_taskq_thunk; + return new_taskq_thunk; } - /* ends a taskq; last thread out destroys the queue */ -void -__kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk) -{ +void __kmpc_end_taskq(ident_t *loc, kmp_int32 global_tid, + kmpc_thunk_t *taskq_thunk) { #ifdef KMP_DEBUG - kmp_int32 i; + kmp_int32 i; #endif - kmp_taskq_t *tq; - int in_parallel; - kmp_info_t *th; - kmp_int32 is_outermost; - kmpc_task_queue_t *queue; - kmpc_thunk_t *thunk; - int nproc; + kmp_taskq_t *tq; + int in_parallel; + kmp_info_t *th; + kmp_int32 is_outermost; + kmpc_task_queue_t *queue; + kmpc_thunk_t *thunk; + int nproc; - KE_TRACE( 10, ("__kmpc_end_taskq called (%d)\n", global_tid)); + KE_TRACE(10, ("__kmpc_end_taskq called (%d)\n", global_tid)); - tq = & __kmp_threads[global_tid] -> th.th_team -> t.t_taskq; - nproc = __kmp_threads[global_tid] -> th.th_team -> t.t_nproc; + tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq; + nproc = __kmp_threads[global_tid]->th.th_team->t.t_nproc; - /* For the outermost taskq only, all but one thread will have taskq_thunk == NULL */ - queue = (taskq_thunk == NULL) ? tq->tq_root : taskq_thunk->th.th_shareds->sv_queue; + /* For the outermost taskq only, all but one thread will have taskq_thunk == + * NULL */ + queue = (taskq_thunk == NULL) ? tq->tq_root + : taskq_thunk->th.th_shareds->sv_queue; - KE_TRACE( 50, ("__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid)); - is_outermost = (queue == tq->tq_root); - in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); + KE_TRACE(50, ("__kmpc_end_taskq queue=%p (%d) \n", queue, global_tid)); + is_outermost = (queue == tq->tq_root); + in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); - if (in_parallel) { - kmp_uint32 spins; + if (in_parallel) { + kmp_uint32 spins; - /* this is just a safeguard to release the waiting threads if */ - /* the outermost taskq never queues a task */ + /* this is just a safeguard to release the waiting threads if */ + /* the outermost taskq never queues a task */ - if (is_outermost && (KMP_MASTER_GTID( global_tid ))) { - if( tq->tq_global_flags & TQF_RELEASE_WORKERS ) { - /* no lock needed, workers are still in spin mode */ - tq->tq_global_flags &= ~TQF_RELEASE_WORKERS; + if (is_outermost && (KMP_MASTER_GTID(global_tid))) { + if (tq->tq_global_flags & TQF_RELEASE_WORKERS) { + /* no lock needed, workers are still in spin mode */ + tq->tq_global_flags &= ~TQF_RELEASE_WORKERS; - __kmp_end_split_barrier( bs_plain_barrier, global_tid ); - } - } + __kmp_end_split_barrier(bs_plain_barrier, global_tid); + } + } - /* keep dequeueing work until all tasks are queued and dequeued */ + /* keep dequeueing work until all tasks are queued and dequeued */ - do { - /* wait until something is available to dequeue */ - KMP_INIT_YIELD(spins); + do { + /* wait until something is available to dequeue */ + KMP_INIT_YIELD(spins); - while ( (queue->tq_nfull == 0) - && (queue->tq_taskq_slot == NULL) - && (! __kmp_taskq_has_any_children(queue) ) - && (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED) ) - ) { - KMP_YIELD_WHEN( TRUE, spins ); - } + while ((queue->tq_nfull == 0) && (queue->tq_taskq_slot == NULL) && + (!__kmp_taskq_has_any_children(queue)) && + (!(queue->tq_flags & TQF_ALL_TASKS_QUEUED))) { + KMP_YIELD_WHEN(TRUE, spins); + } - /* check to see if we can execute tasks in the queue */ - while ( ( (queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL) ) - && (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL - ) { - KF_TRACE(50, ("Found thunk: %p in primary queue %p (%d)\n", thunk, queue, global_tid)); - __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); - } + /* check to see if we can execute tasks in the queue */ + while (((queue->tq_nfull != 0) || (queue->tq_taskq_slot != NULL)) && + (thunk = __kmp_find_task_in_queue(global_tid, queue)) != NULL) { + KF_TRACE(50, ("Found thunk: %p in primary queue %p (%d)\n", thunk, + queue, global_tid)); + __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel); + } - /* see if work found can be found in a descendant queue */ - if ( (__kmp_taskq_has_any_children(queue)) - && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL - ) { + /* see if work found can be found in a descendant queue */ + if ((__kmp_taskq_has_any_children(queue)) && + (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != + NULL) { - KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n", - thunk, thunk->th.th_shareds->sv_queue, queue, global_tid )); + KF_TRACE(50, + ("Stole thunk: %p in descendant queue: %p while waiting in " + "queue: %p (%d)\n", + thunk, thunk->th.th_shareds->sv_queue, queue, global_tid)); - __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); - } + __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel); + } - } while ( (! (queue->tq_flags & TQF_ALL_TASKS_QUEUED)) - || (queue->tq_nfull != 0) - ); + } while ((!(queue->tq_flags & TQF_ALL_TASKS_QUEUED)) || + (queue->tq_nfull != 0)); - KF_TRACE(50, ("All tasks queued and dequeued in queue: %p (%d)\n", queue, global_tid)); + KF_TRACE(50, ("All tasks queued and dequeued in queue: %p (%d)\n", queue, + global_tid)); - /* wait while all tasks are not finished and more work found - in descendant queues */ + /* wait while all tasks are not finished and more work found + in descendant queues */ - while ( (!__kmp_taskq_tasks_finished(queue)) - && (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != NULL - ) { + while ((!__kmp_taskq_tasks_finished(queue)) && + (thunk = __kmp_find_task_in_descendant_queue(global_tid, queue)) != + NULL) { - KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n", - thunk, thunk->th.th_shareds->sv_queue, queue, global_tid)); + KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in " + "queue: %p (%d)\n", + thunk, thunk->th.th_shareds->sv_queue, queue, global_tid)); - __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); - } + __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel); + } - KF_TRACE(50, ("No work found in descendent queues or all work finished in queue: %p (%d)\n", queue, global_tid)); + KF_TRACE(50, ("No work found in descendent queues or all work finished in " + "queue: %p (%d)\n", + queue, global_tid)); - if (!is_outermost) { - /* need to return if NOWAIT present and not outermost taskq */ + if (!is_outermost) { + /* need to return if NOWAIT present and not outermost taskq */ - if (queue->tq_flags & TQF_IS_NOWAIT) { - __kmp_acquire_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); - queue->tq_ref_count--; - KMP_DEBUG_ASSERT( queue->tq_ref_count >= 0 ); - __kmp_release_lock(& queue->tq.tq_parent->tq_link_lck, global_tid); + if (queue->tq_flags & TQF_IS_NOWAIT) { + __kmp_acquire_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); + queue->tq_ref_count--; + KMP_DEBUG_ASSERT(queue->tq_ref_count >= 0); + __kmp_release_lock(&queue->tq.tq_parent->tq_link_lck, global_tid); - KE_TRACE( 10, ("__kmpc_end_taskq return for nowait case (%d)\n", global_tid)); + KE_TRACE( + 10, ("__kmpc_end_taskq return for nowait case (%d)\n", global_tid)); - return; - } + return; + } - __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue ); + __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue); - /* WAIT until all tasks are finished and no child queues exist before proceeding */ - KMP_INIT_YIELD(spins); + /* WAIT until all tasks are finished and no child queues exist before + * proceeding */ + KMP_INIT_YIELD(spins); - while (!__kmp_taskq_tasks_finished(queue) || __kmp_taskq_has_any_children(queue)) { - thunk = __kmp_find_task_in_ancestor_queue( tq, global_tid, queue ); + while (!__kmp_taskq_tasks_finished(queue) || + __kmp_taskq_has_any_children(queue)) { + thunk = __kmp_find_task_in_ancestor_queue(tq, global_tid, queue); - if (thunk != NULL) { - KF_TRACE(50, ("Stole thunk: %p in ancestor queue: %p while waiting in queue: %p (%d)\n", - thunk, thunk->th.th_shareds->sv_queue, queue, global_tid)); - __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); - } + if (thunk != NULL) { + KF_TRACE(50, + ("Stole thunk: %p in ancestor queue: %p while waiting in " + "queue: %p (%d)\n", + thunk, thunk->th.th_shareds->sv_queue, queue, global_tid)); + __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, + in_parallel); + } - KMP_YIELD_WHEN( thunk == NULL, spins ); + KMP_YIELD_WHEN(thunk == NULL, spins); - __kmp_find_and_remove_finished_child_taskq( tq, global_tid, queue ); - } + __kmp_find_and_remove_finished_child_taskq(tq, global_tid, queue); + } - __kmp_acquire_lock(& queue->tq_queue_lck, global_tid); - if ( !(queue->tq_flags & TQF_DEALLOCATED) ) { - queue->tq_flags |= TQF_DEALLOCATED; - } - __kmp_release_lock(& queue->tq_queue_lck, global_tid); + __kmp_acquire_lock(&queue->tq_queue_lck, global_tid); + if (!(queue->tq_flags & TQF_DEALLOCATED)) { + queue->tq_flags |= TQF_DEALLOCATED; + } + __kmp_release_lock(&queue->tq_queue_lck, global_tid); - /* only the allocating thread can deallocate the queue */ - if (taskq_thunk != NULL) { - __kmp_remove_queue_from_tree( tq, global_tid, queue, TRUE ); - } + /* only the allocating thread can deallocate the queue */ + if (taskq_thunk != NULL) { + __kmp_remove_queue_from_tree(tq, global_tid, queue, TRUE); + } - KE_TRACE( 10, ("__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n", global_tid)); + KE_TRACE( + 10, + ("__kmpc_end_taskq return for non_outermost queue, wait case (%d)\n", + global_tid)); - return; - } + return; + } - /* Outermost Queue: steal work from descendants until all tasks are finished */ + // Outermost Queue: steal work from descendants until all tasks are finished - KMP_INIT_YIELD(spins); + KMP_INIT_YIELD(spins); - while (!__kmp_taskq_tasks_finished(queue)) { - thunk = __kmp_find_task_in_descendant_queue(global_tid, queue); + while (!__kmp_taskq_tasks_finished(queue)) { + thunk = __kmp_find_task_in_descendant_queue(global_tid, queue); - if (thunk != NULL) { - KF_TRACE(50, ("Stole thunk: %p in descendant queue: %p while waiting in queue: %p (%d)\n", - thunk, thunk->th.th_shareds->sv_queue, queue, global_tid)); + if (thunk != NULL) { + KF_TRACE(50, + ("Stole thunk: %p in descendant queue: %p while waiting in " + "queue: %p (%d)\n", + thunk, thunk->th.th_shareds->sv_queue, queue, global_tid)); - __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); - } + __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel); + } - KMP_YIELD_WHEN( thunk == NULL, spins ); - } + KMP_YIELD_WHEN(thunk == NULL, spins); + } - /* Need this barrier to prevent destruction of queue before threads have all executed above code */ - /* This may need to be done earlier when NOWAIT is implemented for the outermost level */ + /* Need this barrier to prevent destruction of queue before threads have all + * executed above code */ + /* This may need to be done earlier when NOWAIT is implemented for the + * outermost level */ - if ( !__kmp_barrier( bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL )) { - /* the queue->tq_flags & TQF_IS_NOWAIT case is not yet handled here; */ - /* for right now, everybody waits, and the master thread destroys the */ - /* remaining queues. */ + if (!__kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL)) { + /* the queue->tq_flags & TQF_IS_NOWAIT case is not yet handled here; */ + /* for right now, everybody waits, and the master thread destroys the */ + /* remaining queues. */ - __kmp_remove_all_child_taskq( tq, global_tid, queue ); + __kmp_remove_all_child_taskq(tq, global_tid, queue); - /* Now destroy the root queue */ - KF_TRACE(100, ("T#%d Before Deletion of top-level TaskQ at %p:\n", global_tid, queue )); - KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid )); + /* Now destroy the root queue */ + KF_TRACE(100, ("T#%d Before Deletion of top-level TaskQ at %p:\n", + global_tid, queue)); + KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid)); #ifdef KMP_DEBUG - /* the root queue entry */ - KMP_DEBUG_ASSERT ((queue->tq.tq_parent == NULL) && (queue->tq_next_child == NULL)); + /* the root queue entry */ + KMP_DEBUG_ASSERT((queue->tq.tq_parent == NULL) && + (queue->tq_next_child == NULL)); - /* children must all be gone by now because of barrier above */ - KMP_DEBUG_ASSERT (queue->tq_first_child == NULL); + /* children must all be gone by now because of barrier above */ + KMP_DEBUG_ASSERT(queue->tq_first_child == NULL); - for (i=0; itq_th_thunks[i].ai_data == 0); - } + for (i = 0; i < nproc; i++) { + KMP_DEBUG_ASSERT(queue->tq_th_thunks[i].ai_data == 0); + } - for (i=0, thunk=queue->tq_free_thunks; thunk != NULL; i++, thunk=thunk->th.th_next_free); + for (i = 0, thunk = queue->tq_free_thunks; thunk != NULL; + i++, thunk = thunk->th.th_next_free) + ; - KMP_DEBUG_ASSERT (i == queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH)); + KMP_DEBUG_ASSERT(i == + queue->tq_nslots + (nproc * __KMP_TASKQ_THUNKS_PER_TH)); - for (i = 0; i < nproc; i++) { - KMP_DEBUG_ASSERT( ! tq->tq_curr_thunk[i] ); - } + for (i = 0; i < nproc; i++) { + KMP_DEBUG_ASSERT(!tq->tq_curr_thunk[i]); + } #endif - /* unlink the root queue entry */ - tq -> tq_root = NULL; + /* unlink the root queue entry */ + tq->tq_root = NULL; - /* release storage for root queue entry */ - KF_TRACE(50, ("After Deletion of top-level TaskQ at %p on (%d):\n", queue, global_tid)); + /* release storage for root queue entry */ + KF_TRACE(50, ("After Deletion of top-level TaskQ at %p on (%d):\n", queue, + global_tid)); - queue->tq_flags |= TQF_DEALLOCATED; - __kmp_free_taskq ( tq, queue, in_parallel, global_tid ); + queue->tq_flags |= TQF_DEALLOCATED; + __kmp_free_taskq(tq, queue, in_parallel, global_tid); - KF_DUMP(50, __kmp_dump_task_queue_tree( tq, tq->tq_root, global_tid )); + KF_DUMP(50, __kmp_dump_task_queue_tree(tq, tq->tq_root, global_tid)); - /* release the workers now that the data structures are up to date */ - __kmp_end_split_barrier( bs_plain_barrier, global_tid ); - } + /* release the workers now that the data structures are up to date */ + __kmp_end_split_barrier(bs_plain_barrier, global_tid); + } - th = __kmp_threads[ global_tid ]; + th = __kmp_threads[global_tid]; - /* Reset ORDERED SECTION to parallel version */ - th->th.th_dispatch->th_deo_fcn = 0; + /* Reset ORDERED SECTION to parallel version */ + th->th.th_dispatch->th_deo_fcn = 0; - /* Reset ORDERED SECTION to parallel version */ - th->th.th_dispatch->th_dxo_fcn = 0; - } - else { - /* in serial execution context, dequeue the last task */ - /* and execute it, if there were any tasks encountered */ + /* Reset ORDERED SECTION to parallel version */ + th->th.th_dispatch->th_dxo_fcn = 0; + } else { + /* in serial execution context, dequeue the last task */ + /* and execute it, if there were any tasks encountered */ - if (queue->tq_nfull > 0) { - KMP_DEBUG_ASSERT(queue->tq_nfull == 1); + if (queue->tq_nfull > 0) { + KMP_DEBUG_ASSERT(queue->tq_nfull == 1); - thunk = __kmp_dequeue_task(global_tid, queue, in_parallel); + thunk = __kmp_dequeue_task(global_tid, queue, in_parallel); - if (queue->tq_flags & TQF_IS_LAST_TASK) { - /* TQF_IS_LASTPRIVATE, one thing in queue, __kmpc_end_taskq_task() */ - /* has been run so this is last task, run with TQF_IS_LAST_TASK so */ - /* instrumentation does copy-out. */ + if (queue->tq_flags & TQF_IS_LAST_TASK) { + /* TQF_IS_LASTPRIVATE, one thing in queue, __kmpc_end_taskq_task() */ + /* has been run so this is last task, run with TQF_IS_LAST_TASK so */ + /* instrumentation does copy-out. */ - /* no need for test_then_or call since already locked */ - thunk->th_flags |= TQF_IS_LAST_TASK; - } + /* no need for test_then_or call since already locked */ + thunk->th_flags |= TQF_IS_LAST_TASK; + } - KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, thunk, queue)); + KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, + thunk, queue)); - __kmp_execute_task_from_queue( tq, loc, global_tid, thunk, in_parallel ); - } + __kmp_execute_task_from_queue(tq, loc, global_tid, thunk, in_parallel); + } - /* destroy the unattached serial queue now that there is no more work to do */ - KF_TRACE(100, ("Before Deletion of Serialized TaskQ at %p on (%d):\n", queue, global_tid)); - KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid )); + // destroy the unattached serial queue now that there is no more work to do + KF_TRACE(100, ("Before Deletion of Serialized TaskQ at %p on (%d):\n", + queue, global_tid)); + KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid)); #ifdef KMP_DEBUG - i = 0; - for (thunk=queue->tq_free_thunks; thunk != NULL; thunk=thunk->th.th_next_free) - ++i; - KMP_DEBUG_ASSERT (i == queue->tq_nslots + 1); + i = 0; + for (thunk = queue->tq_free_thunks; thunk != NULL; + thunk = thunk->th.th_next_free) + ++i; + KMP_DEBUG_ASSERT(i == queue->tq_nslots + 1); #endif - /* release storage for unattached serial queue */ - KF_TRACE(50, ("Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid)); + /* release storage for unattached serial queue */ + KF_TRACE(50, + ("Serialized TaskQ at %p deleted on (%d).\n", queue, global_tid)); - queue->tq_flags |= TQF_DEALLOCATED; - __kmp_free_taskq ( tq, queue, in_parallel, global_tid ); - } + queue->tq_flags |= TQF_DEALLOCATED; + __kmp_free_taskq(tq, queue, in_parallel, global_tid); + } - KE_TRACE( 10, ("__kmpc_end_taskq return (%d)\n", global_tid)); + KE_TRACE(10, ("__kmpc_end_taskq return (%d)\n", global_tid)); } /* Enqueues a task for thunk previously created by __kmpc_task_buffer. */ /* Returns nonzero if just filled up queue */ -kmp_int32 -__kmpc_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk) -{ - kmp_int32 ret; - kmpc_task_queue_t *queue; - int in_parallel; - kmp_taskq_t *tq; +kmp_int32 __kmpc_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk) { + kmp_int32 ret; + kmpc_task_queue_t *queue; + int in_parallel; + kmp_taskq_t *tq; - KE_TRACE( 10, ("__kmpc_task called (%d)\n", global_tid)); + KE_TRACE(10, ("__kmpc_task called (%d)\n", global_tid)); - KMP_DEBUG_ASSERT (!(thunk->th_flags & TQF_TASKQ_TASK)); /* thunk->th_task is a regular task */ + KMP_DEBUG_ASSERT(!(thunk->th_flags & + TQF_TASKQ_TASK)); /* thunk->th_task is a regular task */ - tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq; - queue = thunk->th.th_shareds->sv_queue; - in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); + tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq; + queue = thunk->th.th_shareds->sv_queue; + in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); - if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED)) - thunk->th_tasknum = ++queue->tq_tasknum_queuing; + if (in_parallel && (thunk->th_flags & TQF_IS_ORDERED)) + thunk->th_tasknum = ++queue->tq_tasknum_queuing; - /* For serial execution dequeue the preceding task and execute it, if one exists */ - /* This cannot be the last task. That one is handled in __kmpc_end_taskq */ + /* For serial execution dequeue the preceding task and execute it, if one + * exists */ + /* This cannot be the last task. That one is handled in __kmpc_end_taskq */ - if (!in_parallel && queue->tq_nfull > 0) { - kmpc_thunk_t *prev_thunk; + if (!in_parallel && queue->tq_nfull > 0) { + kmpc_thunk_t *prev_thunk; - KMP_DEBUG_ASSERT(queue->tq_nfull == 1); + KMP_DEBUG_ASSERT(queue->tq_nfull == 1); - prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel); + prev_thunk = __kmp_dequeue_task(global_tid, queue, in_parallel); - KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, prev_thunk, queue)); + KF_TRACE(50, ("T#%d found thunk: %p in serial queue: %p\n", global_tid, + prev_thunk, queue)); - __kmp_execute_task_from_queue( tq, loc, global_tid, prev_thunk, in_parallel ); - } + __kmp_execute_task_from_queue(tq, loc, global_tid, prev_thunk, in_parallel); + } - /* The instrumentation sequence is: __kmpc_task_buffer(), initialize private */ - /* variables, __kmpc_task(). The __kmpc_task_buffer routine checks that the */ - /* task queue is not full and allocates a thunk (which is then passed to */ - /* __kmpc_task()). So, the enqueue below should never fail due to a full queue. */ + /* The instrumentation sequence is: __kmpc_task_buffer(), initialize private + variables, __kmpc_task(). The __kmpc_task_buffer routine checks that the + task queue is not full and allocates a thunk (which is then passed to + __kmpc_task()). So, the enqueue below should never fail due to a full + queue. */ - KF_TRACE(100, ("After enqueueing this Task on (%d):\n", global_tid)); - KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid )); + KF_TRACE(100, ("After enqueueing this Task on (%d):\n", global_tid)); + KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid)); - ret = __kmp_enqueue_task ( tq, global_tid, queue, thunk, in_parallel ); + ret = __kmp_enqueue_task(tq, global_tid, queue, thunk, in_parallel); - KF_TRACE(100, ("Task Queue looks like this on (%d):\n", global_tid)); - KF_DUMP(100, __kmp_dump_task_queue( tq, queue, global_tid )); + KF_TRACE(100, ("Task Queue looks like this on (%d):\n", global_tid)); + KF_DUMP(100, __kmp_dump_task_queue(tq, queue, global_tid)); - KE_TRACE( 10, ("__kmpc_task return (%d)\n", global_tid)); + KE_TRACE(10, ("__kmpc_task return (%d)\n", global_tid)); - return ret; + return ret; } /* enqueues a taskq_task for thunk previously created by __kmpc_taskq */ /* this should never be called unless in a parallel context */ -void -__kmpc_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, kmp_int32 status) -{ - kmpc_task_queue_t *queue; - kmp_taskq_t *tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq; - int tid = __kmp_tid_from_gtid( global_tid ); +void __kmpc_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk, + kmp_int32 status) { + kmpc_task_queue_t *queue; + kmp_taskq_t *tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq; + int tid = __kmp_tid_from_gtid(global_tid); - KE_TRACE( 10, ("__kmpc_taskq_task called (%d)\n", global_tid)); - KF_TRACE(100, ("TaskQ Task argument thunk on (%d):\n", global_tid)); - KF_DUMP(100, __kmp_dump_thunk( tq, thunk, global_tid )); + KE_TRACE(10, ("__kmpc_taskq_task called (%d)\n", global_tid)); + KF_TRACE(100, ("TaskQ Task argument thunk on (%d):\n", global_tid)); + KF_DUMP(100, __kmp_dump_thunk(tq, thunk, global_tid)); - queue = thunk->th.th_shareds->sv_queue; + queue = thunk->th.th_shareds->sv_queue; - if ( __kmp_env_consistency_check ) - __kmp_pop_workshare( global_tid, ct_taskq, loc ); + if (__kmp_env_consistency_check) + __kmp_pop_workshare(global_tid, ct_taskq, loc); - /* thunk->th_task is the taskq_task */ - KMP_DEBUG_ASSERT (thunk->th_flags & TQF_TASKQ_TASK); + /* thunk->th_task is the taskq_task */ + KMP_DEBUG_ASSERT(thunk->th_flags & TQF_TASKQ_TASK); - /* not supposed to call __kmpc_taskq_task if it's already enqueued */ - KMP_DEBUG_ASSERT (queue->tq_taskq_slot == NULL); + /* not supposed to call __kmpc_taskq_task if it's already enqueued */ + KMP_DEBUG_ASSERT(queue->tq_taskq_slot == NULL); - /* dequeue taskq thunk from curr_thunk stack */ - tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; - thunk->th_encl_thunk = NULL; + /* dequeue taskq thunk from curr_thunk stack */ + tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; + thunk->th_encl_thunk = NULL; - KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid )); + KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid)); - thunk->th_status = status; + thunk->th_status = status; - KMP_MB(); /* flush thunk->th_status before taskq_task enqueued to avoid race condition */ + // Flush thunk->th_status before taskq_task enqueued to avoid race condition + KMP_MB(); - /* enqueue taskq_task in thunk into special slot in queue */ - /* GEH - probably don't need to lock taskq slot since only one */ - /* thread enqueues & already a lock set at dequeue point */ + /* enqueue taskq_task in thunk into special slot in queue */ + /* GEH - probably don't need to lock taskq slot since only one */ + /* thread enqueues & already a lock set at dequeue point */ - queue->tq_taskq_slot = thunk; + queue->tq_taskq_slot = thunk; - KE_TRACE( 10, ("__kmpc_taskq_task return (%d)\n", global_tid)); + KE_TRACE(10, ("__kmpc_taskq_task return (%d)\n", global_tid)); } -/* ends a taskq_task; done generating tasks */ +/* ends a taskq_task; done generating tasks */ -void -__kmpc_end_taskq_task(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *thunk) -{ - kmp_taskq_t *tq; - kmpc_task_queue_t *queue; - int in_parallel; - int tid; +void __kmpc_end_taskq_task(ident_t *loc, kmp_int32 global_tid, + kmpc_thunk_t *thunk) { + kmp_taskq_t *tq; + kmpc_task_queue_t *queue; + int in_parallel; + int tid; - KE_TRACE( 10, ("__kmpc_end_taskq_task called (%d)\n", global_tid)); + KE_TRACE(10, ("__kmpc_end_taskq_task called (%d)\n", global_tid)); - tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq; - queue = thunk->th.th_shareds->sv_queue; - in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); - tid = __kmp_tid_from_gtid( global_tid ); + tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq; + queue = thunk->th.th_shareds->sv_queue; + in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); + tid = __kmp_tid_from_gtid(global_tid); - if ( __kmp_env_consistency_check ) - __kmp_pop_workshare( global_tid, ct_taskq, loc ); + if (__kmp_env_consistency_check) + __kmp_pop_workshare(global_tid, ct_taskq, loc); - if (in_parallel) { -#if KMP_ARCH_X86 || \ - KMP_ARCH_X86_64 + if (in_parallel) { +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - KMP_TEST_THEN_OR32( &queue->tq_flags, (kmp_int32) TQF_ALL_TASKS_QUEUED ); + KMP_TEST_THEN_OR32(&queue->tq_flags, (kmp_int32)TQF_ALL_TASKS_QUEUED); #else - { - __kmp_acquire_lock(& queue->tq_queue_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work fine without this call for digital/alpha, needed for IBM/RS6000 */ + { + __kmp_acquire_lock(&queue->tq_queue_lck, global_tid); - queue->tq_flags |= TQF_ALL_TASKS_QUEUED; + // Make sure data structures are in consistent state before querying them + // Seems to work without this for digital/alpha, needed for IBM/RS6000 + KMP_MB(); - __kmp_release_lock(& queue->tq_queue_lck, global_tid); - } -#endif + queue->tq_flags |= TQF_ALL_TASKS_QUEUED; + __kmp_release_lock(&queue->tq_queue_lck, global_tid); } +#endif + } + + if (thunk->th_flags & TQF_IS_LASTPRIVATE) { + /* Normally, __kmp_find_task_in_queue() refuses to schedule the last task in + the queue if TQF_IS_LASTPRIVATE so we can positively identify that last + task and run it with its TQF_IS_LAST_TASK bit turned on in th_flags. + When __kmpc_end_taskq_task() is called we are done generating all the + tasks, so we know the last one in the queue is the lastprivate task. + Mark the queue as having gotten to this state via tq_flags & + TQF_IS_LAST_TASK; when that task actually executes mark it via th_flags & + TQF_IS_LAST_TASK (this th_flags bit signals the instrumented code to do + copy-outs after execution). */ + if (!in_parallel) { + /* No synchronization needed for serial context */ + queue->tq_flags |= TQF_IS_LAST_TASK; + } else { +#if KMP_ARCH_X86 || KMP_ARCH_X86_64 - if (thunk->th_flags & TQF_IS_LASTPRIVATE) { - /* Normally, __kmp_find_task_in_queue() refuses to schedule the last task in the */ - /* queue if TQF_IS_LASTPRIVATE so we can positively identify that last task */ - /* and run it with its TQF_IS_LAST_TASK bit turned on in th_flags. When */ - /* __kmpc_end_taskq_task() is called we are done generating all the tasks, so */ - /* we know the last one in the queue is the lastprivate task. Mark the queue */ - /* as having gotten to this state via tq_flags & TQF_IS_LAST_TASK; when that */ - /* task actually executes mark it via th_flags & TQF_IS_LAST_TASK (this th_flags */ - /* bit signals the instrumented code to do copy-outs after execution). */ - - if (! in_parallel) { - /* No synchronization needed for serial context */ - queue->tq_flags |= TQF_IS_LAST_TASK; - } - else { -#if KMP_ARCH_X86 || \ - KMP_ARCH_X86_64 - - KMP_TEST_THEN_OR32( &queue->tq_flags, (kmp_int32) TQF_IS_LAST_TASK ); + KMP_TEST_THEN_OR32(&queue->tq_flags, (kmp_int32)TQF_IS_LAST_TASK); #else - { - __kmp_acquire_lock(& queue->tq_queue_lck, global_tid); - - KMP_MB(); /* make sure data structures are in consistent state before querying them */ - /* Seems to work without this call for digital/alpha, needed for IBM/RS6000 */ + { + __kmp_acquire_lock(&queue->tq_queue_lck, global_tid); - queue->tq_flags |= TQF_IS_LAST_TASK; + // Make sure data structures in consistent state before querying them + // Seems to work without this for digital/alpha, needed for IBM/RS6000 + KMP_MB(); - __kmp_release_lock(& queue->tq_queue_lck, global_tid); - } + queue->tq_flags |= TQF_IS_LAST_TASK; + __kmp_release_lock(&queue->tq_queue_lck, global_tid); + } #endif - /* to prevent race condition where last task is dequeued but */ - /* flag isn't visible yet (not sure about this) */ - KMP_MB(); - } + /* to prevent race condition where last task is dequeued but */ + /* flag isn't visible yet (not sure about this) */ + KMP_MB(); } + } - /* dequeue taskq thunk from curr_thunk stack */ - if (in_parallel) { - tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; - thunk->th_encl_thunk = NULL; + /* dequeue taskq thunk from curr_thunk stack */ + if (in_parallel) { + tq->tq_curr_thunk[tid] = thunk->th_encl_thunk; + thunk->th_encl_thunk = NULL; - KF_DUMP( 200, __kmp_dump_thunk_stack( tq->tq_curr_thunk[tid], global_tid )); - } + KF_DUMP(200, __kmp_dump_thunk_stack(tq->tq_curr_thunk[tid], global_tid)); + } - KE_TRACE( 10, ("__kmpc_end_taskq_task return (%d)\n", global_tid)); + KE_TRACE(10, ("__kmpc_end_taskq_task return (%d)\n", global_tid)); } /* returns thunk for a regular task based on taskq_thunk */ /* (__kmpc_taskq_task does the analogous thing for a TQF_TASKQ_TASK) */ -kmpc_thunk_t * -__kmpc_task_buffer(ident_t *loc, kmp_int32 global_tid, kmpc_thunk_t *taskq_thunk, kmpc_task_t task) -{ - kmp_taskq_t *tq; - kmpc_task_queue_t *queue; - kmpc_thunk_t *new_thunk; - int in_parallel; +kmpc_thunk_t *__kmpc_task_buffer(ident_t *loc, kmp_int32 global_tid, + kmpc_thunk_t *taskq_thunk, kmpc_task_t task) { + kmp_taskq_t *tq; + kmpc_task_queue_t *queue; + kmpc_thunk_t *new_thunk; + int in_parallel; - KE_TRACE( 10, ("__kmpc_task_buffer called (%d)\n", global_tid)); + KE_TRACE(10, ("__kmpc_task_buffer called (%d)\n", global_tid)); - KMP_DEBUG_ASSERT (taskq_thunk->th_flags & TQF_TASKQ_TASK); /* taskq_thunk->th_task is the taskq_task */ + KMP_DEBUG_ASSERT( + taskq_thunk->th_flags & + TQF_TASKQ_TASK); /* taskq_thunk->th_task is the taskq_task */ - tq = &__kmp_threads[global_tid] -> th.th_team -> t.t_taskq; - queue = taskq_thunk->th.th_shareds->sv_queue; - in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); + tq = &__kmp_threads[global_tid]->th.th_team->t.t_taskq; + queue = taskq_thunk->th.th_shareds->sv_queue; + in_parallel = (queue->tq_flags & TQF_PARALLEL_CONTEXT); - /* The instrumentation sequence is: __kmpc_task_buffer(), initialize private */ - /* variables, __kmpc_task(). The __kmpc_task_buffer routine checks that the */ - /* task queue is not full and allocates a thunk (which is then passed to */ - /* __kmpc_task()). So, we can pre-allocate a thunk here assuming it will be */ - /* the next to be enqueued in __kmpc_task(). */ + /* The instrumentation sequence is: __kmpc_task_buffer(), initialize private + variables, __kmpc_task(). The __kmpc_task_buffer routine checks that the + task queue is not full and allocates a thunk (which is then passed to + __kmpc_task()). So, we can pre-allocate a thunk here assuming it will be + the next to be enqueued in __kmpc_task(). */ - new_thunk = __kmp_alloc_thunk (queue, in_parallel, global_tid); - new_thunk->th.th_shareds = (kmpc_shared_vars_t *) queue->tq_shareds[0].ai_data; - new_thunk->th_encl_thunk = NULL; - new_thunk->th_task = task; + new_thunk = __kmp_alloc_thunk(queue, in_parallel, global_tid); + new_thunk->th.th_shareds = (kmpc_shared_vars_t *)queue->tq_shareds[0].ai_data; + new_thunk->th_encl_thunk = NULL; + new_thunk->th_task = task; - /* GEH - shouldn't need to lock the read of tq_flags here */ - new_thunk->th_flags = queue->tq_flags & TQF_INTERFACE_FLAGS; + /* GEH - shouldn't need to lock the read of tq_flags here */ + new_thunk->th_flags = queue->tq_flags & TQF_INTERFACE_FLAGS; - new_thunk->th_status = 0; + new_thunk->th_status = 0; - KMP_DEBUG_ASSERT (!(new_thunk->th_flags & TQF_TASKQ_TASK)); + KMP_DEBUG_ASSERT(!(new_thunk->th_flags & TQF_TASKQ_TASK)); - KF_TRACE(100, ("Creating Regular Task on (%d):\n", global_tid)); - KF_DUMP(100, __kmp_dump_thunk( tq, new_thunk, global_tid )); + KF_TRACE(100, ("Creating Regular Task on (%d):\n", global_tid)); + KF_DUMP(100, __kmp_dump_thunk(tq, new_thunk, global_tid)); - KE_TRACE( 10, ("__kmpc_task_buffer return (%d)\n", global_tid)); + KE_TRACE(10, ("__kmpc_task_buffer return (%d)\n", global_tid)); - return new_thunk; + return new_thunk; } - -/* --------------------------------------------------------------------------- */ diff --git a/openmp/runtime/src/kmp_threadprivate.cpp b/openmp/runtime/src/kmp_threadprivate.cpp index 31d3e07..b1faf1c 100644 --- a/openmp/runtime/src/kmp_threadprivate.cpp +++ b/openmp/runtime/src/kmp_threadprivate.cpp @@ -14,502 +14,476 @@ #include "kmp.h" -#include "kmp_itt.h" #include "kmp_i18n.h" - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ +#include "kmp_itt.h" #define USE_CHECKS_COMMON -#define KMP_INLINE_SUBR 1 - - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ +#define KMP_INLINE_SUBR 1 -void -kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size ); -struct private_common * -kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size ); +void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr, + void *data_addr, size_t pc_size); +struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr, + void *data_addr, + size_t pc_size); -struct shared_table __kmp_threadprivate_d_table; - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ +struct shared_table __kmp_threadprivate_d_table; static #ifdef KMP_INLINE_SUBR -__forceinline + __forceinline #endif -struct private_common * -__kmp_threadprivate_find_task_common( struct common_table *tbl, int gtid, void *pc_addr ) + struct private_common * + __kmp_threadprivate_find_task_common(struct common_table *tbl, int gtid, + void *pc_addr) { - struct private_common *tn; + struct private_common *tn; #ifdef KMP_TASK_COMMON_DEBUG - KC_TRACE( 10, ( "__kmp_threadprivate_find_task_common: thread#%d, called with address %p\n", - gtid, pc_addr ) ); - dump_list(); + KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, called with " + "address %p\n", + gtid, pc_addr)); + dump_list(); #endif - for (tn = tbl->data[ KMP_HASH(pc_addr) ]; tn; tn = tn->next) { - if (tn->gbl_addr == pc_addr) { + for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) { + if (tn->gbl_addr == pc_addr) { #ifdef KMP_TASK_COMMON_DEBUG - KC_TRACE( 10, ( "__kmp_threadprivate_find_task_common: thread#%d, found node %p on list\n", - gtid, pc_addr ) ); + KC_TRACE(10, ("__kmp_threadprivate_find_task_common: thread#%d, found " + "node %p on list\n", + gtid, pc_addr)); #endif - return tn; - } + return tn; } - return 0; + } + return 0; } static #ifdef KMP_INLINE_SUBR -__forceinline + __forceinline #endif -struct shared_common * -__kmp_find_shared_task_common( struct shared_table *tbl, int gtid, void *pc_addr ) -{ - struct shared_common *tn; + struct shared_common * + __kmp_find_shared_task_common(struct shared_table *tbl, int gtid, + void *pc_addr) { + struct shared_common *tn; - for (tn = tbl->data[ KMP_HASH(pc_addr) ]; tn; tn = tn->next) { - if (tn->gbl_addr == pc_addr) { + for (tn = tbl->data[KMP_HASH(pc_addr)]; tn; tn = tn->next) { + if (tn->gbl_addr == pc_addr) { #ifdef KMP_TASK_COMMON_DEBUG - KC_TRACE( 10, ( "__kmp_find_shared_task_common: thread#%d, found node %p on list\n", - gtid, pc_addr ) ); + KC_TRACE( + 10, + ("__kmp_find_shared_task_common: thread#%d, found node %p on list\n", + gtid, pc_addr)); #endif - return tn; - } + return tn; } - return 0; + } + return 0; } - -/* - * Create a template for the data initialized storage. - * Either the template is NULL indicating zero fill, - * or the template is a copy of the original data. - */ - -static struct private_data * -__kmp_init_common_data( void *pc_addr, size_t pc_size ) -{ - struct private_data *d; - size_t i; - char *p; - - d = (struct private_data *) __kmp_allocate( sizeof( struct private_data ) ); -/* - d->data = 0; // AC: commented out because __kmp_allocate zeroes the memory - d->next = 0; -*/ - d->size = pc_size; - d->more = 1; - - p = (char*)pc_addr; - - for (i = pc_size; i > 0; --i) { - if (*p++ != '\0') { - d->data = __kmp_allocate( pc_size ); - KMP_MEMCPY( d->data, pc_addr, pc_size ); - break; - } +// Create a template for the data initialized storage. Either the template is +// NULL indicating zero fill, or the template is a copy of the original data. +static struct private_data *__kmp_init_common_data(void *pc_addr, + size_t pc_size) { + struct private_data *d; + size_t i; + char *p; + + d = (struct private_data *)__kmp_allocate(sizeof(struct private_data)); + /* + d->data = 0; // AC: commented out because __kmp_allocate zeroes the + memory + d->next = 0; + */ + d->size = pc_size; + d->more = 1; + + p = (char *)pc_addr; + + for (i = pc_size; i > 0; --i) { + if (*p++ != '\0') { + d->data = __kmp_allocate(pc_size); + KMP_MEMCPY(d->data, pc_addr, pc_size); + break; } + } - return d; + return d; } -/* - * Initialize the data area from the template. - */ - -static void -__kmp_copy_common_data( void *pc_addr, struct private_data *d ) -{ - char *addr = (char *) pc_addr; - int i, offset; - - for (offset = 0; d != 0; d = d->next) { - for (i = d->more; i > 0; --i) { - if (d->data == 0) - memset( & addr[ offset ], '\0', d->size ); - else - KMP_MEMCPY( & addr[ offset ], d->data, d->size ); - offset += d->size; - } +// Initialize the data area from the template. +static void __kmp_copy_common_data(void *pc_addr, struct private_data *d) { + char *addr = (char *)pc_addr; + int i, offset; + + for (offset = 0; d != 0; d = d->next) { + for (i = d->more; i > 0; --i) { + if (d->data == 0) + memset(&addr[offset], '\0', d->size); + else + KMP_MEMCPY(&addr[offset], d->data, d->size); + offset += d->size; } + } } -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - /* we are called from __kmp_serial_initialize() with __kmp_initz_lock held. */ -void -__kmp_common_initialize( void ) -{ - if( ! TCR_4(__kmp_init_common) ) { - int q; +void __kmp_common_initialize(void) { + if (!TCR_4(__kmp_init_common)) { + int q; #ifdef KMP_DEBUG - int gtid; + int gtid; #endif - __kmp_threadpriv_cache_list = NULL; + __kmp_threadpriv_cache_list = NULL; #ifdef KMP_DEBUG - /* verify the uber masters were initialized */ - for(gtid = 0 ; gtid < __kmp_threads_capacity; gtid++ ) - if( __kmp_root[gtid] ) { - KMP_DEBUG_ASSERT( __kmp_root[gtid]->r.r_uber_thread ); - for ( q = 0; q< KMP_HASH_TABLE_SIZE; ++q) - KMP_DEBUG_ASSERT( !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q] ); -/* __kmp_root[ gitd ]-> r.r_uber_thread -> th.th_pri_common -> data[ q ] = 0;*/ - } + /* verify the uber masters were initialized */ + for (gtid = 0; gtid < __kmp_threads_capacity; gtid++) + if (__kmp_root[gtid]) { + KMP_DEBUG_ASSERT(__kmp_root[gtid]->r.r_uber_thread); + for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) + KMP_DEBUG_ASSERT( + !__kmp_root[gtid]->r.r_uber_thread->th.th_pri_common->data[q]); + /* __kmp_root[ gitd ]-> r.r_uber_thread -> + * th.th_pri_common -> data[ q ] = 0;*/ + } #endif /* KMP_DEBUG */ - for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) - __kmp_threadprivate_d_table.data[ q ] = 0; + for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) + __kmp_threadprivate_d_table.data[q] = 0; - TCW_4(__kmp_init_common, TRUE); - } + TCW_4(__kmp_init_common, TRUE); + } } /* Call all destructors for threadprivate data belonging to all threads. Currently unused! */ -void -__kmp_common_destroy( void ) -{ - if( TCR_4(__kmp_init_common) ) { - int q; - - TCW_4(__kmp_init_common, FALSE); - - for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) { - int gtid; - struct private_common *tn; - struct shared_common *d_tn; - - /* C++ destructors need to be called once per thread before exiting */ - /* don't call destructors for master thread though unless we used copy constructor */ - - for (d_tn = __kmp_threadprivate_d_table.data[ q ]; d_tn; d_tn = d_tn->next) { - if (d_tn->is_vec) { - if (d_tn->dt.dtorv != 0) { - for (gtid = 0; gtid < __kmp_all_nth; ++gtid) { - if( __kmp_threads[gtid] ) { - if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) : - (! KMP_UBER_GTID (gtid)) ) { - tn = __kmp_threadprivate_find_task_common( __kmp_threads[ gtid ]->th.th_pri_common, - gtid, d_tn->gbl_addr ); - if (tn) { - (*d_tn->dt.dtorv) (tn->par_addr, d_tn->vec_len); - } - } - } - } - if (d_tn->obj_init != 0) { - (*d_tn->dt.dtorv) (d_tn->obj_init, d_tn->vec_len); - } - } - } else { - if (d_tn->dt.dtor != 0) { - for (gtid = 0; gtid < __kmp_all_nth; ++gtid) { - if( __kmp_threads[gtid] ) { - if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) : - (! KMP_UBER_GTID (gtid)) ) { - tn = __kmp_threadprivate_find_task_common( __kmp_threads[ gtid ]->th.th_pri_common, - gtid, d_tn->gbl_addr ); - if (tn) { - (*d_tn->dt.dtor) (tn->par_addr); - } - } - } - } - if (d_tn->obj_init != 0) { - (*d_tn->dt.dtor) (d_tn->obj_init); - } - } +void __kmp_common_destroy(void) { + if (TCR_4(__kmp_init_common)) { + int q; + + TCW_4(__kmp_init_common, FALSE); + + for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) { + int gtid; + struct private_common *tn; + struct shared_common *d_tn; + + /* C++ destructors need to be called once per thread before exiting. + Don't call destructors for master thread though unless we used copy + constructor */ + + for (d_tn = __kmp_threadprivate_d_table.data[q]; d_tn; + d_tn = d_tn->next) { + if (d_tn->is_vec) { + if (d_tn->dt.dtorv != 0) { + for (gtid = 0; gtid < __kmp_all_nth; ++gtid) { + if (__kmp_threads[gtid]) { + if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) + : (!KMP_UBER_GTID(gtid))) { + tn = __kmp_threadprivate_find_task_common( + __kmp_threads[gtid]->th.th_pri_common, gtid, + d_tn->gbl_addr); + if (tn) { + (*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len); + } + } + } + } + if (d_tn->obj_init != 0) { + (*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len); + } + } + } else { + if (d_tn->dt.dtor != 0) { + for (gtid = 0; gtid < __kmp_all_nth; ++gtid) { + if (__kmp_threads[gtid]) { + if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) + : (!KMP_UBER_GTID(gtid))) { + tn = __kmp_threadprivate_find_task_common( + __kmp_threads[gtid]->th.th_pri_common, gtid, + d_tn->gbl_addr); + if (tn) { + (*d_tn->dt.dtor)(tn->par_addr); + } } + } } - __kmp_threadprivate_d_table.data[ q ] = 0; + if (d_tn->obj_init != 0) { + (*d_tn->dt.dtor)(d_tn->obj_init); + } + } } + } + __kmp_threadprivate_d_table.data[q] = 0; } + } } /* Call all destructors for threadprivate data belonging to this thread */ -void -__kmp_common_destroy_gtid( int gtid ) -{ - struct private_common *tn; - struct shared_common *d_tn; - - KC_TRACE( 10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid ) ); - if( (__kmp_foreign_tp) ? (! KMP_INITIAL_GTID (gtid)) : - (! KMP_UBER_GTID (gtid)) ) { - - if( TCR_4(__kmp_init_common) ) { - - /* Cannot do this here since not all threads have destroyed their data */ - /* TCW_4(__kmp_init_common, FALSE); */ - - for (tn = __kmp_threads[ gtid ]->th.th_pri_head; tn; tn = tn->link) { - - d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, - gtid, tn->gbl_addr ); - - KMP_DEBUG_ASSERT( d_tn ); - - if (d_tn->is_vec) { - if (d_tn->dt.dtorv != 0) { - (void) (*d_tn->dt.dtorv) (tn->par_addr, d_tn->vec_len); - } - if (d_tn->obj_init != 0) { - (void) (*d_tn->dt.dtorv) (d_tn->obj_init, d_tn->vec_len); - } - } else { - if (d_tn->dt.dtor != 0) { - (void) (*d_tn->dt.dtor) (tn->par_addr); - } - if (d_tn->obj_init != 0) { - (void) (*d_tn->dt.dtor) (d_tn->obj_init); - } - } - } - KC_TRACE( 30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors complete\n", - gtid ) ); +void __kmp_common_destroy_gtid(int gtid) { + struct private_common *tn; + struct shared_common *d_tn; + + KC_TRACE(10, ("__kmp_common_destroy_gtid: T#%d called\n", gtid)); + if ((__kmp_foreign_tp) ? (!KMP_INITIAL_GTID(gtid)) : (!KMP_UBER_GTID(gtid))) { + + if (TCR_4(__kmp_init_common)) { + + /* Cannot do this here since not all threads have destroyed their data */ + /* TCW_4(__kmp_init_common, FALSE); */ + + for (tn = __kmp_threads[gtid]->th.th_pri_head; tn; tn = tn->link) { + + d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid, + tn->gbl_addr); + + KMP_DEBUG_ASSERT(d_tn); + + if (d_tn->is_vec) { + if (d_tn->dt.dtorv != 0) { + (void)(*d_tn->dt.dtorv)(tn->par_addr, d_tn->vec_len); + } + if (d_tn->obj_init != 0) { + (void)(*d_tn->dt.dtorv)(d_tn->obj_init, d_tn->vec_len); + } + } else { + if (d_tn->dt.dtor != 0) { + (void)(*d_tn->dt.dtor)(tn->par_addr); + } + if (d_tn->obj_init != 0) { + (void)(*d_tn->dt.dtor)(d_tn->obj_init); + } } + } + KC_TRACE(30, ("__kmp_common_destroy_gtid: T#%d threadprivate destructors " + "complete\n", + gtid)); } + } } -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - #ifdef KMP_TASK_COMMON_DEBUG -static void -dump_list( void ) -{ - int p, q; - - for (p = 0; p < __kmp_all_nth; ++p) { - if( !__kmp_threads[p] ) continue; - for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) { - if (__kmp_threads[ p ]->th.th_pri_common->data[ q ]) { - struct private_common *tn; - - KC_TRACE( 10, ( "\tdump_list: gtid:%d addresses\n", p ) ); - - for (tn = __kmp_threads[ p ]->th.th_pri_common->data[ q ]; tn; tn = tn->next) { - KC_TRACE( 10, ( "\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n", - tn->gbl_addr, tn->par_addr ) ); - } - } +static void dump_list(void) { + int p, q; + + for (p = 0; p < __kmp_all_nth; ++p) { + if (!__kmp_threads[p]) + continue; + for (q = 0; q < KMP_HASH_TABLE_SIZE; ++q) { + if (__kmp_threads[p]->th.th_pri_common->data[q]) { + struct private_common *tn; + + KC_TRACE(10, ("\tdump_list: gtid:%d addresses\n", p)); + + for (tn = __kmp_threads[p]->th.th_pri_common->data[q]; tn; + tn = tn->next) { + KC_TRACE(10, + ("\tdump_list: THREADPRIVATE: Serial %p -> Parallel %p\n", + tn->gbl_addr, tn->par_addr)); } + } } + } } #endif /* KMP_TASK_COMMON_DEBUG */ +// NOTE: this routine is to be called only from the serial part of the program. +void kmp_threadprivate_insert_private_data(int gtid, void *pc_addr, + void *data_addr, size_t pc_size) { + struct shared_common **lnk_tn, *d_tn; + KMP_DEBUG_ASSERT(__kmp_threads[gtid] && + __kmp_threads[gtid]->th.th_root->r.r_active == 0); -/* - * NOTE: this routine is to be called only from the serial part of the program. - */ - -void -kmp_threadprivate_insert_private_data( int gtid, void *pc_addr, void *data_addr, size_t pc_size ) -{ - struct shared_common **lnk_tn, *d_tn; - KMP_DEBUG_ASSERT( __kmp_threads[ gtid ] && - __kmp_threads[ gtid ] -> th.th_root -> r.r_active == 0 ); - - d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, - gtid, pc_addr ); + d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, gtid, + pc_addr); - if (d_tn == 0) { - d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) ); + if (d_tn == 0) { + d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); - d_tn->gbl_addr = pc_addr; - d_tn->pod_init = __kmp_init_common_data( data_addr, pc_size ); -/* - d_tn->obj_init = 0; // AC: commented out because __kmp_allocate zeroes the memory - d_tn->ct.ctor = 0; - d_tn->cct.cctor = 0;; - d_tn->dt.dtor = 0; - d_tn->is_vec = FALSE; - d_tn->vec_len = 0L; -*/ - d_tn->cmn_size = pc_size; + d_tn->gbl_addr = pc_addr; + d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size); + /* + d_tn->obj_init = 0; // AC: commented out because __kmp_allocate + zeroes the memory + d_tn->ct.ctor = 0; + d_tn->cct.cctor = 0;; + d_tn->dt.dtor = 0; + d_tn->is_vec = FALSE; + d_tn->vec_len = 0L; + */ + d_tn->cmn_size = pc_size; - __kmp_acquire_lock( &__kmp_global_lock, gtid ); + __kmp_acquire_lock(&__kmp_global_lock, gtid); - lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(pc_addr) ]); + lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]); - d_tn->next = *lnk_tn; - *lnk_tn = d_tn; + d_tn->next = *lnk_tn; + *lnk_tn = d_tn; - __kmp_release_lock( &__kmp_global_lock, gtid ); - } + __kmp_release_lock(&__kmp_global_lock, gtid); + } } -struct private_common * -kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_size ) -{ - struct private_common *tn, **tt; - struct shared_common *d_tn; - - /* +++++++++ START OF CRITICAL SECTION +++++++++ */ +struct private_common *kmp_threadprivate_insert(int gtid, void *pc_addr, + void *data_addr, + size_t pc_size) { + struct private_common *tn, **tt; + struct shared_common *d_tn; - __kmp_acquire_lock( & __kmp_global_lock, gtid ); + /* +++++++++ START OF CRITICAL SECTION +++++++++ */ + __kmp_acquire_lock(&__kmp_global_lock, gtid); - tn = (struct private_common *) __kmp_allocate( sizeof (struct private_common) ); + tn = (struct private_common *)__kmp_allocate(sizeof(struct private_common)); - tn->gbl_addr = pc_addr; + tn->gbl_addr = pc_addr; - d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, - gtid, pc_addr ); /* Only the MASTER data table exists. */ + d_tn = __kmp_find_shared_task_common( + &__kmp_threadprivate_d_table, gtid, + pc_addr); /* Only the MASTER data table exists. */ - if (d_tn != 0) { - /* This threadprivate variable has already been seen. */ + if (d_tn != 0) { + /* This threadprivate variable has already been seen. */ - if ( d_tn->pod_init == 0 && d_tn->obj_init == 0 ) { - d_tn->cmn_size = pc_size; + if (d_tn->pod_init == 0 && d_tn->obj_init == 0) { + d_tn->cmn_size = pc_size; - if (d_tn->is_vec) { - if (d_tn->ct.ctorv != 0) { - /* Construct from scratch so no prototype exists */ - d_tn->obj_init = 0; - } - else if (d_tn->cct.cctorv != 0) { - /* Now data initialize the prototype since it was previously registered */ - d_tn->obj_init = (void *) __kmp_allocate( d_tn->cmn_size ); - (void) (*d_tn->cct.cctorv) (d_tn->obj_init, pc_addr, d_tn->vec_len); - } - else { - d_tn->pod_init = __kmp_init_common_data( data_addr, d_tn->cmn_size ); - } - } else { - if (d_tn->ct.ctor != 0) { - /* Construct from scratch so no prototype exists */ - d_tn->obj_init = 0; - } - else if (d_tn->cct.cctor != 0) { - /* Now data initialize the prototype since it was previously registered */ - d_tn->obj_init = (void *) __kmp_allocate( d_tn->cmn_size ); - (void) (*d_tn->cct.cctor) (d_tn->obj_init, pc_addr); - } - else { - d_tn->pod_init = __kmp_init_common_data( data_addr, d_tn->cmn_size ); - } - } + if (d_tn->is_vec) { + if (d_tn->ct.ctorv != 0) { + /* Construct from scratch so no prototype exists */ + d_tn->obj_init = 0; + } else if (d_tn->cct.cctorv != 0) { + /* Now data initialize the prototype since it was previously + * registered */ + d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size); + (void)(*d_tn->cct.cctorv)(d_tn->obj_init, pc_addr, d_tn->vec_len); + } else { + d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size); } + } else { + if (d_tn->ct.ctor != 0) { + /* Construct from scratch so no prototype exists */ + d_tn->obj_init = 0; + } else if (d_tn->cct.cctor != 0) { + /* Now data initialize the prototype since it was previously + registered */ + d_tn->obj_init = (void *)__kmp_allocate(d_tn->cmn_size); + (void)(*d_tn->cct.cctor)(d_tn->obj_init, pc_addr); + } else { + d_tn->pod_init = __kmp_init_common_data(data_addr, d_tn->cmn_size); + } + } } - else { - struct shared_common **lnk_tn; - - d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) ); - d_tn->gbl_addr = pc_addr; - d_tn->cmn_size = pc_size; - d_tn->pod_init = __kmp_init_common_data( data_addr, pc_size ); -/* - d_tn->obj_init = 0; // AC: commented out because __kmp_allocate zeroes the memory - d_tn->ct.ctor = 0; - d_tn->cct.cctor = 0; - d_tn->dt.dtor = 0; - d_tn->is_vec = FALSE; - d_tn->vec_len = 0L; -*/ - lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(pc_addr) ]); - - d_tn->next = *lnk_tn; - *lnk_tn = d_tn; - } - - tn->cmn_size = d_tn->cmn_size; - - if ( (__kmp_foreign_tp) ? (KMP_INITIAL_GTID (gtid)) : (KMP_UBER_GTID (gtid)) ) { - tn->par_addr = (void *) pc_addr; - } - else { - tn->par_addr = (void *) __kmp_allocate( tn->cmn_size ); - } - - __kmp_release_lock( & __kmp_global_lock, gtid ); + } else { + struct shared_common **lnk_tn; - /* +++++++++ END OF CRITICAL SECTION +++++++++ */ + d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); + d_tn->gbl_addr = pc_addr; + d_tn->cmn_size = pc_size; + d_tn->pod_init = __kmp_init_common_data(data_addr, pc_size); + /* + d_tn->obj_init = 0; // AC: commented out because __kmp_allocate + zeroes the memory + d_tn->ct.ctor = 0; + d_tn->cct.cctor = 0; + d_tn->dt.dtor = 0; + d_tn->is_vec = FALSE; + d_tn->vec_len = 0L; + */ + lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(pc_addr)]); + + d_tn->next = *lnk_tn; + *lnk_tn = d_tn; + } + + tn->cmn_size = d_tn->cmn_size; + + if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) { + tn->par_addr = (void *)pc_addr; + } else { + tn->par_addr = (void *)__kmp_allocate(tn->cmn_size); + } + + __kmp_release_lock(&__kmp_global_lock, gtid); +/* +++++++++ END OF CRITICAL SECTION +++++++++ */ #ifdef USE_CHECKS_COMMON - if (pc_size > d_tn->cmn_size) { - KC_TRACE( 10, ( "__kmp_threadprivate_insert: THREADPRIVATE: %p (%" - KMP_UINTPTR_SPEC " ,%" KMP_UINTPTR_SPEC ")\n", - pc_addr, pc_size, d_tn->cmn_size ) ); - KMP_FATAL( TPCommonBlocksInconsist ); - } + if (pc_size > d_tn->cmn_size) { + KC_TRACE( + 10, ("__kmp_threadprivate_insert: THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC + " ,%" KMP_UINTPTR_SPEC ")\n", + pc_addr, pc_size, d_tn->cmn_size)); + KMP_FATAL(TPCommonBlocksInconsist); + } #endif /* USE_CHECKS_COMMON */ - tt = &(__kmp_threads[ gtid ]->th.th_pri_common->data[ KMP_HASH(pc_addr) ]); + tt = &(__kmp_threads[gtid]->th.th_pri_common->data[KMP_HASH(pc_addr)]); #ifdef KMP_TASK_COMMON_DEBUG - if (*tt != 0) { - KC_TRACE( 10, ( "__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n", - gtid, pc_addr ) ); - } + if (*tt != 0) { + KC_TRACE( + 10, + ("__kmp_threadprivate_insert: WARNING! thread#%d: collision on %p\n", + gtid, pc_addr)); + } #endif - tn->next = *tt; - *tt = tn; + tn->next = *tt; + *tt = tn; #ifdef KMP_TASK_COMMON_DEBUG - KC_TRACE( 10, ( "__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n", - gtid, pc_addr ) ); - dump_list( ); + KC_TRACE(10, + ("__kmp_threadprivate_insert: thread#%d, inserted node %p on list\n", + gtid, pc_addr)); + dump_list(); #endif - /* Link the node into a simple list */ + /* Link the node into a simple list */ - tn->link = __kmp_threads[ gtid ]->th.th_pri_head; - __kmp_threads[ gtid ]->th.th_pri_head = tn; + tn->link = __kmp_threads[gtid]->th.th_pri_head; + __kmp_threads[gtid]->th.th_pri_head = tn; #ifdef BUILD_TV - __kmp_tv_threadprivate_store( __kmp_threads[ gtid ], tn->gbl_addr, tn->par_addr ); + __kmp_tv_threadprivate_store(__kmp_threads[gtid], tn->gbl_addr, tn->par_addr); #endif - if( (__kmp_foreign_tp) ? (KMP_INITIAL_GTID (gtid)) : (KMP_UBER_GTID (gtid)) ) - return tn; - - /* - * if C++ object with copy constructor, use it; - * else if C++ object with constructor, use it for the non-master copies only; - * else use pod_init and memcpy - * - * C++ constructors need to be called once for each non-master thread on allocate - * C++ copy constructors need to be called once for each thread on allocate - */ + if ((__kmp_foreign_tp) ? (KMP_INITIAL_GTID(gtid)) : (KMP_UBER_GTID(gtid))) + return tn; - /* - * C++ object with constructors/destructors; - * don't call constructors for master thread though - */ - if (d_tn->is_vec) { - if ( d_tn->ct.ctorv != 0) { - (void) (*d_tn->ct.ctorv) (tn->par_addr, d_tn->vec_len); - } else if (d_tn->cct.cctorv != 0) { - (void) (*d_tn->cct.cctorv) (tn->par_addr, d_tn->obj_init, d_tn->vec_len); - } else if (tn->par_addr != tn->gbl_addr) { - __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); - } - } else { - if ( d_tn->ct.ctor != 0 ) { - (void) (*d_tn->ct.ctor) (tn->par_addr); - } else if (d_tn->cct.cctor != 0) { - (void) (*d_tn->cct.cctor) (tn->par_addr, d_tn->obj_init); - } else if (tn->par_addr != tn->gbl_addr) { - __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); - } + /* if C++ object with copy constructor, use it; + * else if C++ object with constructor, use it for the non-master copies only; + * else use pod_init and memcpy + * + * C++ constructors need to be called once for each non-master thread on + * allocate + * C++ copy constructors need to be called once for each thread on allocate */ + + /* C++ object with constructors/destructors; don't call constructors for + master thread though */ + if (d_tn->is_vec) { + if (d_tn->ct.ctorv != 0) { + (void)(*d_tn->ct.ctorv)(tn->par_addr, d_tn->vec_len); + } else if (d_tn->cct.cctorv != 0) { + (void)(*d_tn->cct.cctorv)(tn->par_addr, d_tn->obj_init, d_tn->vec_len); + } else if (tn->par_addr != tn->gbl_addr) { + __kmp_copy_common_data(tn->par_addr, d_tn->pod_init); } -/* !BUILD_OPENMP_C - if (tn->par_addr != tn->gbl_addr) - __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */ + } else { + if (d_tn->ct.ctor != 0) { + (void)(*d_tn->ct.ctor)(tn->par_addr); + } else if (d_tn->cct.cctor != 0) { + (void)(*d_tn->cct.cctor)(tn->par_addr, d_tn->obj_init); + } else if (tn->par_addr != tn->gbl_addr) { + __kmp_copy_common_data(tn->par_addr, d_tn->pod_init); + } + } + /* !BUILD_OPENMP_C + if (tn->par_addr != tn->gbl_addr) + __kmp_copy_common_data( tn->par_addr, d_tn->pod_init ); */ - return tn; + return tn; } /* ------------------------------------------------------------------------ */ @@ -528,91 +502,95 @@ kmp_threadprivate_insert( int gtid, void *pc_addr, void *data_addr, size_t pc_si Register constructors and destructors for thread private data. This function is called when executing in parallel, when we know the thread id. */ -void -__kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor, kmpc_cctor cctor, kmpc_dtor dtor) -{ - struct shared_common *d_tn, **lnk_tn; +void __kmpc_threadprivate_register(ident_t *loc, void *data, kmpc_ctor ctor, + kmpc_cctor cctor, kmpc_dtor dtor) { + struct shared_common *d_tn, **lnk_tn; - KC_TRACE( 10, ("__kmpc_threadprivate_register: called\n" ) ); + KC_TRACE(10, ("__kmpc_threadprivate_register: called\n")); #ifdef USE_CHECKS_COMMON - /* copy constructor must be zero for current code gen (Nov 2002 - jph) */ - KMP_ASSERT( cctor == 0); + /* copy constructor must be zero for current code gen (Nov 2002 - jph) */ + KMP_ASSERT(cctor == 0); #endif /* USE_CHECKS_COMMON */ - /* Only the global data table exists. */ - d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, -1, data ); - - if (d_tn == 0) { - d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) ); - d_tn->gbl_addr = data; + /* Only the global data table exists. */ + d_tn = __kmp_find_shared_task_common(&__kmp_threadprivate_d_table, -1, data); - d_tn->ct.ctor = ctor; - d_tn->cct.cctor = cctor; - d_tn->dt.dtor = dtor; -/* - d_tn->is_vec = FALSE; // AC: commented out because __kmp_allocate zeroes the memory - d_tn->vec_len = 0L; - d_tn->obj_init = 0; - d_tn->pod_init = 0; -*/ - lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(data) ]); + if (d_tn == 0) { + d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); + d_tn->gbl_addr = data; - d_tn->next = *lnk_tn; - *lnk_tn = d_tn; - } + d_tn->ct.ctor = ctor; + d_tn->cct.cctor = cctor; + d_tn->dt.dtor = dtor; + /* + d_tn->is_vec = FALSE; // AC: commented out because __kmp_allocate + zeroes the memory + d_tn->vec_len = 0L; + d_tn->obj_init = 0; + d_tn->pod_init = 0; + */ + lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]); + + d_tn->next = *lnk_tn; + *lnk_tn = d_tn; + } } -void * -__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data, size_t size) -{ - void *ret; - struct private_common *tn; +void *__kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data, + size_t size) { + void *ret; + struct private_common *tn; - KC_TRACE( 10, ("__kmpc_threadprivate: T#%d called\n", global_tid ) ); + KC_TRACE(10, ("__kmpc_threadprivate: T#%d called\n", global_tid)); #ifdef USE_CHECKS_COMMON - if (! __kmp_init_serial) - KMP_FATAL( RTLNotInitialized ); + if (!__kmp_init_serial) + KMP_FATAL(RTLNotInitialized); #endif /* USE_CHECKS_COMMON */ - if ( ! __kmp_threads[global_tid] -> th.th_root -> r.r_active && ! __kmp_foreign_tp ) { - /* The parallel address will NEVER overlap with the data_address */ - /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the data_address; use data_address = data */ - - KC_TRACE( 20, ("__kmpc_threadprivate: T#%d inserting private data\n", global_tid ) ); - kmp_threadprivate_insert_private_data( global_tid, data, data, size ); - - ret = data; - } - else { - KC_TRACE( 50, ("__kmpc_threadprivate: T#%d try to find private data at address %p\n", - global_tid, data ) ); - tn = __kmp_threadprivate_find_task_common( __kmp_threads[ global_tid ]->th.th_pri_common, global_tid, data ); - - if ( tn ) { - KC_TRACE( 20, ("__kmpc_threadprivate: T#%d found data\n", global_tid ) ); + if (!__kmp_threads[global_tid]->th.th_root->r.r_active && !__kmp_foreign_tp) { + /* The parallel address will NEVER overlap with the data_address */ + /* dkp: 3rd arg to kmp_threadprivate_insert_private_data() is the + * data_address; use data_address = data */ + + KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting private data\n", + global_tid)); + kmp_threadprivate_insert_private_data(global_tid, data, data, size); + + ret = data; + } else { + KC_TRACE( + 50, + ("__kmpc_threadprivate: T#%d try to find private data at address %p\n", + global_tid, data)); + tn = __kmp_threadprivate_find_task_common( + __kmp_threads[global_tid]->th.th_pri_common, global_tid, data); + + if (tn) { + KC_TRACE(20, ("__kmpc_threadprivate: T#%d found data\n", global_tid)); #ifdef USE_CHECKS_COMMON - if ((size_t) size > tn->cmn_size) { - KC_TRACE( 10, ( "THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC " ,%" KMP_UINTPTR_SPEC ")\n", - data, size, tn->cmn_size ) ); - KMP_FATAL( TPCommonBlocksInconsist ); - } + if ((size_t)size > tn->cmn_size) { + KC_TRACE(10, ("THREADPRIVATE: %p (%" KMP_UINTPTR_SPEC + " ,%" KMP_UINTPTR_SPEC ")\n", + data, size, tn->cmn_size)); + KMP_FATAL(TPCommonBlocksInconsist); + } #endif /* USE_CHECKS_COMMON */ - } - else { - /* The parallel address will NEVER overlap with the data_address */ - /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use data_address = data */ - KC_TRACE( 20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid ) ); - tn = kmp_threadprivate_insert( global_tid, data, data, size ); - } - - ret = tn->par_addr; + } else { + /* The parallel address will NEVER overlap with the data_address */ + /* dkp: 3rd arg to kmp_threadprivate_insert() is the data_address; use + * data_address = data */ + KC_TRACE(20, ("__kmpc_threadprivate: T#%d inserting data\n", global_tid)); + tn = kmp_threadprivate_insert(global_tid, data, data, size); } - KC_TRACE( 10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n", - global_tid, ret ) ); - return ret; + ret = tn->par_addr; + } + KC_TRACE(10, ("__kmpc_threadprivate: T#%d exiting; return value = %p\n", + global_tid, ret)); + + return ret; } /*! @@ -627,62 +605,63 @@ __kmpc_threadprivate(ident_t *loc, kmp_int32 global_tid, void *data, size_t size Allocate private storage for threadprivate data. */ void * -__kmpc_threadprivate_cached( - ident_t * loc, - kmp_int32 global_tid, // gtid. - void * data, // Pointer to original global variable. - size_t size, // Size of original global variable. - void *** cache -) { - KC_TRACE( 10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, address: %p, size: %" - KMP_SIZE_T_SPEC "\n", - global_tid, *cache, data, size ) ); - - if ( TCR_PTR(*cache) == 0) { - __kmp_acquire_lock( & __kmp_global_lock, global_tid ); - - if ( TCR_PTR(*cache) == 0) { - __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); - __kmp_tp_cached = 1; - __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); - void ** my_cache; - KMP_ITT_IGNORE( - my_cache = (void**) - __kmp_allocate(sizeof( void * ) * __kmp_tp_capacity + sizeof ( kmp_cached_addr_t )); - ); - // No need to zero the allocated memory; __kmp_allocate does that. - KC_TRACE( 50, ("__kmpc_threadprivate_cached: T#%d allocated cache at address %p\n", - global_tid, my_cache ) ); - - /* TODO: free all this memory in __kmp_common_destroy using __kmp_threadpriv_cache_list */ - /* Add address of mycache to linked list for cleanup later */ - kmp_cached_addr_t *tp_cache_addr; - - tp_cache_addr = (kmp_cached_addr_t *) & my_cache[__kmp_tp_capacity]; - tp_cache_addr -> addr = my_cache; - tp_cache_addr -> next = __kmp_threadpriv_cache_list; - __kmp_threadpriv_cache_list = tp_cache_addr; - - KMP_MB(); - - TCW_PTR( *cache, my_cache); - - KMP_MB(); - } - - __kmp_release_lock( & __kmp_global_lock, global_tid ); +__kmpc_threadprivate_cached(ident_t *loc, + kmp_int32 global_tid, // gtid. + void *data, // Pointer to original global variable. + size_t size, // Size of original global variable. + void ***cache) { + KC_TRACE(10, ("__kmpc_threadprivate_cached: T#%d called with cache: %p, " + "address: %p, size: %" KMP_SIZE_T_SPEC "\n", + global_tid, *cache, data, size)); + + if (TCR_PTR(*cache) == 0) { + __kmp_acquire_lock(&__kmp_global_lock, global_tid); + + if (TCR_PTR(*cache) == 0) { + __kmp_acquire_bootstrap_lock(&__kmp_tp_cached_lock); + __kmp_tp_cached = 1; + __kmp_release_bootstrap_lock(&__kmp_tp_cached_lock); + void **my_cache; + KMP_ITT_IGNORE( + my_cache = (void **)__kmp_allocate( + sizeof(void *) * __kmp_tp_capacity + sizeof(kmp_cached_addr_t));); + // No need to zero the allocated memory; __kmp_allocate does that. + KC_TRACE( + 50, + ("__kmpc_threadprivate_cached: T#%d allocated cache at address %p\n", + global_tid, my_cache)); + + /* TODO: free all this memory in __kmp_common_destroy using + * __kmp_threadpriv_cache_list */ + /* Add address of mycache to linked list for cleanup later */ + kmp_cached_addr_t *tp_cache_addr; + + tp_cache_addr = (kmp_cached_addr_t *)&my_cache[__kmp_tp_capacity]; + tp_cache_addr->addr = my_cache; + tp_cache_addr->next = __kmp_threadpriv_cache_list; + __kmp_threadpriv_cache_list = tp_cache_addr; + + KMP_MB(); + + TCW_PTR(*cache, my_cache); + + KMP_MB(); } - void *ret; - if ((ret = TCR_PTR((*cache)[ global_tid ])) == 0) { - ret = __kmpc_threadprivate( loc, global_tid, data, (size_t) size); + __kmp_release_lock(&__kmp_global_lock, global_tid); + } - TCW_PTR( (*cache)[ global_tid ], ret); - } - KC_TRACE( 10, ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n", - global_tid, ret ) ); + void *ret; + if ((ret = TCR_PTR((*cache)[global_tid])) == 0) { + ret = __kmpc_threadprivate(loc, global_tid, data, (size_t)size); + + TCW_PTR((*cache)[global_tid], ret); + } + KC_TRACE(10, + ("__kmpc_threadprivate_cached: T#%d exiting; return value = %p\n", + global_tid, ret)); - return ret; + return ret; } /*! @@ -695,39 +674,40 @@ __kmpc_threadprivate_cached( @param vector_length length of the vector (bytes or elements?) Register vector constructors and destructors for thread private data. */ -void -__kmpc_threadprivate_register_vec( ident_t *loc, void *data, kmpc_ctor_vec ctor, - kmpc_cctor_vec cctor, kmpc_dtor_vec dtor, - size_t vector_length ) -{ - struct shared_common *d_tn, **lnk_tn; +void __kmpc_threadprivate_register_vec(ident_t *loc, void *data, + kmpc_ctor_vec ctor, kmpc_cctor_vec cctor, + kmpc_dtor_vec dtor, + size_t vector_length) { + struct shared_common *d_tn, **lnk_tn; - KC_TRACE( 10, ("__kmpc_threadprivate_register_vec: called\n" ) ); + KC_TRACE(10, ("__kmpc_threadprivate_register_vec: called\n")); #ifdef USE_CHECKS_COMMON - /* copy constructor must be zero for current code gen (Nov 2002 - jph) */ - KMP_ASSERT( cctor == 0); + /* copy constructor must be zero for current code gen (Nov 2002 - jph) */ + KMP_ASSERT(cctor == 0); #endif /* USE_CHECKS_COMMON */ - d_tn = __kmp_find_shared_task_common( &__kmp_threadprivate_d_table, - -1, data ); /* Only the global data table exists. */ - - if (d_tn == 0) { - d_tn = (struct shared_common *) __kmp_allocate( sizeof( struct shared_common ) ); - d_tn->gbl_addr = data; + d_tn = __kmp_find_shared_task_common( + &__kmp_threadprivate_d_table, -1, + data); /* Only the global data table exists. */ - d_tn->ct.ctorv = ctor; - d_tn->cct.cctorv = cctor; - d_tn->dt.dtorv = dtor; - d_tn->is_vec = TRUE; - d_tn->vec_len = (size_t) vector_length; -/* - d_tn->obj_init = 0; // AC: commented out because __kmp_allocate zeroes the memory - d_tn->pod_init = 0; -*/ - lnk_tn = &(__kmp_threadprivate_d_table.data[ KMP_HASH(data) ]); + if (d_tn == 0) { + d_tn = (struct shared_common *)__kmp_allocate(sizeof(struct shared_common)); + d_tn->gbl_addr = data; - d_tn->next = *lnk_tn; - *lnk_tn = d_tn; - } + d_tn->ct.ctorv = ctor; + d_tn->cct.cctorv = cctor; + d_tn->dt.dtorv = dtor; + d_tn->is_vec = TRUE; + d_tn->vec_len = (size_t)vector_length; + /* + d_tn->obj_init = 0; // AC: commented out because __kmp_allocate + zeroes the memory + d_tn->pod_init = 0; + */ + lnk_tn = &(__kmp_threadprivate_d_table.data[KMP_HASH(data)]); + + d_tn->next = *lnk_tn; + *lnk_tn = d_tn; + } } diff --git a/openmp/runtime/src/kmp_utility.cpp b/openmp/runtime/src/kmp_utility.cpp index a5244b0..af25157 100644 --- a/openmp/runtime/src/kmp_utility.cpp +++ b/openmp/runtime/src/kmp_utility.cpp @@ -14,416 +14,396 @@ #include "kmp.h" -#include "kmp_wrapper_getpid.h" +#include "kmp_i18n.h" #include "kmp_str.h" +#include "kmp_wrapper_getpid.h" #include -#include "kmp_i18n.h" - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ static const char *unknown = "unknown"; #if KMP_ARCH_X86 || KMP_ARCH_X86_64 -/* NOTE: If called before serial_initialize (i.e. from runtime_initialize), then */ -/* the debugging package has not been initialized yet, and only "0" will print */ -/* debugging output since the environment variables have not been read. */ +/* NOTE: If called before serial_initialize (i.e. from runtime_initialize), then + the debugging package has not been initialized yet, and only "0" will print + debugging output since the environment variables have not been read. */ #ifdef KMP_DEBUG static int trace_level = 5; #endif -/* - * LOG_ID_BITS = ( 1 + floor( log_2( max( log_per_phy - 1, 1 )))) +/* LOG_ID_BITS = ( 1 + floor( log_2( max( log_per_phy - 1, 1 )))) * APIC_ID = (PHY_ID << LOG_ID_BITS) | LOG_ID * PHY_ID = APIC_ID >> LOG_ID_BITS */ -int -__kmp_get_physical_id( int log_per_phy, int apic_id ) -{ - int index_lsb, index_msb, temp; - - if (log_per_phy > 1) { - index_lsb = 0; - index_msb = 31; - - temp = log_per_phy; - while ( (temp & 1) == 0 ) { - temp >>= 1; - index_lsb++; - } - - temp = log_per_phy; - while ( (temp & 0x80000000)==0 ) { - temp <<= 1; - index_msb--; - } - - /* If >1 bits were set in log_per_phy, choose next higher power of 2 */ - if (index_lsb != index_msb) index_msb++; - - return ( (int) (apic_id >> index_msb) ); - } - - return apic_id; -} +int __kmp_get_physical_id(int log_per_phy, int apic_id) { + int index_lsb, index_msb, temp; + if (log_per_phy > 1) { + index_lsb = 0; + index_msb = 31; + + temp = log_per_phy; + while ((temp & 1) == 0) { + temp >>= 1; + index_lsb++; + } + + temp = log_per_phy; + while ((temp & 0x80000000) == 0) { + temp <<= 1; + index_msb--; + } + + /* If >1 bits were set in log_per_phy, choose next higher power of 2 */ + if (index_lsb != index_msb) + index_msb++; + + return ((int)(apic_id >> index_msb)); + } + + return apic_id; +} /* * LOG_ID_BITS = ( 1 + floor( log_2( max( log_per_phy - 1, 1 )))) * APIC_ID = (PHY_ID << LOG_ID_BITS) | LOG_ID * LOG_ID = APIC_ID & (( 1 << LOG_ID_BITS ) - 1 ) */ -int -__kmp_get_logical_id( int log_per_phy, int apic_id ) -{ - unsigned current_bit; - int bits_seen; +int __kmp_get_logical_id(int log_per_phy, int apic_id) { + unsigned current_bit; + int bits_seen; - if (log_per_phy <= 1) return ( 0 ); + if (log_per_phy <= 1) + return (0); - bits_seen = 0; + bits_seen = 0; - for (current_bit = 1; log_per_phy != 0; current_bit <<= 1) { - if ( log_per_phy & current_bit ) { - log_per_phy &= ~current_bit; - bits_seen++; - } - } + for (current_bit = 1; log_per_phy != 0; current_bit <<= 1) { + if (log_per_phy & current_bit) { + log_per_phy &= ~current_bit; + bits_seen++; + } + } - /* If exactly 1 bit was set in log_per_phy, choose next lower power of 2 */ - if (bits_seen == 1) { - current_bit >>= 1; - } + /* If exactly 1 bit was set in log_per_phy, choose next lower power of 2 */ + if (bits_seen == 1) { + current_bit >>= 1; + } - return ( (int) ((current_bit - 1) & apic_id) ); + return ((int)((current_bit - 1) & apic_id)); } +static kmp_uint64 __kmp_parse_frequency( // R: Frequency in Hz. + char const *frequency // I: Float number and unit: MHz, GHz, or TGz. + ) { -static -kmp_uint64 -__kmp_parse_frequency( // R: Frequency in Hz. - char const * frequency // I: Float number and unit: MHz, GHz, or TGz. -) { + double value = 0.0; + char const *unit = NULL; + kmp_uint64 result = 0; /* Zero is a better unknown value than all ones. */ - double value = 0.0; - char const * unit = NULL; - kmp_uint64 result = 0; /* Zero is a better unknown value than all ones. */ - - if ( frequency == NULL ) { - return result; - }; // if - value = strtod( frequency, (char * *) & unit ); // strtod() does not like "char const *". - if ( 0 < value && value <= DBL_MAX ) { // Good value (not overflow, underflow, etc). - if ( strcmp( unit, "MHz" ) == 0 ) { - value = value * 1.0E+6; - } else if ( strcmp( unit, "GHz" ) == 0 ) { - value = value * 1.0E+9; - } else if ( strcmp( unit, "THz" ) == 0 ) { - value = value * 1.0E+12; - } else { // Wrong unit. - return result; - }; // if - result = value; - }; // if + if (frequency == NULL) { return result; + }; // if + value = strtod(frequency, + (char **)&unit); // strtod() does not like "char const *". + if (0 < value && + value <= DBL_MAX) { // Good value (not overflow, underflow, etc). + if (strcmp(unit, "MHz") == 0) { + value = value * 1.0E+6; + } else if (strcmp(unit, "GHz") == 0) { + value = value * 1.0E+9; + } else if (strcmp(unit, "THz") == 0) { + value = value * 1.0E+12; + } else { // Wrong unit. + return result; + }; // if + result = value; + }; // if + return result; }; // func __kmp_parse_cpu_frequency -void -__kmp_query_cpuid( kmp_cpuinfo_t *p ) -{ - struct kmp_cpuid buf; - int max_arg; - int log_per_phy; +void __kmp_query_cpuid(kmp_cpuinfo_t *p) { + struct kmp_cpuid buf; + int max_arg; + int log_per_phy; #ifdef KMP_DEBUG - int cflush_size; + int cflush_size; #endif - p->initialized = 1; + p->initialized = 1; - p->sse2 = 1; // Assume SSE2 by default. + p->sse2 = 1; // Assume SSE2 by default. - __kmp_x86_cpuid( 0, 0, &buf ); + __kmp_x86_cpuid(0, 0, &buf); - KA_TRACE( trace_level, ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n", - 0, buf.eax, buf.ebx, buf.ecx, buf.edx ) ); + KA_TRACE(trace_level, + ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n", 0, + buf.eax, buf.ebx, buf.ecx, buf.edx)); - max_arg = buf.eax; + max_arg = buf.eax; - p->apic_id = -1; + p->apic_id = -1; - if (max_arg >= 1) { - int i; - kmp_uint32 t, data[ 4 ]; + if (max_arg >= 1) { + int i; + kmp_uint32 t, data[4]; - __kmp_x86_cpuid( 1, 0, &buf ); - KA_TRACE( trace_level, ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n", - 1, buf.eax, buf.ebx, buf.ecx, buf.edx ) ); + __kmp_x86_cpuid(1, 0, &buf); + KA_TRACE(trace_level, + ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n", + 1, buf.eax, buf.ebx, buf.ecx, buf.edx)); - { -#define get_value(reg,lo,mask) ( ( ( reg ) >> ( lo ) ) & ( mask ) ) + { +#define get_value(reg, lo, mask) (((reg) >> (lo)) & (mask)) - p->signature = buf.eax; - p->family = get_value( buf.eax, 20, 0xff ) + get_value( buf.eax, 8, 0x0f ); - p->model = ( get_value( buf.eax, 16, 0x0f ) << 4 ) + get_value( buf.eax, 4, 0x0f ); - p->stepping = get_value( buf.eax, 0, 0x0f ); + p->signature = buf.eax; + p->family = get_value(buf.eax, 20, 0xff) + get_value(buf.eax, 8, 0x0f); + p->model = + (get_value(buf.eax, 16, 0x0f) << 4) + get_value(buf.eax, 4, 0x0f); + p->stepping = get_value(buf.eax, 0, 0x0f); #undef get_value - KA_TRACE( trace_level, (" family = %d, model = %d, stepping = %d\n", p->family, p->model, p->stepping ) ); - } + KA_TRACE(trace_level, (" family = %d, model = %d, stepping = %d\n", + p->family, p->model, p->stepping)); + } - for ( t = buf.ebx, i = 0; i < 4; t >>= 8, ++i ) { - data[ i ] = (t & 0xff); - }; // for + for (t = buf.ebx, i = 0; i < 4; t >>= 8, ++i) { + data[i] = (t & 0xff); + }; // for - p->sse2 = ( buf.edx >> 26 ) & 1; + p->sse2 = (buf.edx >> 26) & 1; #ifdef KMP_DEBUG - if ( (buf.edx >> 4) & 1 ) { - /* TSC - Timestamp Counter Available */ - KA_TRACE( trace_level, (" TSC" ) ); - } - if ( (buf.edx >> 8) & 1 ) { - /* CX8 - CMPXCHG8B Instruction Available */ - KA_TRACE( trace_level, (" CX8" ) ); - } - if ( (buf.edx >> 9) & 1 ) { - /* APIC - Local APIC Present (multi-processor operation support */ - KA_TRACE( trace_level, (" APIC" ) ); - } - if ( (buf.edx >> 15) & 1 ) { - /* CMOV - Conditional MOVe Instruction Available */ - KA_TRACE( trace_level, (" CMOV" ) ); - } - if ( (buf.edx >> 18) & 1 ) { - /* PSN - Processor Serial Number Available */ - KA_TRACE( trace_level, (" PSN" ) ); - } - if ( (buf.edx >> 19) & 1 ) { - /* CLFULSH - Cache Flush Instruction Available */ - cflush_size = data[ 1 ] * 8; /* Bits 15-08: CLFLUSH line size = 8 (64 bytes) */ - KA_TRACE( trace_level, (" CLFLUSH(%db)", cflush_size ) ); - - } - if ( (buf.edx >> 21) & 1 ) { - /* DTES - Debug Trace & EMON Store */ - KA_TRACE( trace_level, (" DTES" ) ); - } - if ( (buf.edx >> 22) & 1 ) { - /* ACPI - ACPI Support Available */ - KA_TRACE( trace_level, (" ACPI" ) ); - } - if ( (buf.edx >> 23) & 1 ) { - /* MMX - Multimedia Extensions */ - KA_TRACE( trace_level, (" MMX" ) ); - } - if ( (buf.edx >> 25) & 1 ) { - /* SSE - SSE Instructions */ - KA_TRACE( trace_level, (" SSE" ) ); - } - if ( (buf.edx >> 26) & 1 ) { - /* SSE2 - SSE2 Instructions */ - KA_TRACE( trace_level, (" SSE2" ) ); - } - if ( (buf.edx >> 27) & 1 ) { - /* SLFSNP - Self-Snooping Cache */ - KA_TRACE( trace_level, (" SLFSNP" ) ); - } + if ((buf.edx >> 4) & 1) { + /* TSC - Timestamp Counter Available */ + KA_TRACE(trace_level, (" TSC")); + } + if ((buf.edx >> 8) & 1) { + /* CX8 - CMPXCHG8B Instruction Available */ + KA_TRACE(trace_level, (" CX8")); + } + if ((buf.edx >> 9) & 1) { + /* APIC - Local APIC Present (multi-processor operation support */ + KA_TRACE(trace_level, (" APIC")); + } + if ((buf.edx >> 15) & 1) { + /* CMOV - Conditional MOVe Instruction Available */ + KA_TRACE(trace_level, (" CMOV")); + } + if ((buf.edx >> 18) & 1) { + /* PSN - Processor Serial Number Available */ + KA_TRACE(trace_level, (" PSN")); + } + if ((buf.edx >> 19) & 1) { + /* CLFULSH - Cache Flush Instruction Available */ + cflush_size = + data[1] * 8; /* Bits 15-08: CLFLUSH line size = 8 (64 bytes) */ + KA_TRACE(trace_level, (" CLFLUSH(%db)", cflush_size)); + } + if ((buf.edx >> 21) & 1) { + /* DTES - Debug Trace & EMON Store */ + KA_TRACE(trace_level, (" DTES")); + } + if ((buf.edx >> 22) & 1) { + /* ACPI - ACPI Support Available */ + KA_TRACE(trace_level, (" ACPI")); + } + if ((buf.edx >> 23) & 1) { + /* MMX - Multimedia Extensions */ + KA_TRACE(trace_level, (" MMX")); + } + if ((buf.edx >> 25) & 1) { + /* SSE - SSE Instructions */ + KA_TRACE(trace_level, (" SSE")); + } + if ((buf.edx >> 26) & 1) { + /* SSE2 - SSE2 Instructions */ + KA_TRACE(trace_level, (" SSE2")); + } + if ((buf.edx >> 27) & 1) { + /* SLFSNP - Self-Snooping Cache */ + KA_TRACE(trace_level, (" SLFSNP")); + } #endif /* KMP_DEBUG */ - if ( (buf.edx >> 28) & 1 ) { - /* Bits 23-16: Logical Processors per Physical Processor (1 for P4) */ - log_per_phy = data[ 2 ]; - p->apic_id = data[ 3 ]; /* Bits 31-24: Processor Initial APIC ID (X) */ - KA_TRACE( trace_level, (" HT(%d TPUs)", log_per_phy ) ); + if ((buf.edx >> 28) & 1) { + /* Bits 23-16: Logical Processors per Physical Processor (1 for P4) */ + log_per_phy = data[2]; + p->apic_id = data[3]; /* Bits 31-24: Processor Initial APIC ID (X) */ + KA_TRACE(trace_level, (" HT(%d TPUs)", log_per_phy)); - if( log_per_phy > 1 ) { - /* default to 1k FOR JT-enabled processors (4k on OS X*) */ + if (log_per_phy > 1) { +/* default to 1k FOR JT-enabled processors (4k on OS X*) */ #if KMP_OS_DARWIN - p->cpu_stackoffset = 4 * 1024; + p->cpu_stackoffset = 4 * 1024; #else - p->cpu_stackoffset = 1 * 1024; + p->cpu_stackoffset = 1 * 1024; #endif - } + } - p->physical_id = __kmp_get_physical_id( log_per_phy, p->apic_id ); - p->logical_id = __kmp_get_logical_id( log_per_phy, p->apic_id ); - } + p->physical_id = __kmp_get_physical_id(log_per_phy, p->apic_id); + p->logical_id = __kmp_get_logical_id(log_per_phy, p->apic_id); + } #ifdef KMP_DEBUG - if ( (buf.edx >> 29) & 1 ) { - /* ATHROTL - Automatic Throttle Control */ - KA_TRACE( trace_level, (" ATHROTL" ) ); - } - KA_TRACE( trace_level, (" ]\n" ) ); + if ((buf.edx >> 29) & 1) { + /* ATHROTL - Automatic Throttle Control */ + KA_TRACE(trace_level, (" ATHROTL")); + } + KA_TRACE(trace_level, (" ]\n")); - for (i = 2; i <= max_arg; ++i) { - __kmp_x86_cpuid( i, 0, &buf ); - KA_TRACE( trace_level, - ( "INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n", - i, buf.eax, buf.ebx, buf.ecx, buf.edx ) ); - } + for (i = 2; i <= max_arg; ++i) { + __kmp_x86_cpuid(i, 0, &buf); + KA_TRACE(trace_level, + ("INFO: CPUID %d: EAX=0x%08X EBX=0x%08X ECX=0x%08X EDX=0x%08X\n", + i, buf.eax, buf.ebx, buf.ecx, buf.edx)); + } #endif #if KMP_USE_ADAPTIVE_LOCKS - p->rtm = 0; - if (max_arg > 7) - { - /* RTM bit CPUID.07:EBX, bit 11 */ - __kmp_x86_cpuid(7, 0, &buf); - p->rtm = (buf.ebx >> 11) & 1; - KA_TRACE( trace_level, (" RTM" ) ); - } -#endif - }; // if - - { // Parse CPU brand string for frequency, saving the string for later. - int i; - kmp_cpuid_t * base = (kmp_cpuid_t *)&p->name[0]; - - // Get CPU brand string. - for ( i = 0; i < 3; ++ i ) { - __kmp_x86_cpuid( 0x80000002 + i, 0, base+i ); - }; // for - p->name[ sizeof(p->name) - 1 ] = 0; // Just in case. ;-) - KA_TRACE( trace_level, ( "cpu brand string: \"%s\"\n", &p->name[0] ) ); - - // Parse frequency. - p->frequency = __kmp_parse_frequency( strrchr( &p->name[0], ' ' ) ); - KA_TRACE( trace_level, ( "cpu frequency from brand string: %" KMP_UINT64_SPEC "\n", p->frequency ) ); + p->rtm = 0; + if (max_arg > 7) { + /* RTM bit CPUID.07:EBX, bit 11 */ + __kmp_x86_cpuid(7, 0, &buf); + p->rtm = (buf.ebx >> 11) & 1; + KA_TRACE(trace_level, (" RTM")); } +#endif + }; // if + + { // Parse CPU brand string for frequency, saving the string for later. + int i; + kmp_cpuid_t *base = (kmp_cpuid_t *)&p->name[0]; + + // Get CPU brand string. + for (i = 0; i < 3; ++i) { + __kmp_x86_cpuid(0x80000002 + i, 0, base + i); + }; // for + p->name[sizeof(p->name) - 1] = 0; // Just in case. ;-) + KA_TRACE(trace_level, ("cpu brand string: \"%s\"\n", &p->name[0])); + + // Parse frequency. + p->frequency = __kmp_parse_frequency(strrchr(&p->name[0], ' ')); + KA_TRACE(trace_level, + ("cpu frequency from brand string: %" KMP_UINT64_SPEC "\n", + p->frequency)); + } } #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ -/* ------------------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------------------ */ - -void -__kmp_expand_host_name( char *buffer, size_t size ) -{ - KMP_DEBUG_ASSERT(size >= sizeof(unknown)); +void __kmp_expand_host_name(char *buffer, size_t size) { + KMP_DEBUG_ASSERT(size >= sizeof(unknown)); #if KMP_OS_WINDOWS - { - DWORD s = size; + { + DWORD s = size; - if (! GetComputerNameA( buffer, & s )) - KMP_STRCPY_S( buffer, size, unknown ); - } + if (!GetComputerNameA(buffer, &s)) + KMP_STRCPY_S(buffer, size, unknown); + } #else - buffer[size - 2] = 0; - if (gethostname( buffer, size ) || buffer[size - 2] != 0) - KMP_STRCPY_S( buffer, size, unknown ); + buffer[size - 2] = 0; + if (gethostname(buffer, size) || buffer[size - 2] != 0) + KMP_STRCPY_S(buffer, size, unknown); #endif } /* Expand the meta characters in the filename: - * * Currently defined characters are: - * * %H the hostname * %P the number of threads used. * %I the unique identifier for this run. */ -void -__kmp_expand_file_name( char *result, size_t rlen, char *pattern ) -{ - char *pos = result, *end = result + rlen - 1; - char buffer[256]; - int default_cpu_width = 1; - int snp_result; - - KMP_DEBUG_ASSERT(rlen > 0); - *end = 0; - { - int i; - for(i = __kmp_xproc; i >= 10; i /= 10, ++default_cpu_width); - } +void __kmp_expand_file_name(char *result, size_t rlen, char *pattern) { + char *pos = result, *end = result + rlen - 1; + char buffer[256]; + int default_cpu_width = 1; + int snp_result; + + KMP_DEBUG_ASSERT(rlen > 0); + *end = 0; + { + int i; + for (i = __kmp_xproc; i >= 10; i /= 10, ++default_cpu_width) + ; + } + + if (pattern != NULL) { + while (*pattern != '\0' && pos < end) { + if (*pattern != '%') { + *pos++ = *pattern++; + } else { + char *old_pattern = pattern; + int width = 1; + int cpu_width = default_cpu_width; + + ++pattern; + + if (*pattern >= '0' && *pattern <= '9') { + width = 0; + do { + width = (width * 10) + *pattern++ - '0'; + } while (*pattern >= '0' && *pattern <= '9'); + if (width < 0 || width > 1024) + width = 1; + + cpu_width = width; + } - if (pattern != NULL) { - while (*pattern != '\0' && pos < end) { - if (*pattern != '%') { - *pos++ = *pattern++; - } else { - char *old_pattern = pattern; - int width = 1; - int cpu_width = default_cpu_width; - - ++pattern; - - if (*pattern >= '0' && *pattern <= '9') { - width = 0; - do { - width = (width * 10) + *pattern++ - '0'; - } while (*pattern >= '0' && *pattern <= '9'); - if (width < 0 || width > 1024) - width = 1; - - cpu_width = width; - } - - switch (*pattern) { - case 'H': - case 'h': - { - __kmp_expand_host_name( buffer, sizeof( buffer ) ); - KMP_STRNCPY( pos, buffer, end - pos + 1); - if(*end == 0) { - while ( *pos ) - ++pos; - ++pattern; - } else - pos = end; - } - break; - case 'P': - case 'p': - { - snp_result = KMP_SNPRINTF( pos, end - pos + 1, "%0*d", cpu_width, __kmp_dflt_team_nth ); - if(snp_result >= 0 && snp_result <= end - pos) { - while ( *pos ) - ++pos; - ++pattern; - } else - pos = end; - } - break; - case 'I': - case 'i': - { - pid_t id = getpid(); - snp_result = KMP_SNPRINTF( pos, end - pos + 1, "%0*d", width, id ); - if(snp_result >= 0 && snp_result <= end - pos) { - while ( *pos ) - ++pos; - ++pattern; - } else - pos = end; - break; - } - case '%': - { - *pos++ = '%'; - ++pattern; - break; - } - default: - { - *pos++ = '%'; - pattern = old_pattern + 1; - break; - } - } - } - } - /* TODO: How do we get rid of this? */ - if(*pattern != '\0') - KMP_FATAL( FileNameTooLong ); + switch (*pattern) { + case 'H': + case 'h': { + __kmp_expand_host_name(buffer, sizeof(buffer)); + KMP_STRNCPY(pos, buffer, end - pos + 1); + if (*end == 0) { + while (*pos) + ++pos; + ++pattern; + } else + pos = end; + } break; + case 'P': + case 'p': { + snp_result = KMP_SNPRINTF(pos, end - pos + 1, "%0*d", cpu_width, + __kmp_dflt_team_nth); + if (snp_result >= 0 && snp_result <= end - pos) { + while (*pos) + ++pos; + ++pattern; + } else + pos = end; + } break; + case 'I': + case 'i': { + pid_t id = getpid(); + snp_result = KMP_SNPRINTF(pos, end - pos + 1, "%0*d", width, id); + if (snp_result >= 0 && snp_result <= end - pos) { + while (*pos) + ++pos; + ++pattern; + } else + pos = end; + break; + } + case '%': { + *pos++ = '%'; + ++pattern; + break; + } + default: { + *pos++ = '%'; + pattern = old_pattern + 1; + break; + } + } + } } + /* TODO: How do we get rid of this? */ + if (*pattern != '\0') + KMP_FATAL(FileNameTooLong); + } - *pos = '\0'; + *pos = '\0'; } - diff --git a/openmp/runtime/src/kmp_version.cpp b/openmp/runtime/src/kmp_version.cpp index 6e68baf..57720e5 100644 --- a/openmp/runtime/src/kmp_version.cpp +++ b/openmp/runtime/src/kmp_version.cpp @@ -18,199 +18,191 @@ #include "kmp_version.h" // Replace with snapshot date YYYYMMDD for promotion build. -#define KMP_VERSION_BUILD 20140926 +#define KMP_VERSION_BUILD 20140926 // Helper macros to convert value of macro to string literal. -#define _stringer( x ) #x -#define stringer( x ) _stringer( x ) +#define _stringer(x) #x +#define stringer(x) _stringer(x) // Detect compiler. #if KMP_COMPILER_ICC - #if __INTEL_COMPILER == 1010 - #define KMP_COMPILER "Intel C++ Compiler 10.1" - #elif __INTEL_COMPILER == 1100 - #define KMP_COMPILER "Intel C++ Compiler 11.0" - #elif __INTEL_COMPILER == 1110 - #define KMP_COMPILER "Intel C++ Compiler 11.1" - #elif __INTEL_COMPILER == 1200 - #define KMP_COMPILER "Intel C++ Compiler 12.0" - #elif __INTEL_COMPILER == 1210 - #define KMP_COMPILER "Intel C++ Compiler 12.1" - #elif __INTEL_COMPILER == 1300 - #define KMP_COMPILER "Intel C++ Compiler 13.0" - #elif __INTEL_COMPILER == 1310 - #define KMP_COMPILER "Intel C++ Compiler 13.1" - #elif __INTEL_COMPILER == 1400 - #define KMP_COMPILER "Intel C++ Compiler 14.0" - #elif __INTEL_COMPILER == 1410 - #define KMP_COMPILER "Intel C++ Compiler 14.1" - #elif __INTEL_COMPILER == 1500 - #define KMP_COMPILER "Intel C++ Compiler 15.0" - #elif __INTEL_COMPILER == 1600 - #define KMP_COMPILER "Intel C++ Compiler 16.0" - #elif __INTEL_COMPILER == 1700 - #define KMP_COMPILER "Intel C++ Compiler 17.0" - #elif __INTEL_COMPILER == 9998 - #define KMP_COMPILER "Intel C++ Compiler mainline" - #elif __INTEL_COMPILER == 9999 - #define KMP_COMPILER "Intel C++ Compiler mainline" - #endif +#if __INTEL_COMPILER == 1010 +#define KMP_COMPILER "Intel C++ Compiler 10.1" +#elif __INTEL_COMPILER == 1100 +#define KMP_COMPILER "Intel C++ Compiler 11.0" +#elif __INTEL_COMPILER == 1110 +#define KMP_COMPILER "Intel C++ Compiler 11.1" +#elif __INTEL_COMPILER == 1200 +#define KMP_COMPILER "Intel C++ Compiler 12.0" +#elif __INTEL_COMPILER == 1210 +#define KMP_COMPILER "Intel C++ Compiler 12.1" +#elif __INTEL_COMPILER == 1300 +#define KMP_COMPILER "Intel C++ Compiler 13.0" +#elif __INTEL_COMPILER == 1310 +#define KMP_COMPILER "Intel C++ Compiler 13.1" +#elif __INTEL_COMPILER == 1400 +#define KMP_COMPILER "Intel C++ Compiler 14.0" +#elif __INTEL_COMPILER == 1410 +#define KMP_COMPILER "Intel C++ Compiler 14.1" +#elif __INTEL_COMPILER == 1500 +#define KMP_COMPILER "Intel C++ Compiler 15.0" +#elif __INTEL_COMPILER == 1600 +#define KMP_COMPILER "Intel C++ Compiler 16.0" +#elif __INTEL_COMPILER == 1700 +#define KMP_COMPILER "Intel C++ Compiler 17.0" +#elif __INTEL_COMPILER == 9998 +#define KMP_COMPILER "Intel C++ Compiler mainline" +#elif __INTEL_COMPILER == 9999 +#define KMP_COMPILER "Intel C++ Compiler mainline" +#endif #elif KMP_COMPILER_CLANG - #define KMP_COMPILER "Clang " stringer( __clang_major__ ) "." stringer( __clang_minor__ ) +#define KMP_COMPILER \ + "Clang " stringer(__clang_major__) "." stringer(__clang_minor__) #elif KMP_COMPILER_GCC - #define KMP_COMPILER "GCC " stringer( __GNUC__ ) "." stringer( __GNUC_MINOR__ ) +#define KMP_COMPILER "GCC " stringer(__GNUC__) "." stringer(__GNUC_MINOR__) #elif KMP_COMPILER_MSVC - #define KMP_COMPILER "MSVC " stringer( _MSC_FULL_VER ) +#define KMP_COMPILER "MSVC " stringer(_MSC_FULL_VER) #endif #ifndef KMP_COMPILER - #warning "Unknown compiler" - #define KMP_COMPILER "unknown compiler" +#warning "Unknown compiler" +#define KMP_COMPILER "unknown compiler" #endif // Detect librray type (perf, stub). #ifdef KMP_STUB - #define KMP_LIB_TYPE "stub" +#define KMP_LIB_TYPE "stub" #else - #define KMP_LIB_TYPE "performance" +#define KMP_LIB_TYPE "performance" #endif // KMP_LIB_TYPE // Detect link type (static, dynamic). #ifdef KMP_DYNAMIC_LIB - #define KMP_LINK_TYPE "dynamic" +#define KMP_LINK_TYPE "dynamic" #else - #define KMP_LINK_TYPE "static" +#define KMP_LINK_TYPE "static" #endif // KMP_LINK_TYPE // Finally, define strings. -#define KMP_LIBRARY KMP_LIB_TYPE " library (" KMP_LINK_TYPE ")" +#define KMP_LIBRARY KMP_LIB_TYPE " library (" KMP_LINK_TYPE ")" #define KMP_COPYRIGHT "" int const __kmp_version_major = KMP_VERSION_MAJOR; int const __kmp_version_minor = KMP_VERSION_MINOR; int const __kmp_version_build = KMP_VERSION_BUILD; int const __kmp_openmp_version = - #if OMP_50_ENABLED - 201611; - #elif OMP_45_ENABLED - 201511; - #elif OMP_40_ENABLED - 201307; - #else - 201107; - #endif - -/* Do NOT change the format of this string! Intel(R) Thread Profiler checks for a - specific format some changes in the recognition routine there need to - be made before this is changed. -*/ -char const __kmp_copyright[] = - KMP_VERSION_PREFIX KMP_LIBRARY - " ver. " stringer( KMP_VERSION_MAJOR ) "." stringer( KMP_VERSION_MINOR ) - "." stringer( KMP_VERSION_BUILD ) " " - KMP_COPYRIGHT; - -char const __kmp_version_copyright[] = KMP_VERSION_PREFIX KMP_COPYRIGHT; -char const __kmp_version_lib_ver[] = KMP_VERSION_PREFIX "version: " stringer( KMP_VERSION_MAJOR ) "." stringer( KMP_VERSION_MINOR ) "." stringer( KMP_VERSION_BUILD ); -char const __kmp_version_lib_type[] = KMP_VERSION_PREFIX "library type: " KMP_LIB_TYPE; -char const __kmp_version_link_type[] = KMP_VERSION_PREFIX "link type: " KMP_LINK_TYPE; -char const __kmp_version_build_time[] = KMP_VERSION_PREFIX "build time: " "no_timestamp"; +#if OMP_50_ENABLED + 201611; +#elif OMP_45_ENABLED + 201511; +#elif OMP_40_ENABLED + 201307; +#else + 201107; +#endif + +/* Do NOT change the format of this string! Intel(R) Thread Profiler checks for + a specific format some changes in the recognition routine there need to be + made before this is changed. */ +char const __kmp_copyright[] = KMP_VERSION_PREFIX KMP_LIBRARY + " ver. " stringer(KMP_VERSION_MAJOR) "." stringer( + KMP_VERSION_MINOR) "." stringer(KMP_VERSION_BUILD) " " KMP_COPYRIGHT; + +char const __kmp_version_copyright[] = KMP_VERSION_PREFIX KMP_COPYRIGHT; +char const __kmp_version_lib_ver[] = + KMP_VERSION_PREFIX "version: " stringer(KMP_VERSION_MAJOR) "." stringer( + KMP_VERSION_MINOR) "." stringer(KMP_VERSION_BUILD); +char const __kmp_version_lib_type[] = + KMP_VERSION_PREFIX "library type: " KMP_LIB_TYPE; +char const __kmp_version_link_type[] = + KMP_VERSION_PREFIX "link type: " KMP_LINK_TYPE; +char const __kmp_version_build_time[] = KMP_VERSION_PREFIX "build time: " + "no_timestamp"; #if KMP_MIC2 - char const __kmp_version_target_env[] = KMP_VERSION_PREFIX "target environment: MIC2"; +char const __kmp_version_target_env[] = + KMP_VERSION_PREFIX "target environment: MIC2"; #endif -char const __kmp_version_build_compiler[] = KMP_VERSION_PREFIX "build compiler: " KMP_COMPILER; +char const __kmp_version_build_compiler[] = + KMP_VERSION_PREFIX "build compiler: " KMP_COMPILER; -// // Called at serial initialization time. -// static int __kmp_version_1_printed = FALSE; -void -__kmp_print_version_1( void ) -{ - if ( __kmp_version_1_printed ) { - return; - }; // if - __kmp_version_1_printed = TRUE; - - #ifndef KMP_STUB - kmp_str_buf_t buffer; - __kmp_str_buf_init( & buffer ); - // Print version strings skipping initial magic. - __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lib_ver[ KMP_VERSION_MAGIC_LEN ] ); - __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lib_type[ KMP_VERSION_MAGIC_LEN ] ); - __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_link_type[ KMP_VERSION_MAGIC_LEN ] ); - __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_build_time[ KMP_VERSION_MAGIC_LEN ] ); - #if KMP_MIC - __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_target_env[ KMP_VERSION_MAGIC_LEN ] ); - #endif - __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_build_compiler[ KMP_VERSION_MAGIC_LEN ] ); - #if defined(KMP_GOMP_COMPAT) - __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_alt_comp[ KMP_VERSION_MAGIC_LEN ] ); - #endif /* defined(KMP_GOMP_COMPAT) */ - __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_omp_api[ KMP_VERSION_MAGIC_LEN ] ); - __kmp_str_buf_print( & buffer, "%sdynamic error checking: %s\n", KMP_VERSION_PREF_STR, ( __kmp_env_consistency_check ? "yes" : "no" ) ); - #ifdef KMP_DEBUG - for ( int i = bs_plain_barrier; i < bs_last_barrier; ++ i ) { - __kmp_str_buf_print( - & buffer, - "%s%s barrier branch bits: gather=%u, release=%u\n", - KMP_VERSION_PREF_STR, - __kmp_barrier_type_name[ i ], - __kmp_barrier_gather_branch_bits[ i ], - __kmp_barrier_release_branch_bits[ i ] - ); // __kmp_str_buf_print - }; // for i - for ( int i = bs_plain_barrier; i < bs_last_barrier; ++ i ) { - __kmp_str_buf_print( - & buffer, - "%s%s barrier pattern: gather=%s, release=%s\n", - KMP_VERSION_PREF_STR, - __kmp_barrier_type_name[ i ], - __kmp_barrier_pattern_name[ __kmp_barrier_gather_pattern[ i ] ], - __kmp_barrier_pattern_name[ __kmp_barrier_release_pattern[ i ] ] - ); // __kmp_str_buf_print - }; // for i - __kmp_str_buf_print( & buffer, "%s\n", & __kmp_version_lock[ KMP_VERSION_MAGIC_LEN ] ); - #endif - __kmp_str_buf_print( - & buffer, - "%sthread affinity support: %s\n", - KMP_VERSION_PREF_STR, - #if KMP_AFFINITY_SUPPORTED - ( - KMP_AFFINITY_CAPABLE() - ? - ( - __kmp_affinity_type == affinity_none - ? - "not used" - : - "yes" - ) - : - "no" - ) - #else - "no" - #endif - ); - __kmp_printf( "%s", buffer.str ); - __kmp_str_buf_free( & buffer ); - K_DIAG( 1, ( "KMP_VERSION is true\n" ) ); - #endif // KMP_STUB +void __kmp_print_version_1(void) { + if (__kmp_version_1_printed) { + return; + }; // if + __kmp_version_1_printed = TRUE; + +#ifndef KMP_STUB + kmp_str_buf_t buffer; + __kmp_str_buf_init(&buffer); + // Print version strings skipping initial magic. + __kmp_str_buf_print(&buffer, "%s\n", + &__kmp_version_lib_ver[KMP_VERSION_MAGIC_LEN]); + __kmp_str_buf_print(&buffer, "%s\n", + &__kmp_version_lib_type[KMP_VERSION_MAGIC_LEN]); + __kmp_str_buf_print(&buffer, "%s\n", + &__kmp_version_link_type[KMP_VERSION_MAGIC_LEN]); + __kmp_str_buf_print(&buffer, "%s\n", + &__kmp_version_build_time[KMP_VERSION_MAGIC_LEN]); +#if KMP_MIC + __kmp_str_buf_print(&buffer, "%s\n", + &__kmp_version_target_env[KMP_VERSION_MAGIC_LEN]); +#endif + __kmp_str_buf_print(&buffer, "%s\n", + &__kmp_version_build_compiler[KMP_VERSION_MAGIC_LEN]); +#if defined(KMP_GOMP_COMPAT) + __kmp_str_buf_print(&buffer, "%s\n", + &__kmp_version_alt_comp[KMP_VERSION_MAGIC_LEN]); +#endif /* defined(KMP_GOMP_COMPAT) */ + __kmp_str_buf_print(&buffer, "%s\n", + &__kmp_version_omp_api[KMP_VERSION_MAGIC_LEN]); + __kmp_str_buf_print(&buffer, "%sdynamic error checking: %s\n", + KMP_VERSION_PREF_STR, + (__kmp_env_consistency_check ? "yes" : "no")); +#ifdef KMP_DEBUG + for (int i = bs_plain_barrier; i < bs_last_barrier; ++i) { + __kmp_str_buf_print( + &buffer, "%s%s barrier branch bits: gather=%u, release=%u\n", + KMP_VERSION_PREF_STR, __kmp_barrier_type_name[i], + __kmp_barrier_gather_branch_bits[i], + __kmp_barrier_release_branch_bits[i]); // __kmp_str_buf_print + }; // for i + for (int i = bs_plain_barrier; i < bs_last_barrier; ++i) { + __kmp_str_buf_print( + &buffer, "%s%s barrier pattern: gather=%s, release=%s\n", + KMP_VERSION_PREF_STR, __kmp_barrier_type_name[i], + __kmp_barrier_pattern_name[__kmp_barrier_gather_pattern[i]], + __kmp_barrier_pattern_name + [__kmp_barrier_release_pattern[i]]); // __kmp_str_buf_print + }; // for i + __kmp_str_buf_print(&buffer, "%s\n", + &__kmp_version_lock[KMP_VERSION_MAGIC_LEN]); +#endif + __kmp_str_buf_print( + &buffer, "%sthread affinity support: %s\n", KMP_VERSION_PREF_STR, +#if KMP_AFFINITY_SUPPORTED + (KMP_AFFINITY_CAPABLE() + ? (__kmp_affinity_type == affinity_none ? "not used" : "yes") + : "no") +#else + "no" +#endif + ); + __kmp_printf("%s", buffer.str); + __kmp_str_buf_free(&buffer); + K_DIAG(1, ("KMP_VERSION is true\n")); +#endif // KMP_STUB } // __kmp_print_version_1 -// // Called at parallel initialization time. -// static int __kmp_version_2_printed = FALSE; -void -__kmp_print_version_2( void ) { - if ( __kmp_version_2_printed ) { - return; - }; // if - __kmp_version_2_printed = TRUE; +void __kmp_print_version_2(void) { + if (__kmp_version_2_printed) { + return; + }; // if + __kmp_version_2_printed = TRUE; } // __kmp_print_version_2 // end of file // diff --git a/openmp/runtime/src/kmp_version.h b/openmp/runtime/src/kmp_version.h index 212853b..05e5287 100644 --- a/openmp/runtime/src/kmp_version.h +++ b/openmp/runtime/src/kmp_version.h @@ -17,31 +17,32 @@ #define KMP_VERSION_H #ifdef __cplusplus - extern "C" { +extern "C" { #endif // __cplusplus #ifndef KMP_VERSION_MAJOR - #error KMP_VERSION_MAJOR macro is not defined. +#error KMP_VERSION_MAJOR macro is not defined. #endif -#define KMP_VERSION_MINOR 0 -/* - Using "magic" prefix in all the version strings is rather convenient to get static version info - from binaries by using standard utilities "strings" and "grep", e. g.: +#define KMP_VERSION_MINOR 0 +/* Using "magic" prefix in all the version strings is rather convenient to get + static version info from binaries by using standard utilities "strings" and + "grep", e. g.: $ strings libomp.so | grep "@(#)" - gives clean list of all version strings in the library. Leading zero helps to keep version - string separate from printable characters which may occurs just before version string. -*/ -#define KMP_VERSION_MAGIC_STR "\x00@(#) " -#define KMP_VERSION_MAGIC_LEN 6 // Length of KMP_VERSION_MAGIC_STR. -#define KMP_VERSION_PREF_STR "Intel(R) OMP " -#define KMP_VERSION_PREFIX KMP_VERSION_MAGIC_STR KMP_VERSION_PREF_STR + gives clean list of all version strings in the library. Leading zero helps + to keep version string separate from printable characters which may occurs + just before version string. */ +#define KMP_VERSION_MAGIC_STR "\x00@(#) " +#define KMP_VERSION_MAGIC_LEN 6 // Length of KMP_VERSION_MAGIC_STR. +#define KMP_VERSION_PREF_STR "Intel(R) OMP " +#define KMP_VERSION_PREFIX KMP_VERSION_MAGIC_STR KMP_VERSION_PREF_STR /* declare all the version string constants for KMP_VERSION env. variable */ -extern int const __kmp_version_major; -extern int const __kmp_version_minor; -extern int const __kmp_version_build; -extern int const __kmp_openmp_version; -extern char const __kmp_copyright[]; // Old variable, kept for compatibility with ITC and ITP. +extern int const __kmp_version_major; +extern int const __kmp_version_minor; +extern int const __kmp_version_build; +extern int const __kmp_openmp_version; +extern char const + __kmp_copyright[]; // Old variable, kept for compatibility with ITC and ITP. extern char const __kmp_version_copyright[]; extern char const __kmp_version_lib_ver[]; extern char const __kmp_version_lib_type[]; @@ -58,11 +59,11 @@ extern char const __kmp_version_ftnstdcall[]; extern char const __kmp_version_ftncdecl[]; extern char const __kmp_version_ftnextra[]; -void __kmp_print_version_1( void ); -void __kmp_print_version_2( void ); +void __kmp_print_version_1(void); +void __kmp_print_version_2(void); #ifdef __cplusplus - } // extern "C" +} // extern "C" #endif // __cplusplus #endif /* KMP_VERSION_H */ diff --git a/openmp/runtime/src/kmp_wait_release.cpp b/openmp/runtime/src/kmp_wait_release.cpp index 05b1e05..40d09ef 100644 --- a/openmp/runtime/src/kmp_wait_release.cpp +++ b/openmp/runtime/src/kmp_wait_release.cpp @@ -14,13 +14,10 @@ #include "kmp_wait_release.h" -void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, int final_spin - USE_ITT_BUILD_ARG(void * itt_sync_obj) ) -{ - __kmp_wait_template(this_thr, flag, final_spin - USE_ITT_BUILD_ARG(itt_sync_obj) ); +void __kmp_wait_64(kmp_info_t *this_thr, kmp_flag_64 *flag, + int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { + __kmp_wait_template(this_thr, flag, + final_spin USE_ITT_BUILD_ARG(itt_sync_obj)); } -void __kmp_release_64(kmp_flag_64 *flag) { - __kmp_release_template(flag); -} +void __kmp_release_64(kmp_flag_64 *flag) { __kmp_release_template(flag); } diff --git a/openmp/runtime/src/kmp_wait_release.h b/openmp/runtime/src/kmp_wait_release.h index a3d2ba1..074696c 100644 --- a/openmp/runtime/src/kmp_wait_release.h +++ b/openmp/runtime/src/kmp_wait_release.h @@ -24,8 +24,8 @@ @defgroup WAIT_RELEASE Wait/Release operations The definitions and functions here implement the lowest level thread -synchronizations of suspending a thread and awaking it. They are used -to build higher level operations such as barriers and fork/join. +synchronizations of suspending a thread and awaking it. They are used to build +higher level operations such as barriers and fork/join. */ /*! @@ -37,581 +37,647 @@ to build higher level operations such as barriers and fork/join. * The flag_type describes the storage used for the flag. */ enum flag_type { - flag32, /**< 32 bit flags */ - flag64, /**< 64 bit flags */ - flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */ + flag32, /**< 32 bit flags */ + flag64, /**< 64 bit flags */ + flag_oncore /**< special 64-bit flag for on-core barrier (hierarchical) */ }; /*! * Base class for wait/release volatile flag */ -template -class kmp_flag { - volatile P * loc; /**< Pointer to the flag storage that is modified by another thread */ - flag_type t; /**< "Type" of the flag in loc */ - public: - typedef P flag_t; - kmp_flag(volatile P *p, flag_type ft) : loc(p), t(ft) {} - /*! - * @result the pointer to the actual flag - */ - volatile P * get() { return loc; } - /*! - * @param new_loc in set loc to point at new_loc - */ - void set(volatile P *new_loc) { loc = new_loc; } - /*! - * @result the flag_type - */ - flag_type get_type() { return t; } - // Derived classes must provide the following: - /* - kmp_info_t * get_waiter(kmp_uint32 i); - kmp_uint32 get_num_waiters(); - bool done_check(); - bool done_check_val(P old_loc); - bool notdone_check(); - P internal_release(); - void suspend(int th_gtid); - void resume(int th_gtid); - P set_sleeping(); - P unset_sleeping(); - bool is_sleeping(); - bool is_any_sleeping(); - bool is_sleeping_val(P old_loc); - int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished - USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained); - */ +template class kmp_flag { + volatile P + *loc; /**< Pointer to the flag storage that is modified by another thread + */ + flag_type t; /**< "Type" of the flag in loc */ +public: + typedef P flag_t; + kmp_flag(volatile P *p, flag_type ft) : loc(p), t(ft) {} + /*! + * @result the pointer to the actual flag + */ + volatile P *get() { return loc; } + /*! + * @param new_loc in set loc to point at new_loc + */ + void set(volatile P *new_loc) { loc = new_loc; } + /*! + * @result the flag_type + */ + flag_type get_type() { return t; } + // Derived classes must provide the following: + /* + kmp_info_t * get_waiter(kmp_uint32 i); + kmp_uint32 get_num_waiters(); + bool done_check(); + bool done_check_val(P old_loc); + bool notdone_check(); + P internal_release(); + void suspend(int th_gtid); + void resume(int th_gtid); + P set_sleeping(); + P unset_sleeping(); + bool is_sleeping(); + bool is_any_sleeping(); + bool is_sleeping_val(P old_loc); + int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, + int *thread_finished + USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 + is_constrained); + */ }; -/* Spin wait loop that first does pause, then yield, then sleep. A thread that calls __kmp_wait_* - must make certain that another thread calls __kmp_release to wake it back up to prevent deadlocks! */ +/* Spin wait loop that first does pause, then yield, then sleep. A thread that + calls __kmp_wait_* must make certain that another thread calls __kmp_release + to wake it back up to prevent deadlocks! */ template static inline void -__kmp_wait_template(kmp_info_t *this_thr, C *flag, int final_spin - USE_ITT_BUILD_ARG(void * itt_sync_obj) ) -{ - // NOTE: We may not belong to a team at this point. - volatile typename C::flag_t *spin = flag->get(); - kmp_uint32 spins; - kmp_uint32 hibernate; - int th_gtid; - int tasks_completed = FALSE; - int oversubscribed; -#if ! KMP_USE_MONITOR - kmp_uint64 poll_count; - kmp_uint64 hibernate_goal; +__kmp_wait_template(kmp_info_t *this_thr, C *flag, + int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { + // NOTE: We may not belong to a team at this point. + volatile typename C::flag_t *spin = flag->get(); + kmp_uint32 spins; + kmp_uint32 hibernate; + int th_gtid; + int tasks_completed = FALSE; + int oversubscribed; +#if !KMP_USE_MONITOR + kmp_uint64 poll_count; + kmp_uint64 hibernate_goal; #endif - KMP_FSYNC_SPIN_INIT(spin, NULL); - if (flag->done_check()) { - KMP_FSYNC_SPIN_ACQUIRED(spin); - return; - } - th_gtid = this_thr->th.th_info.ds.ds_gtid; - KA_TRACE(20, ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag)); + KMP_FSYNC_SPIN_INIT(spin, NULL); + if (flag->done_check()) { + KMP_FSYNC_SPIN_ACQUIRED(spin); + return; + } + th_gtid = this_thr->th.th_info.ds.ds_gtid; + KA_TRACE(20, + ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag)); #if KMP_STATS_ENABLED - stats_state_e thread_state = KMP_GET_THREAD_STATE(); + stats_state_e thread_state = KMP_GET_THREAD_STATE(); #endif #if OMPT_SUPPORT && OMPT_BLAME - ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state; - if (ompt_enabled && - ompt_state != ompt_state_undefined) { - if (ompt_state == ompt_state_idle) { - if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) { - ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1); - } - } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) { - KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier || - ompt_state == ompt_state_wait_barrier_implicit || - ompt_state == ompt_state_wait_barrier_explicit); - - ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info; - ompt_parallel_id_t pId; - ompt_task_id_t tId; - if (team){ - pId = team->ompt_team_info.parallel_id; - tId = team->ompt_task_info.task_id; - } else { - pId = this_thr->th.th_team->t.ompt_team_info.parallel_id; - tId = this_thr->th.th_current_task->ompt_task_info.task_id; - } - ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId); - } + ompt_state_t ompt_state = this_thr->th.ompt_thread_info.state; + if (ompt_enabled && ompt_state != ompt_state_undefined) { + if (ompt_state == ompt_state_idle) { + if (ompt_callbacks.ompt_callback(ompt_event_idle_begin)) { + ompt_callbacks.ompt_callback(ompt_event_idle_begin)(th_gtid + 1); + } + } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)) { + KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier || + ompt_state == ompt_state_wait_barrier_implicit || + ompt_state == ompt_state_wait_barrier_explicit); + + ompt_lw_taskteam_t *team = + this_thr->th.th_team->t.ompt_serialized_team_info; + ompt_parallel_id_t pId; + ompt_task_id_t tId; + if (team) { + pId = team->ompt_team_info.parallel_id; + tId = team->ompt_task_info.task_id; + } else { + pId = this_thr->th.th_team->t.ompt_team_info.parallel_id; + tId = this_thr->th.th_current_task->ompt_task_info.task_id; + } + ompt_callbacks.ompt_callback(ompt_event_wait_barrier_begin)(pId, tId); } + } #endif - // Setup for waiting - KMP_INIT_YIELD(spins); + // Setup for waiting + KMP_INIT_YIELD(spins); - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { + if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { #if KMP_USE_MONITOR - // The worker threads cannot rely on the team struct existing at this point. - // Use the bt values cached in the thread struct instead. +// The worker threads cannot rely on the team struct existing at this point. +// Use the bt values cached in the thread struct instead. #ifdef KMP_ADJUST_BLOCKTIME - if (__kmp_zero_bt && !this_thr->th.th_team_bt_set) - // Force immediate suspend if not set by user and more threads than available procs - hibernate = 0; - else - hibernate = this_thr->th.th_team_bt_intervals; + if (__kmp_zero_bt && !this_thr->th.th_team_bt_set) + // Force immediate suspend if not set by user and more threads than + // available procs + hibernate = 0; + else + hibernate = this_thr->th.th_team_bt_intervals; #else - hibernate = this_thr->th.th_team_bt_intervals; + hibernate = this_thr->th.th_team_bt_intervals; #endif /* KMP_ADJUST_BLOCKTIME */ - /* If the blocktime is nonzero, we want to make sure that we spin wait for the entirety - of the specified #intervals, plus up to one interval more. This increment make - certain that this thread doesn't go to sleep too soon. */ - if (hibernate != 0) - hibernate++; - - // Add in the current time value. - hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value); - KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n", - th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, - hibernate - __kmp_global.g.g_time.dt.t_value)); + /* If the blocktime is nonzero, we want to make sure that we spin wait for + the entirety of the specified #intervals, plus up to one interval more. + This increment make certain that this thread doesn't go to sleep too + soon. */ + if (hibernate != 0) + hibernate++; + + // Add in the current time value. + hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value); + KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n", + th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate, + hibernate - __kmp_global.g.g_time.dt.t_value)); #else - hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals; - poll_count = 0; + hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals; + poll_count = 0; #endif // KMP_USE_MONITOR - } - - oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc); - KMP_MB(); - - // Main wait spin loop - while (flag->notdone_check()) { - int in_pool; - kmp_task_team_t * task_team = NULL; - if (__kmp_tasking_mode != tskm_immediate_exec) { - task_team = this_thr->th.th_task_team; - /* If the thread's task team pointer is NULL, it means one of 3 things: - 1) A newly-created thread is first being released by __kmp_fork_barrier(), and - its task team has not been set up yet. - 2) All tasks have been executed to completion. - 3) Tasking is off for this region. This could be because we are in a serialized region - (perhaps the outer one), or else tasking was manually disabled (KMP_TASKING=0). */ - if (task_team != NULL) { - if (TCR_SYNC_4(task_team->tt.tt_active)) { - if (KMP_TASKING_ENABLED(task_team)) - flag->execute_tasks(this_thr, th_gtid, final_spin, &tasks_completed - USE_ITT_BUILD_ARG(itt_sync_obj), 0); - else - this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; - } - else { - KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)); - this_thr->th.th_task_team = NULL; - this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; - } - } else { - this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; - } // if - } // if - - KMP_FSYNC_SPIN_PREPARE(spin); - if (TCR_4(__kmp_global.g.g_done)) { - if (__kmp_global.g.g_abort) - __kmp_abort_thread(); - break; + } + + oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc); + KMP_MB(); + + // Main wait spin loop + while (flag->notdone_check()) { + int in_pool; + kmp_task_team_t *task_team = NULL; + if (__kmp_tasking_mode != tskm_immediate_exec) { + task_team = this_thr->th.th_task_team; + /* If the thread's task team pointer is NULL, it means one of 3 things: + 1) A newly-created thread is first being released by + __kmp_fork_barrier(), and its task team has not been set up yet. + 2) All tasks have been executed to completion. + 3) Tasking is off for this region. This could be because we are in a + serialized region (perhaps the outer one), or else tasking was manually + disabled (KMP_TASKING=0). */ + if (task_team != NULL) { + if (TCR_SYNC_4(task_team->tt.tt_active)) { + if (KMP_TASKING_ENABLED(task_team)) + flag->execute_tasks( + this_thr, th_gtid, final_spin, + &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0); + else + this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; + } else { + KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)); + this_thr->th.th_task_team = NULL; + this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; } + } else { + this_thr->th.th_reap_state = KMP_SAFE_TO_REAP; + } // if + } // if + + KMP_FSYNC_SPIN_PREPARE(spin); + if (TCR_4(__kmp_global.g.g_done)) { + if (__kmp_global.g.g_abort) + __kmp_abort_thread(); + break; + } - // If we are oversubscribed, or have waited a bit (and KMP_LIBRARY=throughput), then yield - KMP_YIELD(oversubscribed); - // TODO: Should it be number of cores instead of thread contexts? Like: - // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores); - // Need performance improvement data to make the change... - KMP_YIELD_SPIN(spins); - - // Check if this thread was transferred from a team - // to the thread pool (or vice-versa) while spinning. - in_pool = !!TCR_4(this_thr->th.th_in_pool); - if (in_pool != !!this_thr->th.th_active_in_pool) { - if (in_pool) { // Recently transferred from team to pool - KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth); - this_thr->th.th_active_in_pool = TRUE; - /* Here, we cannot assert that: - KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <= __kmp_thread_pool_nth); - __kmp_thread_pool_nth is inc/dec'd by the master thread while the fork/join - lock is held, whereas __kmp_thread_pool_active_nth is inc/dec'd asynchronously - by the workers. The two can get out of sync for brief periods of time. */ - } - else { // Recently transferred from pool to team - KMP_TEST_THEN_DEC32((kmp_int32 *) &__kmp_thread_pool_active_nth); - KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); - this_thr->th.th_active_in_pool = FALSE; - } - } + // If we are oversubscribed, or have waited a bit (and + // KMP_LIBRARY=throughput), then yield + KMP_YIELD(oversubscribed); + // TODO: Should it be number of cores instead of thread contexts? Like: + // KMP_YIELD(TCR_4(__kmp_nth) > __kmp_ncores); + // Need performance improvement data to make the change... + KMP_YIELD_SPIN(spins); + // Check if this thread was transferred from a team + // to the thread pool (or vice-versa) while spinning. + in_pool = !!TCR_4(this_thr->th.th_in_pool); + if (in_pool != !!this_thr->th.th_active_in_pool) { + if (in_pool) { // Recently transferred from team to pool + KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth); + this_thr->th.th_active_in_pool = TRUE; + /* Here, we cannot assert that: + KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) <= + __kmp_thread_pool_nth); + __kmp_thread_pool_nth is inc/dec'd by the master thread while the + fork/join lock is held, whereas __kmp_thread_pool_active_nth is + inc/dec'd asynchronously by the workers. The two can get out of sync + for brief periods of time. */ + } else { // Recently transferred from pool to team + KMP_TEST_THEN_DEC32((kmp_int32 *)&__kmp_thread_pool_active_nth); + KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); + this_thr->th.th_active_in_pool = FALSE; + } + } #if KMP_STATS_ENABLED - // Check if thread has been signalled to idle state - // This indicates that the logical "join-barrier" has finished - if (this_thr->th.th_stats->isIdle() && KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) { - KMP_SET_THREAD_STATE(IDLE); - KMP_PUSH_PARTITIONED_TIMER(OMP_idle); - } + // Check if thread has been signalled to idle state + // This indicates that the logical "join-barrier" has finished + if (this_thr->th.th_stats->isIdle() && + KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) { + KMP_SET_THREAD_STATE(IDLE); + KMP_PUSH_PARTITIONED_TIMER(OMP_idle); + } #endif - // Don't suspend if KMP_BLOCKTIME is set to "infinite" - if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) - continue; + // Don't suspend if KMP_BLOCKTIME is set to "infinite" + if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) + continue; - // Don't suspend if there is a likelihood of new tasks being spawned. - if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks)) - continue; + // Don't suspend if there is a likelihood of new tasks being spawned. + if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks)) + continue; #if KMP_USE_MONITOR - // If we have waited a bit more, fall asleep - if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate) - continue; + // If we have waited a bit more, fall asleep + if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate) + continue; #else - if (KMP_BLOCKING(hibernate_goal, poll_count++)) - continue; + if (KMP_BLOCKING(hibernate_goal, poll_count++)) + continue; #endif - KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid)); + KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid)); + flag->suspend(th_gtid); - flag->suspend(th_gtid); - - if (TCR_4(__kmp_global.g.g_done)) { - if (__kmp_global.g.g_abort) - __kmp_abort_thread(); - break; - } - else if (__kmp_tasking_mode != tskm_immediate_exec - && this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) { - this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; - } - // TODO: If thread is done with work and times out, disband/free + if (TCR_4(__kmp_global.g.g_done)) { + if (__kmp_global.g.g_abort) + __kmp_abort_thread(); + break; + } else if (__kmp_tasking_mode != tskm_immediate_exec && + this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) { + this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP; } + // TODO: If thread is done with work and times out, disband/free + } #if OMPT_SUPPORT && OMPT_BLAME - if (ompt_enabled && - ompt_state != ompt_state_undefined) { - if (ompt_state == ompt_state_idle) { - if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) { - ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1); - } - } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) { - KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier || - ompt_state == ompt_state_wait_barrier_implicit || - ompt_state == ompt_state_wait_barrier_explicit); - - ompt_lw_taskteam_t* team = this_thr->th.th_team->t.ompt_serialized_team_info; - ompt_parallel_id_t pId; - ompt_task_id_t tId; - if (team){ - pId = team->ompt_team_info.parallel_id; - tId = team->ompt_task_info.task_id; - } else { - pId = this_thr->th.th_team->t.ompt_team_info.parallel_id; - tId = this_thr->th.th_current_task->ompt_task_info.task_id; - } - ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId); - } + if (ompt_enabled && ompt_state != ompt_state_undefined) { + if (ompt_state == ompt_state_idle) { + if (ompt_callbacks.ompt_callback(ompt_event_idle_end)) { + ompt_callbacks.ompt_callback(ompt_event_idle_end)(th_gtid + 1); + } + } else if (ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)) { + KMP_DEBUG_ASSERT(ompt_state == ompt_state_wait_barrier || + ompt_state == ompt_state_wait_barrier_implicit || + ompt_state == ompt_state_wait_barrier_explicit); + + ompt_lw_taskteam_t *team = + this_thr->th.th_team->t.ompt_serialized_team_info; + ompt_parallel_id_t pId; + ompt_task_id_t tId; + if (team) { + pId = team->ompt_team_info.parallel_id; + tId = team->ompt_task_info.task_id; + } else { + pId = this_thr->th.th_team->t.ompt_team_info.parallel_id; + tId = this_thr->th.th_current_task->ompt_task_info.task_id; + } + ompt_callbacks.ompt_callback(ompt_event_wait_barrier_end)(pId, tId); } + } #endif #if KMP_STATS_ENABLED - // If we were put into idle state, pop that off the state stack - if (KMP_GET_THREAD_STATE() == IDLE) { - KMP_POP_PARTITIONED_TIMER(); - KMP_SET_THREAD_STATE(thread_state); - this_thr->th.th_stats->resetIdleFlag(); - } + // If we were put into idle state, pop that off the state stack + if (KMP_GET_THREAD_STATE() == IDLE) { + KMP_POP_PARTITIONED_TIMER(); + KMP_SET_THREAD_STATE(thread_state); + this_thr->th.th_stats->resetIdleFlag(); + } #endif - KMP_FSYNC_SPIN_ACQUIRED(spin); + KMP_FSYNC_SPIN_ACQUIRED(spin); } -/* Release any threads specified as waiting on the flag by releasing the flag and resume the waiting thread - if indicated by the sleep bit(s). A thread that calls __kmp_wait_template must call this function to wake - up the potentially sleeping thread and prevent deadlocks! */ -template -static inline void -__kmp_release_template(C *flag) -{ +/* Release any threads specified as waiting on the flag by releasing the flag + and resume the waiting thread if indicated by the sleep bit(s). A thread that + calls __kmp_wait_template must call this function to wake up the potentially + sleeping thread and prevent deadlocks! */ +template static inline void __kmp_release_template(C *flag) { #ifdef KMP_DEBUG - int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; + int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; #endif - KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get())); - KMP_DEBUG_ASSERT(flag->get()); - KMP_FSYNC_RELEASING(flag->get()); - - flag->internal_release(); - - KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(), *(flag->get()))); - - if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { - // Only need to check sleep stuff if infinite block time not set - if (flag->is_any_sleeping()) { // Are *any* of the threads that wait on this flag sleeping? - for (unsigned int i=0; iget_num_waiters(); ++i) { - kmp_info_t * waiter = flag->get_waiter(i); // if a sleeping waiter exists at i, sets current_waiter to i inside the flag - if (waiter) { - int wait_gtid = waiter->th.th_info.ds.ds_gtid; - // Wake up thread if needed - KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep flag(%p) set\n", - gtid, wait_gtid, flag->get())); - flag->resume(wait_gtid); // unsets flag's current_waiter when done - } - } + KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get())); + KMP_DEBUG_ASSERT(flag->get()); + KMP_FSYNC_RELEASING(flag->get()); + + flag->internal_release(); + + KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(), + *(flag->get()))); + + if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) { + // Only need to check sleep stuff if infinite block time not set. + // Are *any* threads waiting on flag sleeping? + if (flag->is_any_sleeping()) { + for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) { + // if sleeping waiter exists at i, sets current_waiter to i inside flag + kmp_info_t *waiter = flag->get_waiter(i); + if (waiter) { + int wait_gtid = waiter->th.th_info.ds.ds_gtid; + // Wake up thread if needed + KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep " + "flag(%p) set\n", + gtid, wait_gtid, flag->get())); + flag->resume(wait_gtid); // unsets flag's current_waiter when done } + } } + } } -template -struct flag_traits {}; - -template <> -struct flag_traits { - typedef kmp_uint32 flag_t; - static const flag_type t = flag32; - static inline flag_t tcr(flag_t f) { return TCR_4(f); } - static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_32((volatile kmp_int32 *)f); } - static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR32((volatile kmp_int32 *)f, v); } - static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND32((volatile kmp_int32 *)f, v); } +template struct flag_traits {}; + +template <> struct flag_traits { + typedef kmp_uint32 flag_t; + static const flag_type t = flag32; + static inline flag_t tcr(flag_t f) { return TCR_4(f); } + static inline flag_t test_then_add4(volatile flag_t *f) { + return KMP_TEST_THEN_ADD4_32((volatile kmp_int32 *)f); + } + static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { + return KMP_TEST_THEN_OR32((volatile kmp_int32 *)f, v); + } + static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { + return KMP_TEST_THEN_AND32((volatile kmp_int32 *)f, v); + } }; -template <> -struct flag_traits { - typedef kmp_uint64 flag_t; - static const flag_type t = flag64; - static inline flag_t tcr(flag_t f) { return TCR_8(f); } - static inline flag_t test_then_add4(volatile flag_t *f) { return KMP_TEST_THEN_ADD4_64((volatile kmp_int64 *)f); } - static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_OR64((volatile kmp_int64 *)f, v); } - static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { return KMP_TEST_THEN_AND64((volatile kmp_int64 *)f, v); } +template <> struct flag_traits { + typedef kmp_uint64 flag_t; + static const flag_type t = flag64; + static inline flag_t tcr(flag_t f) { return TCR_8(f); } + static inline flag_t test_then_add4(volatile flag_t *f) { + return KMP_TEST_THEN_ADD4_64((volatile kmp_int64 *)f); + } + static inline flag_t test_then_or(volatile flag_t *f, flag_t v) { + return KMP_TEST_THEN_OR64((volatile kmp_int64 *)f, v); + } + static inline flag_t test_then_and(volatile flag_t *f, flag_t v) { + return KMP_TEST_THEN_AND64((volatile kmp_int64 *)f, v); + } }; -template -class kmp_basic_flag : public kmp_flag { - typedef flag_traits traits_type; - FlagType checker; /**< Value to compare flag to to check if flag has been released. */ - kmp_info_t * waiting_threads[1]; /**< Array of threads sleeping on this thread. */ - kmp_uint32 num_waiting_threads; /**< Number of threads sleeping on this thread. */ - public: - kmp_basic_flag(volatile FlagType *p) : kmp_flag(p, traits_type::t), num_waiting_threads(0) {} - kmp_basic_flag(volatile FlagType *p, kmp_info_t *thr) : kmp_flag(p, traits_type::t), num_waiting_threads(1) { - waiting_threads[0] = thr; - } - kmp_basic_flag(volatile FlagType *p, FlagType c) : kmp_flag(p, traits_type::t), checker(c), num_waiting_threads(0) {} - /*! - * param i in index into waiting_threads - * @result the thread that is waiting at index i - */ - kmp_info_t * get_waiter(kmp_uint32 i) { - KMP_DEBUG_ASSERT(iget())) == checker; } - /*! - * @param old_loc in old value of flag - * @result true if the flag's old value indicates it was released. - */ - bool done_check_val(FlagType old_loc) { return old_loc == checker; } - /*! - * @result true if the flag object is not yet released. - * Used in __kmp_wait_template like: - * @code - * while (flag.notdone_check()) { pause(); } - * @endcode - */ - bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; } - /*! - * @result Actual flag value before release was applied. - * Trigger all waiting threads to run by modifying flag to release state. - */ - void internal_release() { - (void) traits_type::test_then_add4((volatile FlagType *)this->get()); - } - /*! - * @result Actual flag value before sleep bit(s) set. - * Notes that there is at least one thread sleeping on the flag by setting sleep bit(s). - */ - FlagType set_sleeping() { - return traits_type::test_then_or((volatile FlagType *)this->get(), KMP_BARRIER_SLEEP_STATE); - } - /*! - * @result Actual flag value before sleep bit(s) cleared. - * Notes that there are no longer threads sleeping on the flag by clearing sleep bit(s). - */ - FlagType unset_sleeping() { - return traits_type::test_then_and((volatile FlagType *)this->get(), ~KMP_BARRIER_SLEEP_STATE); - } - /*! - * @param old_loc in old value of flag - * Test whether there are threads sleeping on the flag's old value in old_loc. - */ - bool is_sleeping_val(FlagType old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; } - /*! - * Test whether there are threads sleeping on the flag. - */ - bool is_sleeping() { return is_sleeping_val(*(this->get())); } - bool is_any_sleeping() { return is_sleeping_val(*(this->get())); } - kmp_uint8 *get_stolen() { return NULL; } - enum barrier_type get_bt() { return bs_last_barrier; } +template class kmp_basic_flag : public kmp_flag { + typedef flag_traits traits_type; + FlagType checker; /**< Value to compare flag to to check if flag has been + released. */ + kmp_info_t + *waiting_threads[1]; /**< Array of threads sleeping on this thread. */ + kmp_uint32 + num_waiting_threads; /**< Number of threads sleeping on this thread. */ +public: + kmp_basic_flag(volatile FlagType *p) + : kmp_flag(p, traits_type::t), num_waiting_threads(0) {} + kmp_basic_flag(volatile FlagType *p, kmp_info_t *thr) + : kmp_flag(p, traits_type::t), num_waiting_threads(1) { + waiting_threads[0] = thr; + } + kmp_basic_flag(volatile FlagType *p, FlagType c) + : kmp_flag(p, traits_type::t), checker(c), + num_waiting_threads(0) {} + /*! + * param i in index into waiting_threads + * @result the thread that is waiting at index i + */ + kmp_info_t *get_waiter(kmp_uint32 i) { + KMP_DEBUG_ASSERT(i < num_waiting_threads); + return waiting_threads[i]; + } + /*! + * @result num_waiting_threads + */ + kmp_uint32 get_num_waiters() { return num_waiting_threads; } + /*! + * @param thr in the thread which is now waiting + * + * Insert a waiting thread at index 0. + */ + void set_waiter(kmp_info_t *thr) { + waiting_threads[0] = thr; + num_waiting_threads = 1; + } + /*! + * @result true if the flag object has been released. + */ + bool done_check() { return traits_type::tcr(*(this->get())) == checker; } + /*! + * @param old_loc in old value of flag + * @result true if the flag's old value indicates it was released. + */ + bool done_check_val(FlagType old_loc) { return old_loc == checker; } + /*! + * @result true if the flag object is not yet released. + * Used in __kmp_wait_template like: + * @code + * while (flag.notdone_check()) { pause(); } + * @endcode + */ + bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; } + /*! + * @result Actual flag value before release was applied. + * Trigger all waiting threads to run by modifying flag to release state. + */ + void internal_release() { + (void)traits_type::test_then_add4((volatile FlagType *)this->get()); + } + /*! + * @result Actual flag value before sleep bit(s) set. + * Notes that there is at least one thread sleeping on the flag by setting + * sleep bit(s). + */ + FlagType set_sleeping() { + return traits_type::test_then_or((volatile FlagType *)this->get(), + KMP_BARRIER_SLEEP_STATE); + } + /*! + * @result Actual flag value before sleep bit(s) cleared. + * Notes that there are no longer threads sleeping on the flag by clearing + * sleep bit(s). + */ + FlagType unset_sleeping() { + return traits_type::test_then_and((volatile FlagType *)this->get(), + ~KMP_BARRIER_SLEEP_STATE); + } + /*! + * @param old_loc in old value of flag + * Test whether there are threads sleeping on the flag's old value in old_loc. + */ + bool is_sleeping_val(FlagType old_loc) { + return old_loc & KMP_BARRIER_SLEEP_STATE; + } + /*! + * Test whether there are threads sleeping on the flag. + */ + bool is_sleeping() { return is_sleeping_val(*(this->get())); } + bool is_any_sleeping() { return is_sleeping_val(*(this->get())); } + kmp_uint8 *get_stolen() { return NULL; } + enum barrier_type get_bt() { return bs_last_barrier; } }; class kmp_flag_32 : public kmp_basic_flag { - public: - kmp_flag_32(volatile kmp_uint32 *p) : kmp_basic_flag(p) {} - kmp_flag_32(volatile kmp_uint32 *p, kmp_info_t *thr) : kmp_basic_flag(p, thr) {} - kmp_flag_32(volatile kmp_uint32 *p, kmp_uint32 c) : kmp_basic_flag(p, c) {} - void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); } - void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); } - int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished - USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) { - return __kmp_execute_tasks_32(this_thr, gtid, this, final_spin, thread_finished - USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); - } - void wait(kmp_info_t *this_thr, int final_spin - USE_ITT_BUILD_ARG(void * itt_sync_obj)) { - __kmp_wait_template(this_thr, this, final_spin - USE_ITT_BUILD_ARG(itt_sync_obj)); - } - void release() { __kmp_release_template(this); } - flag_type get_ptr_type() { return flag32; } +public: + kmp_flag_32(volatile kmp_uint32 *p) : kmp_basic_flag(p) {} + kmp_flag_32(volatile kmp_uint32 *p, kmp_info_t *thr) + : kmp_basic_flag(p, thr) {} + kmp_flag_32(volatile kmp_uint32 *p, kmp_uint32 c) + : kmp_basic_flag(p, c) {} + void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); } + void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); } + int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, + int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), + kmp_int32 is_constrained) { + return __kmp_execute_tasks_32( + this_thr, gtid, this, final_spin, + thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); + } + void wait(kmp_info_t *this_thr, + int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { + __kmp_wait_template(this_thr, this, + final_spin USE_ITT_BUILD_ARG(itt_sync_obj)); + } + void release() { __kmp_release_template(this); } + flag_type get_ptr_type() { return flag32; } }; class kmp_flag_64 : public kmp_basic_flag { - public: - kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag(p) {} - kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) : kmp_basic_flag(p, thr) {} - kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) : kmp_basic_flag(p, c) {} - void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); } - void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); } - int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished - USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) { - return __kmp_execute_tasks_64(this_thr, gtid, this, final_spin, thread_finished - USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); - } - void wait(kmp_info_t *this_thr, int final_spin - USE_ITT_BUILD_ARG(void * itt_sync_obj)) { - __kmp_wait_template(this_thr, this, final_spin - USE_ITT_BUILD_ARG(itt_sync_obj)); - } - void release() { __kmp_release_template(this); } - flag_type get_ptr_type() { return flag64; } +public: + kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag(p) {} + kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr) + : kmp_basic_flag(p, thr) {} + kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c) + : kmp_basic_flag(p, c) {} + void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); } + void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); } + int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, + int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), + kmp_int32 is_constrained) { + return __kmp_execute_tasks_64( + this_thr, gtid, this, final_spin, + thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); + } + void wait(kmp_info_t *this_thr, + int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) { + __kmp_wait_template(this_thr, this, + final_spin USE_ITT_BUILD_ARG(itt_sync_obj)); + } + void release() { __kmp_release_template(this); } + flag_type get_ptr_type() { return flag64; } }; // Hierarchical 64-bit on-core barrier instantiation class kmp_flag_oncore : public kmp_flag { - kmp_uint64 checker; - kmp_info_t * waiting_threads[1]; - kmp_uint32 num_waiting_threads; - kmp_uint32 offset; /**< Portion of flag that is of interest for an operation. */ - bool flag_switch; /**< Indicates a switch in flag location. */ - enum barrier_type bt; /**< Barrier type. */ - kmp_info_t * this_thr; /**< Thread that may be redirected to different flag location. */ + kmp_uint64 checker; + kmp_info_t *waiting_threads[1]; + kmp_uint32 num_waiting_threads; + kmp_uint32 + offset; /**< Portion of flag that is of interest for an operation. */ + bool flag_switch; /**< Indicates a switch in flag location. */ + enum barrier_type bt; /**< Barrier type. */ + kmp_info_t *this_thr; /**< Thread that may be redirected to different flag + location. */ #if USE_ITT_BUILD - void *itt_sync_obj; /**< ITT object that must be passed to new flag location. */ + void * + itt_sync_obj; /**< ITT object that must be passed to new flag location. */ #endif - unsigned char& byteref(volatile kmp_uint64* loc, size_t offset) { return ((unsigned char *)loc)[offset]; } + unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) { + return ((unsigned char *)loc)[offset]; + } + public: - kmp_flag_oncore(volatile kmp_uint64 *p) - : kmp_flag(p, flag_oncore), num_waiting_threads(0), flag_switch(false) {} - kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx) - : kmp_flag(p, flag_oncore), num_waiting_threads(0), offset(idx), flag_switch(false) {} - kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, enum barrier_type bar_t, - kmp_info_t * thr + kmp_flag_oncore(volatile kmp_uint64 *p) + : kmp_flag(p, flag_oncore), num_waiting_threads(0), + flag_switch(false) {} + kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx) + : kmp_flag(p, flag_oncore), num_waiting_threads(0), + offset(idx), flag_switch(false) {} + kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx, + enum barrier_type bar_t, kmp_info_t *thr #if USE_ITT_BUILD - , void *itt + , + void *itt #endif - ) - : kmp_flag(p, flag_oncore), checker(c), num_waiting_threads(0), offset(idx), - flag_switch(false), bt(bar_t), this_thr(thr) + ) + : kmp_flag(p, flag_oncore), checker(c), + num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t), + this_thr(thr) #if USE_ITT_BUILD - , itt_sync_obj(itt) + , + itt_sync_obj(itt) #endif - {} - kmp_info_t * get_waiter(kmp_uint32 i) { - KMP_DEBUG_ASSERT(ith.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG) - flag_switch = true; - if (byteref(get(),offset) != 1 && !flag_switch) - return true; - else if (flag_switch) { - this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING; - kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, (kmp_uint64)KMP_BARRIER_STATE_BUMP); - __kmp_wait_64(this_thr, &flag, TRUE + { + } + kmp_info_t *get_waiter(kmp_uint32 i) { + KMP_DEBUG_ASSERT(i < num_waiting_threads); + return waiting_threads[i]; + } + kmp_uint32 get_num_waiters() { return num_waiting_threads; } + void set_waiter(kmp_info_t *thr) { + waiting_threads[0] = thr; + num_waiting_threads = 1; + } + bool done_check_val(kmp_uint64 old_loc) { + return byteref(&old_loc, offset) == checker; + } + bool done_check() { return done_check_val(*get()); } + bool notdone_check() { + // Calculate flag_switch + if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG) + flag_switch = true; + if (byteref(get(), offset) != 1 && !flag_switch) + return true; + else if (flag_switch) { + this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING; + kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go, + (kmp_uint64)KMP_BARRIER_STATE_BUMP); + __kmp_wait_64(this_thr, &flag, TRUE #if USE_ITT_BUILD - , itt_sync_obj + , + itt_sync_obj #endif - ); - } - return false; - } - void internal_release() { - if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { - byteref(get(),offset) = 1; - } - else { - kmp_uint64 mask=0; - byteref(&mask,offset) = 1; - (void) KMP_TEST_THEN_OR64((volatile kmp_int64 *)get(), mask); - } + ); } - kmp_uint64 set_sleeping() { - return KMP_TEST_THEN_OR64((kmp_int64 volatile *)get(), KMP_BARRIER_SLEEP_STATE); + return false; + } + void internal_release() { + if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { + byteref(get(), offset) = 1; + } else { + kmp_uint64 mask = 0; + byteref(&mask, offset) = 1; + (void)KMP_TEST_THEN_OR64((volatile kmp_int64 *)get(), mask); } - kmp_uint64 unset_sleeping() { - return KMP_TEST_THEN_AND64((kmp_int64 volatile *)get(), ~KMP_BARRIER_SLEEP_STATE); - } - bool is_sleeping_val(kmp_uint64 old_loc) { return old_loc & KMP_BARRIER_SLEEP_STATE; } - bool is_sleeping() { return is_sleeping_val(*get()); } - bool is_any_sleeping() { return is_sleeping_val(*get()); } - void wait(kmp_info_t *this_thr, int final_spin) { - __kmp_wait_template(this_thr, this, final_spin - USE_ITT_BUILD_ARG(itt_sync_obj)); - } - void release() { __kmp_release_template(this); } - void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); } - void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); } - int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, int *thread_finished - USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32 is_constrained) { - return __kmp_execute_tasks_oncore(this_thr, gtid, this, final_spin, thread_finished - USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); - } - kmp_uint8 *get_stolen() { return NULL; } - enum barrier_type get_bt() { return bt; } - flag_type get_ptr_type() { return flag_oncore; } + } + kmp_uint64 set_sleeping() { + return KMP_TEST_THEN_OR64((kmp_int64 volatile *)get(), + KMP_BARRIER_SLEEP_STATE); + } + kmp_uint64 unset_sleeping() { + return KMP_TEST_THEN_AND64((kmp_int64 volatile *)get(), + ~KMP_BARRIER_SLEEP_STATE); + } + bool is_sleeping_val(kmp_uint64 old_loc) { + return old_loc & KMP_BARRIER_SLEEP_STATE; + } + bool is_sleeping() { return is_sleeping_val(*get()); } + bool is_any_sleeping() { return is_sleeping_val(*get()); } + void wait(kmp_info_t *this_thr, int final_spin) { + __kmp_wait_template( + this_thr, this, final_spin USE_ITT_BUILD_ARG(itt_sync_obj)); + } + void release() { __kmp_release_template(this); } + void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); } + void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); } + int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin, + int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj), + kmp_int32 is_constrained) { + return __kmp_execute_tasks_oncore( + this_thr, gtid, this, final_spin, + thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained); + } + kmp_uint8 *get_stolen() { return NULL; } + enum barrier_type get_bt() { return bt; } + flag_type get_ptr_type() { return flag_oncore; } }; -// Used to wake up threads, volatile void* flag is usually the th_sleep_loc associated -// with int gtid. +// Used to wake up threads, volatile void* flag is usually the th_sleep_loc +// associated with int gtid. static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) { - if (!flag) return; - - switch (((kmp_flag_64 *)flag)->get_type()) { - case flag32: __kmp_resume_32(gtid, NULL); break; - case flag64: __kmp_resume_64(gtid, NULL); break; - case flag_oncore: __kmp_resume_oncore(gtid, NULL); break; - } + if (!flag) + return; + + switch (((kmp_flag_64 *)flag)->get_type()) { + case flag32: + __kmp_resume_32(gtid, NULL); + break; + case flag64: + __kmp_resume_64(gtid, NULL); + break; + case flag_oncore: + __kmp_resume_oncore(gtid, NULL); + break; + } } /*! diff --git a/openmp/runtime/src/kmp_wrapper_getpid.h b/openmp/runtime/src/kmp_wrapper_getpid.h index 68c2be7..490e5cb 100644 --- a/openmp/runtime/src/kmp_wrapper_getpid.h +++ b/openmp/runtime/src/kmp_wrapper_getpid.h @@ -18,50 +18,52 @@ #if KMP_OS_UNIX - // On Unix-like systems (Linux* OS and OS X*) getpid() is declared in standard headers. - #include - #include - #include - #if KMP_OS_DARWIN - //OS X - #define __kmp_gettid() syscall(SYS_thread_selfid) - #elif defined(SYS_gettid) - // Hopefully other Unix systems define SYS_gettid syscall for getting os thread id - #define __kmp_gettid() syscall(SYS_gettid) - #else - #warning No gettid found, use getpid instead - #define __kmp_gettid() getpid() - #endif +// On Unix-like systems (Linux* OS and OS X*) getpid() is declared in standard +// headers. +#include +#include +#include +#if KMP_OS_DARWIN +// OS X +#define __kmp_gettid() syscall(SYS_thread_selfid) +#elif defined(SYS_gettid) +// Hopefully other Unix systems define SYS_gettid syscall for getting os thread +// id +#define __kmp_gettid() syscall(SYS_gettid) +#else +#warning No gettid found, use getpid instead +#define __kmp_gettid() getpid() +#endif #elif KMP_OS_WINDOWS - // On Windows* OS _getpid() returns int (not pid_t) and is declared in "process.h". - #include - // Let us simulate Unix. - typedef int pid_t; - #define getpid _getpid - #define __kmp_gettid() GetCurrentThreadId() +// On Windows* OS _getpid() returns int (not pid_t) and is declared in +// "process.h". +#include +// Let us simulate Unix. +typedef int pid_t; +#define getpid _getpid +#define __kmp_gettid() GetCurrentThreadId() #else - #error Unknown or unsupported OS. +#error Unknown or unsupported OS. #endif -/* - TODO: All the libomp source code uses pid_t type for storing the result of getpid(), it is good. - But often it printed as "%d", that is not good, because it ignores pid_t definition (may pid_t - be longer that int?). It seems all pid prints should be rewritten as - - printf( "%" KMP_UINT64_SPEC, (kmp_uint64) pid ); +/* TODO: All the libomp source code uses pid_t type for storing the result of + getpid(), it is good. But often it printed as "%d", that is not good, because + it ignores pid_t definition (may pid_t be longer that int?). It seems all pid + prints should be rewritten as: - or (at least) as + printf( "%" KMP_UINT64_SPEC, (kmp_uint64) pid ); - printf( "%" KMP_UINT32_SPEC, (kmp_uint32) pid ); + or (at least) as - (kmp_uint32, kmp_uint64, KMP_UINT64_SPEC, and KMP_UNIT32_SPEC are defined in "kmp_os.h".) + printf( "%" KMP_UINT32_SPEC, (kmp_uint32) pid ); -*/ + (kmp_uint32, kmp_uint64, KMP_UINT64_SPEC, and KMP_UNIT32_SPEC are defined in + "kmp_os.h".) */ #endif // KMP_WRAPPER_GETPID_H diff --git a/openmp/runtime/src/kmp_wrapper_malloc.h b/openmp/runtime/src/kmp_wrapper_malloc.h index 453d1ef..257416d 100644 --- a/openmp/runtime/src/kmp_wrapper_malloc.h +++ b/openmp/runtime/src/kmp_wrapper_malloc.h @@ -17,21 +17,18 @@ #ifndef KMP_WRAPPER_MALLOC_H #define KMP_WRAPPER_MALLOC_H -/* - This header serves for 3 purposes: - - 1. Declaring standard memory allocation rourines in OS-independent way. - 2. Passing source location info through memory allocation wrappers. - 3. Enabling native memory debugging capabilities. - - - 1. Declaring standard memory allocation rourines in OS-independent way. - ----------------------------------------------------------------------- - - On Linux* OS, alloca() function is declared in header, while on Windows* OS there is no - header, function _alloca() (note underscore!) is declared in . This header - eliminates these differences, so client code incluiding "kmp_wrapper_malloc.h" can rely on - following routines: +/* This header serves for 3 purposes: + 1. Declaring standard memory allocation rourines in OS-independent way. + 2. Passing source location info through memory allocation wrappers. + 3. Enabling native memory debugging capabilities. + + 1. Declaring standard memory allocation rourines in OS-independent way. + ----------------------------------------------------------------------- + On Linux* OS, alloca() function is declared in header, while on + Windows* OS there is no header, function _alloca() (note + underscore!) is declared in . This header eliminates these + differences, so client code incluiding "kmp_wrapper_malloc.h" can rely on + following routines: malloc calloc @@ -39,60 +36,56 @@ free alloca - in OS-independent way. It also enables memory tracking capabilities in debug build. (Currently - it is available only on Windows* OS.) - - - 2. Passing source location info through memory allocation wrappers. - ------------------------------------------------------------------- - - Some tools may help debugging memory errors, for example, report memory leaks. However, memory - allocation wrappers may hinder source location. - - For example: - - void * aligned_malloc( int size ) { - void * ptr = malloc( size ); // All the memory leaks will be reported at this line. - // some adjustments... - return ptr; - }; - - ptr = aligned_malloc( size ); // Memory leak will *not* be detected here. :-( - - To overcome the problem, information about original source location should be passed through all - the memory allocation wrappers, for example: - - void * aligned_malloc( int size, char const * file, int line ) { - void * ptr = _malloc_dbg( size, file, line ); - // some adjustments... - return ptr; - }; - - void * ptr = aligned_malloc( size, __FILE__, __LINE__ ); - - This is a good idea for debug, but passing additional arguments impacts performance. Disabling - extra arguments in release version of the software introduces too many conditional compilation, - which makes code unreadable. This header defines few macros and functions facilitating it: - - void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) { - void * ptr = malloc_src_loc( size KMP_SRC_LOC_PARM ); - // some adjustments... - return ptr; - }; - #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR ) - // Use macro instead of direct call to function. - - void * ptr = aligned_malloc( size ); // Bingo! Memory leak will be reported at this line. - - - 3. Enabling native memory debugging capabilities. - ------------------------------------------------- - - Some platforms may offer memory debugging capabilities. For example, debug version of Microsoft - RTL tracks all memory allocations and can report memory leaks. This header enables this, and - makes report more useful (see "Passing source location info through memory allocation - wrappers"). - + in OS-independent way. It also enables memory tracking capabilities in debug + build. (Currently it is available only on Windows* OS.) + + 2. Passing source location info through memory allocation wrappers. + ------------------------------------------------------------------- + Some tools may help debugging memory errors, for example, report memory + leaks. However, memory allocation wrappers may hinder source location. + For example: + + void * aligned_malloc( int size ) { + void * ptr = malloc( size ); // All the memory leaks will be reported at + // this line. + // some adjustments... + return ptr; + }; + + ptr = aligned_malloc( size ); // Memory leak will *not* be detected here. :-( + + To overcome the problem, information about original source location should + be passed through all the memory allocation wrappers, for example: + + void * aligned_malloc( int size, char const * file, int line ) { + void * ptr = _malloc_dbg( size, file, line ); + // some adjustments... + return ptr; + }; + void * ptr = aligned_malloc( size, __FILE__, __LINE__ ); + + This is a good idea for debug, but passing additional arguments impacts + performance. Disabling extra arguments in release version of the software + introduces too many conditional compilation, which makes code unreadable. + This header defines few macros and functions facilitating it: + + void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) { + void * ptr = malloc_src_loc( size KMP_SRC_LOC_PARM ); + // some adjustments... + return ptr; + }; + #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR ) + // Use macro instead of direct call to function. + + void * ptr = aligned_malloc( size ); // Bingo! Memory leak will be + // reported at this line. + + 3. Enabling native memory debugging capabilities. + ------------------------------------------------- + Some platforms may offer memory debugging capabilities. For example, debug + version of Microsoft RTL tracks all memory allocations and can report memory + leaks. This header enables this, and makes report more useful (see "Passing + source location info through memory allocation wrappers"). */ #include @@ -101,102 +94,101 @@ // Include alloca() declaration. #if KMP_OS_WINDOWS - #include // Windows* OS: _alloca() declared in "malloc.h". - #define alloca _alloca // Allow to use alloca() with no underscore. +#include // Windows* OS: _alloca() declared in "malloc.h". +#define alloca _alloca // Allow to use alloca() with no underscore. #elif KMP_OS_FREEBSD || KMP_OS_NETBSD - // Declared in "stdlib.h". +// Declared in "stdlib.h". #elif KMP_OS_UNIX - #include // Linux* OS and OS X*: alloc() declared in "alloca". +#include // Linux* OS and OS X*: alloc() declared in "alloca". #else - #error Unknown or unsupported OS. +#error Unknown or unsupported OS. #endif -/* - KMP_SRC_LOC_DECL -- Declaring source location paramemters, to be used in function declaration. - KMP_SRC_LOC_PARM -- Source location paramemters, to be used to pass parameters to underlying - levels. - KMP_SRC_LOC_CURR -- Source location arguments describing current location, to be used at - top-level. - - Typical usage: - - void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) { - // Note: Comma is missed before KMP_SRC_LOC_DECL. - KE_TRACE( 25, ( "called from %s:%d\n", KMP_SRC_LOC_PARM ) ); - ... - } - #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR ) - // Use macro instead of direct call to function -- macro passes info about current - // source location to the func. +/* KMP_SRC_LOC_DECL -- Declaring source location paramemters, to be used in + function declaration. + KMP_SRC_LOC_PARM -- Source location paramemters, to be used to pass + parameters to underlying levels. + KMP_SRC_LOC_CURR -- Source location arguments describing current location, + to be used at top-level. + + Typical usage: + void * _aligned_malloc( int size KMP_SRC_LOC_DECL ) { + // Note: Comma is missed before KMP_SRC_LOC_DECL. + KE_TRACE( 25, ( "called from %s:%d\n", KMP_SRC_LOC_PARM ) ); + ... + } + #define aligned_malloc( size ) _aligned_malloc( (size) KMP_SRC_LOC_CURR ) + // Use macro instead of direct call to function -- macro passes info + // about current source location to the func. */ #if KMP_DEBUG - #define KMP_SRC_LOC_DECL , char const * _file_, int _line_ - #define KMP_SRC_LOC_PARM , _file_, _line_ - #define KMP_SRC_LOC_CURR , __FILE__, __LINE__ +#define KMP_SRC_LOC_DECL , char const *_file_, int _line_ +#define KMP_SRC_LOC_PARM , _file_, _line_ +#define KMP_SRC_LOC_CURR , __FILE__, __LINE__ #else - #define KMP_SRC_LOC_DECL - #define KMP_SRC_LOC_PARM - #define KMP_SRC_LOC_CURR +#define KMP_SRC_LOC_DECL +#define KMP_SRC_LOC_PARM +#define KMP_SRC_LOC_CURR #endif // KMP_DEBUG -/* - malloc_src_loc() and free_src_loc() are pseudo-functions (really macros) with accepts extra - arguments (source location info) in debug mode. They should be used in place of malloc() and - free(), this allows enabling native memory debugging capabilities (if any). - - Typical usage: - - ptr = malloc_src_loc( size KMP_SRC_LOC_PARM ); - // Inside memory allocation wrapper, or - ptr = malloc_src_loc( size KMP_SRC_LOC_CURR ); - // Outside of memory allocation wrapper. - +/* malloc_src_loc() and free_src_loc() are pseudo-functions (really macros) + with accepts extra arguments (source location info) in debug mode. They + should be used in place of malloc() and free(), this allows enabling native + memory debugging capabilities (if any). + Typical usage: + ptr = malloc_src_loc( size KMP_SRC_LOC_PARM ); + // Inside memory allocation wrapper, or + ptr = malloc_src_loc( size KMP_SRC_LOC_CURR ); + // Outside of memory allocation wrapper. */ -#define malloc_src_loc( args ) _malloc_src_loc( args ) -#define free_src_loc( args ) _free_src_loc( args ) - /* - Depending on build mode (debug or release), malloc_src_loc is declared with 1 or 3 - parameters, but calls to malloc_src_loc() are always the same: +#define malloc_src_loc(args) _malloc_src_loc(args) +#define free_src_loc(args) _free_src_loc(args) +/* Depending on build mode (debug or release), malloc_src_loc is declared with + 1 or 3 parameters, but calls to malloc_src_loc() are always the same: - ... malloc_src_loc( size KMP_SRC_LOC_PARM ); // or KMP_SRC_LOC_CURR + ... malloc_src_loc( size KMP_SRC_LOC_PARM ); // or KMP_SRC_LOC_CURR - Compiler issues warning/error "too few arguments in macro invocation". Declaring two - macroses, malloc_src_loc() and _malloc_src_loc() overcomes the problem. - */ + Compiler issues warning/error "too few arguments in macro invocation". + Declaring two macros, malloc_src_loc() and _malloc_src_loc(), overcomes the + problem. */ #if KMP_DEBUG - #if KMP_OS_WINDOWS && _DEBUG - // KMP_DEBUG != _DEBUG. MS debug RTL is available only if _DEBUG is defined. +#if KMP_OS_WINDOWS && _DEBUG +// KMP_DEBUG != _DEBUG. MS debug RTL is available only if _DEBUG is defined. - // Windows* OS has native memory debugging capabilities. Enable them. +// Windows* OS has native memory debugging capabilities. Enable them. - #include +#include - #define KMP_MEM_BLOCK _CLIENT_BLOCK - #define malloc( size ) _malloc_dbg( (size), KMP_MEM_BLOCK, __FILE__, __LINE__ ) - #define calloc( num, size ) _calloc_dbg( (num), (size), KMP_MEM_BLOCK, __FILE__, __LINE__ ) - #define realloc( ptr, size ) _realloc_dbg( (ptr), (size), KMP_MEM_BLOCK, __FILE__, __LINE__ ) - #define free( ptr ) _free_dbg( (ptr), KMP_MEM_BLOCK ) +#define KMP_MEM_BLOCK _CLIENT_BLOCK +#define malloc(size) _malloc_dbg((size), KMP_MEM_BLOCK, __FILE__, __LINE__) +#define calloc(num, size) \ + _calloc_dbg((num), (size), KMP_MEM_BLOCK, __FILE__, __LINE__) +#define realloc(ptr, size) \ + _realloc_dbg((ptr), (size), KMP_MEM_BLOCK, __FILE__, __LINE__) +#define free(ptr) _free_dbg((ptr), KMP_MEM_BLOCK) - #define _malloc_src_loc( size, file, line ) _malloc_dbg( (size), KMP_MEM_BLOCK, (file), (line) ) - #define _free_src_loc( ptr, file, line ) _free_dbg( (ptr), KMP_MEM_BLOCK ) +#define _malloc_src_loc(size, file, line) \ + _malloc_dbg((size), KMP_MEM_BLOCK, (file), (line)) +#define _free_src_loc(ptr, file, line) _free_dbg((ptr), KMP_MEM_BLOCK) - #else +#else - // Linux* OS, OS X*, or non-debug Windows* OS. +// Linux* OS, OS X*, or non-debug Windows* OS. - #define _malloc_src_loc( size, file, line ) malloc( (size) ) - #define _free_src_loc( ptr, file, line ) free( (ptr) ) +#define _malloc_src_loc(size, file, line) malloc((size)) +#define _free_src_loc(ptr, file, line) free((ptr)) - #endif +#endif #else - // In release build malloc_src_loc() and free_src_loc() do not have extra parameters. - #define _malloc_src_loc( size ) malloc( (size) ) - #define _free_src_loc( ptr ) free( (ptr) ) +// In release build malloc_src_loc() and free_src_loc() do not have extra +// parameters. +#define _malloc_src_loc(size) malloc((size)) +#define _free_src_loc(ptr) free((ptr)) #endif // KMP_DEBUG diff --git a/openmp/runtime/src/ompt-event-specific.h b/openmp/runtime/src/ompt-event-specific.h index fdf1213..baeb67c 100644 --- a/openmp/runtime/src/ompt-event-specific.h +++ b/openmp/runtime/src/ompt-event-specific.h @@ -1,5 +1,5 @@ -#ifndef __OMPT_EVENT_SPECIFIC_H__ -#define __OMPT_EVENT_SPECIFIC_H__ +#ifndef __OMPT_EVENT_SPECIFIC_H__ +#define __OMPT_EVENT_SPECIFIC_H__ /****************************************************************************** * File: ompt-event-specific.h @@ -10,10 +10,9 @@ * and the level of their implementation by a runtime system. *****************************************************************************/ -#define _ompt_tokenpaste_helper(x,y) x ## y -#define _ompt_tokenpaste(x,y) _ompt_tokenpaste_helper(x,y) -#define ompt_event_implementation_status(e) _ompt_tokenpaste(e,_implemented) - +#define _ompt_tokenpaste_helper(x, y) x##y +#define _ompt_tokenpaste(x, y) _ompt_tokenpaste_helper(x, y) +#define ompt_event_implementation_status(e) _ompt_tokenpaste(e, _implemented) /*---------------------------------------------------------------------------- | Specify whether an event may occur or not, and whether event callbacks @@ -23,130 +22,132 @@ | the OMPT TR. They are exposed to tools through ompt_set_callback. +--------------------------------------------------------------------------*/ -#define ompt_event_NEVER ompt_set_result_event_never_occurs -#define ompt_event_UNIMPLEMENTED ompt_set_result_event_may_occur_no_callback -#define ompt_event_MAY_CONVENIENT ompt_set_result_event_may_occur_callback_some -#define ompt_event_MAY_ALWAYS ompt_set_result_event_may_occur_callback_always +#define ompt_event_NEVER ompt_set_result_event_never_occurs +#define ompt_event_UNIMPLEMENTED ompt_set_result_event_may_occur_no_callback +#define ompt_event_MAY_CONVENIENT ompt_set_result_event_may_occur_callback_some +#define ompt_event_MAY_ALWAYS ompt_set_result_event_may_occur_callback_always #if OMPT_TRACE -#define ompt_event_MAY_ALWAYS_TRACE ompt_event_MAY_ALWAYS +#define ompt_event_MAY_ALWAYS_TRACE ompt_event_MAY_ALWAYS #else -#define ompt_event_MAY_ALWAYS_TRACE ompt_event_UNIMPLEMENTED +#define ompt_event_MAY_ALWAYS_TRACE ompt_event_UNIMPLEMENTED #endif #if OMPT_BLAME -#define ompt_event_MAY_ALWAYS_BLAME ompt_event_MAY_ALWAYS +#define ompt_event_MAY_ALWAYS_BLAME ompt_event_MAY_ALWAYS #else -#define ompt_event_MAY_ALWAYS_BLAME ompt_event_UNIMPLEMENTED +#define ompt_event_MAY_ALWAYS_BLAME ompt_event_UNIMPLEMENTED #endif /*---------------------------------------------------------------------------- | Mandatory Events +--------------------------------------------------------------------------*/ -#define ompt_event_parallel_begin_implemented ompt_event_MAY_ALWAYS -#define ompt_event_parallel_end_implemented ompt_event_MAY_ALWAYS - -#define ompt_event_task_begin_implemented ompt_event_MAY_ALWAYS -#define ompt_event_task_end_implemented ompt_event_MAY_ALWAYS +#define ompt_event_parallel_begin_implemented ompt_event_MAY_ALWAYS +#define ompt_event_parallel_end_implemented ompt_event_MAY_ALWAYS -#define ompt_event_thread_begin_implemented ompt_event_MAY_ALWAYS -#define ompt_event_thread_end_implemented ompt_event_MAY_ALWAYS +#define ompt_event_task_begin_implemented ompt_event_MAY_ALWAYS +#define ompt_event_task_end_implemented ompt_event_MAY_ALWAYS -#define ompt_event_control_implemented ompt_event_MAY_ALWAYS +#define ompt_event_thread_begin_implemented ompt_event_MAY_ALWAYS +#define ompt_event_thread_end_implemented ompt_event_MAY_ALWAYS -#define ompt_event_runtime_shutdown_implemented ompt_event_MAY_ALWAYS +#define ompt_event_control_implemented ompt_event_MAY_ALWAYS +#define ompt_event_runtime_shutdown_implemented ompt_event_MAY_ALWAYS /*---------------------------------------------------------------------------- | Optional Events (blame shifting) +--------------------------------------------------------------------------*/ -#define ompt_event_idle_begin_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_idle_end_implemented ompt_event_MAY_ALWAYS_BLAME - -#define ompt_event_wait_barrier_begin_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_wait_barrier_end_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_event_idle_begin_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_event_idle_end_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_wait_taskwait_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_taskwait_end_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_wait_barrier_begin_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_event_wait_barrier_end_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_wait_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_taskgroup_end_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_wait_taskwait_begin_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_wait_taskwait_end_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_release_lock_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_release_nest_lock_last_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_release_critical_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_release_atomic_implemented ompt_event_MAY_ALWAYS_BLAME -#define ompt_event_release_ordered_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_event_wait_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_wait_taskgroup_end_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_release_lock_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_event_release_nest_lock_last_implemented \ + ompt_event_MAY_ALWAYS_BLAME +#define ompt_event_release_critical_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_event_release_atomic_implemented ompt_event_MAY_ALWAYS_BLAME +#define ompt_event_release_ordered_implemented ompt_event_MAY_ALWAYS_BLAME /*---------------------------------------------------------------------------- | Optional Events (synchronous events) +--------------------------------------------------------------------------*/ -#define ompt_event_implicit_task_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_implicit_task_end_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_implicit_task_begin_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_implicit_task_end_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_initial_task_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_initial_task_end_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_initial_task_begin_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_initial_task_end_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_task_switch_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_task_switch_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_loop_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_loop_end_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_loop_begin_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_loop_end_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_sections_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_sections_end_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_sections_begin_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_sections_end_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_single_in_block_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_single_in_block_end_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_single_others_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_single_others_end_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_single_in_block_begin_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_single_in_block_end_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_single_others_begin_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_single_others_end_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_workshare_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_workshare_end_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_workshare_begin_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_workshare_end_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_master_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_master_end_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_master_begin_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_master_end_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_barrier_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_barrier_end_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_barrier_begin_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_barrier_end_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_taskwait_begin_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_taskwait_end_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_taskwait_begin_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_taskwait_end_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_taskgroup_end_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_taskgroup_begin_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_taskgroup_end_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_release_nest_lock_prev_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_wait_lock_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_nest_lock_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_critical_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_wait_atomic_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_wait_ordered_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_release_nest_lock_prev_implemented \ + ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_wait_lock_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_wait_nest_lock_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_wait_critical_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_wait_atomic_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_wait_ordered_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_acquired_lock_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_acquired_nest_lock_first_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_acquired_nest_lock_next_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_acquired_critical_implemented ompt_event_UNIMPLEMENTED -#define ompt_event_acquired_atomic_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_acquired_ordered_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_acquired_lock_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_acquired_nest_lock_first_implemented \ + ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_acquired_nest_lock_next_implemented \ + ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_acquired_critical_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_acquired_atomic_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_acquired_ordered_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_init_lock_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_init_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_init_lock_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_init_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_destroy_lock_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_destroy_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_destroy_lock_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_destroy_nest_lock_implemented ompt_event_MAY_ALWAYS_TRACE -#define ompt_event_flush_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_flush_implemented ompt_event_UNIMPLEMENTED #if OMP_40_ENABLED -# define ompt_event_task_dependences_implemented ompt_event_MAY_ALWAYS_TRACE -# define ompt_event_task_dependence_pair_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_task_dependences_implemented ompt_event_MAY_ALWAYS_TRACE +#define ompt_event_task_dependence_pair_implemented ompt_event_MAY_ALWAYS_TRACE #else -# define ompt_event_task_dependences_implemented ompt_event_UNIMPLEMENTED -# define ompt_event_task_dependence_pair_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_task_dependences_implemented ompt_event_UNIMPLEMENTED +#define ompt_event_task_dependence_pair_implemented ompt_event_UNIMPLEMENTED #endif /* OMP_40_ENABLED */ #endif diff --git a/openmp/runtime/src/ompt-general.cpp b/openmp/runtime/src/ompt-general.cpp index 37c1c22..fa66d9c 100644 --- a/openmp/runtime/src/ompt-general.cpp +++ b/openmp/runtime/src/ompt-general.cpp @@ -9,16 +9,12 @@ #include #include - - /***************************************************************************** * ompt include files ****************************************************************************/ #include "ompt-specific.cpp" - - /***************************************************************************** * macros ****************************************************************************/ @@ -34,32 +30,25 @@ #define OMPT_STR_MATCH(haystack, needle) (!strcasecmp(haystack, needle)) #endif - /***************************************************************************** * types ****************************************************************************/ typedef struct { - const char *state_name; - ompt_state_t state_id; + const char *state_name; + ompt_state_t state_id; } ompt_state_info_t; - enum tool_setting_e { - omp_tool_error, - omp_tool_unset, - omp_tool_disabled, - omp_tool_enabled + omp_tool_error, + omp_tool_unset, + omp_tool_disabled, + omp_tool_enabled }; - -typedef void (*ompt_initialize_t) ( - ompt_function_lookup_t ompt_fn_lookup, - const char *version, - unsigned int ompt_version -); - - +typedef void (*ompt_initialize_t)(ompt_function_lookup_t ompt_fn_lookup, + const char *version, + unsigned int ompt_version); /***************************************************************************** * global variables @@ -68,16 +57,14 @@ typedef void (*ompt_initialize_t) ( int ompt_enabled = 0; ompt_state_info_t ompt_state_info[] = { -#define ompt_state_macro(state, code) { # state, state }, +#define ompt_state_macro(state, code) {#state, state}, FOREACH_OMPT_STATE(ompt_state_macro) #undef ompt_state_macro }; ompt_callbacks_t ompt_callbacks; -static ompt_initialize_t ompt_initialize_fn = NULL; - - +static ompt_initialize_t ompt_initialize_fn = NULL; /***************************************************************************** * forward declarations @@ -87,7 +74,6 @@ static ompt_interface_fn_t ompt_fn_lookup(const char *s); OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void); - /***************************************************************************** * initialization and finalization (private operations) ****************************************************************************/ @@ -102,13 +88,11 @@ OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void); * NULL is returned and OMPT won't be enabled */ #if OMPT_HAVE_WEAK_ATTRIBUTE _OMP_EXTERN -__attribute__ (( weak )) -ompt_initialize_t ompt_tool() -{ +__attribute__((weak)) ompt_initialize_t ompt_tool() { #if OMPT_DEBUG - printf("ompt_tool() is called from the RTL\n"); + printf("ompt_tool() is called from the RTL\n"); #endif - return NULL; + return NULL; } #elif OMPT_HAVE_PSAPI @@ -120,162 +104,155 @@ ompt_initialize_t ompt_tool() // The number of loaded modules to start enumeration with EnumProcessModules() #define NUM_MODULES 128 -static -ompt_initialize_t ompt_tool_windows() -{ - int i; - DWORD needed, new_size; - HMODULE *modules; - HANDLE process = GetCurrentProcess(); - modules = (HMODULE*)malloc( NUM_MODULES * sizeof(HMODULE) ); - ompt_initialize_t (*ompt_tool_p)() = NULL; +static ompt_initialize_t ompt_tool_windows() { + int i; + DWORD needed, new_size; + HMODULE *modules; + HANDLE process = GetCurrentProcess(); + modules = (HMODULE *)malloc(NUM_MODULES * sizeof(HMODULE)); + ompt_initialize_t (*ompt_tool_p)() = NULL; #if OMPT_DEBUG - printf("ompt_tool_windows(): looking for ompt_tool\n"); + printf("ompt_tool_windows(): looking for ompt_tool\n"); #endif - if (!EnumProcessModules( process, modules, NUM_MODULES * sizeof(HMODULE), - &needed)) { - // Regardless of the error reason use the stub initialization function - free(modules); - return NULL; - } - // Check if NUM_MODULES is enough to list all modules - new_size = needed / sizeof(HMODULE); - if (new_size > NUM_MODULES) { + if (!EnumProcessModules(process, modules, NUM_MODULES * sizeof(HMODULE), + &needed)) { + // Regardless of the error reason use the stub initialization function + free(modules); + return NULL; + } + // Check if NUM_MODULES is enough to list all modules + new_size = needed / sizeof(HMODULE); + if (new_size > NUM_MODULES) { #if OMPT_DEBUG printf("ompt_tool_windows(): resize buffer to %d bytes\n", needed); #endif - modules = (HMODULE*)realloc( modules, needed ); - // If resizing failed use the stub function. - if (!EnumProcessModules(process, modules, needed, &needed)) { - free(modules); - return NULL; - } + modules = (HMODULE *)realloc(modules, needed); + // If resizing failed use the stub function. + if (!EnumProcessModules(process, modules, needed, &needed)) { + free(modules); + return NULL; } - for (i = 0; i < new_size; ++i) { - (FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_tool"); - if (ompt_tool_p) { + } + for (i = 0; i < new_size; ++i) { + (FARPROC &)ompt_tool_p = GetProcAddress(modules[i], "ompt_tool"); + if (ompt_tool_p) { #if OMPT_DEBUG - TCHAR modName[MAX_PATH]; - if (GetModuleFileName(modules[i], modName, MAX_PATH)) - printf("ompt_tool_windows(): ompt_tool found in module %s\n", - modName); + TCHAR modName[MAX_PATH]; + if (GetModuleFileName(modules[i], modName, MAX_PATH)) + printf("ompt_tool_windows(): ompt_tool found in module %s\n", modName); #endif - free(modules); - return ompt_tool_p(); - } + free(modules); + return ompt_tool_p(); + } #if OMPT_DEBUG - else { - TCHAR modName[MAX_PATH]; - if (GetModuleFileName(modules[i], modName, MAX_PATH)) - printf("ompt_tool_windows(): ompt_tool not found in module %s\n", - modName); - } -#endif + else { + TCHAR modName[MAX_PATH]; + if (GetModuleFileName(modules[i], modName, MAX_PATH)) + printf("ompt_tool_windows(): ompt_tool not found in module %s\n", + modName); } - free(modules); - return NULL; +#endif + } + free(modules); + return NULL; } #else -# error Either __attribute__((weak)) or psapi.dll are required for OMPT support +#error Either __attribute__((weak)) or psapi.dll are required for OMPT support #endif // OMPT_HAVE_WEAK_ATTRIBUTE -void ompt_pre_init() -{ - //-------------------------------------------------- - // Execute the pre-initialization logic only once. - //-------------------------------------------------- - static int ompt_pre_initialized = 0; +void ompt_pre_init() { + //-------------------------------------------------- + // Execute the pre-initialization logic only once. + //-------------------------------------------------- + static int ompt_pre_initialized = 0; - if (ompt_pre_initialized) return; + if (ompt_pre_initialized) + return; - ompt_pre_initialized = 1; + ompt_pre_initialized = 1; - //-------------------------------------------------- - // Use a tool iff a tool is enabled and available. - //-------------------------------------------------- - const char *ompt_env_var = getenv("OMP_TOOL"); - tool_setting_e tool_setting = omp_tool_error; + //-------------------------------------------------- + // Use a tool iff a tool is enabled and available. + //-------------------------------------------------- + const char *ompt_env_var = getenv("OMP_TOOL"); + tool_setting_e tool_setting = omp_tool_error; - if (!ompt_env_var || !strcmp(ompt_env_var, "")) - tool_setting = omp_tool_unset; - else if (OMPT_STR_MATCH(ompt_env_var, "disabled")) - tool_setting = omp_tool_disabled; - else if (OMPT_STR_MATCH(ompt_env_var, "enabled")) - tool_setting = omp_tool_enabled; + if (!ompt_env_var || !strcmp(ompt_env_var, "")) + tool_setting = omp_tool_unset; + else if (OMPT_STR_MATCH(ompt_env_var, "disabled")) + tool_setting = omp_tool_disabled; + else if (OMPT_STR_MATCH(ompt_env_var, "enabled")) + tool_setting = omp_tool_enabled; #if OMPT_DEBUG - printf("ompt_pre_init(): tool_setting = %d\n", tool_setting); + printf("ompt_pre_init(): tool_setting = %d\n", tool_setting); #endif - switch(tool_setting) { - case omp_tool_disabled: - break; - - case omp_tool_unset: - case omp_tool_enabled: - ompt_initialize_fn = ompt_tool(); - if (ompt_initialize_fn) { - ompt_enabled = 1; - } - break; - - case omp_tool_error: - fprintf(stderr, - "Warning: OMP_TOOL has invalid value \"%s\".\n" - " legal values are (NULL,\"\",\"disabled\"," - "\"enabled\").\n", ompt_env_var); - break; + switch (tool_setting) { + case omp_tool_disabled: + break; + + case omp_tool_unset: + case omp_tool_enabled: + ompt_initialize_fn = ompt_tool(); + if (ompt_initialize_fn) { + ompt_enabled = 1; } + break; + + case omp_tool_error: + fprintf(stderr, "Warning: OMP_TOOL has invalid value \"%s\".\n" + " legal values are (NULL,\"\",\"disabled\"," + "\"enabled\").\n", + ompt_env_var); + break; + } #if OMPT_DEBUG - printf("ompt_pre_init(): ompt_enabled = %d\n", ompt_enabled); + printf("ompt_pre_init(): ompt_enabled = %d\n", ompt_enabled); #endif } +void ompt_post_init() { + //-------------------------------------------------- + // Execute the post-initialization logic only once. + //-------------------------------------------------- + static int ompt_post_initialized = 0; -void ompt_post_init() -{ - //-------------------------------------------------- - // Execute the post-initialization logic only once. - //-------------------------------------------------- - static int ompt_post_initialized = 0; - - if (ompt_post_initialized) return; + if (ompt_post_initialized) + return; - ompt_post_initialized = 1; + ompt_post_initialized = 1; - //-------------------------------------------------- - // Initialize the tool if so indicated. - //-------------------------------------------------- - if (ompt_enabled) { - ompt_initialize_fn(ompt_fn_lookup, ompt_get_runtime_version(), - OMPT_VERSION); + //-------------------------------------------------- + // Initialize the tool if so indicated. + //-------------------------------------------------- + if (ompt_enabled) { + ompt_initialize_fn(ompt_fn_lookup, ompt_get_runtime_version(), + OMPT_VERSION); - ompt_thread_t *root_thread = ompt_get_thread(); + ompt_thread_t *root_thread = ompt_get_thread(); - ompt_set_thread_state(root_thread, ompt_state_overhead); + ompt_set_thread_state(root_thread, ompt_state_overhead); - if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { - ompt_callbacks.ompt_callback(ompt_event_thread_begin) - (ompt_thread_initial, ompt_get_thread_id()); - } - - ompt_set_thread_state(root_thread, ompt_state_work_serial); + if (ompt_callbacks.ompt_callback(ompt_event_thread_begin)) { + ompt_callbacks.ompt_callback(ompt_event_thread_begin)( + ompt_thread_initial, ompt_get_thread_id()); } -} + ompt_set_thread_state(root_thread, ompt_state_work_serial); + } +} -void ompt_fini() -{ - if (ompt_enabled) { - if (ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)) { - ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)(); - } +void ompt_fini() { + if (ompt_enabled) { + if (ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)) { + ompt_callbacks.ompt_callback(ompt_event_runtime_shutdown)(); } + } - ompt_enabled = 0; + ompt_enabled = 0; } - /***************************************************************************** * interface operations ****************************************************************************/ @@ -285,148 +262,122 @@ void ompt_fini() ****************************************************************************/ OMPT_API_ROUTINE int ompt_enumerate_state(int current_state, int *next_state, - const char **next_state_name) -{ - const static int len = sizeof(ompt_state_info) / sizeof(ompt_state_info_t); - int i = 0; - - for (i = 0; i < len - 1; i++) { - if (ompt_state_info[i].state_id == current_state) { - *next_state = ompt_state_info[i+1].state_id; - *next_state_name = ompt_state_info[i+1].state_name; - return 1; - } + const char **next_state_name) { + const static int len = sizeof(ompt_state_info) / sizeof(ompt_state_info_t); + int i = 0; + + for (i = 0; i < len - 1; i++) { + if (ompt_state_info[i].state_id == current_state) { + *next_state = ompt_state_info[i + 1].state_id; + *next_state_name = ompt_state_info[i + 1].state_name; + return 1; } + } - return 0; + return 0; } - - /***************************************************************************** * callbacks ****************************************************************************/ -OMPT_API_ROUTINE int ompt_set_callback(ompt_event_t evid, ompt_callback_t cb) -{ - switch (evid) { +OMPT_API_ROUTINE int ompt_set_callback(ompt_event_t evid, ompt_callback_t cb) { + switch (evid) { #define ompt_event_macro(event_name, callback_type, event_id) \ - case event_name: \ - if (ompt_event_implementation_status(event_name)) { \ - ompt_callbacks.ompt_callback(event_name) = (callback_type) cb; \ - } \ - return ompt_event_implementation_status(event_name); + case event_name: \ + if (ompt_event_implementation_status(event_name)) { \ + ompt_callbacks.ompt_callback(event_name) = (callback_type)cb; \ + } \ + return ompt_event_implementation_status(event_name); FOREACH_OMPT_EVENT(ompt_event_macro) #undef ompt_event_macro - default: return ompt_set_result_registration_error; - } + default: + return ompt_set_result_registration_error; + } } - -OMPT_API_ROUTINE int ompt_get_callback(ompt_event_t evid, ompt_callback_t *cb) -{ - switch (evid) { +OMPT_API_ROUTINE int ompt_get_callback(ompt_event_t evid, ompt_callback_t *cb) { + switch (evid) { #define ompt_event_macro(event_name, callback_type, event_id) \ - case event_name: \ - if (ompt_event_implementation_status(event_name)) { \ - ompt_callback_t mycb = \ - (ompt_callback_t) ompt_callbacks.ompt_callback(event_name); \ - if (mycb) { \ - *cb = mycb; \ - return ompt_get_callback_success; \ - } \ - } \ - return ompt_get_callback_failure; + case event_name: \ + if (ompt_event_implementation_status(event_name)) { \ + ompt_callback_t mycb = \ + (ompt_callback_t)ompt_callbacks.ompt_callback(event_name); \ + if (mycb) { \ + *cb = mycb; \ + return ompt_get_callback_success; \ + } \ + } \ + return ompt_get_callback_failure; FOREACH_OMPT_EVENT(ompt_event_macro) #undef ompt_event_macro - default: return ompt_get_callback_failure; - } + default: + return ompt_get_callback_failure; + } } - /***************************************************************************** * parallel regions ****************************************************************************/ -OMPT_API_ROUTINE ompt_parallel_id_t ompt_get_parallel_id(int ancestor_level) -{ - return __ompt_get_parallel_id_internal(ancestor_level); +OMPT_API_ROUTINE ompt_parallel_id_t ompt_get_parallel_id(int ancestor_level) { + return __ompt_get_parallel_id_internal(ancestor_level); } - -OMPT_API_ROUTINE int ompt_get_parallel_team_size(int ancestor_level) -{ - return __ompt_get_parallel_team_size_internal(ancestor_level); +OMPT_API_ROUTINE int ompt_get_parallel_team_size(int ancestor_level) { + return __ompt_get_parallel_team_size_internal(ancestor_level); } - -OMPT_API_ROUTINE void *ompt_get_parallel_function(int ancestor_level) -{ - return __ompt_get_parallel_function_internal(ancestor_level); +OMPT_API_ROUTINE void *ompt_get_parallel_function(int ancestor_level) { + return __ompt_get_parallel_function_internal(ancestor_level); } +OMPT_API_ROUTINE ompt_state_t ompt_get_state(ompt_wait_id_t *ompt_wait_id) { + ompt_state_t thread_state = __ompt_get_state_internal(ompt_wait_id); -OMPT_API_ROUTINE ompt_state_t ompt_get_state(ompt_wait_id_t *ompt_wait_id) -{ - ompt_state_t thread_state = __ompt_get_state_internal(ompt_wait_id); - - if (thread_state == ompt_state_undefined) { - thread_state = ompt_state_work_serial; - } + if (thread_state == ompt_state_undefined) { + thread_state = ompt_state_work_serial; + } - return thread_state; + return thread_state; } - - /***************************************************************************** * threads ****************************************************************************/ - -OMPT_API_ROUTINE void *ompt_get_idle_frame() -{ - return __ompt_get_idle_frame_internal(); +OMPT_API_ROUTINE void *ompt_get_idle_frame() { + return __ompt_get_idle_frame_internal(); } - - /***************************************************************************** * tasks ****************************************************************************/ - -OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void) -{ - return __ompt_get_thread_id_internal(); +OMPT_API_ROUTINE ompt_thread_id_t ompt_get_thread_id(void) { + return __ompt_get_thread_id_internal(); } -OMPT_API_ROUTINE ompt_task_id_t ompt_get_task_id(int depth) -{ - return __ompt_get_task_id_internal(depth); +OMPT_API_ROUTINE ompt_task_id_t ompt_get_task_id(int depth) { + return __ompt_get_task_id_internal(depth); } - -OMPT_API_ROUTINE ompt_frame_t *ompt_get_task_frame(int depth) -{ - return __ompt_get_task_frame_internal(depth); +OMPT_API_ROUTINE ompt_frame_t *ompt_get_task_frame(int depth) { + return __ompt_get_task_frame_internal(depth); } - -OMPT_API_ROUTINE void *ompt_get_task_function(int depth) -{ - return __ompt_get_task_function_internal(depth); +OMPT_API_ROUTINE void *ompt_get_task_function(int depth) { + return __ompt_get_task_function_internal(depth); } - /***************************************************************************** * placeholders ****************************************************************************/ @@ -440,96 +391,76 @@ OMPT_API_ROUTINE void *ompt_get_task_function(int depth) extern "C" { #endif - -OMPT_API_PLACEHOLDER void ompt_idle(void) -{ - // This function is a placeholder used to represent the calling context of - // idle OpenMP worker threads. It is not meant to be invoked. - assert(0); +OMPT_API_PLACEHOLDER void ompt_idle(void) { + // This function is a placeholder used to represent the calling context of + // idle OpenMP worker threads. It is not meant to be invoked. + assert(0); } - -OMPT_API_PLACEHOLDER void ompt_overhead(void) -{ - // This function is a placeholder used to represent the OpenMP context of - // threads working in the OpenMP runtime. It is not meant to be invoked. - assert(0); +OMPT_API_PLACEHOLDER void ompt_overhead(void) { + // This function is a placeholder used to represent the OpenMP context of + // threads working in the OpenMP runtime. It is not meant to be invoked. + assert(0); } - -OMPT_API_PLACEHOLDER void ompt_barrier_wait(void) -{ - // This function is a placeholder used to represent the OpenMP context of - // threads waiting for a barrier in the OpenMP runtime. It is not meant - // to be invoked. - assert(0); +OMPT_API_PLACEHOLDER void ompt_barrier_wait(void) { + // This function is a placeholder used to represent the OpenMP context of + // threads waiting for a barrier in the OpenMP runtime. It is not meant + // to be invoked. + assert(0); } - -OMPT_API_PLACEHOLDER void ompt_task_wait(void) -{ - // This function is a placeholder used to represent the OpenMP context of - // threads waiting for a task in the OpenMP runtime. It is not meant - // to be invoked. - assert(0); +OMPT_API_PLACEHOLDER void ompt_task_wait(void) { + // This function is a placeholder used to represent the OpenMP context of + // threads waiting for a task in the OpenMP runtime. It is not meant + // to be invoked. + assert(0); } - -OMPT_API_PLACEHOLDER void ompt_mutex_wait(void) -{ - // This function is a placeholder used to represent the OpenMP context of - // threads waiting for a mutex in the OpenMP runtime. It is not meant - // to be invoked. - assert(0); +OMPT_API_PLACEHOLDER void ompt_mutex_wait(void) { + // This function is a placeholder used to represent the OpenMP context of + // threads waiting for a mutex in the OpenMP runtime. It is not meant + // to be invoked. + assert(0); } #ifdef __cplusplus }; #endif - /***************************************************************************** * compatability ****************************************************************************/ -OMPT_API_ROUTINE int ompt_get_ompt_version() -{ - return OMPT_VERSION; -} - - +OMPT_API_ROUTINE int ompt_get_ompt_version() { return OMPT_VERSION; } /***************************************************************************** * application-facing API ****************************************************************************/ - /*---------------------------------------------------------------------------- | control ---------------------------------------------------------------------------*/ -_OMP_EXTERN void ompt_control(uint64_t command, uint64_t modifier) -{ - if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_control)) { - ompt_callbacks.ompt_callback(ompt_event_control)(command, modifier); - } +_OMP_EXTERN void ompt_control(uint64_t command, uint64_t modifier) { + if (ompt_enabled && ompt_callbacks.ompt_callback(ompt_event_control)) { + ompt_callbacks.ompt_callback(ompt_event_control)(command, modifier); + } } - - /***************************************************************************** * API inquiry for tool ****************************************************************************/ -static ompt_interface_fn_t ompt_fn_lookup(const char *s) -{ +static ompt_interface_fn_t ompt_fn_lookup(const char *s) { -#define ompt_interface_fn(fn) \ - if (strcmp(s, #fn) == 0) return (ompt_interface_fn_t) fn; +#define ompt_interface_fn(fn) \ + if (strcmp(s, #fn) == 0) \ + return (ompt_interface_fn_t)fn; - FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn) + FOREACH_OMPT_INQUIRY_FN(ompt_interface_fn) - FOREACH_OMPT_PLACEHOLDER_FN(ompt_interface_fn) + FOREACH_OMPT_PLACEHOLDER_FN(ompt_interface_fn) - return (ompt_interface_fn_t) 0; + return (ompt_interface_fn_t)0; } diff --git a/openmp/runtime/src/ompt-internal.h b/openmp/runtime/src/ompt-internal.h index 42da9d8..44929efc 100644 --- a/openmp/runtime/src/ompt-internal.h +++ b/openmp/runtime/src/ompt-internal.h @@ -1,79 +1,71 @@ #ifndef __OMPT_INTERNAL_H__ #define __OMPT_INTERNAL_H__ -#include "ompt.h" #include "ompt-event-specific.h" +#include "ompt.h" #define OMPT_VERSION 1 #define _OMP_EXTERN extern "C" -#define OMPT_INVOKER(x) \ +#define OMPT_INVOKER(x) \ ((x == fork_context_gnu) ? ompt_invoker_program : ompt_invoker_runtime) - -#define ompt_callback(e) e ## _callback - +#define ompt_callback(e) e##_callback typedef struct ompt_callbacks_s { -#define ompt_event_macro(event, callback, eventid) callback ompt_callback(event); +#define ompt_event_macro(event, callback, eventid) \ + callback ompt_callback(event); - FOREACH_OMPT_EVENT(ompt_event_macro) + FOREACH_OMPT_EVENT(ompt_event_macro) #undef ompt_event_macro } ompt_callbacks_t; - - typedef struct { - ompt_frame_t frame; - void* function; - ompt_task_id_t task_id; + ompt_frame_t frame; + void *function; + ompt_task_id_t task_id; #if OMP_40_ENABLED - int ndeps; - ompt_task_dependence_t *deps; + int ndeps; + ompt_task_dependence_t *deps; #endif /* OMP_40_ENABLED */ } ompt_task_info_t; - typedef struct { - ompt_parallel_id_t parallel_id; - void *microtask; + ompt_parallel_id_t parallel_id; + void *microtask; } ompt_team_info_t; - typedef struct ompt_lw_taskteam_s { - ompt_team_info_t ompt_team_info; - ompt_task_info_t ompt_task_info; - struct ompt_lw_taskteam_s *parent; + ompt_team_info_t ompt_team_info; + ompt_task_info_t ompt_task_info; + struct ompt_lw_taskteam_s *parent; } ompt_lw_taskteam_t; - typedef struct ompt_parallel_info_s { - ompt_task_id_t parent_task_id; /* id of parent task */ - ompt_parallel_id_t parallel_id; /* id of parallel region */ - ompt_frame_t *parent_task_frame; /* frame data of parent task */ - void *parallel_function; /* pointer to outlined function */ + ompt_task_id_t parent_task_id; /* id of parent task */ + ompt_parallel_id_t parallel_id; /* id of parallel region */ + ompt_frame_t *parent_task_frame; /* frame data of parent task */ + void *parallel_function; /* pointer to outlined function */ } ompt_parallel_info_t; - typedef struct { - ompt_state_t state; - ompt_wait_id_t wait_id; - void *idle_frame; + ompt_state_t state; + ompt_wait_id_t wait_id; + void *idle_frame; } ompt_thread_info_t; - extern ompt_callbacks_t ompt_callbacks; #if OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE #if USE_FAST_MEMORY -# define KMP_OMPT_DEPS_ALLOC __kmp_fast_allocate -# define KMP_OMPT_DEPS_FREE __kmp_fast_free -# else -# define KMP_OMPT_DEPS_ALLOC __kmp_thread_malloc -# define KMP_OMPT_DEPS_FREE __kmp_thread_free -# endif +#define KMP_OMPT_DEPS_ALLOC __kmp_fast_allocate +#define KMP_OMPT_DEPS_FREE __kmp_fast_free +#else +#define KMP_OMPT_DEPS_ALLOC __kmp_thread_malloc +#define KMP_OMPT_DEPS_FREE __kmp_thread_free +#endif #endif /* OMP_40_ENABLED && OMPT_SUPPORT && OMPT_TRACE */ #ifdef __cplusplus diff --git a/openmp/runtime/src/ompt-specific.cpp b/openmp/runtime/src/ompt-specific.cpp index 9a962f4..9e0d1eef 100644 --- a/openmp/runtime/src/ompt-specific.cpp +++ b/openmp/runtime/src/ompt-specific.cpp @@ -10,7 +10,7 @@ // macros //****************************************************************************** -#define GTID_TO_OMPT_THREAD_ID(id) ((ompt_thread_id_t) (id >=0) ? id + 1: 0) +#define GTID_TO_OMPT_THREAD_ID(id) ((ompt_thread_id_t)(id >= 0) ? id + 1 : 0) #define LWT_FROM_TEAM(team) (team)->t.ompt_serialized_team_info; @@ -26,10 +26,10 @@ // when using fetch_and_add to generate the IDs, there isn't any reason to waste // bits for thread id. #if 0 -#define NEXT_ID(id_ptr,tid) \ +#define NEXT_ID(id_ptr, tid) \ ((KMP_TEST_THEN_INC64(id_ptr) << OMPT_THREAD_ID_BITS) | (tid)) #else -#define NEXT_ID(id_ptr,tid) (KMP_TEST_THEN_INC64((volatile kmp_int64 *)id_ptr)) +#define NEXT_ID(id_ptr, tid) (KMP_TEST_THEN_INC64((volatile kmp_int64 *)id_ptr)) #endif //****************************************************************************** @@ -43,89 +43,87 @@ // kept consistent //---------------------------------------------------------- -ompt_team_info_t * -__ompt_get_teaminfo(int depth, int *size) -{ - kmp_info_t *thr = ompt_get_thread(); +ompt_team_info_t *__ompt_get_teaminfo(int depth, int *size) { + kmp_info_t *thr = ompt_get_thread(); - if (thr) { - kmp_team *team = thr->th.th_team; - if (team == NULL) return NULL; + if (thr) { + kmp_team *team = thr->th.th_team; + if (team == NULL) + return NULL; - ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(team); + ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(team); - while(depth > 0) { - // next lightweight team (if any) - if (lwt) lwt = lwt->parent; + while (depth > 0) { + // next lightweight team (if any) + if (lwt) + lwt = lwt->parent; - // next heavyweight team (if any) after - // lightweight teams are exhausted - if (!lwt && team) { - team=team->t.t_parent; - if (team) { - lwt = LWT_FROM_TEAM(team); - } - } - - depth--; + // next heavyweight team (if any) after + // lightweight teams are exhausted + if (!lwt && team) { + team = team->t.t_parent; + if (team) { + lwt = LWT_FROM_TEAM(team); } + } - if (lwt) { - // lightweight teams have one task - if (size) *size = 1; + depth--; + } - // return team info for lightweight team - return &lwt->ompt_team_info; - } else if (team) { - // extract size from heavyweight team - if (size) *size = team->t.t_nproc; + if (lwt) { + // lightweight teams have one task + if (size) + *size = 1; - // return team info for heavyweight team - return &team->t.ompt_team_info; - } + // return team info for lightweight team + return &lwt->ompt_team_info; + } else if (team) { + // extract size from heavyweight team + if (size) + *size = team->t.t_nproc; + + // return team info for heavyweight team + return &team->t.ompt_team_info; } + } - return NULL; + return NULL; } - -ompt_task_info_t * -__ompt_get_taskinfo(int depth) -{ - ompt_task_info_t *info = NULL; - kmp_info_t *thr = ompt_get_thread(); - - if (thr) { - kmp_taskdata_t *taskdata = thr->th.th_current_task; - ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(taskdata->td_team); - - while (depth > 0) { - // next lightweight team (if any) - if (lwt) lwt = lwt->parent; - - // next heavyweight team (if any) after - // lightweight teams are exhausted - if (!lwt && taskdata) { - taskdata = taskdata->td_parent; - if (taskdata) { - lwt = LWT_FROM_TEAM(taskdata->td_team); - } - } - depth--; +ompt_task_info_t *__ompt_get_taskinfo(int depth) { + ompt_task_info_t *info = NULL; + kmp_info_t *thr = ompt_get_thread(); + + if (thr) { + kmp_taskdata_t *taskdata = thr->th.th_current_task; + ompt_lw_taskteam_t *lwt = LWT_FROM_TEAM(taskdata->td_team); + + while (depth > 0) { + // next lightweight team (if any) + if (lwt) + lwt = lwt->parent; + + // next heavyweight team (if any) after + // lightweight teams are exhausted + if (!lwt && taskdata) { + taskdata = taskdata->td_parent; + if (taskdata) { + lwt = LWT_FROM_TEAM(taskdata->td_team); } + } + depth--; + } - if (lwt) { - info = &lwt->ompt_task_info; - } else if (taskdata) { - info = &taskdata->ompt_task_info; - } + if (lwt) { + info = &lwt->ompt_task_info; + } else if (taskdata) { + info = &taskdata->ompt_task_info; } + } - return info; + return info; } - - //****************************************************************************** // interface operations //****************************************************************************** @@ -134,204 +132,151 @@ __ompt_get_taskinfo(int depth) // thread support //---------------------------------------------------------- -ompt_parallel_id_t -__ompt_thread_id_new() -{ - static uint64_t ompt_thread_id = 1; - return NEXT_ID(&ompt_thread_id, 0); +ompt_parallel_id_t __ompt_thread_id_new() { + static uint64_t ompt_thread_id = 1; + return NEXT_ID(&ompt_thread_id, 0); } -void -__ompt_thread_begin(ompt_thread_type_t thread_type, int gtid) -{ - ompt_callbacks.ompt_callback(ompt_event_thread_begin)( - thread_type, GTID_TO_OMPT_THREAD_ID(gtid)); +void __ompt_thread_begin(ompt_thread_type_t thread_type, int gtid) { + ompt_callbacks.ompt_callback(ompt_event_thread_begin)( + thread_type, GTID_TO_OMPT_THREAD_ID(gtid)); } - -void -__ompt_thread_end(ompt_thread_type_t thread_type, int gtid) -{ - ompt_callbacks.ompt_callback(ompt_event_thread_end)( - thread_type, GTID_TO_OMPT_THREAD_ID(gtid)); +void __ompt_thread_end(ompt_thread_type_t thread_type, int gtid) { + ompt_callbacks.ompt_callback(ompt_event_thread_end)( + thread_type, GTID_TO_OMPT_THREAD_ID(gtid)); } +ompt_thread_id_t __ompt_get_thread_id_internal() { + // FIXME: until we have a better way of assigning ids, use __kmp_get_gtid + // since the return value might be negative, we need to test that before + // assigning it to an ompt_thread_id_t, which is unsigned. + int id = __kmp_get_gtid(); + assert(id >= 0); -ompt_thread_id_t -__ompt_get_thread_id_internal() -{ - // FIXME - // until we have a better way of assigning ids, use __kmp_get_gtid - // since the return value might be negative, we need to test that before - // assigning it to an ompt_thread_id_t, which is unsigned. - int id = __kmp_get_gtid(); - assert(id >= 0); - - return GTID_TO_OMPT_THREAD_ID(id); + return GTID_TO_OMPT_THREAD_ID(id); } //---------------------------------------------------------- // state support //---------------------------------------------------------- -void -__ompt_thread_assign_wait_id(void *variable) -{ - int gtid = __kmp_gtid_get_specific(); - kmp_info_t *ti = ompt_get_thread_gtid(gtid); +void __ompt_thread_assign_wait_id(void *variable) { + int gtid = __kmp_gtid_get_specific(); + kmp_info_t *ti = ompt_get_thread_gtid(gtid); - ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t) variable; + ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t)variable; } -ompt_state_t -__ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) -{ - kmp_info_t *ti = ompt_get_thread(); +ompt_state_t __ompt_get_state_internal(ompt_wait_id_t *ompt_wait_id) { + kmp_info_t *ti = ompt_get_thread(); - if (ti) { - if (ompt_wait_id) - *ompt_wait_id = ti->th.ompt_thread_info.wait_id; - return ti->th.ompt_thread_info.state; - } - return ompt_state_undefined; + if (ti) { + if (ompt_wait_id) + *ompt_wait_id = ti->th.ompt_thread_info.wait_id; + return ti->th.ompt_thread_info.state; + } + return ompt_state_undefined; } //---------------------------------------------------------- // idle frame support //---------------------------------------------------------- -void * -__ompt_get_idle_frame_internal(void) -{ - kmp_info_t *ti = ompt_get_thread(); - return ti ? ti->th.ompt_thread_info.idle_frame : NULL; +void *__ompt_get_idle_frame_internal(void) { + kmp_info_t *ti = ompt_get_thread(); + return ti ? ti->th.ompt_thread_info.idle_frame : NULL; } - //---------------------------------------------------------- // parallel region support //---------------------------------------------------------- -ompt_parallel_id_t -__ompt_parallel_id_new(int gtid) -{ - static uint64_t ompt_parallel_id = 1; - return gtid >= 0 ? NEXT_ID(&ompt_parallel_id, gtid) : 0; +ompt_parallel_id_t __ompt_parallel_id_new(int gtid) { + static uint64_t ompt_parallel_id = 1; + return gtid >= 0 ? NEXT_ID(&ompt_parallel_id, gtid) : 0; } - -void * -__ompt_get_parallel_function_internal(int depth) -{ - ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL); - void *function = info ? info->microtask : NULL; - return function; +void *__ompt_get_parallel_function_internal(int depth) { + ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL); + void *function = info ? info->microtask : NULL; + return function; } - -ompt_parallel_id_t -__ompt_get_parallel_id_internal(int depth) -{ - ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL); - ompt_parallel_id_t id = info ? info->parallel_id : 0; - return id; +ompt_parallel_id_t __ompt_get_parallel_id_internal(int depth) { + ompt_team_info_t *info = __ompt_get_teaminfo(depth, NULL); + ompt_parallel_id_t id = info ? info->parallel_id : 0; + return id; } - -int -__ompt_get_parallel_team_size_internal(int depth) -{ - // initialize the return value with the error value. - // if there is a team at the specified depth, the default - // value will be overwritten the size of that team. - int size = -1; - (void) __ompt_get_teaminfo(depth, &size); - return size; +int __ompt_get_parallel_team_size_internal(int depth) { + // initialize the return value with the error value. + // if there is a team at the specified depth, the default + // value will be overwritten the size of that team. + int size = -1; + (void)__ompt_get_teaminfo(depth, &size); + return size; } - //---------------------------------------------------------- // lightweight task team support //---------------------------------------------------------- -void -__ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, - int gtid, void *microtask, - ompt_parallel_id_t ompt_pid) -{ - lwt->ompt_team_info.parallel_id = ompt_pid; - lwt->ompt_team_info.microtask = microtask; - lwt->ompt_task_info.task_id = 0; - lwt->ompt_task_info.frame.reenter_runtime_frame = NULL; - lwt->ompt_task_info.frame.exit_runtime_frame = NULL; - lwt->ompt_task_info.function = NULL; - lwt->parent = 0; +void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid, + void *microtask, ompt_parallel_id_t ompt_pid) { + lwt->ompt_team_info.parallel_id = ompt_pid; + lwt->ompt_team_info.microtask = microtask; + lwt->ompt_task_info.task_id = 0; + lwt->ompt_task_info.frame.reenter_runtime_frame = NULL; + lwt->ompt_task_info.frame.exit_runtime_frame = NULL; + lwt->ompt_task_info.function = NULL; + lwt->parent = 0; } - -void -__ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr) -{ - ompt_lw_taskteam_t *my_parent = thr->th.th_team->t.ompt_serialized_team_info; - lwt->parent = my_parent; - thr->th.th_team->t.ompt_serialized_team_info = lwt; +void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, kmp_info_t *thr) { + ompt_lw_taskteam_t *my_parent = thr->th.th_team->t.ompt_serialized_team_info; + lwt->parent = my_parent; + thr->th.th_team->t.ompt_serialized_team_info = lwt; } - -ompt_lw_taskteam_t * -__ompt_lw_taskteam_unlink(kmp_info_t *thr) -{ - ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info; - if (lwtask) thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent; - return lwtask; +ompt_lw_taskteam_t *__ompt_lw_taskteam_unlink(kmp_info_t *thr) { + ompt_lw_taskteam_t *lwtask = thr->th.th_team->t.ompt_serialized_team_info; + if (lwtask) + thr->th.th_team->t.ompt_serialized_team_info = lwtask->parent; + return lwtask; } - //---------------------------------------------------------- // task support //---------------------------------------------------------- -ompt_task_id_t -__ompt_task_id_new(int gtid) -{ - static uint64_t ompt_task_id = 1; - return NEXT_ID(&ompt_task_id, gtid); +ompt_task_id_t __ompt_task_id_new(int gtid) { + static uint64_t ompt_task_id = 1; + return NEXT_ID(&ompt_task_id, gtid); } - -ompt_task_id_t -__ompt_get_task_id_internal(int depth) -{ - ompt_task_info_t *info = __ompt_get_taskinfo(depth); - ompt_task_id_t task_id = info ? info->task_id : 0; - return task_id; +ompt_task_id_t __ompt_get_task_id_internal(int depth) { + ompt_task_info_t *info = __ompt_get_taskinfo(depth); + ompt_task_id_t task_id = info ? info->task_id : 0; + return task_id; } - -void * -__ompt_get_task_function_internal(int depth) -{ - ompt_task_info_t *info = __ompt_get_taskinfo(depth); - void *function = info ? info->function : NULL; - return function; +void *__ompt_get_task_function_internal(int depth) { + ompt_task_info_t *info = __ompt_get_taskinfo(depth); + void *function = info ? info->function : NULL; + return function; } - -ompt_frame_t * -__ompt_get_task_frame_internal(int depth) -{ - ompt_task_info_t *info = __ompt_get_taskinfo(depth); - ompt_frame_t *frame = info ? frame = &info->frame : NULL; - return frame; +ompt_frame_t *__ompt_get_task_frame_internal(int depth) { + ompt_task_info_t *info = __ompt_get_taskinfo(depth); + ompt_frame_t *frame = info ? frame = &info->frame : NULL; + return frame; } - //---------------------------------------------------------- // team support //---------------------------------------------------------- -void -__ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid) -{ - team->t.ompt_team_info.parallel_id = ompt_pid; +void __ompt_team_assign_id(kmp_team_t *team, ompt_parallel_id_t ompt_pid) { + team->t.ompt_team_info.parallel_id = ompt_pid; } diff --git a/openmp/runtime/src/ompt-specific.h b/openmp/runtime/src/ompt-specific.h index c8b50fb..b4f09a4 100644 --- a/openmp/runtime/src/ompt-specific.h +++ b/openmp/runtime/src/ompt-specific.h @@ -9,8 +9,6 @@ typedef kmp_info_t ompt_thread_t; - - /***************************************************************************** * forward declarations ****************************************************************************/ @@ -22,9 +20,9 @@ void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr, int gtid, void *microtask, ompt_parallel_id_t ompt_pid); -void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr); +void __ompt_lw_taskteam_link(ompt_lw_taskteam_t *lwt, ompt_thread_t *thr); -ompt_lw_taskteam_t * __ompt_lw_taskteam_unlink(ompt_thread_t *thr); +ompt_lw_taskteam_t *__ompt_lw_taskteam_unlink(ompt_thread_t *thr); ompt_parallel_id_t __ompt_parallel_id_new(int gtid); ompt_task_id_t __ompt_task_id_new(int gtid); @@ -43,8 +41,6 @@ ompt_task_id_t __ompt_get_task_id_internal(int depth); ompt_frame_t *__ompt_get_task_frame_internal(int depth); - - /***************************************************************************** * macros ****************************************************************************/ @@ -53,38 +49,25 @@ ompt_frame_t *__ompt_get_task_frame_internal(int depth); #define OMPT_HAVE_PSAPI KMP_HAVE_PSAPI #define OMPT_STR_MATCH(haystack, needle) __kmp_str_match(haystack, 0, needle) - - //****************************************************************************** // inline functions //****************************************************************************** -inline ompt_thread_t * -ompt_get_thread_gtid(int gtid) -{ - return (gtid >= 0) ? __kmp_thread_from_gtid(gtid) : NULL; +inline ompt_thread_t *ompt_get_thread_gtid(int gtid) { + return (gtid >= 0) ? __kmp_thread_from_gtid(gtid) : NULL; } - -inline ompt_thread_t * -ompt_get_thread() -{ - int gtid = __kmp_get_gtid(); - return ompt_get_thread_gtid(gtid); +inline ompt_thread_t *ompt_get_thread() { + int gtid = __kmp_get_gtid(); + return ompt_get_thread_gtid(gtid); } - -inline void -ompt_set_thread_state(ompt_thread_t *thread, ompt_state_t state) -{ - thread->th.ompt_thread_info.state = state; +inline void ompt_set_thread_state(ompt_thread_t *thread, ompt_state_t state) { + thread->th.ompt_thread_info.state = state; } - -inline const char * -ompt_get_runtime_version() -{ - return &__kmp_version_lib_ver[KMP_VERSION_MAGIC_LEN]; +inline const char *ompt_get_runtime_version() { + return &__kmp_version_lib_ver[KMP_VERSION_MAGIC_LEN]; } #endif diff --git a/openmp/runtime/src/tsan_annotations.cpp b/openmp/runtime/src/tsan_annotations.cpp index 9d05555..2629788 100644 --- a/openmp/runtime/src/tsan_annotations.cpp +++ b/openmp/runtime/src/tsan_annotations.cpp @@ -3,7 +3,6 @@ * race detection in OpenMP programs. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -18,46 +17,92 @@ #include typedef unsigned long uptr; -typedef signed long sptr; +typedef signed long sptr; -extern "C" __attribute__((weak)) void AnnotateHappensBefore(const char *f, int l, uptr addr) {} -extern "C" __attribute__((weak)) void AnnotateHappensAfter(const char *f, int l, uptr addr) {} -extern "C" __attribute__((weak)) void AnnotateCondVarSignal(const char *f, int l, uptr cv) {} -extern "C" __attribute__((weak)) void AnnotateCondVarSignalAll(const char *f, int l, uptr cv) {} -extern "C" __attribute__((weak)) void AnnotateMutexIsNotPHB(const char *f, int l, uptr mu) {} -extern "C" __attribute__((weak)) void AnnotateCondVarWait(const char *f, int l, uptr cv, uptr lock) {} -extern "C" __attribute__((weak)) void AnnotateRWLockCreate(const char *f, int l, uptr m) {} -extern "C" __attribute__((weak)) void AnnotateRWLockCreateStatic(const char *f, int l, uptr m) {} -extern "C" __attribute__((weak)) void AnnotateRWLockDestroy(const char *f, int l, uptr m) {} -extern "C" __attribute__((weak)) void AnnotateRWLockAcquired(const char *f, int l, uptr m, uptr is_w) {} -extern "C" __attribute__((weak)) void AnnotateRWLockReleased(const char *f, int l, uptr m, uptr is_w) {} -extern "C" __attribute__((weak)) void AnnotateTraceMemory(const char *f, int l, uptr mem) {} -extern "C" __attribute__((weak)) void AnnotateFlushState(const char *f, int l) {} -extern "C" __attribute__((weak)) void AnnotateNewMemory(const char *f, int l, uptr mem, uptr size) {} -extern "C" __attribute__((weak)) void AnnotateNoOp(const char *f, int l, uptr mem) {} -extern "C" __attribute__((weak)) void AnnotateFlushExpectedRaces(const char *f, int l) {} -extern "C" __attribute__((weak)) void AnnotateEnableRaceDetection( const char *f, int l, int enable) {} -extern "C" __attribute__((weak)) void AnnotateMutexIsUsedAsCondVar( const char *f, int l, uptr mu) {} -extern "C" __attribute__((weak)) void AnnotatePCQGet( const char *f, int l, uptr pcq) {} -extern "C" __attribute__((weak)) void AnnotatePCQPut( const char *f, int l, uptr pcq) {} -extern "C" __attribute__((weak)) void AnnotatePCQDestroy( const char *f, int l, uptr pcq) {} -extern "C" __attribute__((weak)) void AnnotatePCQCreate( const char *f, int l, uptr pcq) {} -extern "C" __attribute__((weak)) void AnnotateExpectRace( const char *f, int l, uptr mem, char *desc) {} -extern "C" __attribute__((weak)) void AnnotateBenignRaceSized( const char *f, int l, uptr mem, uptr size, char *desc) {} -extern "C" __attribute__((weak)) void AnnotateBenignRace( const char *f, int l, uptr mem, char *desc) {} -extern "C" __attribute__((weak)) void AnnotateIgnoreReadsBegin(const char *f, int l) {} -extern "C" __attribute__((weak)) void AnnotateIgnoreReadsEnd(const char *f, int l) {} -extern "C" __attribute__((weak)) void AnnotateIgnoreWritesBegin(const char *f, int l) {} -extern "C" __attribute__((weak)) void AnnotateIgnoreWritesEnd(const char *f, int l) {} -extern "C" __attribute__((weak)) void AnnotateIgnoreSyncBegin(const char *f, int l) {} -extern "C" __attribute__((weak)) void AnnotateIgnoreSyncEnd(const char *f, int l) {} -extern "C" __attribute__((weak)) void AnnotatePublishMemoryRange( const char *f, int l, uptr addr, uptr size) {} -extern "C" __attribute__((weak)) void AnnotateUnpublishMemoryRange( const char *f, int l, uptr addr, uptr size) {} -extern "C" __attribute__((weak)) void AnnotateThreadName( const char *f, int l, char *name) {} -extern "C" __attribute__((weak)) void WTFAnnotateHappensBefore(const char *f, int l, uptr addr) {} -extern "C" __attribute__((weak)) void WTFAnnotateHappensAfter(const char *f, int l, uptr addr) {} -extern "C" __attribute__((weak)) void WTFAnnotateBenignRaceSized( const char *f, int l, uptr mem, uptr sz, char *desc) {} -extern "C" __attribute__((weak)) int RunningOnValgrind() {return 0;} -extern "C" __attribute__((weak)) double ValgrindSlowdown(void) {return 0;} -extern "C" __attribute__((weak)) const char __attribute__((weak))* ThreadSanitizerQuery(const char *query) {return 0;} -extern "C" __attribute__((weak)) void AnnotateMemoryIsInitialized(const char *f, int l, uptr mem, uptr sz) {} +extern "C" __attribute__((weak)) void AnnotateHappensBefore(const char *f, + int l, uptr addr) {} +extern "C" __attribute__((weak)) void AnnotateHappensAfter(const char *f, int l, + uptr addr) {} +extern "C" __attribute__((weak)) void AnnotateCondVarSignal(const char *f, + int l, uptr cv) {} +extern "C" __attribute__((weak)) void AnnotateCondVarSignalAll(const char *f, + int l, uptr cv) { +} +extern "C" __attribute__((weak)) void AnnotateMutexIsNotPHB(const char *f, + int l, uptr mu) {} +extern "C" __attribute__((weak)) void AnnotateCondVarWait(const char *f, int l, + uptr cv, uptr lock) {} +extern "C" __attribute__((weak)) void AnnotateRWLockCreate(const char *f, int l, + uptr m) {} +extern "C" __attribute__((weak)) void +AnnotateRWLockCreateStatic(const char *f, int l, uptr m) {} +extern "C" __attribute__((weak)) void AnnotateRWLockDestroy(const char *f, + int l, uptr m) {} +extern "C" __attribute__((weak)) void +AnnotateRWLockAcquired(const char *f, int l, uptr m, uptr is_w) {} +extern "C" __attribute__((weak)) void +AnnotateRWLockReleased(const char *f, int l, uptr m, uptr is_w) {} +extern "C" __attribute__((weak)) void AnnotateTraceMemory(const char *f, int l, + uptr mem) {} +extern "C" __attribute__((weak)) void AnnotateFlushState(const char *f, int l) { +} +extern "C" __attribute__((weak)) void AnnotateNewMemory(const char *f, int l, + uptr mem, uptr size) {} +extern "C" __attribute__((weak)) void AnnotateNoOp(const char *f, int l, + uptr mem) {} +extern "C" __attribute__((weak)) void AnnotateFlushExpectedRaces(const char *f, + int l) {} +extern "C" __attribute__((weak)) void +AnnotateEnableRaceDetection(const char *f, int l, int enable) {} +extern "C" __attribute__((weak)) void +AnnotateMutexIsUsedAsCondVar(const char *f, int l, uptr mu) {} +extern "C" __attribute__((weak)) void AnnotatePCQGet(const char *f, int l, + uptr pcq) {} +extern "C" __attribute__((weak)) void AnnotatePCQPut(const char *f, int l, + uptr pcq) {} +extern "C" __attribute__((weak)) void AnnotatePCQDestroy(const char *f, int l, + uptr pcq) {} +extern "C" __attribute__((weak)) void AnnotatePCQCreate(const char *f, int l, + uptr pcq) {} +extern "C" __attribute__((weak)) void AnnotateExpectRace(const char *f, int l, + uptr mem, char *desc) { +} +extern "C" __attribute__((weak)) void +AnnotateBenignRaceSized(const char *f, int l, uptr mem, uptr size, char *desc) { +} +extern "C" __attribute__((weak)) void AnnotateBenignRace(const char *f, int l, + uptr mem, char *desc) { +} +extern "C" __attribute__((weak)) void AnnotateIgnoreReadsBegin(const char *f, + int l) {} +extern "C" __attribute__((weak)) void AnnotateIgnoreReadsEnd(const char *f, + int l) {} +extern "C" __attribute__((weak)) void AnnotateIgnoreWritesBegin(const char *f, + int l) {} +extern "C" __attribute__((weak)) void AnnotateIgnoreWritesEnd(const char *f, + int l) {} +extern "C" __attribute__((weak)) void AnnotateIgnoreSyncBegin(const char *f, + int l) {} +extern "C" __attribute__((weak)) void AnnotateIgnoreSyncEnd(const char *f, + int l) {} +extern "C" __attribute__((weak)) void +AnnotatePublishMemoryRange(const char *f, int l, uptr addr, uptr size) {} +extern "C" __attribute__((weak)) void +AnnotateUnpublishMemoryRange(const char *f, int l, uptr addr, uptr size) {} +extern "C" __attribute__((weak)) void AnnotateThreadName(const char *f, int l, + char *name) {} +extern "C" __attribute__((weak)) void +WTFAnnotateHappensBefore(const char *f, int l, uptr addr) {} +extern "C" __attribute__((weak)) void +WTFAnnotateHappensAfter(const char *f, int l, uptr addr) {} +extern "C" __attribute__((weak)) void +WTFAnnotateBenignRaceSized(const char *f, int l, uptr mem, uptr sz, + char *desc) {} +extern "C" __attribute__((weak)) int RunningOnValgrind() { return 0; } +extern "C" __attribute__((weak)) double ValgrindSlowdown(void) { return 0; } +extern "C" __attribute__((weak)) const char __attribute__((weak)) * + ThreadSanitizerQuery(const char *query) { + return 0; +} +extern "C" __attribute__((weak)) void +AnnotateMemoryIsInitialized(const char *f, int l, uptr mem, uptr sz) {} diff --git a/openmp/runtime/src/tsan_annotations.h b/openmp/runtime/src/tsan_annotations.h index cacd7ec..9abbfaf 100644 --- a/openmp/runtime/src/tsan_annotations.h +++ b/openmp/runtime/src/tsan_annotations.h @@ -4,7 +4,6 @@ * race detection in OpenMP programs. */ - //===----------------------------------------------------------------------===// // // The LLVM Compiler Infrastructure @@ -21,7 +20,7 @@ /* types as used in tsan/rtl/tsan_interface_ann.cc */ typedef unsigned long uptr; -typedef signed long sptr; +typedef signed long sptr; #ifdef __cplusplus extern "C" { @@ -44,30 +43,32 @@ void AnnotateFlushState(const char *f, int l); void AnnotateNewMemory(const char *f, int l, uptr mem, uptr size); void AnnotateNoOp(const char *f, int l, uptr mem); void AnnotateFlushExpectedRaces(const char *f, int l); -void AnnotateEnableRaceDetection( const char *f, int l, int enable); -void AnnotateMutexIsUsedAsCondVar( const char *f, int l, uptr mu); -void AnnotatePCQGet( const char *f, int l, uptr pcq); -void AnnotatePCQPut( const char *f, int l, uptr pcq); -void AnnotatePCQDestroy( const char *f, int l, uptr pcq); -void AnnotatePCQCreate( const char *f, int l, uptr pcq); -void AnnotateExpectRace( const char *f, int l, uptr mem, char *desc); -void AnnotateBenignRaceSized( const char *f, int l, uptr mem, uptr size, char *desc); -void AnnotateBenignRace( const char *f, int l, uptr mem, char *desc); +void AnnotateEnableRaceDetection(const char *f, int l, int enable); +void AnnotateMutexIsUsedAsCondVar(const char *f, int l, uptr mu); +void AnnotatePCQGet(const char *f, int l, uptr pcq); +void AnnotatePCQPut(const char *f, int l, uptr pcq); +void AnnotatePCQDestroy(const char *f, int l, uptr pcq); +void AnnotatePCQCreate(const char *f, int l, uptr pcq); +void AnnotateExpectRace(const char *f, int l, uptr mem, char *desc); +void AnnotateBenignRaceSized(const char *f, int l, uptr mem, uptr size, + char *desc); +void AnnotateBenignRace(const char *f, int l, uptr mem, char *desc); void AnnotateIgnoreReadsBegin(const char *f, int l); void AnnotateIgnoreReadsEnd(const char *f, int l); void AnnotateIgnoreWritesBegin(const char *f, int l); void AnnotateIgnoreWritesEnd(const char *f, int l); void AnnotateIgnoreSyncBegin(const char *f, int l); void AnnotateIgnoreSyncEnd(const char *f, int l); -void AnnotatePublishMemoryRange( const char *f, int l, uptr addr, uptr size); -void AnnotateUnpublishMemoryRange( const char *f, int l, uptr addr, uptr size); -void AnnotateThreadName( const char *f, int l, char *name); +void AnnotatePublishMemoryRange(const char *f, int l, uptr addr, uptr size); +void AnnotateUnpublishMemoryRange(const char *f, int l, uptr addr, uptr size); +void AnnotateThreadName(const char *f, int l, char *name); void WTFAnnotateHappensBefore(const char *f, int l, uptr addr); void WTFAnnotateHappensAfter(const char *f, int l, uptr addr); -void WTFAnnotateBenignRaceSized( const char *f, int l, uptr mem, uptr sz, char *desc); +void WTFAnnotateBenignRaceSized(const char *f, int l, uptr mem, uptr sz, + char *desc); int RunningOnValgrind(); double ValgrindSlowdown(void); -const char * ThreadSanitizerQuery(const char *query); +const char *ThreadSanitizerQuery(const char *query); void AnnotateMemoryIsInitialized(const char *f, int l, uptr mem, uptr sz); #ifdef __cplusplus @@ -75,17 +76,27 @@ void AnnotateMemoryIsInitialized(const char *f, int l, uptr mem, uptr sz); #endif #ifdef TSAN_SUPPORT -#define ANNOTATE_HAPPENS_AFTER(addr) AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr) -#define ANNOTATE_HAPPENS_BEFORE(addr) AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr) -#define ANNOTATE_IGNORE_WRITES_BEGIN() AnnotateIgnoreWritesBegin(__FILE__, __LINE__) +#define ANNOTATE_HAPPENS_AFTER(addr) \ + AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr) +#define ANNOTATE_HAPPENS_BEFORE(addr) \ + AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr) +#define ANNOTATE_IGNORE_WRITES_BEGIN() \ + AnnotateIgnoreWritesBegin(__FILE__, __LINE__) #define ANNOTATE_IGNORE_WRITES_END() AnnotateIgnoreWritesEnd(__FILE__, __LINE__) -#define ANNOTATE_RWLOCK_CREATE(lck) AnnotateRWLockCreate(__FILE__, __LINE__, (uptr)lck) -#define ANNOTATE_RWLOCK_RELEASED(lck) AnnotateRWLockAcquired(__FILE__, __LINE__, (uptr)lck, 1) -#define ANNOTATE_RWLOCK_ACQUIRED(lck) AnnotateRWLockReleased(__FILE__, __LINE__, (uptr)lck, 1) -#define ANNOTATE_BARRIER_BEGIN(addr) AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr) -#define ANNOTATE_BARRIER_END(addr) AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr) -#define ANNOTATE_REDUCE_AFTER(addr) AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr) -#define ANNOTATE_REDUCE_BEFORE(addr) AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr) +#define ANNOTATE_RWLOCK_CREATE(lck) \ + AnnotateRWLockCreate(__FILE__, __LINE__, (uptr)lck) +#define ANNOTATE_RWLOCK_RELEASED(lck) \ + AnnotateRWLockAcquired(__FILE__, __LINE__, (uptr)lck, 1) +#define ANNOTATE_RWLOCK_ACQUIRED(lck) \ + AnnotateRWLockReleased(__FILE__, __LINE__, (uptr)lck, 1) +#define ANNOTATE_BARRIER_BEGIN(addr) \ + AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr) +#define ANNOTATE_BARRIER_END(addr) \ + AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr) +#define ANNOTATE_REDUCE_AFTER(addr) \ + AnnotateHappensAfter(__FILE__, __LINE__, (uptr)addr) +#define ANNOTATE_REDUCE_BEFORE(addr) \ + AnnotateHappensBefore(__FILE__, __LINE__, (uptr)addr) #else #define ANNOTATE_HAPPENS_AFTER(addr) #define ANNOTATE_HAPPENS_BEFORE(addr) diff --git a/openmp/runtime/src/z_Linux_asm.s b/openmp/runtime/src/z_Linux_asm.s index 11fd023..c9a5526 100644 --- a/openmp/runtime/src/z_Linux_asm.s +++ b/openmp/runtime/src/z_Linux_asm.s @@ -21,7 +21,6 @@ #if KMP_ARCH_X86 || KMP_ARCH_X86_64 # if KMP_MIC -// // the 'delay r16/r32/r64' should be used instead of the 'pause'. // The delay operation has the effect of removing the current thread from // the round-robin HT mechanism, and therefore speeds up the issue rate of @@ -70,9 +69,10 @@ KMP_PREFIX_UNDERSCORE($0): .endmacro # else // KMP_OS_DARWIN -# define KMP_PREFIX_UNDERSCORE(x) x // no extra underscore for Linux* OS symbols +# define KMP_PREFIX_UNDERSCORE(x) x //no extra underscore for Linux* OS symbols // Format labels so that they don't override function names in gdb's backtraces -// MIC assembler doesn't accept .L syntax, the L works fine there (as well as on OS X*) +// MIC assembler doesn't accept .L syntax, the L works fine there (as well as +// on OS X*) # if KMP_MIC # define KMP_LABEL(x) L_##x // local label # else @@ -163,12 +163,10 @@ KMP_PREFIX_UNDERSCORE(\proc): #ifdef KMP_GOMP_COMPAT -// // Support for unnamed common blocks. // // Because the symbol ".gomp_critical_user_" contains a ".", we have to // put this stuff in assembly. -// # if KMP_ARCH_X86 # if KMP_OS_DARWIN @@ -221,14 +219,12 @@ __kmp_unnamed_critical_addr: // microtasking routines specifically written for IA-32 architecture // running Linux* OS // ----------------------------------------------------------------------- -// .ident "Intel Corporation" .data ALIGN 4 // void // __kmp_x86_pause( void ); -// .text PROC __kmp_x86_pause @@ -238,10 +234,9 @@ __kmp_unnamed_critical_addr: DEBUG_INFO __kmp_x86_pause -// // void // __kmp_x86_cpuid( int mode, int mode2, void *cpuid_buffer ); -// + PROC __kmp_x86_cpuid pushl %ebp @@ -253,7 +248,7 @@ __kmp_unnamed_critical_addr: movl 8(%ebp), %eax movl 12(%ebp), %ecx - cpuid // Query the CPUID for the current processor + cpuid // Query the CPUID for the current processor movl 16(%ebp), %edi movl %eax, 0(%edi) @@ -275,10 +270,8 @@ __kmp_unnamed_critical_addr: # if !KMP_ASM_INTRINS //------------------------------------------------------------------------ -// // kmp_int32 // __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); -// PROC __kmp_test_then_add32 @@ -291,7 +284,6 @@ __kmp_unnamed_critical_addr: DEBUG_INFO __kmp_test_then_add32 //------------------------------------------------------------------------ -// // FUNCTION __kmp_xchg_fixed8 // // kmp_int32 @@ -302,7 +294,6 @@ __kmp_unnamed_critical_addr: // d: 8(%esp) // // return: %al - PROC __kmp_xchg_fixed8 movl 4(%esp), %ecx // "p" @@ -316,7 +307,6 @@ __kmp_unnamed_critical_addr: //------------------------------------------------------------------------ -// // FUNCTION __kmp_xchg_fixed16 // // kmp_int16 @@ -326,7 +316,6 @@ __kmp_unnamed_critical_addr: // p: 4(%esp) // d: 8(%esp) // return: %ax - PROC __kmp_xchg_fixed16 movl 4(%esp), %ecx // "p" @@ -340,7 +329,6 @@ __kmp_unnamed_critical_addr: //------------------------------------------------------------------------ -// // FUNCTION __kmp_xchg_fixed32 // // kmp_int32 @@ -351,7 +339,6 @@ __kmp_unnamed_critical_addr: // d: 8(%esp) // // return: %eax - PROC __kmp_xchg_fixed32 movl 4(%esp), %ecx // "p" @@ -364,11 +351,8 @@ __kmp_unnamed_critical_addr: DEBUG_INFO __kmp_xchg_fixed32 -// // kmp_int8 // __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -// - PROC __kmp_compare_and_store8 movl 4(%esp), %ecx @@ -382,11 +366,8 @@ __kmp_unnamed_critical_addr: DEBUG_INFO __kmp_compare_and_store8 -// // kmp_int16 -// __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -// - +// __kmp_compare_and_store16(volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv); PROC __kmp_compare_and_store16 movl 4(%esp), %ecx @@ -400,11 +381,8 @@ __kmp_unnamed_critical_addr: DEBUG_INFO __kmp_compare_and_store16 -// // kmp_int32 -// __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -// - +// __kmp_compare_and_store32(volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv); PROC __kmp_compare_and_store32 movl 4(%esp), %ecx @@ -412,16 +390,14 @@ __kmp_unnamed_critical_addr: movl 12(%esp), %edx lock cmpxchgl %edx,(%ecx) - sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0 - and $1, %eax // sign extend previous instruction + sete %al // if %eax == (%ecx) set %al = 1 else set %al = 0 + and $1, %eax // sign extend previous instruction ret DEBUG_INFO __kmp_compare_and_store32 -// // kmp_int32 -// __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -// +// __kmp_compare_and_store64(volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 s ); PROC __kmp_compare_and_store64 pushl %ebp @@ -435,8 +411,8 @@ __kmp_unnamed_critical_addr: movl 24(%ebp), %ecx // "sv" high order word lock cmpxchg8b (%edi) - sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0 - and $1, %eax // sign extend previous instruction + sete %al // if %edx:eax == (%edi) set %al = 1 else set %al = 0 + and $1, %eax // sign extend previous instruction popl %edi popl %ebx movl %ebp, %esp @@ -445,11 +421,8 @@ __kmp_unnamed_critical_addr: DEBUG_INFO __kmp_compare_and_store64 -// // kmp_int8 -// __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -// - +// __kmp_compare_and_store_ret8(volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv); PROC __kmp_compare_and_store_ret8 movl 4(%esp), %ecx @@ -461,11 +434,9 @@ __kmp_unnamed_critical_addr: DEBUG_INFO __kmp_compare_and_store_ret8 -// // kmp_int16 -// __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -// - +// __kmp_compare_and_store_ret16(volatile kmp_int16 *p, kmp_int16 cv, +// kmp_int16 sv); PROC __kmp_compare_and_store_ret16 movl 4(%esp), %ecx @@ -477,11 +448,9 @@ __kmp_unnamed_critical_addr: DEBUG_INFO __kmp_compare_and_store_ret16 -// // kmp_int32 -// __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -// - +// __kmp_compare_and_store_ret32(volatile kmp_int32 *p, kmp_int32 cv, +// kmp_int32 sv); PROC __kmp_compare_and_store_ret32 movl 4(%esp), %ecx @@ -493,10 +462,9 @@ __kmp_unnamed_critical_addr: DEBUG_INFO __kmp_compare_and_store_ret32 -// // kmp_int64 -// __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -// +// __kmp_compare_and_store_ret64(volatile kmp_int64 *p, kmp_int64 cv, +// kmp_int64 sv); PROC __kmp_compare_and_store_ret64 pushl %ebp @@ -520,7 +488,6 @@ __kmp_unnamed_critical_addr: //------------------------------------------------------------------------ -// // FUNCTION __kmp_xchg_real32 // // kmp_real32 @@ -531,8 +498,6 @@ __kmp_unnamed_critical_addr: // data: 8(%esp) // // return: %eax - - PROC __kmp_xchg_real32 pushl %ebp @@ -565,7 +530,6 @@ __kmp_unnamed_critical_addr: //------------------------------------------------------------------------ -// // FUNCTION __kmp_load_x87_fpu_control_word // // void @@ -573,8 +537,6 @@ __kmp_unnamed_critical_addr: // // parameters: // p: 4(%esp) -// - PROC __kmp_load_x87_fpu_control_word movl 4(%esp), %eax @@ -585,7 +547,6 @@ __kmp_unnamed_critical_addr: //------------------------------------------------------------------------ -// // FUNCTION __kmp_store_x87_fpu_control_word // // void @@ -593,8 +554,6 @@ __kmp_unnamed_critical_addr: // // parameters: // p: 4(%esp) -// - PROC __kmp_store_x87_fpu_control_word movl 4(%esp), %eax @@ -605,14 +564,10 @@ __kmp_unnamed_critical_addr: //------------------------------------------------------------------------ -// // FUNCTION __kmp_clear_x87_fpu_status_word // // void // __kmp_clear_x87_fpu_status_word(); -// -// - PROC __kmp_clear_x87_fpu_status_word fnclex @@ -622,7 +577,6 @@ __kmp_unnamed_critical_addr: //------------------------------------------------------------------------ -// // typedef void (*microtask_t)( int *gtid, int *tid, ... ); // // int @@ -714,7 +668,6 @@ KMP_LABEL(invoke_3): DEBUG_INFO __kmp_hardware_timestamp // -- End __kmp_hardware_timestamp -// ----------------------------------------------------------------------- #endif /* KMP_ARCH_X86 */ @@ -732,9 +685,9 @@ KMP_LABEL(invoke_3): .data ALIGN 4 -// To prevent getting our code into .data section .text added to every routine definition for x86_64. +// To prevent getting our code into .data section .text added to every routine +// definition for x86_64. //------------------------------------------------------------------------ -// // FUNCTION __kmp_x86_cpuid // // void @@ -744,7 +697,6 @@ KMP_LABEL(invoke_3): // mode: %edi // mode2: %esi // cpuid_buffer: %rdx - .text PROC __kmp_x86_cpuid @@ -774,7 +726,6 @@ KMP_LABEL(invoke_3): # if !KMP_ASM_INTRINS //------------------------------------------------------------------------ -// // FUNCTION __kmp_test_then_add32 // // kmp_int32 @@ -785,7 +736,6 @@ KMP_LABEL(invoke_3): // d: %esi // // return: %eax - .text PROC __kmp_test_then_add32 @@ -798,7 +748,6 @@ KMP_LABEL(invoke_3): //------------------------------------------------------------------------ -// // FUNCTION __kmp_test_then_add64 // // kmp_int64 @@ -808,7 +757,6 @@ KMP_LABEL(invoke_3): // p: %rdi // d: %rsi // return: %rax - .text PROC __kmp_test_then_add64 @@ -821,7 +769,6 @@ KMP_LABEL(invoke_3): //------------------------------------------------------------------------ -// // FUNCTION __kmp_xchg_fixed8 // // kmp_int32 @@ -832,7 +779,6 @@ KMP_LABEL(invoke_3): // d: %sil // // return: %al - .text PROC __kmp_xchg_fixed8 @@ -846,7 +792,6 @@ KMP_LABEL(invoke_3): //------------------------------------------------------------------------ -// // FUNCTION __kmp_xchg_fixed16 // // kmp_int16 @@ -856,7 +801,6 @@ KMP_LABEL(invoke_3): // p: %rdi // d: %si // return: %ax - .text PROC __kmp_xchg_fixed16 @@ -870,7 +814,6 @@ KMP_LABEL(invoke_3): //------------------------------------------------------------------------ -// // FUNCTION __kmp_xchg_fixed32 // // kmp_int32 @@ -881,7 +824,6 @@ KMP_LABEL(invoke_3): // d: %esi // // return: %eax - .text PROC __kmp_xchg_fixed32 @@ -895,7 +837,6 @@ KMP_LABEL(invoke_3): //------------------------------------------------------------------------ -// // FUNCTION __kmp_xchg_fixed64 // // kmp_int64 @@ -905,7 +846,6 @@ KMP_LABEL(invoke_3): // p: %rdi // d: %rsi // return: %rax - .text PROC __kmp_xchg_fixed64 @@ -919,7 +859,6 @@ KMP_LABEL(invoke_3): //------------------------------------------------------------------------ -// // FUNCTION __kmp_compare_and_store8 // // kmp_int8 @@ -931,7 +870,6 @@ KMP_LABEL(invoke_3): // sv: %edx // // return: %eax - .text PROC __kmp_compare_and_store8 @@ -946,7 +884,6 @@ KMP_LABEL(invoke_3): //------------------------------------------------------------------------ -// // FUNCTION __kmp_compare_and_store16 // // kmp_int16 @@ -958,7 +895,6 @@ KMP_LABEL(invoke_3): // sv: %dx // // return: %eax - .text PROC __kmp_compare_and_store16 @@ -973,7 +909,6 @@ KMP_LABEL(invoke_3): //------------------------------------------------------------------------ -// // FUNCTION __kmp_compare_and_store32 // // kmp_int32 @@ -985,7 +920,6 @@ KMP_LABEL(invoke_3): // sv: %edx // // return: %eax - .text PROC __kmp_compare_and_store32 @@ -1000,7 +934,6 @@ KMP_LABEL(invoke_3): //------------------------------------------------------------------------ -// // FUNCTION __kmp_compare_and_store64 // // kmp_int32 @@ -1011,7 +944,6 @@ KMP_LABEL(invoke_3): // cv: %rsi // sv: %rdx // return: %eax - .text PROC __kmp_compare_and_store64 @@ -1025,7 +957,6 @@ KMP_LABEL(invoke_3): DEBUG_INFO __kmp_compare_and_store64 //------------------------------------------------------------------------ -// // FUNCTION __kmp_compare_and_store_ret8 // // kmp_int8 @@ -1037,7 +968,6 @@ KMP_LABEL(invoke_3): // sv: %edx // // return: %eax - .text PROC __kmp_compare_and_store_ret8 @@ -1050,7 +980,6 @@ KMP_LABEL(invoke_3): //------------------------------------------------------------------------ -// // FUNCTION __kmp_compare_and_store_ret16 // // kmp_int16 @@ -1062,7 +991,6 @@ KMP_LABEL(invoke_3): // sv: %dx // // return: %eax - .text PROC __kmp_compare_and_store_ret16 @@ -1075,7 +1003,6 @@ KMP_LABEL(invoke_3): //------------------------------------------------------------------------ -// // FUNCTION __kmp_compare_and_store_ret32 // // kmp_int32 @@ -1087,7 +1014,6 @@ KMP_LABEL(invoke_3): // sv: %edx // // return: %eax - .text PROC __kmp_compare_and_store_ret32 @@ -1100,7 +1026,6 @@ KMP_LABEL(invoke_3): //------------------------------------------------------------------------ -// // FUNCTION __kmp_compare_and_store_ret64 // // kmp_int64 @@ -1111,7 +1036,6 @@ KMP_LABEL(invoke_3): // cv: %rsi // sv: %rdx // return: %eax - .text PROC __kmp_compare_and_store_ret64 @@ -1130,7 +1054,6 @@ KMP_LABEL(invoke_3): # if !KMP_ASM_INTRINS //------------------------------------------------------------------------ -// // FUNCTION __kmp_xchg_real32 // // kmp_real32 @@ -1141,7 +1064,6 @@ KMP_LABEL(invoke_3): // data: %xmm0 (lower 4 bytes) // // return: %xmm0 (lower 4 bytes) - .text PROC __kmp_xchg_real32 @@ -1158,7 +1080,6 @@ KMP_LABEL(invoke_3): //------------------------------------------------------------------------ -// // FUNCTION __kmp_xchg_real64 // // kmp_real64 @@ -1168,8 +1089,6 @@ KMP_LABEL(invoke_3): // addr: %rdi // data: %xmm0 (lower 8 bytes) // return: %xmm0 (lower 8 bytes) -// - .text PROC __kmp_xchg_real64 @@ -1190,7 +1109,6 @@ KMP_LABEL(invoke_3): //------------------------------------------------------------------------ -// // FUNCTION __kmp_load_x87_fpu_control_word // // void @@ -1198,8 +1116,6 @@ KMP_LABEL(invoke_3): // // parameters: // p: %rdi -// - .text PROC __kmp_load_x87_fpu_control_word @@ -1210,7 +1126,6 @@ KMP_LABEL(invoke_3): //------------------------------------------------------------------------ -// // FUNCTION __kmp_store_x87_fpu_control_word // // void @@ -1218,8 +1133,6 @@ KMP_LABEL(invoke_3): // // parameters: // p: %rdi -// - .text PROC __kmp_store_x87_fpu_control_word @@ -1230,14 +1143,10 @@ KMP_LABEL(invoke_3): //------------------------------------------------------------------------ -// // FUNCTION __kmp_clear_x87_fpu_status_word // // void // __kmp_clear_x87_fpu_status_word(); -// -// - .text PROC __kmp_clear_x87_fpu_status_word @@ -1256,7 +1165,6 @@ KMP_LABEL(invoke_3): //------------------------------------------------------------------------ -// // typedef void (*microtask_t)( int *gtid, int *tid, ... ); // // int @@ -1267,8 +1175,7 @@ KMP_LABEL(invoke_3): // return 1; // } // -// note: -// at call to pkfn must have %rsp 128-byte aligned for compiler +// note: at call to pkfn must have %rsp 128-byte aligned for compiler // // parameters: // %rdi: pkfn @@ -1291,8 +1198,6 @@ KMP_LABEL(invoke_3): // %rbx: used to hold pkfn address, and zero constant, callee-save // // return: %eax (always 1/TRUE) -// - __gtid = -16 __tid = -24 @@ -1442,13 +1347,10 @@ KMP_LABEL(kmp_1_exit): // -- End __kmp_hardware_timestamp //------------------------------------------------------------------------ -// // FUNCTION __kmp_bsr32 // // int // __kmp_bsr32( int ); -// - .text PROC __kmp_bsr32 diff --git a/openmp/runtime/src/z_Linux_util.cpp b/openmp/runtime/src/z_Linux_util.cpp index 857b147..2f83c02 100644 --- a/openmp/runtime/src/z_Linux_util.cpp +++ b/openmp/runtime/src/z_Linux_util.cpp @@ -14,59 +14,56 @@ #include "kmp.h" -#include "kmp_wrapper_getpid.h" -#include "kmp_itt.h" -#include "kmp_str.h" +#include "kmp_affinity.h" #include "kmp_i18n.h" -#include "kmp_lock.h" #include "kmp_io.h" +#include "kmp_itt.h" +#include "kmp_lock.h" #include "kmp_stats.h" +#include "kmp_str.h" #include "kmp_wait_release.h" -#include "kmp_affinity.h" +#include "kmp_wrapper_getpid.h" #if !KMP_OS_FREEBSD && !KMP_OS_NETBSD -# include +#include #endif -#include -#include // HUGE_VAL. -#include -#include +#include // HUGE_VAL. #include #include +#include +#include +#include #if KMP_OS_LINUX && !KMP_OS_CNK -# include -# if KMP_USE_FUTEX -// We should really include , but that causes compatibility problems on different -// Linux* OS distributions that either require that you include (or break when you try to include) -// . -// Since all we need is the two macros below (which are part of the kernel ABI, so can't change) -// we just define the constants here and don't include -# ifndef FUTEX_WAIT -# define FUTEX_WAIT 0 -# endif -# ifndef FUTEX_WAKE -# define FUTEX_WAKE 1 -# endif -# endif +#include +#if KMP_USE_FUTEX +// We should really include , but that causes compatibility problems on +// different Linux* OS distributions that either require that you include (or +// break when you try to include) . Since all we need is the two +// macros below (which are part of the kernel ABI, so can't change) we just +// define the constants here and don't include +#ifndef FUTEX_WAIT +#define FUTEX_WAIT 0 +#endif +#ifndef FUTEX_WAKE +#define FUTEX_WAKE 1 +#endif +#endif #elif KMP_OS_DARWIN -# include -# include +#include +#include #elif KMP_OS_FREEBSD -# include +#include #endif -#include #include +#include #include #include "tsan_annotations.h" -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - struct kmp_sys_timer { - struct timespec start; + struct timespec start; }; // Convert timespec to nanoseconds. @@ -75,2444 +72,2229 @@ struct kmp_sys_timer { static struct kmp_sys_timer __kmp_sys_timer_data; #if KMP_HANDLE_SIGNALS - typedef void (* sig_func_t )( int ); - STATIC_EFI2_WORKAROUND struct sigaction __kmp_sighldrs[ NSIG ]; - static sigset_t __kmp_sigset; +typedef void (*sig_func_t)(int); +STATIC_EFI2_WORKAROUND struct sigaction __kmp_sighldrs[NSIG]; +static sigset_t __kmp_sigset; #endif -static int __kmp_init_runtime = FALSE; +static int __kmp_init_runtime = FALSE; static int __kmp_fork_count = 0; -static pthread_condattr_t __kmp_suspend_cond_attr; +static pthread_condattr_t __kmp_suspend_cond_attr; static pthread_mutexattr_t __kmp_suspend_mutex_attr; -static kmp_cond_align_t __kmp_wait_cv; -static kmp_mutex_align_t __kmp_wait_mx; +static kmp_cond_align_t __kmp_wait_cv; +static kmp_mutex_align_t __kmp_wait_mx; kmp_uint64 __kmp_ticks_per_msec = 1000000; -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - #ifdef DEBUG_SUSPEND -static void -__kmp_print_cond( char *buffer, kmp_cond_align_t *cond ) -{ - KMP_SNPRINTF( buffer, 128, "(cond (lock (%ld, %d)), (descr (%p)))", - cond->c_cond.__c_lock.__status, cond->c_cond.__c_lock.__spinlock, - cond->c_cond.__c_waiting ); +static void __kmp_print_cond(char *buffer, kmp_cond_align_t *cond) { + KMP_SNPRINTF(buffer, 128, "(cond (lock (%ld, %d)), (descr (%p)))", + cond->c_cond.__c_lock.__status, cond->c_cond.__c_lock.__spinlock, + cond->c_cond.__c_waiting); } #endif -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ +#if (KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED) -#if ( KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED) +/* Affinity support */ -/* - * Affinity support - */ +void __kmp_affinity_bind_thread(int which) { + KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), + "Illegal set affinity operation when not capable"); -void -__kmp_affinity_bind_thread( int which ) -{ - KMP_ASSERT2(KMP_AFFINITY_CAPABLE(), - "Illegal set affinity operation when not capable"); - - kmp_affin_mask_t *mask; - KMP_CPU_ALLOC_ON_STACK(mask); - KMP_CPU_ZERO(mask); - KMP_CPU_SET(which, mask); - __kmp_set_system_affinity(mask, TRUE); - KMP_CPU_FREE_FROM_STACK(mask); + kmp_affin_mask_t *mask; + KMP_CPU_ALLOC_ON_STACK(mask); + KMP_CPU_ZERO(mask); + KMP_CPU_SET(which, mask); + __kmp_set_system_affinity(mask, TRUE); + KMP_CPU_FREE_FROM_STACK(mask); } -/* - * Determine if we can access affinity functionality on this version of +/* Determine if we can access affinity functionality on this version of * Linux* OS by checking __NR_sched_{get,set}affinity system calls, and set - * __kmp_affin_mask_size to the appropriate value (0 means not capable). - */ -void -__kmp_affinity_determine_capable(const char *env_var) -{ - // - // Check and see if the OS supports thread affinity. + * __kmp_affin_mask_size to the appropriate value (0 means not capable). */ +void __kmp_affinity_determine_capable(const char *env_var) { +// Check and see if the OS supports thread affinity. + +#define KMP_CPU_SET_SIZE_LIMIT (1024 * 1024) + + int gCode; + int sCode; + unsigned char *buf; + buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT); + + // If Linux* OS: + // If the syscall fails or returns a suggestion for the size, + // then we don't have to search for an appropriate size. + gCode = syscall(__NR_sched_getaffinity, 0, KMP_CPU_SET_SIZE_LIMIT, buf); + KA_TRACE(30, ("__kmp_affinity_determine_capable: " + "initial getaffinity call returned %d errno = %d\n", + gCode, errno)); + + // if ((gCode < 0) && (errno == ENOSYS)) + if (gCode < 0) { + // System call not supported + if (__kmp_affinity_verbose || + (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none) && + (__kmp_affinity_type != affinity_default) && + (__kmp_affinity_type != affinity_disabled))) { + int error = errno; + kmp_msg_t err_code = KMP_ERR(error); + __kmp_msg(kmp_ms_warning, KMP_MSG(GetAffSysCallNotSupported, env_var), + err_code, __kmp_msg_null); + if (__kmp_generate_warnings == kmp_warnings_off) { + __kmp_str_free(&err_code.str); + } + } + KMP_AFFINITY_DISABLE(); + KMP_INTERNAL_FREE(buf); + return; + } + if (gCode > 0) { // Linux* OS only + // The optimal situation: the OS returns the size of the buffer it expects. // - -# define KMP_CPU_SET_SIZE_LIMIT (1024*1024) - - int gCode; - int sCode; - unsigned char *buf; - buf = ( unsigned char * ) KMP_INTERNAL_MALLOC( KMP_CPU_SET_SIZE_LIMIT ); - - // If Linux* OS: - // If the syscall fails or returns a suggestion for the size, - // then we don't have to search for an appropriate size. - gCode = syscall( __NR_sched_getaffinity, 0, KMP_CPU_SET_SIZE_LIMIT, buf ); - KA_TRACE(30, ( "__kmp_affinity_determine_capable: " - "initial getaffinity call returned %d errno = %d\n", - gCode, errno)); - - //if ((gCode < 0) && (errno == ENOSYS)) - if (gCode < 0) { - // - // System call not supported - // - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none) - && (__kmp_affinity_type != affinity_default) - && (__kmp_affinity_type != affinity_disabled))) { - int error = errno; - kmp_msg_t err_code = KMP_ERR( error ); - __kmp_msg( - kmp_ms_warning, - KMP_MSG( GetAffSysCallNotSupported, env_var ), - err_code, - __kmp_msg_null - ); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } + // A verification of correct behavior is that Isetaffinity on a NULL + // buffer with the same size fails with errno set to EFAULT. + sCode = syscall(__NR_sched_setaffinity, 0, gCode, NULL); + KA_TRACE(30, ("__kmp_affinity_determine_capable: " + "setaffinity for mask size %d returned %d errno = %d\n", + gCode, sCode, errno)); + if (sCode < 0) { + if (errno == ENOSYS) { + if (__kmp_affinity_verbose || + (__kmp_affinity_warnings && + (__kmp_affinity_type != affinity_none) && + (__kmp_affinity_type != affinity_default) && + (__kmp_affinity_type != affinity_disabled))) { + int error = errno; + kmp_msg_t err_code = KMP_ERR(error); + __kmp_msg(kmp_ms_warning, KMP_MSG(SetAffSysCallNotSupported, env_var), + err_code, __kmp_msg_null); + if (__kmp_generate_warnings == kmp_warnings_off) { + __kmp_str_free(&err_code.str); + } } KMP_AFFINITY_DISABLE(); KMP_INTERNAL_FREE(buf); + } + if (errno == EFAULT) { + KMP_AFFINITY_ENABLE(gCode); + KA_TRACE(10, ("__kmp_affinity_determine_capable: " + "affinity supported (mask size %d)\n", + (int)__kmp_affin_mask_size)); + KMP_INTERNAL_FREE(buf); return; + } } - if (gCode > 0) { // Linux* OS only - // The optimal situation: the OS returns the size of the buffer - // it expects. - // - // A verification of correct behavior is that Isetaffinity on a NULL - // buffer with the same size fails with errno set to EFAULT. - sCode = syscall( __NR_sched_setaffinity, 0, gCode, NULL ); - KA_TRACE(30, ( "__kmp_affinity_determine_capable: " - "setaffinity for mask size %d returned %d errno = %d\n", - gCode, sCode, errno)); - if (sCode < 0) { - if (errno == ENOSYS) { - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none) - && (__kmp_affinity_type != affinity_default) - && (__kmp_affinity_type != affinity_disabled))) { - int error = errno; - kmp_msg_t err_code = KMP_ERR( error ); - __kmp_msg( - kmp_ms_warning, - KMP_MSG( SetAffSysCallNotSupported, env_var ), - err_code, - __kmp_msg_null - ); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - } - KMP_AFFINITY_DISABLE(); - KMP_INTERNAL_FREE(buf); - } - if (errno == EFAULT) { - KMP_AFFINITY_ENABLE(gCode); - KA_TRACE(10, ( "__kmp_affinity_determine_capable: " - "affinity supported (mask size %d)\n", - (int)__kmp_affin_mask_size)); - KMP_INTERNAL_FREE(buf); - return; - } - } - } + } - // - // Call the getaffinity system call repeatedly with increasing set sizes - // until we succeed, or reach an upper bound on the search. - // - KA_TRACE(30, ( "__kmp_affinity_determine_capable: " - "searching for proper set size\n")); - int size; - for (size = 1; size <= KMP_CPU_SET_SIZE_LIMIT; size *= 2) { - gCode = syscall( __NR_sched_getaffinity, 0, size, buf ); - KA_TRACE(30, ( "__kmp_affinity_determine_capable: " - "getaffinity for mask size %d returned %d errno = %d\n", size, - gCode, errno)); - - if (gCode < 0) { - if ( errno == ENOSYS ) - { - // - // We shouldn't get here - // - KA_TRACE(30, ( "__kmp_affinity_determine_capable: " - "inconsistent OS call behavior: errno == ENOSYS for mask size %d\n", - size)); - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none) - && (__kmp_affinity_type != affinity_default) - && (__kmp_affinity_type != affinity_disabled))) { - int error = errno; - kmp_msg_t err_code = KMP_ERR( error ); - __kmp_msg( - kmp_ms_warning, - KMP_MSG( GetAffSysCallNotSupported, env_var ), - err_code, - __kmp_msg_null - ); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - } - KMP_AFFINITY_DISABLE(); - KMP_INTERNAL_FREE(buf); - return; - } - continue; - } + // Call the getaffinity system call repeatedly with increasing set sizes + // until we succeed, or reach an upper bound on the search. + KA_TRACE(30, ("__kmp_affinity_determine_capable: " + "searching for proper set size\n")); + int size; + for (size = 1; size <= KMP_CPU_SET_SIZE_LIMIT; size *= 2) { + gCode = syscall(__NR_sched_getaffinity, 0, size, buf); + KA_TRACE(30, ("__kmp_affinity_determine_capable: " + "getaffinity for mask size %d returned %d errno = %d\n", + size, gCode, errno)); - sCode = syscall( __NR_sched_setaffinity, 0, gCode, NULL ); - KA_TRACE(30, ( "__kmp_affinity_determine_capable: " - "setaffinity for mask size %d returned %d errno = %d\n", - gCode, sCode, errno)); - if (sCode < 0) { - if (errno == ENOSYS) { // Linux* OS only - // - // We shouldn't get here - // - KA_TRACE(30, ( "__kmp_affinity_determine_capable: " - "inconsistent OS call behavior: errno == ENOSYS for mask size %d\n", - size)); - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none) - && (__kmp_affinity_type != affinity_default) - && (__kmp_affinity_type != affinity_disabled))) { - int error = errno; - kmp_msg_t err_code = KMP_ERR( error ); - __kmp_msg( - kmp_ms_warning, - KMP_MSG( SetAffSysCallNotSupported, env_var ), - err_code, - __kmp_msg_null - ); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - } - KMP_AFFINITY_DISABLE(); - KMP_INTERNAL_FREE(buf); - return; - } - if (errno == EFAULT) { - KMP_AFFINITY_ENABLE(gCode); - KA_TRACE(10, ( "__kmp_affinity_determine_capable: " - "affinity supported (mask size %d)\n", - (int)__kmp_affin_mask_size)); - KMP_INTERNAL_FREE(buf); - return; - } + if (gCode < 0) { + if (errno == ENOSYS) { + // We shouldn't get here + KA_TRACE(30, ("__kmp_affinity_determine_capable: " + "inconsistent OS call behavior: errno == ENOSYS for mask " + "size %d\n", + size)); + if (__kmp_affinity_verbose || + (__kmp_affinity_warnings && + (__kmp_affinity_type != affinity_none) && + (__kmp_affinity_type != affinity_default) && + (__kmp_affinity_type != affinity_disabled))) { + int error = errno; + kmp_msg_t err_code = KMP_ERR(error); + __kmp_msg(kmp_ms_warning, KMP_MSG(GetAffSysCallNotSupported, env_var), + err_code, __kmp_msg_null); + if (__kmp_generate_warnings == kmp_warnings_off) { + __kmp_str_free(&err_code.str); + } } + KMP_AFFINITY_DISABLE(); + KMP_INTERNAL_FREE(buf); + return; + } + continue; } - //int error = errno; // save uncaught error code - KMP_INTERNAL_FREE(buf); - // errno = error; // restore uncaught error code, will be printed at the next KMP_WARNING below - // - // Affinity is not supported - // - KMP_AFFINITY_DISABLE(); - KA_TRACE(10, ( "__kmp_affinity_determine_capable: " - "cannot determine mask size - affinity not supported\n")); - if (__kmp_affinity_verbose || (__kmp_affinity_warnings - && (__kmp_affinity_type != affinity_none) - && (__kmp_affinity_type != affinity_default) - && (__kmp_affinity_type != affinity_disabled))) { - KMP_WARNING( AffCantGetMaskSize, env_var ); + sCode = syscall(__NR_sched_setaffinity, 0, gCode, NULL); + KA_TRACE(30, ("__kmp_affinity_determine_capable: " + "setaffinity for mask size %d returned %d errno = %d\n", + gCode, sCode, errno)); + if (sCode < 0) { + if (errno == ENOSYS) { // Linux* OS only + // We shouldn't get here + KA_TRACE(30, ("__kmp_affinity_determine_capable: " + "inconsistent OS call behavior: errno == ENOSYS for mask " + "size %d\n", + size)); + if (__kmp_affinity_verbose || + (__kmp_affinity_warnings && + (__kmp_affinity_type != affinity_none) && + (__kmp_affinity_type != affinity_default) && + (__kmp_affinity_type != affinity_disabled))) { + int error = errno; + kmp_msg_t err_code = KMP_ERR(error); + __kmp_msg(kmp_ms_warning, KMP_MSG(SetAffSysCallNotSupported, env_var), + err_code, __kmp_msg_null); + if (__kmp_generate_warnings == kmp_warnings_off) { + __kmp_str_free(&err_code.str); + } + } + KMP_AFFINITY_DISABLE(); + KMP_INTERNAL_FREE(buf); + return; + } + if (errno == EFAULT) { + KMP_AFFINITY_ENABLE(gCode); + KA_TRACE(10, ("__kmp_affinity_determine_capable: " + "affinity supported (mask size %d)\n", + (int)__kmp_affin_mask_size)); + KMP_INTERNAL_FREE(buf); + return; + } } + } + // save uncaught error code + // int error = errno; + KMP_INTERNAL_FREE(buf); + // restore uncaught error code, will be printed at the next KMP_WARNING below + // errno = error; + + // Affinity is not supported + KMP_AFFINITY_DISABLE(); + KA_TRACE(10, ("__kmp_affinity_determine_capable: " + "cannot determine mask size - affinity not supported\n")); + if (__kmp_affinity_verbose || + (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none) && + (__kmp_affinity_type != affinity_default) && + (__kmp_affinity_type != affinity_disabled))) { + KMP_WARNING(AffCantGetMaskSize, env_var); + } } #endif // KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - #if KMP_USE_FUTEX -int -__kmp_futex_determine_capable() -{ - int loc = 0; - int rc = syscall( __NR_futex, &loc, FUTEX_WAKE, 1, NULL, NULL, 0 ); - int retval = ( rc == 0 ) || ( errno != ENOSYS ); +int __kmp_futex_determine_capable() { + int loc = 0; + int rc = syscall(__NR_futex, &loc, FUTEX_WAKE, 1, NULL, NULL, 0); + int retval = (rc == 0) || (errno != ENOSYS); - KA_TRACE(10, ( "__kmp_futex_determine_capable: rc = %d errno = %d\n", rc, - errno ) ); - KA_TRACE(10, ( "__kmp_futex_determine_capable: futex syscall%s supported\n", - retval ? "" : " not" ) ); + KA_TRACE(10, + ("__kmp_futex_determine_capable: rc = %d errno = %d\n", rc, errno)); + KA_TRACE(10, ("__kmp_futex_determine_capable: futex syscall%s supported\n", + retval ? "" : " not")); - return retval; + return retval; } #endif // KMP_USE_FUTEX -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ +#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (!KMP_ASM_INTRINS) +/* Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to + use compare_and_store for these routines */ -#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (! KMP_ASM_INTRINS) -/* - * Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to - * use compare_and_store for these routines - */ +kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 d) { + kmp_int8 old_value, new_value; -kmp_int8 -__kmp_test_then_or8( volatile kmp_int8 *p, kmp_int8 d ) -{ - kmp_int8 old_value, new_value; + old_value = TCR_1(*p); + new_value = old_value | d; - old_value = TCR_1( *p ); + while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) { + KMP_CPU_PAUSE(); + old_value = TCR_1(*p); new_value = old_value | d; - - while ( ! KMP_COMPARE_AND_STORE_REL8 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_1( *p ); - new_value = old_value | d; - } - return old_value; + } + return old_value; } -kmp_int8 -__kmp_test_then_and8( volatile kmp_int8 *p, kmp_int8 d ) -{ - kmp_int8 old_value, new_value; +kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 d) { + kmp_int8 old_value, new_value; - old_value = TCR_1( *p ); - new_value = old_value & d; + old_value = TCR_1(*p); + new_value = old_value & d; - while ( ! KMP_COMPARE_AND_STORE_REL8 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_1( *p ); - new_value = old_value & d; - } - return old_value; + while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) { + KMP_CPU_PAUSE(); + old_value = TCR_1(*p); + new_value = old_value & d; + } + return old_value; } -kmp_int32 -__kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 d ) -{ - kmp_int32 old_value, new_value; +kmp_int32 __kmp_test_then_or32(volatile kmp_int32 *p, kmp_int32 d) { + kmp_int32 old_value, new_value; - old_value = TCR_4( *p ); - new_value = old_value | d; + old_value = TCR_4(*p); + new_value = old_value | d; - while ( ! KMP_COMPARE_AND_STORE_REL32 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_4( *p ); - new_value = old_value | d; - } - return old_value; + while (!KMP_COMPARE_AND_STORE_REL32(p, old_value, new_value)) { + KMP_CPU_PAUSE(); + old_value = TCR_4(*p); + new_value = old_value | d; + } + return old_value; } -kmp_int32 -__kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 d ) -{ - kmp_int32 old_value, new_value; +kmp_int32 __kmp_test_then_and32(volatile kmp_int32 *p, kmp_int32 d) { + kmp_int32 old_value, new_value; - old_value = TCR_4( *p ); - new_value = old_value & d; + old_value = TCR_4(*p); + new_value = old_value & d; - while ( ! KMP_COMPARE_AND_STORE_REL32 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_4( *p ); - new_value = old_value & d; - } - return old_value; + while (!KMP_COMPARE_AND_STORE_REL32(p, old_value, new_value)) { + KMP_CPU_PAUSE(); + old_value = TCR_4(*p); + new_value = old_value & d; + } + return old_value; } -# if KMP_ARCH_X86 -kmp_int8 -__kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 d ) -{ - kmp_int8 old_value, new_value; +#if KMP_ARCH_X86 +kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 d) { + kmp_int8 old_value, new_value; - old_value = TCR_1( *p ); - new_value = old_value + d; + old_value = TCR_1(*p); + new_value = old_value + d; - while ( ! KMP_COMPARE_AND_STORE_REL8 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_1( *p ); - new_value = old_value + d; - } - return old_value; + while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) { + KMP_CPU_PAUSE(); + old_value = TCR_1(*p); + new_value = old_value + d; + } + return old_value; } -kmp_int64 -__kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ) -{ - kmp_int64 old_value, new_value; +kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 d) { + kmp_int64 old_value, new_value; - old_value = TCR_8( *p ); - new_value = old_value + d; + old_value = TCR_8(*p); + new_value = old_value + d; - while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_8( *p ); - new_value = old_value + d; - } - return old_value; + while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) { + KMP_CPU_PAUSE(); + old_value = TCR_8(*p); + new_value = old_value + d; + } + return old_value; } -# endif /* KMP_ARCH_X86 */ +#endif /* KMP_ARCH_X86 */ -kmp_int64 -__kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 d ) -{ - kmp_int64 old_value, new_value; +kmp_int64 __kmp_test_then_or64(volatile kmp_int64 *p, kmp_int64 d) { + kmp_int64 old_value, new_value; - old_value = TCR_8( *p ); + old_value = TCR_8(*p); + new_value = old_value | d; + while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) { + KMP_CPU_PAUSE(); + old_value = TCR_8(*p); new_value = old_value | d; - while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_8( *p ); - new_value = old_value | d; - } - return old_value; + } + return old_value; } -kmp_int64 -__kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 d ) -{ - kmp_int64 old_value, new_value; +kmp_int64 __kmp_test_then_and64(volatile kmp_int64 *p, kmp_int64 d) { + kmp_int64 old_value, new_value; - old_value = TCR_8( *p ); + old_value = TCR_8(*p); + new_value = old_value & d; + while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) { + KMP_CPU_PAUSE(); + old_value = TCR_8(*p); new_value = old_value & d; - while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_8( *p ); - new_value = old_value & d; - } - return old_value; + } + return old_value; } #endif /* (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (! KMP_ASM_INTRINS) */ -void -__kmp_terminate_thread( int gtid ) -{ - int status; - kmp_info_t *th = __kmp_threads[ gtid ]; - - if ( !th ) return; - - #ifdef KMP_CANCEL_THREADS - KA_TRACE( 10, ("__kmp_terminate_thread: kill (%d)\n", gtid ) ); - status = pthread_cancel( th->th.th_info.ds.ds_thread ); - if ( status != 0 && status != ESRCH ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantTerminateWorkerThread ), - KMP_ERR( status ), - __kmp_msg_null - ); - }; // if - #endif - __kmp_yield( TRUE ); -} // +void __kmp_terminate_thread(int gtid) { + int status; + kmp_info_t *th = __kmp_threads[gtid]; -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ + if (!th) + return; -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ +#ifdef KMP_CANCEL_THREADS + KA_TRACE(10, ("__kmp_terminate_thread: kill (%d)\n", gtid)); + status = pthread_cancel(th->th.th_info.ds.ds_thread); + if (status != 0 && status != ESRCH) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantTerminateWorkerThread), KMP_ERR(status), + __kmp_msg_null); + }; // if +#endif + __kmp_yield(TRUE); +} // -/* - * Set thread stack info according to values returned by - * pthread_getattr_np(). - * If values are unreasonable, assume call failed and use - * incremental stack refinement method instead. - * Returns TRUE if the stack parameters could be determined exactly, - * FALSE if incremental refinement is necessary. - */ -static kmp_int32 -__kmp_set_stack_info( int gtid, kmp_info_t *th ) -{ - int stack_data; +/* Set thread stack info according to values returned by pthread_getattr_np(). + If values are unreasonable, assume call failed and use incremental stack + refinement method instead. Returns TRUE if the stack parameters could be + determined exactly, FALSE if incremental refinement is necessary. */ +static kmp_int32 __kmp_set_stack_info(int gtid, kmp_info_t *th) { + int stack_data; #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD - /* Linux* OS only -- no pthread_getattr_np support on OS X* */ - pthread_attr_t attr; - int status; - size_t size = 0; - void * addr = 0; - - /* Always do incremental stack refinement for ubermaster threads since the initial - thread stack range can be reduced by sibling thread creation so pthread_attr_getstack - may cause thread gtid aliasing */ - if ( ! KMP_UBER_GTID(gtid) ) { - - /* Fetch the real thread attributes */ - status = pthread_attr_init( &attr ); - KMP_CHECK_SYSFAIL( "pthread_attr_init", status ); + /* Linux* OS only -- no pthread_getattr_np support on OS X* */ + pthread_attr_t attr; + int status; + size_t size = 0; + void *addr = 0; + + /* Always do incremental stack refinement for ubermaster threads since the + initial thread stack range can be reduced by sibling thread creation so + pthread_attr_getstack may cause thread gtid aliasing */ + if (!KMP_UBER_GTID(gtid)) { + + /* Fetch the real thread attributes */ + status = pthread_attr_init(&attr); + KMP_CHECK_SYSFAIL("pthread_attr_init", status); #if KMP_OS_FREEBSD || KMP_OS_NETBSD - status = pthread_attr_get_np( pthread_self(), &attr ); - KMP_CHECK_SYSFAIL( "pthread_attr_get_np", status ); + status = pthread_attr_get_np(pthread_self(), &attr); + KMP_CHECK_SYSFAIL("pthread_attr_get_np", status); #else - status = pthread_getattr_np( pthread_self(), &attr ); - KMP_CHECK_SYSFAIL( "pthread_getattr_np", status ); + status = pthread_getattr_np(pthread_self(), &attr); + KMP_CHECK_SYSFAIL("pthread_getattr_np", status); #endif - status = pthread_attr_getstack( &attr, &addr, &size ); - KMP_CHECK_SYSFAIL( "pthread_attr_getstack", status ); - KA_TRACE( 60, ( "__kmp_set_stack_info: T#%d pthread_attr_getstack returned size: %lu, " - "low addr: %p\n", - gtid, size, addr )); - - status = pthread_attr_destroy( &attr ); - KMP_CHECK_SYSFAIL( "pthread_attr_destroy", status ); - } + status = pthread_attr_getstack(&attr, &addr, &size); + KMP_CHECK_SYSFAIL("pthread_attr_getstack", status); + KA_TRACE(60, + ("__kmp_set_stack_info: T#%d pthread_attr_getstack returned size:" + " %lu, low addr: %p\n", + gtid, size, addr)); + status = pthread_attr_destroy(&attr); + KMP_CHECK_SYSFAIL("pthread_attr_destroy", status); + } - if ( size != 0 && addr != 0 ) { /* was stack parameter determination successful? */ - /* Store the correct base and size */ - TCW_PTR(th->th.th_info.ds.ds_stackbase, (((char *)addr) + size)); - TCW_PTR(th->th.th_info.ds.ds_stacksize, size); - TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE); - return TRUE; - } + if (size != 0 && addr != 0) { // was stack parameter determination successful? + /* Store the correct base and size */ + TCW_PTR(th->th.th_info.ds.ds_stackbase, (((char *)addr) + size)); + TCW_PTR(th->th.th_info.ds.ds_stacksize, size); + TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE); + return TRUE; + } #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD */ - /* Use incremental refinement starting from initial conservative estimate */ - TCW_PTR(th->th.th_info.ds.ds_stacksize, 0); - TCW_PTR(th -> th.th_info.ds.ds_stackbase, &stack_data); - TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE); - return FALSE; + /* Use incremental refinement starting from initial conservative estimate */ + TCW_PTR(th->th.th_info.ds.ds_stacksize, 0); + TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data); + TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE); + return FALSE; } -static void* -__kmp_launch_worker( void *thr ) -{ - int status, old_type, old_state; +static void *__kmp_launch_worker(void *thr) { + int status, old_type, old_state; #ifdef KMP_BLOCK_SIGNALS - sigset_t new_set, old_set; + sigset_t new_set, old_set; #endif /* KMP_BLOCK_SIGNALS */ - void *exit_val; + void *exit_val; #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD - void * volatile padding = 0; + void *volatile padding = 0; #endif - int gtid; + int gtid; - gtid = ((kmp_info_t*)thr) -> th.th_info.ds.ds_gtid; - __kmp_gtid_set_specific( gtid ); + gtid = ((kmp_info_t *)thr)->th.th_info.ds.ds_gtid; + __kmp_gtid_set_specific(gtid); #ifdef KMP_TDATA_GTID - __kmp_gtid = gtid; + __kmp_gtid = gtid; #endif #if KMP_STATS_ENABLED - // set __thread local index to point to thread-specific stats - __kmp_stats_thread_ptr = ((kmp_info_t*)thr)->th.th_stats; - KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life); - KMP_SET_THREAD_STATE(IDLE); - KMP_INIT_PARTITIONED_TIMERS(OMP_idle); + // set __thread local index to point to thread-specific stats + __kmp_stats_thread_ptr = ((kmp_info_t *)thr)->th.th_stats; + KMP_START_EXPLICIT_TIMER(OMP_worker_thread_life); + KMP_SET_THREAD_STATE(IDLE); + KMP_INIT_PARTITIONED_TIMERS(OMP_idle); #endif #if USE_ITT_BUILD - __kmp_itt_thread_name( gtid ); + __kmp_itt_thread_name(gtid); #endif /* USE_ITT_BUILD */ #if KMP_AFFINITY_SUPPORTED - __kmp_affinity_set_init_mask( gtid, FALSE ); + __kmp_affinity_set_init_mask(gtid, FALSE); #endif #ifdef KMP_CANCEL_THREADS - status = pthread_setcanceltype( PTHREAD_CANCEL_ASYNCHRONOUS, & old_type ); - KMP_CHECK_SYSFAIL( "pthread_setcanceltype", status ); - /* josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads? */ - status = pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, & old_state ); - KMP_CHECK_SYSFAIL( "pthread_setcancelstate", status ); + status = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old_type); + KMP_CHECK_SYSFAIL("pthread_setcanceltype", status); + // josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads? + status = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old_state); + KMP_CHECK_SYSFAIL("pthread_setcancelstate", status); #endif #if KMP_ARCH_X86 || KMP_ARCH_X86_64 - // - // Set the FP control regs to be a copy of - // the parallel initialization thread's. - // - __kmp_clear_x87_fpu_status_word(); - __kmp_load_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word ); - __kmp_load_mxcsr( &__kmp_init_mxcsr ); + // Set FP control regs to be a copy of the parallel initialization thread's. + __kmp_clear_x87_fpu_status_word(); + __kmp_load_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word); + __kmp_load_mxcsr(&__kmp_init_mxcsr); #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ #ifdef KMP_BLOCK_SIGNALS - status = sigfillset( & new_set ); - KMP_CHECK_SYSFAIL_ERRNO( "sigfillset", status ); - status = pthread_sigmask( SIG_BLOCK, & new_set, & old_set ); - KMP_CHECK_SYSFAIL( "pthread_sigmask", status ); + status = sigfillset(&new_set); + KMP_CHECK_SYSFAIL_ERRNO("sigfillset", status); + status = pthread_sigmask(SIG_BLOCK, &new_set, &old_set); + KMP_CHECK_SYSFAIL("pthread_sigmask", status); #endif /* KMP_BLOCK_SIGNALS */ #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD - if ( __kmp_stkoffset > 0 && gtid > 0 ) { - padding = KMP_ALLOCA( gtid * __kmp_stkoffset ); - } + if (__kmp_stkoffset > 0 && gtid > 0) { + padding = KMP_ALLOCA(gtid * __kmp_stkoffset); + } #endif - KMP_MB(); - __kmp_set_stack_info( gtid, (kmp_info_t*)thr ); + KMP_MB(); + __kmp_set_stack_info(gtid, (kmp_info_t *)thr); - __kmp_check_stack_overlap( (kmp_info_t*)thr ); + __kmp_check_stack_overlap((kmp_info_t *)thr); - exit_val = __kmp_launch_thread( (kmp_info_t *) thr ); + exit_val = __kmp_launch_thread((kmp_info_t *)thr); #ifdef KMP_BLOCK_SIGNALS - status = pthread_sigmask( SIG_SETMASK, & old_set, NULL ); - KMP_CHECK_SYSFAIL( "pthread_sigmask", status ); + status = pthread_sigmask(SIG_SETMASK, &old_set, NULL); + KMP_CHECK_SYSFAIL("pthread_sigmask", status); #endif /* KMP_BLOCK_SIGNALS */ - return exit_val; + return exit_val; } #if KMP_USE_MONITOR /* The monitor thread controls all of the threads in the complex */ -static void* -__kmp_launch_monitor( void *thr ) -{ - int status, old_type, old_state; +static void *__kmp_launch_monitor(void *thr) { + int status, old_type, old_state; #ifdef KMP_BLOCK_SIGNALS - sigset_t new_set; + sigset_t new_set; #endif /* KMP_BLOCK_SIGNALS */ - struct timespec interval; - int yield_count; - int yield_cycles = 0; + struct timespec interval; + int yield_count; + int yield_cycles = 0; - KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_MB(); /* Flush all pending memory write invalidates. */ - KA_TRACE( 10, ("__kmp_launch_monitor: #1 launched\n" ) ); + KA_TRACE(10, ("__kmp_launch_monitor: #1 launched\n")); - /* register us as the monitor thread */ - __kmp_gtid_set_specific( KMP_GTID_MONITOR ); + /* register us as the monitor thread */ + __kmp_gtid_set_specific(KMP_GTID_MONITOR); #ifdef KMP_TDATA_GTID - __kmp_gtid = KMP_GTID_MONITOR; + __kmp_gtid = KMP_GTID_MONITOR; #endif - KMP_MB(); + KMP_MB(); #if USE_ITT_BUILD - __kmp_itt_thread_ignore(); // Instruct Intel(R) Threading Tools to ignore monitor thread. + // Instruct Intel(R) Threading Tools to ignore monitor thread. + __kmp_itt_thread_ignore(); #endif /* USE_ITT_BUILD */ - __kmp_set_stack_info( ((kmp_info_t*)thr)->th.th_info.ds.ds_gtid, (kmp_info_t*)thr ); + __kmp_set_stack_info(((kmp_info_t *)thr)->th.th_info.ds.ds_gtid, + (kmp_info_t *)thr); - __kmp_check_stack_overlap( (kmp_info_t*)thr ); + __kmp_check_stack_overlap((kmp_info_t *)thr); #ifdef KMP_CANCEL_THREADS - status = pthread_setcanceltype( PTHREAD_CANCEL_ASYNCHRONOUS, & old_type ); - KMP_CHECK_SYSFAIL( "pthread_setcanceltype", status ); - /* josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads? */ - status = pthread_setcancelstate( PTHREAD_CANCEL_ENABLE, & old_state ); - KMP_CHECK_SYSFAIL( "pthread_setcancelstate", status ); + status = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old_type); + KMP_CHECK_SYSFAIL("pthread_setcanceltype", status); + // josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads? + status = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old_state); + KMP_CHECK_SYSFAIL("pthread_setcancelstate", status); #endif - #if KMP_REAL_TIME_FIX - // This is a potential fix which allows application with real-time scheduling policy work. - // However, decision about the fix is not made yet, so it is disabled by default. - { // Are program started with real-time scheduling policy? - int sched = sched_getscheduler( 0 ); - if ( sched == SCHED_FIFO || sched == SCHED_RR ) { - // Yes, we are a part of real-time application. Try to increase the priority of the - // monitor. - struct sched_param param; - int max_priority = sched_get_priority_max( sched ); - int rc; - KMP_WARNING( RealTimeSchedNotSupported ); - sched_getparam( 0, & param ); - if ( param.sched_priority < max_priority ) { - param.sched_priority += 1; - rc = sched_setscheduler( 0, sched, & param ); - if ( rc != 0 ) { - int error = errno; - kmp_msg_t err_code = KMP_ERR( error ); - __kmp_msg( - kmp_ms_warning, - KMP_MSG( CantChangeMonitorPriority ), - err_code, - KMP_MSG( MonitorWillStarve ), - __kmp_msg_null - ); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - }; // if - } else { - // We cannot abort here, because number of CPUs may be enough for all the threads, - // including the monitor thread, so application could potentially work... - __kmp_msg( - kmp_ms_warning, - KMP_MSG( RunningAtMaxPriority ), - KMP_MSG( MonitorWillStarve ), - KMP_HNT( RunningAtMaxPriority ), - __kmp_msg_null - ); - }; // if +#if KMP_REAL_TIME_FIX + // This is a potential fix which allows application with real-time scheduling + // policy work. However, decision about the fix is not made yet, so it is + // disabled by default. + { // Are program started with real-time scheduling policy? + int sched = sched_getscheduler(0); + if (sched == SCHED_FIFO || sched == SCHED_RR) { + // Yes, we are a part of real-time application. Try to increase the + // priority of the monitor. + struct sched_param param; + int max_priority = sched_get_priority_max(sched); + int rc; + KMP_WARNING(RealTimeSchedNotSupported); + sched_getparam(0, ¶m); + if (param.sched_priority < max_priority) { + param.sched_priority += 1; + rc = sched_setscheduler(0, sched, ¶m); + if (rc != 0) { + int error = errno; + kmp_msg_t err_code = KMP_ERR(error); + __kmp_msg(kmp_ms_warning, KMP_MSG(CantChangeMonitorPriority), + err_code, KMP_MSG(MonitorWillStarve), __kmp_msg_null); + if (__kmp_generate_warnings == kmp_warnings_off) { + __kmp_str_free(&err_code.str); + } }; // if - TCW_4( __kmp_global.g.g_time.dt.t_value, 0 ); // AC: free thread that waits for monitor started - } - #endif // KMP_REAL_TIME_FIX + } else { + // We cannot abort here, because number of CPUs may be enough for all + // the threads, including the monitor thread, so application could + // potentially work... + __kmp_msg(kmp_ms_warning, KMP_MSG(RunningAtMaxPriority), + KMP_MSG(MonitorWillStarve), KMP_HNT(RunningAtMaxPriority), + __kmp_msg_null); + }; // if + }; // if + // AC: free thread that waits for monitor started + TCW_4(__kmp_global.g.g_time.dt.t_value, 0); + } +#endif // KMP_REAL_TIME_FIX - KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_MB(); /* Flush all pending memory write invalidates. */ - if ( __kmp_monitor_wakeups == 1 ) { - interval.tv_sec = 1; - interval.tv_nsec = 0; - } else { - interval.tv_sec = 0; - interval.tv_nsec = (KMP_NSEC_PER_SEC / __kmp_monitor_wakeups); - } + if (__kmp_monitor_wakeups == 1) { + interval.tv_sec = 1; + interval.tv_nsec = 0; + } else { + interval.tv_sec = 0; + interval.tv_nsec = (KMP_NSEC_PER_SEC / __kmp_monitor_wakeups); + } - KA_TRACE( 10, ("__kmp_launch_monitor: #2 monitor\n" ) ); + KA_TRACE(10, ("__kmp_launch_monitor: #2 monitor\n")); - if (__kmp_yield_cycle) { - __kmp_yielding_on = 0; /* Start out with yielding shut off */ - yield_count = __kmp_yield_off_count; - } else { - __kmp_yielding_on = 1; /* Yielding is on permanently */ - } + if (__kmp_yield_cycle) { + __kmp_yielding_on = 0; /* Start out with yielding shut off */ + yield_count = __kmp_yield_off_count; + } else { + __kmp_yielding_on = 1; /* Yielding is on permanently */ + } - while( ! TCR_4( __kmp_global.g.g_done ) ) { - struct timespec now; - struct timeval tval; + while (!TCR_4(__kmp_global.g.g_done)) { + struct timespec now; + struct timeval tval; - /* This thread monitors the state of the system */ + /* This thread monitors the state of the system */ - KA_TRACE( 15, ( "__kmp_launch_monitor: update\n" ) ); + KA_TRACE(15, ("__kmp_launch_monitor: update\n")); - status = gettimeofday( &tval, NULL ); - KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); - TIMEVAL_TO_TIMESPEC( &tval, &now ); + status = gettimeofday(&tval, NULL); + KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status); + TIMEVAL_TO_TIMESPEC(&tval, &now); - now.tv_sec += interval.tv_sec; - now.tv_nsec += interval.tv_nsec; + now.tv_sec += interval.tv_sec; + now.tv_nsec += interval.tv_nsec; - if (now.tv_nsec >= KMP_NSEC_PER_SEC) { - now.tv_sec += 1; - now.tv_nsec -= KMP_NSEC_PER_SEC; - } + if (now.tv_nsec >= KMP_NSEC_PER_SEC) { + now.tv_sec += 1; + now.tv_nsec -= KMP_NSEC_PER_SEC; + } - status = pthread_mutex_lock( & __kmp_wait_mx.m_mutex ); - KMP_CHECK_SYSFAIL( "pthread_mutex_lock", status ); - // AC: the monitor should not fall asleep if g_done has been set - if ( !TCR_4(__kmp_global.g.g_done) ) { // check once more under mutex - status = pthread_cond_timedwait( &__kmp_wait_cv.c_cond, &__kmp_wait_mx.m_mutex, &now ); - if ( status != 0 ) { - if ( status != ETIMEDOUT && status != EINTR ) { - KMP_SYSFAIL( "pthread_cond_timedwait", status ); - }; - }; + status = pthread_mutex_lock(&__kmp_wait_mx.m_mutex); + KMP_CHECK_SYSFAIL("pthread_mutex_lock", status); + // AC: the monitor should not fall asleep if g_done has been set + if (!TCR_4(__kmp_global.g.g_done)) { // check once more under mutex + status = pthread_cond_timedwait(&__kmp_wait_cv.c_cond, + &__kmp_wait_mx.m_mutex, &now); + if (status != 0) { + if (status != ETIMEDOUT && status != EINTR) { + KMP_SYSFAIL("pthread_cond_timedwait", status); }; - status = pthread_mutex_unlock( & __kmp_wait_mx.m_mutex ); - KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); - - if (__kmp_yield_cycle) { - yield_cycles++; - if ( (yield_cycles % yield_count) == 0 ) { - if (__kmp_yielding_on) { - __kmp_yielding_on = 0; /* Turn it off now */ - yield_count = __kmp_yield_off_count; - } else { - __kmp_yielding_on = 1; /* Turn it on now */ - yield_count = __kmp_yield_on_count; - } - yield_cycles = 0; - } + }; + }; + status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex); + KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status); + + if (__kmp_yield_cycle) { + yield_cycles++; + if ((yield_cycles % yield_count) == 0) { + if (__kmp_yielding_on) { + __kmp_yielding_on = 0; /* Turn it off now */ + yield_count = __kmp_yield_off_count; } else { - __kmp_yielding_on = 1; + __kmp_yielding_on = 1; /* Turn it on now */ + yield_count = __kmp_yield_on_count; } + yield_cycles = 0; + } + } else { + __kmp_yielding_on = 1; + } - TCW_4( __kmp_global.g.g_time.dt.t_value, - TCR_4( __kmp_global.g.g_time.dt.t_value ) + 1 ); + TCW_4(__kmp_global.g.g_time.dt.t_value, + TCR_4(__kmp_global.g.g_time.dt.t_value) + 1); - KMP_MB(); /* Flush all pending memory write invalidates. */ - } + KMP_MB(); /* Flush all pending memory write invalidates. */ + } - KA_TRACE( 10, ("__kmp_launch_monitor: #3 cleanup\n" ) ); + KA_TRACE(10, ("__kmp_launch_monitor: #3 cleanup\n")); #ifdef KMP_BLOCK_SIGNALS - status = sigfillset( & new_set ); - KMP_CHECK_SYSFAIL_ERRNO( "sigfillset", status ); - status = pthread_sigmask( SIG_UNBLOCK, & new_set, NULL ); - KMP_CHECK_SYSFAIL( "pthread_sigmask", status ); + status = sigfillset(&new_set); + KMP_CHECK_SYSFAIL_ERRNO("sigfillset", status); + status = pthread_sigmask(SIG_UNBLOCK, &new_set, NULL); + KMP_CHECK_SYSFAIL("pthread_sigmask", status); #endif /* KMP_BLOCK_SIGNALS */ - KA_TRACE( 10, ("__kmp_launch_monitor: #4 finished\n" ) ); - - if( __kmp_global.g.g_abort != 0 ) { - /* now we need to terminate the worker threads */ - /* the value of t_abort is the signal we caught */ + KA_TRACE(10, ("__kmp_launch_monitor: #4 finished\n")); - int gtid; + if (__kmp_global.g.g_abort != 0) { + /* now we need to terminate the worker threads */ + /* the value of t_abort is the signal we caught */ - KA_TRACE( 10, ("__kmp_launch_monitor: #5 terminate sig=%d\n", __kmp_global.g.g_abort ) ); + int gtid; - /* terminate the OpenMP worker threads */ - /* TODO this is not valid for sibling threads!! - * the uber master might not be 0 anymore.. */ - for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid) - __kmp_terminate_thread( gtid ); + KA_TRACE(10, ("__kmp_launch_monitor: #5 terminate sig=%d\n", + __kmp_global.g.g_abort)); - __kmp_cleanup(); + /* terminate the OpenMP worker threads */ + /* TODO this is not valid for sibling threads!! + * the uber master might not be 0 anymore.. */ + for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid) + __kmp_terminate_thread(gtid); - KA_TRACE( 10, ("__kmp_launch_monitor: #6 raise sig=%d\n", __kmp_global.g.g_abort ) ); + __kmp_cleanup(); - if (__kmp_global.g.g_abort > 0) - raise( __kmp_global.g.g_abort ); + KA_TRACE(10, ("__kmp_launch_monitor: #6 raise sig=%d\n", + __kmp_global.g.g_abort)); - } + if (__kmp_global.g.g_abort > 0) + raise(__kmp_global.g.g_abort); + } - KA_TRACE( 10, ("__kmp_launch_monitor: #7 exit\n" ) ); + KA_TRACE(10, ("__kmp_launch_monitor: #7 exit\n")); - return thr; + return thr; } #endif // KMP_USE_MONITOR -void -__kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size ) -{ - pthread_t handle; - pthread_attr_t thread_attr; - int status; - +void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size) { + pthread_t handle; + pthread_attr_t thread_attr; + int status; - th->th.th_info.ds.ds_gtid = gtid; + th->th.th_info.ds.ds_gtid = gtid; #if KMP_STATS_ENABLED - // sets up worker thread stats - __kmp_acquire_tas_lock(&__kmp_stats_lock, gtid); - - // th->th.th_stats is used to transfer thread specific stats-pointer to __kmp_launch_worker - // So when thread is created (goes into __kmp_launch_worker) it will - // set it's __thread local pointer to th->th.th_stats - if(!KMP_UBER_GTID(gtid)) { - th->th.th_stats = __kmp_stats_list->push_back(gtid); - } else { - // For root threads, the __kmp_stats_thread_ptr is set in __kmp_register_root(), so - // set the th->th.th_stats field to it. - th->th.th_stats = __kmp_stats_thread_ptr; - } - __kmp_release_tas_lock(&__kmp_stats_lock, gtid); + // sets up worker thread stats + __kmp_acquire_tas_lock(&__kmp_stats_lock, gtid); + + // th->th.th_stats is used to transfer thread-specific stats-pointer to + // __kmp_launch_worker. So when thread is created (goes into + // __kmp_launch_worker) it will set its __thread local pointer to + // th->th.th_stats + if (!KMP_UBER_GTID(gtid)) { + th->th.th_stats = __kmp_stats_list->push_back(gtid); + } else { + // For root threads, __kmp_stats_thread_ptr is set in __kmp_register_root(), + // so set the th->th.th_stats field to it. + th->th.th_stats = __kmp_stats_thread_ptr; + } + __kmp_release_tas_lock(&__kmp_stats_lock, gtid); #endif // KMP_STATS_ENABLED - if ( KMP_UBER_GTID(gtid) ) { - KA_TRACE( 10, ("__kmp_create_worker: uber thread (%d)\n", gtid ) ); - th -> th.th_info.ds.ds_thread = pthread_self(); - __kmp_set_stack_info( gtid, th ); - __kmp_check_stack_overlap( th ); - return; - }; // if + if (KMP_UBER_GTID(gtid)) { + KA_TRACE(10, ("__kmp_create_worker: uber thread (%d)\n", gtid)); + th->th.th_info.ds.ds_thread = pthread_self(); + __kmp_set_stack_info(gtid, th); + __kmp_check_stack_overlap(th); + return; + }; // if - KA_TRACE( 10, ("__kmp_create_worker: try to create thread (%d)\n", gtid ) ); + KA_TRACE(10, ("__kmp_create_worker: try to create thread (%d)\n", gtid)); - KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_MB(); /* Flush all pending memory write invalidates. */ #ifdef KMP_THREAD_ATTR - status = pthread_attr_init( &thread_attr ); - if ( status != 0 ) { - __kmp_msg(kmp_ms_fatal, KMP_MSG( CantInitThreadAttrs ), KMP_ERR( status ), __kmp_msg_null); - }; // if - status = pthread_attr_setdetachstate( & thread_attr, PTHREAD_CREATE_JOINABLE ); - if ( status != 0 ) { - __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetWorkerState ), KMP_ERR( status ), __kmp_msg_null); - }; // if + status = pthread_attr_init(&thread_attr); + if (status != 0) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantInitThreadAttrs), KMP_ERR(status), + __kmp_msg_null); + }; // if + status = pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE); + if (status != 0) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetWorkerState), KMP_ERR(status), + __kmp_msg_null); + }; // if + + /* Set stack size for this thread now. + The multiple of 2 is there because on some machines, requesting an unusual + stacksize causes the thread to have an offset before the dummy alloca() + takes place to create the offset. Since we want the user to have a + sufficient stacksize AND support a stack offset, we alloca() twice the + offset so that the upcoming alloca() does not eliminate any premade offset, + and also gives the user the stack space they requested for all threads */ + stack_size += gtid * __kmp_stkoffset * 2; + + KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, " + "__kmp_stksize = %lu bytes, final stacksize = %lu bytes\n", + gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size)); - /* Set stack size for this thread now. - * The multiple of 2 is there because on some machines, requesting an unusual stacksize - * causes the thread to have an offset before the dummy alloca() takes place to create the - * offset. Since we want the user to have a sufficient stacksize AND support a stack offset, we - * alloca() twice the offset so that the upcoming alloca() does not eliminate any premade - * offset, and also gives the user the stack space they requested for all threads */ - stack_size += gtid * __kmp_stkoffset * 2; - - KA_TRACE( 10, ( "__kmp_create_worker: T#%d, default stacksize = %lu bytes, " - "__kmp_stksize = %lu bytes, final stacksize = %lu bytes\n", - gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size ) ); - -# ifdef _POSIX_THREAD_ATTR_STACKSIZE - status = pthread_attr_setstacksize( & thread_attr, stack_size ); -# ifdef KMP_BACKUP_STKSIZE - if ( status != 0 ) { - if ( ! __kmp_env_stksize ) { - stack_size = KMP_BACKUP_STKSIZE + gtid * __kmp_stkoffset; - __kmp_stksize = KMP_BACKUP_STKSIZE; - KA_TRACE( 10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, " - "__kmp_stksize = %lu bytes, (backup) final stacksize = %lu " - "bytes\n", - gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size ) - ); - status = pthread_attr_setstacksize( &thread_attr, stack_size ); - }; // if - }; // if -# endif /* KMP_BACKUP_STKSIZE */ - if ( status != 0 ) { - __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetWorkerStackSize, stack_size ), KMP_ERR( status ), - KMP_HNT( ChangeWorkerStackSize ), __kmp_msg_null); +#ifdef _POSIX_THREAD_ATTR_STACKSIZE + status = pthread_attr_setstacksize(&thread_attr, stack_size); +#ifdef KMP_BACKUP_STKSIZE + if (status != 0) { + if (!__kmp_env_stksize) { + stack_size = KMP_BACKUP_STKSIZE + gtid * __kmp_stkoffset; + __kmp_stksize = KMP_BACKUP_STKSIZE; + KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, " + "__kmp_stksize = %lu bytes, (backup) final stacksize = %lu " + "bytes\n", + gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size)); + status = pthread_attr_setstacksize(&thread_attr, stack_size); }; // if -# endif /* _POSIX_THREAD_ATTR_STACKSIZE */ + }; // if +#endif /* KMP_BACKUP_STKSIZE */ + if (status != 0) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetWorkerStackSize, stack_size), + KMP_ERR(status), KMP_HNT(ChangeWorkerStackSize), __kmp_msg_null); + }; // if +#endif /* _POSIX_THREAD_ATTR_STACKSIZE */ #endif /* KMP_THREAD_ATTR */ - status = pthread_create( & handle, & thread_attr, __kmp_launch_worker, (void *) th ); - if ( status != 0 || ! handle ) { // ??? Why do we check handle?? + status = + pthread_create(&handle, &thread_attr, __kmp_launch_worker, (void *)th); + if (status != 0 || !handle) { // ??? Why do we check handle?? #ifdef _POSIX_THREAD_ATTR_STACKSIZE - if ( status == EINVAL ) { - __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetWorkerStackSize, stack_size ), KMP_ERR( status ), - KMP_HNT( IncreaseWorkerStackSize ), __kmp_msg_null); - }; - if ( status == ENOMEM ) { - __kmp_msg(kmp_ms_fatal, KMP_MSG( CantSetWorkerStackSize, stack_size ), KMP_ERR( status ), - KMP_HNT( DecreaseWorkerStackSize ), __kmp_msg_null); - }; + if (status == EINVAL) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetWorkerStackSize, stack_size), + KMP_ERR(status), KMP_HNT(IncreaseWorkerStackSize), + __kmp_msg_null); + }; + if (status == ENOMEM) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetWorkerStackSize, stack_size), + KMP_ERR(status), KMP_HNT(DecreaseWorkerStackSize), + __kmp_msg_null); + }; #endif /* _POSIX_THREAD_ATTR_STACKSIZE */ - if ( status == EAGAIN ) { - __kmp_msg(kmp_ms_fatal, KMP_MSG( NoResourcesForWorkerThread ), KMP_ERR( status ), - KMP_HNT( Decrease_NUM_THREADS ), __kmp_msg_null); - }; // if - KMP_SYSFAIL( "pthread_create", status ); + if (status == EAGAIN) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(NoResourcesForWorkerThread), + KMP_ERR(status), KMP_HNT(Decrease_NUM_THREADS), __kmp_msg_null); }; // if + KMP_SYSFAIL("pthread_create", status); + }; // if - th->th.th_info.ds.ds_thread = handle; + th->th.th_info.ds.ds_thread = handle; #ifdef KMP_THREAD_ATTR - status = pthread_attr_destroy( & thread_attr ); - if ( status ) { - kmp_msg_t err_code = KMP_ERR( status ); - __kmp_msg(kmp_ms_warning, KMP_MSG( CantDestroyThreadAttrs ), err_code, __kmp_msg_null); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - }; // if + status = pthread_attr_destroy(&thread_attr); + if (status) { + kmp_msg_t err_code = KMP_ERR(status); + __kmp_msg(kmp_ms_warning, KMP_MSG(CantDestroyThreadAttrs), err_code, + __kmp_msg_null); + if (__kmp_generate_warnings == kmp_warnings_off) { + __kmp_str_free(&err_code.str); + } + }; // if #endif /* KMP_THREAD_ATTR */ - KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_MB(); /* Flush all pending memory write invalidates. */ - KA_TRACE( 10, ("__kmp_create_worker: done creating thread (%d)\n", gtid ) ); + KA_TRACE(10, ("__kmp_create_worker: done creating thread (%d)\n", gtid)); } // __kmp_create_worker - #if KMP_USE_MONITOR -void -__kmp_create_monitor( kmp_info_t *th ) -{ - pthread_t handle; - pthread_attr_t thread_attr; - size_t size; - int status; - int auto_adj_size = FALSE; - - if( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) { - // We don't need monitor thread in case of MAX_BLOCKTIME - KA_TRACE( 10, ("__kmp_create_monitor: skipping monitor thread because of MAX blocktime\n" ) ); - th->th.th_info.ds.ds_tid = 0; // this makes reap_monitor no-op - th->th.th_info.ds.ds_gtid = 0; - return; - } - KA_TRACE( 10, ("__kmp_create_monitor: try to create monitor\n" ) ); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR; - th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR; - #if KMP_REAL_TIME_FIX - TCW_4( __kmp_global.g.g_time.dt.t_value, -1 ); // Will use it for synchronization a bit later. - #else - TCW_4( __kmp_global.g.g_time.dt.t_value, 0 ); - #endif // KMP_REAL_TIME_FIX - - #ifdef KMP_THREAD_ATTR - if ( __kmp_monitor_stksize == 0 ) { - __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE; - auto_adj_size = TRUE; - } - status = pthread_attr_init( &thread_attr ); - if ( status != 0 ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantInitThreadAttrs ), - KMP_ERR( status ), - __kmp_msg_null - ); - }; // if - status = pthread_attr_setdetachstate( & thread_attr, PTHREAD_CREATE_JOINABLE ); - if ( status != 0 ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetMonitorState ), - KMP_ERR( status ), - __kmp_msg_null - ); - }; // if +void __kmp_create_monitor(kmp_info_t *th) { + pthread_t handle; + pthread_attr_t thread_attr; + size_t size; + int status; + int auto_adj_size = FALSE; + + if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { + // We don't need monitor thread in case of MAX_BLOCKTIME + KA_TRACE(10, ("__kmp_create_monitor: skipping monitor thread because of " + "MAX blocktime\n")); + th->th.th_info.ds.ds_tid = 0; // this makes reap_monitor no-op + th->th.th_info.ds.ds_gtid = 0; + return; + } + KA_TRACE(10, ("__kmp_create_monitor: try to create monitor\n")); + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR; + th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR; +#if KMP_REAL_TIME_FIX + TCW_4(__kmp_global.g.g_time.dt.t_value, + -1); // Will use it for synchronization a bit later. +#else + TCW_4(__kmp_global.g.g_time.dt.t_value, 0); +#endif // KMP_REAL_TIME_FIX + +#ifdef KMP_THREAD_ATTR + if (__kmp_monitor_stksize == 0) { + __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE; + auto_adj_size = TRUE; + } + status = pthread_attr_init(&thread_attr); + if (status != 0) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantInitThreadAttrs), KMP_ERR(status), + __kmp_msg_null); + }; // if + status = pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE); + if (status != 0) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetMonitorState), KMP_ERR(status), + __kmp_msg_null); + }; // if + +#ifdef _POSIX_THREAD_ATTR_STACKSIZE + status = pthread_attr_getstacksize(&thread_attr, &size); + KMP_CHECK_SYSFAIL("pthread_attr_getstacksize", status); +#else + size = __kmp_sys_min_stksize; +#endif /* _POSIX_THREAD_ATTR_STACKSIZE */ +#endif /* KMP_THREAD_ATTR */ + + if (__kmp_monitor_stksize == 0) { + __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE; + } + if (__kmp_monitor_stksize < __kmp_sys_min_stksize) { + __kmp_monitor_stksize = __kmp_sys_min_stksize; + } - #ifdef _POSIX_THREAD_ATTR_STACKSIZE - status = pthread_attr_getstacksize( & thread_attr, & size ); - KMP_CHECK_SYSFAIL( "pthread_attr_getstacksize", status ); - #else - size = __kmp_sys_min_stksize; - #endif /* _POSIX_THREAD_ATTR_STACKSIZE */ - #endif /* KMP_THREAD_ATTR */ + KA_TRACE(10, ("__kmp_create_monitor: default stacksize = %lu bytes," + "requested stacksize = %lu bytes\n", + size, __kmp_monitor_stksize)); - if ( __kmp_monitor_stksize == 0 ) { - __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE; +retry: + +/* Set stack size for this thread now. */ +#ifdef _POSIX_THREAD_ATTR_STACKSIZE + KA_TRACE(10, ("__kmp_create_monitor: setting stacksize = %lu bytes,", + __kmp_monitor_stksize)); + status = pthread_attr_setstacksize(&thread_attr, __kmp_monitor_stksize); + if (status != 0) { + if (auto_adj_size) { + __kmp_monitor_stksize *= 2; + goto retry; } - if ( __kmp_monitor_stksize < __kmp_sys_min_stksize ) { - __kmp_monitor_stksize = __kmp_sys_min_stksize; + kmp_msg_t err_code = KMP_ERR(status); + __kmp_msg(kmp_ms_warning, // should this be fatal? BB + KMP_MSG(CantSetMonitorStackSize, (long int)__kmp_monitor_stksize), + err_code, KMP_HNT(ChangeMonitorStackSize), __kmp_msg_null); + if (__kmp_generate_warnings == kmp_warnings_off) { + __kmp_str_free(&err_code.str); } + }; // if +#endif /* _POSIX_THREAD_ATTR_STACKSIZE */ - KA_TRACE( 10, ( "__kmp_create_monitor: default stacksize = %lu bytes," - "requested stacksize = %lu bytes\n", - size, __kmp_monitor_stksize ) ); - - retry: - - /* Set stack size for this thread now. */ - - #ifdef _POSIX_THREAD_ATTR_STACKSIZE - KA_TRACE( 10, ( "__kmp_create_monitor: setting stacksize = %lu bytes,", - __kmp_monitor_stksize ) ); - status = pthread_attr_setstacksize( & thread_attr, __kmp_monitor_stksize ); - if ( status != 0 ) { - if ( auto_adj_size ) { - __kmp_monitor_stksize *= 2; - goto retry; - } - kmp_msg_t err_code = KMP_ERR( status ); - __kmp_msg( - kmp_ms_warning, // should this be fatal? BB - KMP_MSG( CantSetMonitorStackSize, (long int) __kmp_monitor_stksize ), - err_code, - KMP_HNT( ChangeMonitorStackSize ), - __kmp_msg_null - ); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - }; // if - #endif /* _POSIX_THREAD_ATTR_STACKSIZE */ - - status = pthread_create( &handle, & thread_attr, __kmp_launch_monitor, (void *) th ); - - if ( status != 0 ) { - #ifdef _POSIX_THREAD_ATTR_STACKSIZE - if ( status == EINVAL ) { - if ( auto_adj_size && ( __kmp_monitor_stksize < (size_t)0x40000000 ) ) { - __kmp_monitor_stksize *= 2; - goto retry; - } - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetMonitorStackSize, __kmp_monitor_stksize ), - KMP_ERR( status ), - KMP_HNT( IncreaseMonitorStackSize ), - __kmp_msg_null - ); - }; // if - if ( status == ENOMEM ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetMonitorStackSize, __kmp_monitor_stksize ), - KMP_ERR( status ), - KMP_HNT( DecreaseMonitorStackSize ), - __kmp_msg_null - ); - }; // if - #endif /* _POSIX_THREAD_ATTR_STACKSIZE */ - if ( status == EAGAIN ) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( NoResourcesForMonitorThread ), - KMP_ERR( status ), - KMP_HNT( DecreaseNumberOfThreadsInUse ), - __kmp_msg_null - ); - }; // if - KMP_SYSFAIL( "pthread_create", status ); + status = + pthread_create(&handle, &thread_attr, __kmp_launch_monitor, (void *)th); + + if (status != 0) { +#ifdef _POSIX_THREAD_ATTR_STACKSIZE + if (status == EINVAL) { + if (auto_adj_size && (__kmp_monitor_stksize < (size_t)0x40000000)) { + __kmp_monitor_stksize *= 2; + goto retry; + } + __kmp_msg( + kmp_ms_fatal, KMP_MSG(CantSetMonitorStackSize, __kmp_monitor_stksize), + KMP_ERR(status), KMP_HNT(IncreaseMonitorStackSize), __kmp_msg_null); }; // if + if (status == ENOMEM) { + __kmp_msg( + kmp_ms_fatal, KMP_MSG(CantSetMonitorStackSize, __kmp_monitor_stksize), + KMP_ERR(status), KMP_HNT(DecreaseMonitorStackSize), __kmp_msg_null); + }; // if +#endif /* _POSIX_THREAD_ATTR_STACKSIZE */ + if (status == EAGAIN) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(NoResourcesForMonitorThread), + KMP_ERR(status), KMP_HNT(DecreaseNumberOfThreadsInUse), + __kmp_msg_null); + }; // if + KMP_SYSFAIL("pthread_create", status); + }; // if - th->th.th_info.ds.ds_thread = handle; - - #if KMP_REAL_TIME_FIX - // Wait for the monitor thread is really started and set its *priority*. - KMP_DEBUG_ASSERT( sizeof( kmp_uint32 ) == sizeof( __kmp_global.g.g_time.dt.t_value ) ); - __kmp_wait_yield_4( - (kmp_uint32 volatile *) & __kmp_global.g.g_time.dt.t_value, -1, & __kmp_neq_4, NULL - ); - #endif // KMP_REAL_TIME_FIX - - #ifdef KMP_THREAD_ATTR - status = pthread_attr_destroy( & thread_attr ); - if ( status != 0 ) { - kmp_msg_t err_code = KMP_ERR( status ); - __kmp_msg( - kmp_ms_warning, - KMP_MSG( CantDestroyThreadAttrs ), - err_code, - __kmp_msg_null - ); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - }; // if - #endif + th->th.th_info.ds.ds_thread = handle; + +#if KMP_REAL_TIME_FIX + // Wait for the monitor thread is really started and set its *priority*. + KMP_DEBUG_ASSERT(sizeof(kmp_uint32) == + sizeof(__kmp_global.g.g_time.dt.t_value)); + __kmp_wait_yield_4((kmp_uint32 volatile *)&__kmp_global.g.g_time.dt.t_value, + -1, &__kmp_neq_4, NULL); +#endif // KMP_REAL_TIME_FIX + +#ifdef KMP_THREAD_ATTR + status = pthread_attr_destroy(&thread_attr); + if (status != 0) { + kmp_msg_t err_code = KMP_ERR(status); + __kmp_msg(kmp_ms_warning, KMP_MSG(CantDestroyThreadAttrs), err_code, + __kmp_msg_null); + if (__kmp_generate_warnings == kmp_warnings_off) { + __kmp_str_free(&err_code.str); + } + }; // if +#endif - KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_MB(); /* Flush all pending memory write invalidates. */ - KA_TRACE( 10, ( "__kmp_create_monitor: monitor created %#.8lx\n", th->th.th_info.ds.ds_thread ) ); + KA_TRACE(10, ("__kmp_create_monitor: monitor created %#.8lx\n", + th->th.th_info.ds.ds_thread)); } // __kmp_create_monitor #endif // KMP_USE_MONITOR -void -__kmp_exit_thread( - int exit_status -) { - pthread_exit( (void *)(intptr_t) exit_status ); +void __kmp_exit_thread(int exit_status) { + pthread_exit((void *)(intptr_t)exit_status); } // __kmp_exit_thread #if KMP_USE_MONITOR void __kmp_resume_monitor(); -void -__kmp_reap_monitor( kmp_info_t *th ) -{ - int status; - void *exit_val; - - KA_TRACE( 10, ("__kmp_reap_monitor: try to reap monitor thread with handle %#.8lx\n", - th->th.th_info.ds.ds_thread ) ); - - // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR. - // If both tid and gtid are 0, it means the monitor did not ever start. - // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down. - KMP_DEBUG_ASSERT( th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid ); - if ( th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR ) { - KA_TRACE( 10, ("__kmp_reap_monitor: monitor did not start, returning\n") ); - return; - }; // if +void __kmp_reap_monitor(kmp_info_t *th) { + int status; + void *exit_val; - KMP_MB(); /* Flush all pending memory write invalidates. */ + KA_TRACE(10, ("__kmp_reap_monitor: try to reap monitor thread with handle" + " %#.8lx\n", + th->th.th_info.ds.ds_thread)); + // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR. + // If both tid and gtid are 0, it means the monitor did not ever start. + // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down. + KMP_DEBUG_ASSERT(th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid); + if (th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR) { + KA_TRACE(10, ("__kmp_reap_monitor: monitor did not start, returning\n")); + return; + }; // if - /* First, check to see whether the monitor thread exists to wake it up. This is - to avoid performance problem when the monitor sleeps during blocktime-size - interval */ + KMP_MB(); /* Flush all pending memory write invalidates. */ - status = pthread_kill( th->th.th_info.ds.ds_thread, 0 ); - if (status != ESRCH) { - __kmp_resume_monitor(); // Wake up the monitor thread - } - KA_TRACE( 10, ("__kmp_reap_monitor: try to join with monitor\n") ); - status = pthread_join( th->th.th_info.ds.ds_thread, & exit_val); - if (exit_val != th) { - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( ReapMonitorError ), - KMP_ERR( status ), - __kmp_msg_null - ); - } + /* First, check to see whether the monitor thread exists to wake it up. This + is to avoid performance problem when the monitor sleeps during + blocktime-size interval */ - th->th.th_info.ds.ds_tid = KMP_GTID_DNE; - th->th.th_info.ds.ds_gtid = KMP_GTID_DNE; + status = pthread_kill(th->th.th_info.ds.ds_thread, 0); + if (status != ESRCH) { + __kmp_resume_monitor(); // Wake up the monitor thread + } + KA_TRACE(10, ("__kmp_reap_monitor: try to join with monitor\n")); + status = pthread_join(th->th.th_info.ds.ds_thread, &exit_val); + if (exit_val != th) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(ReapMonitorError), KMP_ERR(status), + __kmp_msg_null); + } - KA_TRACE( 10, ("__kmp_reap_monitor: done reaping monitor thread with handle %#.8lx\n", - th->th.th_info.ds.ds_thread ) ); + th->th.th_info.ds.ds_tid = KMP_GTID_DNE; + th->th.th_info.ds.ds_gtid = KMP_GTID_DNE; - KMP_MB(); /* Flush all pending memory write invalidates. */ + KA_TRACE(10, ("__kmp_reap_monitor: done reaping monitor thread with handle" + " %#.8lx\n", + th->th.th_info.ds.ds_thread)); + KMP_MB(); /* Flush all pending memory write invalidates. */ } #endif // KMP_USE_MONITOR -void -__kmp_reap_worker( kmp_info_t *th ) -{ - int status; - void *exit_val; +void __kmp_reap_worker(kmp_info_t *th) { + int status; + void *exit_val; - KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_MB(); /* Flush all pending memory write invalidates. */ - KA_TRACE( 10, ("__kmp_reap_worker: try to reap T#%d\n", th->th.th_info.ds.ds_gtid ) ); + KA_TRACE( + 10, ("__kmp_reap_worker: try to reap T#%d\n", th->th.th_info.ds.ds_gtid)); - status = pthread_join( th->th.th_info.ds.ds_thread, & exit_val); + status = pthread_join(th->th.th_info.ds.ds_thread, &exit_val); #ifdef KMP_DEBUG - /* Don't expose these to the user until we understand when they trigger */ - if ( status != 0 ) { - __kmp_msg(kmp_ms_fatal, KMP_MSG( ReapWorkerError ), KMP_ERR( status ), __kmp_msg_null); - } - if ( exit_val != th ) { - KA_TRACE( 10, ( "__kmp_reap_worker: worker T#%d did not reap properly, exit_val = %p\n", - th->th.th_info.ds.ds_gtid, exit_val ) ); - } + /* Don't expose these to the user until we understand when they trigger */ + if (status != 0) { + __kmp_msg(kmp_ms_fatal, KMP_MSG(ReapWorkerError), KMP_ERR(status), + __kmp_msg_null); + } + if (exit_val != th) { + KA_TRACE(10, ("__kmp_reap_worker: worker T#%d did not reap properly, " + "exit_val = %p\n", + th->th.th_info.ds.ds_gtid, exit_val)); + } #endif /* KMP_DEBUG */ - KA_TRACE( 10, ("__kmp_reap_worker: done reaping T#%d\n", th->th.th_info.ds.ds_gtid ) ); + KA_TRACE(10, ("__kmp_reap_worker: done reaping T#%d\n", + th->th.th_info.ds.ds_gtid)); - KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_MB(); /* Flush all pending memory write invalidates. */ } - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - #if KMP_HANDLE_SIGNALS - -static void -__kmp_null_handler( int signo ) -{ - // Do nothing, for doing SIG_IGN-type actions. +static void __kmp_null_handler(int signo) { + // Do nothing, for doing SIG_IGN-type actions. } // __kmp_null_handler - -static void -__kmp_team_handler( int signo ) -{ - if ( __kmp_global.g.g_abort == 0 ) { - /* Stage 1 signal handler, let's shut down all of the threads */ - #ifdef KMP_DEBUG - __kmp_debug_printf( "__kmp_team_handler: caught signal = %d\n", signo ); - #endif - switch ( signo ) { - case SIGHUP : - case SIGINT : - case SIGQUIT : - case SIGILL : - case SIGABRT : - case SIGFPE : - case SIGBUS : - case SIGSEGV : - #ifdef SIGSYS - case SIGSYS : - #endif - case SIGTERM : - if ( __kmp_debug_buf ) { - __kmp_dump_debug_buffer( ); - }; // if - KMP_MB(); // Flush all pending memory write invalidates. - TCW_4( __kmp_global.g.g_abort, signo ); - KMP_MB(); // Flush all pending memory write invalidates. - TCW_4( __kmp_global.g.g_done, TRUE ); - KMP_MB(); // Flush all pending memory write invalidates. - break; - default: - #ifdef KMP_DEBUG - __kmp_debug_printf( "__kmp_team_handler: unknown signal type" ); - #endif - break; - }; // switch - }; // if +static void __kmp_team_handler(int signo) { + if (__kmp_global.g.g_abort == 0) { +/* Stage 1 signal handler, let's shut down all of the threads */ +#ifdef KMP_DEBUG + __kmp_debug_printf("__kmp_team_handler: caught signal = %d\n", signo); +#endif + switch (signo) { + case SIGHUP: + case SIGINT: + case SIGQUIT: + case SIGILL: + case SIGABRT: + case SIGFPE: + case SIGBUS: + case SIGSEGV: +#ifdef SIGSYS + case SIGSYS: +#endif + case SIGTERM: + if (__kmp_debug_buf) { + __kmp_dump_debug_buffer(); + }; // if + KMP_MB(); // Flush all pending memory write invalidates. + TCW_4(__kmp_global.g.g_abort, signo); + KMP_MB(); // Flush all pending memory write invalidates. + TCW_4(__kmp_global.g.g_done, TRUE); + KMP_MB(); // Flush all pending memory write invalidates. + break; + default: +#ifdef KMP_DEBUG + __kmp_debug_printf("__kmp_team_handler: unknown signal type"); +#endif + break; + }; // switch + }; // if } // __kmp_team_handler - -static -void __kmp_sigaction( int signum, const struct sigaction * act, struct sigaction * oldact ) { - int rc = sigaction( signum, act, oldact ); - KMP_CHECK_SYSFAIL_ERRNO( "sigaction", rc ); +static void __kmp_sigaction(int signum, const struct sigaction *act, + struct sigaction *oldact) { + int rc = sigaction(signum, act, oldact); + KMP_CHECK_SYSFAIL_ERRNO("sigaction", rc); } - -static void -__kmp_install_one_handler( int sig, sig_func_t handler_func, int parallel_init ) -{ - KMP_MB(); // Flush all pending memory write invalidates. - KB_TRACE( 60, ( "__kmp_install_one_handler( %d, ..., %d )\n", sig, parallel_init ) ); - if ( parallel_init ) { - struct sigaction new_action; - struct sigaction old_action; - new_action.sa_handler = handler_func; - new_action.sa_flags = 0; - sigfillset( & new_action.sa_mask ); - __kmp_sigaction( sig, & new_action, & old_action ); - if ( old_action.sa_handler == __kmp_sighldrs[ sig ].sa_handler ) { - sigaddset( & __kmp_sigset, sig ); - } else { - // Restore/keep user's handler if one previously installed. - __kmp_sigaction( sig, & old_action, NULL ); - }; // if +static void __kmp_install_one_handler(int sig, sig_func_t handler_func, + int parallel_init) { + KMP_MB(); // Flush all pending memory write invalidates. + KB_TRACE(60, + ("__kmp_install_one_handler( %d, ..., %d )\n", sig, parallel_init)); + if (parallel_init) { + struct sigaction new_action; + struct sigaction old_action; + new_action.sa_handler = handler_func; + new_action.sa_flags = 0; + sigfillset(&new_action.sa_mask); + __kmp_sigaction(sig, &new_action, &old_action); + if (old_action.sa_handler == __kmp_sighldrs[sig].sa_handler) { + sigaddset(&__kmp_sigset, sig); } else { - // Save initial/system signal handlers to see if user handlers installed. - __kmp_sigaction( sig, NULL, & __kmp_sighldrs[ sig ] ); + // Restore/keep user's handler if one previously installed. + __kmp_sigaction(sig, &old_action, NULL); }; // if - KMP_MB(); // Flush all pending memory write invalidates. + } else { + // Save initial/system signal handlers to see if user handlers installed. + __kmp_sigaction(sig, NULL, &__kmp_sighldrs[sig]); + }; // if + KMP_MB(); // Flush all pending memory write invalidates. } // __kmp_install_one_handler - -static void -__kmp_remove_one_handler( int sig ) -{ - KB_TRACE( 60, ( "__kmp_remove_one_handler( %d )\n", sig ) ); - if ( sigismember( & __kmp_sigset, sig ) ) { - struct sigaction old; - KMP_MB(); // Flush all pending memory write invalidates. - __kmp_sigaction( sig, & __kmp_sighldrs[ sig ], & old ); - if ( ( old.sa_handler != __kmp_team_handler ) && ( old.sa_handler != __kmp_null_handler ) ) { - // Restore the users signal handler. - KB_TRACE( 10, ( "__kmp_remove_one_handler: oops, not our handler, restoring: sig=%d\n", sig ) ); - __kmp_sigaction( sig, & old, NULL ); - }; // if - sigdelset( & __kmp_sigset, sig ); - KMP_MB(); // Flush all pending memory write invalidates. +static void __kmp_remove_one_handler(int sig) { + KB_TRACE(60, ("__kmp_remove_one_handler( %d )\n", sig)); + if (sigismember(&__kmp_sigset, sig)) { + struct sigaction old; + KMP_MB(); // Flush all pending memory write invalidates. + __kmp_sigaction(sig, &__kmp_sighldrs[sig], &old); + if ((old.sa_handler != __kmp_team_handler) && + (old.sa_handler != __kmp_null_handler)) { + // Restore the users signal handler. + KB_TRACE(10, ("__kmp_remove_one_handler: oops, not our handler, " + "restoring: sig=%d\n", + sig)); + __kmp_sigaction(sig, &old, NULL); }; // if + sigdelset(&__kmp_sigset, sig); + KMP_MB(); // Flush all pending memory write invalidates. + }; // if } // __kmp_remove_one_handler - -void -__kmp_install_signals( int parallel_init ) -{ - KB_TRACE( 10, ( "__kmp_install_signals( %d )\n", parallel_init ) ); - if ( __kmp_handle_signals || ! parallel_init ) { - // If ! parallel_init, we do not install handlers, just save original handlers. - // Let us do it even __handle_signals is 0. - sigemptyset( & __kmp_sigset ); - __kmp_install_one_handler( SIGHUP, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGINT, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGQUIT, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGILL, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGABRT, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGFPE, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGBUS, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGSEGV, __kmp_team_handler, parallel_init ); - #ifdef SIGSYS - __kmp_install_one_handler( SIGSYS, __kmp_team_handler, parallel_init ); - #endif // SIGSYS - __kmp_install_one_handler( SIGTERM, __kmp_team_handler, parallel_init ); - #ifdef SIGPIPE - __kmp_install_one_handler( SIGPIPE, __kmp_team_handler, parallel_init ); - #endif // SIGPIPE - }; // if +void __kmp_install_signals(int parallel_init) { + KB_TRACE(10, ("__kmp_install_signals( %d )\n", parallel_init)); + if (__kmp_handle_signals || !parallel_init) { + // If ! parallel_init, we do not install handlers, just save original + // handlers. Let us do it even __handle_signals is 0. + sigemptyset(&__kmp_sigset); + __kmp_install_one_handler(SIGHUP, __kmp_team_handler, parallel_init); + __kmp_install_one_handler(SIGINT, __kmp_team_handler, parallel_init); + __kmp_install_one_handler(SIGQUIT, __kmp_team_handler, parallel_init); + __kmp_install_one_handler(SIGILL, __kmp_team_handler, parallel_init); + __kmp_install_one_handler(SIGABRT, __kmp_team_handler, parallel_init); + __kmp_install_one_handler(SIGFPE, __kmp_team_handler, parallel_init); + __kmp_install_one_handler(SIGBUS, __kmp_team_handler, parallel_init); + __kmp_install_one_handler(SIGSEGV, __kmp_team_handler, parallel_init); +#ifdef SIGSYS + __kmp_install_one_handler(SIGSYS, __kmp_team_handler, parallel_init); +#endif // SIGSYS + __kmp_install_one_handler(SIGTERM, __kmp_team_handler, parallel_init); +#ifdef SIGPIPE + __kmp_install_one_handler(SIGPIPE, __kmp_team_handler, parallel_init); +#endif // SIGPIPE + }; // if } // __kmp_install_signals - -void -__kmp_remove_signals( void ) -{ - int sig; - KB_TRACE( 10, ( "__kmp_remove_signals()\n" ) ); - for ( sig = 1; sig < NSIG; ++ sig ) { - __kmp_remove_one_handler( sig ); - }; // for sig +void __kmp_remove_signals(void) { + int sig; + KB_TRACE(10, ("__kmp_remove_signals()\n")); + for (sig = 1; sig < NSIG; ++sig) { + __kmp_remove_one_handler(sig); + }; // for sig } // __kmp_remove_signals - #endif // KMP_HANDLE_SIGNALS -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_enable( int new_state ) -{ - #ifdef KMP_CANCEL_THREADS - int status, old_state; - status = pthread_setcancelstate( new_state, & old_state ); - KMP_CHECK_SYSFAIL( "pthread_setcancelstate", status ); - KMP_DEBUG_ASSERT( old_state == PTHREAD_CANCEL_DISABLE ); - #endif +void __kmp_enable(int new_state) { +#ifdef KMP_CANCEL_THREADS + int status, old_state; + status = pthread_setcancelstate(new_state, &old_state); + KMP_CHECK_SYSFAIL("pthread_setcancelstate", status); + KMP_DEBUG_ASSERT(old_state == PTHREAD_CANCEL_DISABLE); +#endif } -void -__kmp_disable( int * old_state ) -{ - #ifdef KMP_CANCEL_THREADS - int status; - status = pthread_setcancelstate( PTHREAD_CANCEL_DISABLE, old_state ); - KMP_CHECK_SYSFAIL( "pthread_setcancelstate", status ); - #endif +void __kmp_disable(int *old_state) { +#ifdef KMP_CANCEL_THREADS + int status; + status = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, old_state); + KMP_CHECK_SYSFAIL("pthread_setcancelstate", status); +#endif } -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -static void -__kmp_atfork_prepare (void) -{ - /* nothing to do */ +static void __kmp_atfork_prepare(void) { /* nothing to do */ } -static void -__kmp_atfork_parent (void) -{ - /* nothing to do */ +static void __kmp_atfork_parent(void) { /* nothing to do */ } -/* - Reset the library so execution in the child starts "all over again" with - clean data structures in initial states. Don't worry about freeing memory - allocated by parent, just abandon it to be safe. -*/ -static void -__kmp_atfork_child (void) -{ - /* TODO make sure this is done right for nested/sibling */ - // ATT: Memory leaks are here? TODO: Check it and fix. - /* KMP_ASSERT( 0 ); */ - - ++__kmp_fork_count; - - __kmp_init_runtime = FALSE; +/* Reset the library so execution in the child starts "all over again" with + clean data structures in initial states. Don't worry about freeing memory + allocated by parent, just abandon it to be safe. */ +static void __kmp_atfork_child(void) { + /* TODO make sure this is done right for nested/sibling */ + // ATT: Memory leaks are here? TODO: Check it and fix. + /* KMP_ASSERT( 0 ); */ + + ++__kmp_fork_count; + + __kmp_init_runtime = FALSE; #if KMP_USE_MONITOR - __kmp_init_monitor = 0; + __kmp_init_monitor = 0; #endif - __kmp_init_parallel = FALSE; - __kmp_init_middle = FALSE; - __kmp_init_serial = FALSE; - TCW_4(__kmp_init_gtid, FALSE); - __kmp_init_common = FALSE; - - TCW_4(__kmp_init_user_locks, FALSE); -#if ! KMP_USE_DYNAMIC_LOCK - __kmp_user_lock_table.used = 1; - __kmp_user_lock_table.allocated = 0; - __kmp_user_lock_table.table = NULL; - __kmp_lock_blocks = NULL; + __kmp_init_parallel = FALSE; + __kmp_init_middle = FALSE; + __kmp_init_serial = FALSE; + TCW_4(__kmp_init_gtid, FALSE); + __kmp_init_common = FALSE; + + TCW_4(__kmp_init_user_locks, FALSE); +#if !KMP_USE_DYNAMIC_LOCK + __kmp_user_lock_table.used = 1; + __kmp_user_lock_table.allocated = 0; + __kmp_user_lock_table.table = NULL; + __kmp_lock_blocks = NULL; #endif - __kmp_all_nth = 0; - TCW_4(__kmp_nth, 0); + __kmp_all_nth = 0; + TCW_4(__kmp_nth, 0); - /* Must actually zero all the *cache arguments passed to __kmpc_threadprivate here - so threadprivate doesn't use stale data */ - KA_TRACE( 10, ( "__kmp_atfork_child: checking cache address list %p\n", - __kmp_threadpriv_cache_list ) ); + /* Must actually zero all the *cache arguments passed to __kmpc_threadprivate + here so threadprivate doesn't use stale data */ + KA_TRACE(10, ("__kmp_atfork_child: checking cache address list %p\n", + __kmp_threadpriv_cache_list)); - while ( __kmp_threadpriv_cache_list != NULL ) { + while (__kmp_threadpriv_cache_list != NULL) { - if ( *__kmp_threadpriv_cache_list -> addr != NULL ) { - KC_TRACE( 50, ( "__kmp_atfork_child: zeroing cache at address %p\n", - &(*__kmp_threadpriv_cache_list -> addr) ) ); + if (*__kmp_threadpriv_cache_list->addr != NULL) { + KC_TRACE(50, ("__kmp_atfork_child: zeroing cache at address %p\n", + &(*__kmp_threadpriv_cache_list->addr))); - *__kmp_threadpriv_cache_list -> addr = NULL; - } - __kmp_threadpriv_cache_list = __kmp_threadpriv_cache_list -> next; + *__kmp_threadpriv_cache_list->addr = NULL; } + __kmp_threadpriv_cache_list = __kmp_threadpriv_cache_list->next; + } - __kmp_init_runtime = FALSE; - - /* reset statically initialized locks */ - __kmp_init_bootstrap_lock( &__kmp_initz_lock ); - __kmp_init_bootstrap_lock( &__kmp_stdio_lock ); - __kmp_init_bootstrap_lock( &__kmp_console_lock ); - - /* This is necessary to make sure no stale data is left around */ - /* AC: customers complain that we use unsafe routines in the atfork - handler. Mathworks: dlsym() is unsafe. We call dlsym and dlopen - in dynamic_link when check the presence of shared tbbmalloc library. - Suggestion is to make the library initialization lazier, similar - to what done for __kmpc_begin(). */ - // TODO: synchronize all static initializations with regular library - // startup; look at kmp_global.cpp and etc. - //__kmp_internal_begin (); + __kmp_init_runtime = FALSE; + + /* reset statically initialized locks */ + __kmp_init_bootstrap_lock(&__kmp_initz_lock); + __kmp_init_bootstrap_lock(&__kmp_stdio_lock); + __kmp_init_bootstrap_lock(&__kmp_console_lock); + + /* This is necessary to make sure no stale data is left around */ + /* AC: customers complain that we use unsafe routines in the atfork + handler. Mathworks: dlsym() is unsafe. We call dlsym and dlopen + in dynamic_link when check the presence of shared tbbmalloc library. + Suggestion is to make the library initialization lazier, similar + to what done for __kmpc_begin(). */ + // TODO: synchronize all static initializations with regular library + // startup; look at kmp_global.cpp and etc. + //__kmp_internal_begin (); +} +void __kmp_register_atfork(void) { + if (__kmp_need_register_atfork) { + int status = pthread_atfork(__kmp_atfork_prepare, __kmp_atfork_parent, + __kmp_atfork_child); + KMP_CHECK_SYSFAIL("pthread_atfork", status); + __kmp_need_register_atfork = FALSE; + } } -void -__kmp_register_atfork(void) { - if ( __kmp_need_register_atfork ) { - int status = pthread_atfork( __kmp_atfork_prepare, __kmp_atfork_parent, __kmp_atfork_child ); - KMP_CHECK_SYSFAIL( "pthread_atfork", status ); - __kmp_need_register_atfork = FALSE; - } +void __kmp_suspend_initialize(void) { + int status; + status = pthread_mutexattr_init(&__kmp_suspend_mutex_attr); + KMP_CHECK_SYSFAIL("pthread_mutexattr_init", status); + status = pthread_condattr_init(&__kmp_suspend_cond_attr); + KMP_CHECK_SYSFAIL("pthread_condattr_init", status); } -void -__kmp_suspend_initialize( void ) -{ +static void __kmp_suspend_initialize_thread(kmp_info_t *th) { + ANNOTATE_HAPPENS_AFTER(&th->th.th_suspend_init_count); + if (th->th.th_suspend_init_count <= __kmp_fork_count) { + /* this means we haven't initialized the suspension pthread objects for this + thread in this instance of the process */ int status; - status = pthread_mutexattr_init( &__kmp_suspend_mutex_attr ); - KMP_CHECK_SYSFAIL( "pthread_mutexattr_init", status ); - status = pthread_condattr_init( &__kmp_suspend_cond_attr ); - KMP_CHECK_SYSFAIL( "pthread_condattr_init", status ); + status = pthread_cond_init(&th->th.th_suspend_cv.c_cond, + &__kmp_suspend_cond_attr); + KMP_CHECK_SYSFAIL("pthread_cond_init", status); + status = pthread_mutex_init(&th->th.th_suspend_mx.m_mutex, + &__kmp_suspend_mutex_attr); + KMP_CHECK_SYSFAIL("pthread_mutex_init", status); + *(volatile int *)&th->th.th_suspend_init_count = __kmp_fork_count + 1; + ANNOTATE_HAPPENS_BEFORE(&th->th.th_suspend_init_count); + }; } -static void -__kmp_suspend_initialize_thread( kmp_info_t *th ) -{ - ANNOTATE_HAPPENS_AFTER(&th->th.th_suspend_init_count); - if ( th->th.th_suspend_init_count <= __kmp_fork_count ) { - /* this means we haven't initialized the suspension pthread objects for this thread - in this instance of the process */ - int status; - status = pthread_cond_init( &th->th.th_suspend_cv.c_cond, &__kmp_suspend_cond_attr ); - KMP_CHECK_SYSFAIL( "pthread_cond_init", status ); - status = pthread_mutex_init( &th->th.th_suspend_mx.m_mutex, & __kmp_suspend_mutex_attr ); - KMP_CHECK_SYSFAIL( "pthread_mutex_init", status ); - *(volatile int*)&th->th.th_suspend_init_count = __kmp_fork_count + 1; - ANNOTATE_HAPPENS_BEFORE(&th->th.th_suspend_init_count); +void __kmp_suspend_uninitialize_thread(kmp_info_t *th) { + if (th->th.th_suspend_init_count > __kmp_fork_count) { + /* this means we have initialize the suspension pthread objects for this + thread in this instance of the process */ + int status; + + status = pthread_cond_destroy(&th->th.th_suspend_cv.c_cond); + if (status != 0 && status != EBUSY) { + KMP_SYSFAIL("pthread_cond_destroy", status); }; + status = pthread_mutex_destroy(&th->th.th_suspend_mx.m_mutex); + if (status != 0 && status != EBUSY) { + KMP_SYSFAIL("pthread_mutex_destroy", status); + }; + --th->th.th_suspend_init_count; + KMP_DEBUG_ASSERT(th->th.th_suspend_init_count == __kmp_fork_count); + } } -void -__kmp_suspend_uninitialize_thread( kmp_info_t *th ) -{ - if(th->th.th_suspend_init_count > __kmp_fork_count) { - /* this means we have initialize the suspension pthread objects for this thread - in this instance of the process */ - int status; - - status = pthread_cond_destroy( &th->th.th_suspend_cv.c_cond ); - if ( status != 0 && status != EBUSY ) { - KMP_SYSFAIL( "pthread_cond_destroy", status ); - }; - status = pthread_mutex_destroy( &th->th.th_suspend_mx.m_mutex ); - if ( status != 0 && status != EBUSY ) { - KMP_SYSFAIL( "pthread_mutex_destroy", status ); - }; - --th->th.th_suspend_init_count; - KMP_DEBUG_ASSERT(th->th.th_suspend_init_count == __kmp_fork_count); - } -} /* This routine puts the calling thread to sleep after setting the - * sleep bit for the indicated flag variable to true. - */ + sleep bit for the indicated flag variable to true. */ template -static inline void __kmp_suspend_template( int th_gtid, C *flag ) -{ - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_suspend); - kmp_info_t *th = __kmp_threads[th_gtid]; - int status; - typename C::flag_t old_spin; - - KF_TRACE( 30, ("__kmp_suspend_template: T#%d enter for flag = %p\n", th_gtid, flag->get() ) ); - - __kmp_suspend_initialize_thread( th ); - - status = pthread_mutex_lock( &th->th.th_suspend_mx.m_mutex ); - KMP_CHECK_SYSFAIL( "pthread_mutex_lock", status ); - - KF_TRACE( 10, ( "__kmp_suspend_template: T#%d setting sleep bit for spin(%p)\n", - th_gtid, flag->get() ) ); - - /* TODO: shouldn't this use release semantics to ensure that __kmp_suspend_initialize_thread - gets called first? - */ - old_spin = flag->set_sleeping(); - - KF_TRACE( 5, ( "__kmp_suspend_template: T#%d set sleep bit for spin(%p)==%x, was %x\n", - th_gtid, flag->get(), *(flag->get()), old_spin ) ); - - if ( flag->done_check_val(old_spin) ) { - old_spin = flag->unset_sleeping(); - KF_TRACE( 5, ( "__kmp_suspend_template: T#%d false alarm, reset sleep bit for spin(%p)\n", - th_gtid, flag->get()) ); - } else { - /* Encapsulate in a loop as the documentation states that this may - * "with low probability" return when the condition variable has - * not been signaled or broadcast - */ - int deactivated = FALSE; - TCW_PTR(th->th.th_sleep_loc, (void *)flag); - while ( flag->is_sleeping() ) { +static inline void __kmp_suspend_template(int th_gtid, C *flag) { + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_suspend); + kmp_info_t *th = __kmp_threads[th_gtid]; + int status; + typename C::flag_t old_spin; + + KF_TRACE(30, ("__kmp_suspend_template: T#%d enter for flag = %p\n", th_gtid, + flag->get())); + + __kmp_suspend_initialize_thread(th); + + status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex); + KMP_CHECK_SYSFAIL("pthread_mutex_lock", status); + + KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for spin(%p)\n", + th_gtid, flag->get())); + + /* TODO: shouldn't this use release semantics to ensure that + __kmp_suspend_initialize_thread gets called first? */ + old_spin = flag->set_sleeping(); + + KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for spin(%p)==%x," + " was %x\n", + th_gtid, flag->get(), *(flag->get()), old_spin)); + + if (flag->done_check_val(old_spin)) { + old_spin = flag->unset_sleeping(); + KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit " + "for spin(%p)\n", + th_gtid, flag->get())); + } else { + /* Encapsulate in a loop as the documentation states that this may + "with low probability" return when the condition variable has + not been signaled or broadcast */ + int deactivated = FALSE; + TCW_PTR(th->th.th_sleep_loc, (void *)flag); + + while (flag->is_sleeping()) { #ifdef DEBUG_SUSPEND - char buffer[128]; - __kmp_suspend_count++; - __kmp_print_cond( buffer, &th->th.th_suspend_cv ); - __kmp_printf( "__kmp_suspend_template: suspending T#%d: %s\n", th_gtid, buffer ); + char buffer[128]; + __kmp_suspend_count++; + __kmp_print_cond(buffer, &th->th.th_suspend_cv); + __kmp_printf("__kmp_suspend_template: suspending T#%d: %s\n", th_gtid, + buffer); #endif - // Mark the thread as no longer active (only in the first iteration of the loop). - if ( ! deactivated ) { - th->th.th_active = FALSE; - if ( th->th.th_active_in_pool ) { - th->th.th_active_in_pool = FALSE; - KMP_TEST_THEN_DEC32( - (kmp_int32 *) &__kmp_thread_pool_active_nth ); - KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 ); - } - deactivated = TRUE; - } + // Mark the thread as no longer active (only in the first iteration of the + // loop). + if (!deactivated) { + th->th.th_active = FALSE; + if (th->th.th_active_in_pool) { + th->th.th_active_in_pool = FALSE; + KMP_TEST_THEN_DEC32((kmp_int32 *)&__kmp_thread_pool_active_nth); + KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); + } + deactivated = TRUE; + } #if USE_SUSPEND_TIMEOUT - struct timespec now; - struct timeval tval; - int msecs; - - status = gettimeofday( &tval, NULL ); - KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); - TIMEVAL_TO_TIMESPEC( &tval, &now ); - - msecs = (4*__kmp_dflt_blocktime) + 200; - now.tv_sec += msecs / 1000; - now.tv_nsec += (msecs % 1000)*1000; - - KF_TRACE( 15, ( "__kmp_suspend_template: T#%d about to perform pthread_cond_timedwait\n", - th_gtid ) ); - status = pthread_cond_timedwait( &th->th.th_suspend_cv.c_cond, &th->th.th_suspend_mx.m_mutex, & now ); + struct timespec now; + struct timeval tval; + int msecs; + + status = gettimeofday(&tval, NULL); + KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status); + TIMEVAL_TO_TIMESPEC(&tval, &now); + + msecs = (4 * __kmp_dflt_blocktime) + 200; + now.tv_sec += msecs / 1000; + now.tv_nsec += (msecs % 1000) * 1000; + + KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform " + "pthread_cond_timedwait\n", + th_gtid)); + status = pthread_cond_timedwait(&th->th.th_suspend_cv.c_cond, + &th->th.th_suspend_mx.m_mutex, &now); #else - KF_TRACE( 15, ( "__kmp_suspend_template: T#%d about to perform pthread_cond_wait\n", - th_gtid ) ); - status = pthread_cond_wait( &th->th.th_suspend_cv.c_cond, &th->th.th_suspend_mx.m_mutex ); + KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform" + " pthread_cond_wait\n", + th_gtid)); + status = pthread_cond_wait(&th->th.th_suspend_cv.c_cond, + &th->th.th_suspend_mx.m_mutex); #endif - if ( (status != 0) && (status != EINTR) && (status != ETIMEDOUT) ) { - KMP_SYSFAIL( "pthread_cond_wait", status ); - } + if ((status != 0) && (status != EINTR) && (status != ETIMEDOUT)) { + KMP_SYSFAIL("pthread_cond_wait", status); + } #ifdef KMP_DEBUG - if (status == ETIMEDOUT) { - if ( flag->is_sleeping() ) { - KF_TRACE( 100, ( "__kmp_suspend_template: T#%d timeout wakeup\n", th_gtid ) ); - } else { - KF_TRACE( 2, ( "__kmp_suspend_template: T#%d timeout wakeup, sleep bit not set!\n", - th_gtid ) ); - } - } else if ( flag->is_sleeping() ) { - KF_TRACE( 100, ( "__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid ) ); - } -#endif - } // while - - // Mark the thread as active again (if it was previous marked as inactive) - if ( deactivated ) { - th->th.th_active = TRUE; - if ( TCR_4(th->th.th_in_pool) ) { - KMP_TEST_THEN_INC32( (kmp_int32 *) &__kmp_thread_pool_active_nth ); - th->th.th_active_in_pool = TRUE; - } + if (status == ETIMEDOUT) { + if (flag->is_sleeping()) { + KF_TRACE(100, + ("__kmp_suspend_template: T#%d timeout wakeup\n", th_gtid)); + } else { + KF_TRACE(2, ("__kmp_suspend_template: T#%d timeout wakeup, sleep bit " + "not set!\n", + th_gtid)); } + } else if (flag->is_sleeping()) { + KF_TRACE(100, + ("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid)); + } +#endif + } // while + + // Mark the thread as active again (if it was previous marked as inactive) + if (deactivated) { + th->th.th_active = TRUE; + if (TCR_4(th->th.th_in_pool)) { + KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth); + th->th.th_active_in_pool = TRUE; + } } - + } #ifdef DEBUG_SUSPEND - { - char buffer[128]; - __kmp_print_cond( buffer, &th->th.th_suspend_cv); - __kmp_printf( "__kmp_suspend_template: T#%d has awakened: %s\n", th_gtid, buffer ); - } + { + char buffer[128]; + __kmp_print_cond(buffer, &th->th.th_suspend_cv); + __kmp_printf("__kmp_suspend_template: T#%d has awakened: %s\n", th_gtid, + buffer); + } #endif - status = pthread_mutex_unlock( &th->th.th_suspend_mx.m_mutex ); - KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); - - KF_TRACE( 30, ("__kmp_suspend_template: T#%d exit\n", th_gtid ) ); + status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex); + KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status); + KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid)); } void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) { - __kmp_suspend_template(th_gtid, flag); + __kmp_suspend_template(th_gtid, flag); } void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) { - __kmp_suspend_template(th_gtid, flag); + __kmp_suspend_template(th_gtid, flag); } void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) { - __kmp_suspend_template(th_gtid, flag); + __kmp_suspend_template(th_gtid, flag); } - /* This routine signals the thread specified by target_gtid to wake up - * after setting the sleep bit indicated by the flag argument to FALSE. - * The target thread must already have called __kmp_suspend_template() - */ + after setting the sleep bit indicated by the flag argument to FALSE. + The target thread must already have called __kmp_suspend_template() */ template -static inline void __kmp_resume_template( int target_gtid, C *flag ) -{ - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume); - kmp_info_t *th = __kmp_threads[target_gtid]; - int status; +static inline void __kmp_resume_template(int target_gtid, C *flag) { + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume); + kmp_info_t *th = __kmp_threads[target_gtid]; + int status; #ifdef KMP_DEBUG - int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; + int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; #endif - KF_TRACE( 30, ( "__kmp_resume_template: T#%d wants to wakeup T#%d enter\n", gtid, target_gtid ) ); - KMP_DEBUG_ASSERT( gtid != target_gtid ); + KF_TRACE(30, ("__kmp_resume_template: T#%d wants to wakeup T#%d enter\n", + gtid, target_gtid)); + KMP_DEBUG_ASSERT(gtid != target_gtid); - __kmp_suspend_initialize_thread( th ); + __kmp_suspend_initialize_thread(th); - status = pthread_mutex_lock( &th->th.th_suspend_mx.m_mutex ); - KMP_CHECK_SYSFAIL( "pthread_mutex_lock", status ); + status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex); + KMP_CHECK_SYSFAIL("pthread_mutex_lock", status); - if (!flag) { // coming from __kmp_null_resume_wrapper - flag = (C *)th->th.th_sleep_loc; - } + if (!flag) { // coming from __kmp_null_resume_wrapper + flag = (C *)th->th.th_sleep_loc; + } - // First, check if the flag is null or its type has changed. If so, someone else woke it up. - if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type simply shows what flag was cast to - KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag(%p)\n", - gtid, target_gtid, NULL ) ); - status = pthread_mutex_unlock( &th->th.th_suspend_mx.m_mutex ); - KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); - return; + // First, check if the flag is null or its type has changed. If so, someone + // else woke it up. + if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type + // simply shows what + // flag was cast to + KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already " + "awake: flag(%p)\n", + gtid, target_gtid, NULL)); + status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex); + KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status); + return; + } else { // if multiple threads are sleeping, flag should be internally + // referring to a specific thread here + typename C::flag_t old_spin = flag->unset_sleeping(); + if (!flag->is_sleeping_val(old_spin)) { + KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already " + "awake: flag(%p): " + "%u => %u\n", + gtid, target_gtid, flag->get(), old_spin, *flag->get())); + status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex); + KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status); + return; } - else { // if multiple threads are sleeping, flag should be internally referring to a specific thread here - typename C::flag_t old_spin = flag->unset_sleeping(); - if ( ! flag->is_sleeping_val(old_spin) ) { - KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag(%p): " - "%u => %u\n", - gtid, target_gtid, flag->get(), old_spin, *flag->get() ) ); - status = pthread_mutex_unlock( &th->th.th_suspend_mx.m_mutex ); - KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); - return; - } - KF_TRACE( 5, ( "__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep bit for flag's loc(%p): " - "%u => %u\n", - gtid, target_gtid, flag->get(), old_spin, *flag->get() ) ); - } - TCW_PTR(th->th.th_sleep_loc, NULL); - + KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset " + "sleep bit for flag's loc(%p): " + "%u => %u\n", + gtid, target_gtid, flag->get(), old_spin, *flag->get())); + } + TCW_PTR(th->th.th_sleep_loc, NULL); #ifdef DEBUG_SUSPEND - { - char buffer[128]; - __kmp_print_cond( buffer, &th->th.th_suspend_cv ); - __kmp_printf( "__kmp_resume_template: T#%d resuming T#%d: %s\n", gtid, target_gtid, buffer ); - } + { + char buffer[128]; + __kmp_print_cond(buffer, &th->th.th_suspend_cv); + __kmp_printf("__kmp_resume_template: T#%d resuming T#%d: %s\n", gtid, + target_gtid, buffer); + } #endif - - status = pthread_cond_signal( &th->th.th_suspend_cv.c_cond ); - KMP_CHECK_SYSFAIL( "pthread_cond_signal", status ); - status = pthread_mutex_unlock( &th->th.th_suspend_mx.m_mutex ); - KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); - KF_TRACE( 30, ( "__kmp_resume_template: T#%d exiting after signaling wake up for T#%d\n", - gtid, target_gtid ) ); + status = pthread_cond_signal(&th->th.th_suspend_cv.c_cond); + KMP_CHECK_SYSFAIL("pthread_cond_signal", status); + status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex); + KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status); + KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up" + " for T#%d\n", + gtid, target_gtid)); } void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) { - __kmp_resume_template(target_gtid, flag); + __kmp_resume_template(target_gtid, flag); } void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) { - __kmp_resume_template(target_gtid, flag); + __kmp_resume_template(target_gtid, flag); } void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) { - __kmp_resume_template(target_gtid, flag); + __kmp_resume_template(target_gtid, flag); } #if KMP_USE_MONITOR -void -__kmp_resume_monitor() -{ - KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume); - int status; +void __kmp_resume_monitor() { + KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume); + int status; #ifdef KMP_DEBUG - int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; - KF_TRACE( 30, ( "__kmp_resume_monitor: T#%d wants to wakeup T#%d enter\n", - gtid, KMP_GTID_MONITOR ) ); - KMP_DEBUG_ASSERT( gtid != KMP_GTID_MONITOR ); + int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; + KF_TRACE(30, ("__kmp_resume_monitor: T#%d wants to wakeup T#%d enter\n", gtid, + KMP_GTID_MONITOR)); + KMP_DEBUG_ASSERT(gtid != KMP_GTID_MONITOR); #endif - status = pthread_mutex_lock( &__kmp_wait_mx.m_mutex ); - KMP_CHECK_SYSFAIL( "pthread_mutex_lock", status ); + status = pthread_mutex_lock(&__kmp_wait_mx.m_mutex); + KMP_CHECK_SYSFAIL("pthread_mutex_lock", status); #ifdef DEBUG_SUSPEND - { - char buffer[128]; - __kmp_print_cond( buffer, &__kmp_wait_cv.c_cond ); - __kmp_printf( "__kmp_resume_monitor: T#%d resuming T#%d: %s\n", gtid, KMP_GTID_MONITOR, buffer ); - } + { + char buffer[128]; + __kmp_print_cond(buffer, &__kmp_wait_cv.c_cond); + __kmp_printf("__kmp_resume_monitor: T#%d resuming T#%d: %s\n", gtid, + KMP_GTID_MONITOR, buffer); + } #endif - status = pthread_cond_signal( &__kmp_wait_cv.c_cond ); - KMP_CHECK_SYSFAIL( "pthread_cond_signal", status ); - status = pthread_mutex_unlock( &__kmp_wait_mx.m_mutex ); - KMP_CHECK_SYSFAIL( "pthread_mutex_unlock", status ); - KF_TRACE( 30, ( "__kmp_resume_monitor: T#%d exiting after signaling wake up for T#%d\n", - gtid, KMP_GTID_MONITOR ) ); + status = pthread_cond_signal(&__kmp_wait_cv.c_cond); + KMP_CHECK_SYSFAIL("pthread_cond_signal", status); + status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex); + KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status); + KF_TRACE(30, ("__kmp_resume_monitor: T#%d exiting after signaling wake up" + " for T#%d\n", + gtid, KMP_GTID_MONITOR)); } #endif // KMP_USE_MONITOR -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_yield( int cond ) -{ - if (!cond) - return; +void __kmp_yield(int cond) { + if (!cond) + return; #if KMP_USE_MONITOR - if (!__kmp_yielding_on) - return; + if (!__kmp_yielding_on) + return; #else - if (__kmp_yield_cycle && !KMP_YIELD_NOW()) - return; + if (__kmp_yield_cycle && !KMP_YIELD_NOW()) + return; #endif - sched_yield(); + sched_yield(); } -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_gtid_set_specific( int gtid ) -{ - if( __kmp_init_gtid ) { - int status; - status = pthread_setspecific( __kmp_gtid_threadprivate_key, (void*)(intptr_t)(gtid+1) ); - KMP_CHECK_SYSFAIL( "pthread_setspecific", status ); - } else { - KA_TRACE( 50, ("__kmp_gtid_set_specific: runtime shutdown, returning\n" ) ); - } +void __kmp_gtid_set_specific(int gtid) { + if (__kmp_init_gtid) { + int status; + status = pthread_setspecific(__kmp_gtid_threadprivate_key, + (void *)(intptr_t)(gtid + 1)); + KMP_CHECK_SYSFAIL("pthread_setspecific", status); + } else { + KA_TRACE(50, ("__kmp_gtid_set_specific: runtime shutdown, returning\n")); + } } -int -__kmp_gtid_get_specific() -{ - int gtid; - if ( !__kmp_init_gtid ) { - KA_TRACE( 50, ("__kmp_gtid_get_specific: runtime shutdown, returning KMP_GTID_SHUTDOWN\n" ) ); - return KMP_GTID_SHUTDOWN; - } - gtid = (int)(size_t)pthread_getspecific( __kmp_gtid_threadprivate_key ); - if ( gtid == 0 ) { - gtid = KMP_GTID_DNE; - } - else { - gtid--; - } - KA_TRACE( 50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n", - __kmp_gtid_threadprivate_key, gtid )); - return gtid; +int __kmp_gtid_get_specific() { + int gtid; + if (!__kmp_init_gtid) { + KA_TRACE(50, ("__kmp_gtid_get_specific: runtime shutdown, returning " + "KMP_GTID_SHUTDOWN\n")); + return KMP_GTID_SHUTDOWN; + } + gtid = (int)(size_t)pthread_getspecific(__kmp_gtid_threadprivate_key); + if (gtid == 0) { + gtid = KMP_GTID_DNE; + } else { + gtid--; + } + KA_TRACE(50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n", + __kmp_gtid_threadprivate_key, gtid)); + return gtid; } -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -double -__kmp_read_cpu_time( void ) -{ - /*clock_t t;*/ - struct tms buffer; +double __kmp_read_cpu_time(void) { + /*clock_t t;*/ + struct tms buffer; - /*t =*/ times( & buffer ); + /*t =*/times(&buffer); - return (buffer.tms_utime + buffer.tms_cutime) / (double) CLOCKS_PER_SEC; + return (buffer.tms_utime + buffer.tms_cutime) / (double)CLOCKS_PER_SEC; } -int -__kmp_read_system_info( struct kmp_sys_info *info ) -{ - int status; - struct rusage r_usage; - - memset( info, 0, sizeof( *info ) ); - - status = getrusage( RUSAGE_SELF, &r_usage); - KMP_CHECK_SYSFAIL_ERRNO( "getrusage", status ); - - info->maxrss = r_usage.ru_maxrss; /* the maximum resident set size utilized (in kilobytes) */ - info->minflt = r_usage.ru_minflt; /* the number of page faults serviced without any I/O */ - info->majflt = r_usage.ru_majflt; /* the number of page faults serviced that required I/O */ - info->nswap = r_usage.ru_nswap; /* the number of times a process was "swapped" out of memory */ - info->inblock = r_usage.ru_inblock; /* the number of times the file system had to perform input */ - info->oublock = r_usage.ru_oublock; /* the number of times the file system had to perform output */ - info->nvcsw = r_usage.ru_nvcsw; /* the number of times a context switch was voluntarily */ - info->nivcsw = r_usage.ru_nivcsw; /* the number of times a context switch was forced */ - - return (status != 0); +int __kmp_read_system_info(struct kmp_sys_info *info) { + int status; + struct rusage r_usage; + + memset(info, 0, sizeof(*info)); + + status = getrusage(RUSAGE_SELF, &r_usage); + KMP_CHECK_SYSFAIL_ERRNO("getrusage", status); + + // The maximum resident set size utilized (in kilobytes) + info->maxrss = r_usage.ru_maxrss; + // The number of page faults serviced without any I/O + info->minflt = r_usage.ru_minflt; + // The number of page faults serviced that required I/O + info->majflt = r_usage.ru_majflt; + // The number of times a process was "swapped" out of memory + info->nswap = r_usage.ru_nswap; + // The number of times the file system had to perform input + info->inblock = r_usage.ru_inblock; + // The number of times the file system had to perform output + info->oublock = r_usage.ru_oublock; + // The number of times a context switch was voluntarily + info->nvcsw = r_usage.ru_nvcsw; + // The number of times a context switch was forced + info->nivcsw = r_usage.ru_nivcsw; + + return (status != 0); } -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_read_system_time( double *delta ) -{ - double t_ns; - struct timeval tval; - struct timespec stop; - int status; - - status = gettimeofday( &tval, NULL ); - KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); - TIMEVAL_TO_TIMESPEC( &tval, &stop ); - t_ns = TS2NS(stop) - TS2NS(__kmp_sys_timer_data.start); - *delta = (t_ns * 1e-9); +void __kmp_read_system_time(double *delta) { + double t_ns; + struct timeval tval; + struct timespec stop; + int status; + + status = gettimeofday(&tval, NULL); + KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status); + TIMEVAL_TO_TIMESPEC(&tval, &stop); + t_ns = TS2NS(stop) - TS2NS(__kmp_sys_timer_data.start); + *delta = (t_ns * 1e-9); } -void -__kmp_clear_system_time( void ) -{ - struct timeval tval; - int status; - status = gettimeofday( &tval, NULL ); - KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); - TIMEVAL_TO_TIMESPEC( &tval, &__kmp_sys_timer_data.start ); +void __kmp_clear_system_time(void) { + struct timeval tval; + int status; + status = gettimeofday(&tval, NULL); + KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status); + TIMEVAL_TO_TIMESPEC(&tval, &__kmp_sys_timer_data.start); } -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - #ifdef BUILD_TV -void -__kmp_tv_threadprivate_store( kmp_info_t *th, void *global_addr, void *thread_addr ) -{ - struct tv_data *p; +void __kmp_tv_threadprivate_store(kmp_info_t *th, void *global_addr, + void *thread_addr) { + struct tv_data *p; - p = (struct tv_data *) __kmp_allocate( sizeof( *p ) ); + p = (struct tv_data *)__kmp_allocate(sizeof(*p)); - p->u.tp.global_addr = global_addr; - p->u.tp.thread_addr = thread_addr; + p->u.tp.global_addr = global_addr; + p->u.tp.thread_addr = thread_addr; - p->type = (void *) 1; + p->type = (void *)1; - p->next = th->th.th_local.tv_data; - th->th.th_local.tv_data = p; + p->next = th->th.th_local.tv_data; + th->th.th_local.tv_data = p; - if ( p->next == 0 ) { - int rc = pthread_setspecific( __kmp_tv_key, p ); - KMP_CHECK_SYSFAIL( "pthread_setspecific", rc ); - } + if (p->next == 0) { + int rc = pthread_setspecific(__kmp_tv_key, p); + KMP_CHECK_SYSFAIL("pthread_setspecific", rc); + } } #endif /* BUILD_TV */ -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -static int -__kmp_get_xproc( void ) { - - int r = 0; +static int __kmp_get_xproc(void) { - #if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD + int r = 0; - r = sysconf( _SC_NPROCESSORS_ONLN ); +#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD - #elif KMP_OS_DARWIN + r = sysconf(_SC_NPROCESSORS_ONLN); - // Bug C77011 High "OpenMP Threads and number of active cores". +#elif KMP_OS_DARWIN - // Find the number of available CPUs. - kern_return_t rc; - host_basic_info_data_t info; - mach_msg_type_number_t num = HOST_BASIC_INFO_COUNT; - rc = host_info( mach_host_self(), HOST_BASIC_INFO, (host_info_t) & info, & num ); - if ( rc == 0 && num == HOST_BASIC_INFO_COUNT ) { - // Cannot use KA_TRACE() here because this code works before trace support is - // initialized. - r = info.avail_cpus; - } else { - KMP_WARNING( CantGetNumAvailCPU ); - KMP_INFORM( AssumedNumCPU ); - }; // if + // Bug C77011 High "OpenMP Threads and number of active cores". + + // Find the number of available CPUs. + kern_return_t rc; + host_basic_info_data_t info; + mach_msg_type_number_t num = HOST_BASIC_INFO_COUNT; + rc = host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&info, &num); + if (rc == 0 && num == HOST_BASIC_INFO_COUNT) { +// Cannot use KA_TRACE() here because this code works before trace support is +// initialized. + r = info.avail_cpus; + } else { + KMP_WARNING(CantGetNumAvailCPU); + KMP_INFORM(AssumedNumCPU); + }; // if - #else +#else - #error "Unknown or unsupported OS." +#error "Unknown or unsupported OS." - #endif +#endif - return r > 0 ? r : 2; /* guess value of 2 if OS told us 0 */ + return r > 0 ? r : 2; /* guess value of 2 if OS told us 0 */ } // __kmp_get_xproc -int -__kmp_read_from_file( char const *path, char const *format, ... ) -{ - int result; - va_list args; +int __kmp_read_from_file(char const *path, char const *format, ...) { + int result; + va_list args; - va_start(args, format); - FILE *f = fopen(path, "rb"); - if ( f == NULL ) - return 0; - result = vfscanf(f, format, args); - fclose(f); + va_start(args, format); + FILE *f = fopen(path, "rb"); + if (f == NULL) + return 0; + result = vfscanf(f, format, args); + fclose(f); - return result; + return result; } -void -__kmp_runtime_initialize( void ) -{ - int status; - pthread_mutexattr_t mutex_attr; - pthread_condattr_t cond_attr; +void __kmp_runtime_initialize(void) { + int status; + pthread_mutexattr_t mutex_attr; + pthread_condattr_t cond_attr; - if ( __kmp_init_runtime ) { - return; - }; // if + if (__kmp_init_runtime) { + return; + }; // if - #if ( KMP_ARCH_X86 || KMP_ARCH_X86_64 ) - if ( ! __kmp_cpuinfo.initialized ) { - __kmp_query_cpuid( &__kmp_cpuinfo ); - }; // if - #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) + if (!__kmp_cpuinfo.initialized) { + __kmp_query_cpuid(&__kmp_cpuinfo); + }; // if +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - __kmp_xproc = __kmp_get_xproc(); + __kmp_xproc = __kmp_get_xproc(); - if ( sysconf( _SC_THREADS ) ) { + if (sysconf(_SC_THREADS)) { - /* Query the maximum number of threads */ - __kmp_sys_max_nth = sysconf( _SC_THREAD_THREADS_MAX ); - if ( __kmp_sys_max_nth == -1 ) { - /* Unlimited threads for NPTL */ - __kmp_sys_max_nth = INT_MAX; - } - else if ( __kmp_sys_max_nth <= 1 ) { - /* Can't tell, just use PTHREAD_THREADS_MAX */ - __kmp_sys_max_nth = KMP_MAX_NTH; - } + /* Query the maximum number of threads */ + __kmp_sys_max_nth = sysconf(_SC_THREAD_THREADS_MAX); + if (__kmp_sys_max_nth == -1) { + /* Unlimited threads for NPTL */ + __kmp_sys_max_nth = INT_MAX; + } else if (__kmp_sys_max_nth <= 1) { + /* Can't tell, just use PTHREAD_THREADS_MAX */ + __kmp_sys_max_nth = KMP_MAX_NTH; + } - /* Query the minimum stack size */ - __kmp_sys_min_stksize = sysconf( _SC_THREAD_STACK_MIN ); - if ( __kmp_sys_min_stksize <= 1 ) { - __kmp_sys_min_stksize = KMP_MIN_STKSIZE; - } + /* Query the minimum stack size */ + __kmp_sys_min_stksize = sysconf(_SC_THREAD_STACK_MIN); + if (__kmp_sys_min_stksize <= 1) { + __kmp_sys_min_stksize = KMP_MIN_STKSIZE; } + } - /* Set up minimum number of threads to switch to TLS gtid */ - __kmp_tls_gtid_min = KMP_TLS_GTID_MIN; + /* Set up minimum number of threads to switch to TLS gtid */ + __kmp_tls_gtid_min = KMP_TLS_GTID_MIN; - #ifdef BUILD_TV - { - int rc = pthread_key_create( & __kmp_tv_key, 0 ); - KMP_CHECK_SYSFAIL( "pthread_key_create", rc ); - } - #endif - - status = pthread_key_create( &__kmp_gtid_threadprivate_key, __kmp_internal_end_dest ); - KMP_CHECK_SYSFAIL( "pthread_key_create", status ); - status = pthread_mutexattr_init( & mutex_attr ); - KMP_CHECK_SYSFAIL( "pthread_mutexattr_init", status ); - status = pthread_mutex_init( & __kmp_wait_mx.m_mutex, & mutex_attr ); - KMP_CHECK_SYSFAIL( "pthread_mutex_init", status ); - status = pthread_condattr_init( & cond_attr ); - KMP_CHECK_SYSFAIL( "pthread_condattr_init", status ); - status = pthread_cond_init( & __kmp_wait_cv.c_cond, & cond_attr ); - KMP_CHECK_SYSFAIL( "pthread_cond_init", status ); +#ifdef BUILD_TV + { + int rc = pthread_key_create(&__kmp_tv_key, 0); + KMP_CHECK_SYSFAIL("pthread_key_create", rc); + } +#endif + + status = pthread_key_create(&__kmp_gtid_threadprivate_key, + __kmp_internal_end_dest); + KMP_CHECK_SYSFAIL("pthread_key_create", status); + status = pthread_mutexattr_init(&mutex_attr); + KMP_CHECK_SYSFAIL("pthread_mutexattr_init", status); + status = pthread_mutex_init(&__kmp_wait_mx.m_mutex, &mutex_attr); + KMP_CHECK_SYSFAIL("pthread_mutex_init", status); + status = pthread_condattr_init(&cond_attr); + KMP_CHECK_SYSFAIL("pthread_condattr_init", status); + status = pthread_cond_init(&__kmp_wait_cv.c_cond, &cond_attr); + KMP_CHECK_SYSFAIL("pthread_cond_init", status); #if USE_ITT_BUILD - __kmp_itt_initialize(); + __kmp_itt_initialize(); #endif /* USE_ITT_BUILD */ - __kmp_init_runtime = TRUE; + __kmp_init_runtime = TRUE; } -void -__kmp_runtime_destroy( void ) -{ - int status; +void __kmp_runtime_destroy(void) { + int status; - if ( ! __kmp_init_runtime ) { - return; // Nothing to do. - }; + if (!__kmp_init_runtime) { + return; // Nothing to do. + }; #if USE_ITT_BUILD - __kmp_itt_destroy(); + __kmp_itt_destroy(); #endif /* USE_ITT_BUILD */ - status = pthread_key_delete( __kmp_gtid_threadprivate_key ); - KMP_CHECK_SYSFAIL( "pthread_key_delete", status ); - #ifdef BUILD_TV - status = pthread_key_delete( __kmp_tv_key ); - KMP_CHECK_SYSFAIL( "pthread_key_delete", status ); - #endif + status = pthread_key_delete(__kmp_gtid_threadprivate_key); + KMP_CHECK_SYSFAIL("pthread_key_delete", status); +#ifdef BUILD_TV + status = pthread_key_delete(__kmp_tv_key); + KMP_CHECK_SYSFAIL("pthread_key_delete", status); +#endif - status = pthread_mutex_destroy( & __kmp_wait_mx.m_mutex ); - if ( status != 0 && status != EBUSY ) { - KMP_SYSFAIL( "pthread_mutex_destroy", status ); - } - status = pthread_cond_destroy( & __kmp_wait_cv.c_cond ); - if ( status != 0 && status != EBUSY ) { - KMP_SYSFAIL( "pthread_cond_destroy", status ); - } - #if KMP_AFFINITY_SUPPORTED - __kmp_affinity_uninitialize(); - #endif + status = pthread_mutex_destroy(&__kmp_wait_mx.m_mutex); + if (status != 0 && status != EBUSY) { + KMP_SYSFAIL("pthread_mutex_destroy", status); + } + status = pthread_cond_destroy(&__kmp_wait_cv.c_cond); + if (status != 0 && status != EBUSY) { + KMP_SYSFAIL("pthread_cond_destroy", status); + } +#if KMP_AFFINITY_SUPPORTED + __kmp_affinity_uninitialize(); +#endif - __kmp_init_runtime = FALSE; + __kmp_init_runtime = FALSE; } - /* Put the thread to sleep for a time period */ /* NOTE: not currently used anywhere */ -void -__kmp_thread_sleep( int millis ) -{ - sleep( ( millis + 500 ) / 1000 ); -} +void __kmp_thread_sleep(int millis) { sleep((millis + 500) / 1000); } /* Calculate the elapsed wall clock time for the user */ -void -__kmp_elapsed( double *t ) -{ - int status; -# ifdef FIX_SGI_CLOCK - struct timespec ts; - - status = clock_gettime( CLOCK_PROCESS_CPUTIME_ID, &ts ); - KMP_CHECK_SYSFAIL_ERRNO( "clock_gettime", status ); - *t = (double) ts.tv_nsec * (1.0 / (double) KMP_NSEC_PER_SEC) + - (double) ts.tv_sec; -# else - struct timeval tv; - - status = gettimeofday( & tv, NULL ); - KMP_CHECK_SYSFAIL_ERRNO( "gettimeofday", status ); - *t = (double) tv.tv_usec * (1.0 / (double) KMP_USEC_PER_SEC) + - (double) tv.tv_sec; -# endif +void __kmp_elapsed(double *t) { + int status; +#ifdef FIX_SGI_CLOCK + struct timespec ts; + + status = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); + KMP_CHECK_SYSFAIL_ERRNO("clock_gettime", status); + *t = + (double)ts.tv_nsec * (1.0 / (double)KMP_NSEC_PER_SEC) + (double)ts.tv_sec; +#else + struct timeval tv; + + status = gettimeofday(&tv, NULL); + KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status); + *t = + (double)tv.tv_usec * (1.0 / (double)KMP_USEC_PER_SEC) + (double)tv.tv_sec; +#endif } /* Calculate the elapsed wall clock tick for the user */ -void -__kmp_elapsed_tick( double *t ) -{ - *t = 1 / (double) CLOCKS_PER_SEC; -} +void __kmp_elapsed_tick(double *t) { *t = 1 / (double)CLOCKS_PER_SEC; } /* Return the current time stamp in nsec */ -kmp_uint64 -__kmp_now_nsec() -{ - struct timeval t; - gettimeofday(&t, NULL); - return KMP_NSEC_PER_SEC*t.tv_sec + 1000*t.tv_usec; +kmp_uint64 __kmp_now_nsec() { + struct timeval t; + gettimeofday(&t, NULL); + return KMP_NSEC_PER_SEC * t.tv_sec + 1000 * t.tv_usec; } #if KMP_ARCH_X86 || KMP_ARCH_X86_64 /* Measure clock ticks per millisecond */ -void -__kmp_initialize_system_tick() -{ - kmp_uint64 delay = 100000; // 50~100 usec on most machines. - kmp_uint64 nsec = __kmp_now_nsec(); - kmp_uint64 goal = __kmp_hardware_timestamp() + delay; - kmp_uint64 now; - while ((now = __kmp_hardware_timestamp()) < goal); - __kmp_ticks_per_msec = (kmp_uint64)(1e6 * (delay + (now - goal)) / (__kmp_now_nsec() - nsec)); +void __kmp_initialize_system_tick() { + kmp_uint64 delay = 100000; // 50~100 usec on most machines. + kmp_uint64 nsec = __kmp_now_nsec(); + kmp_uint64 goal = __kmp_hardware_timestamp() + delay; + kmp_uint64 now; + while ((now = __kmp_hardware_timestamp()) < goal) + ; + __kmp_ticks_per_msec = + (kmp_uint64)(1e6 * (delay + (now - goal)) / (__kmp_now_nsec() - nsec)); } #endif -/* - Determine whether the given address is mapped into the current address space. -*/ +/* Determine whether the given address is mapped into the current address + space. */ -int -__kmp_is_address_mapped( void * addr ) { +int __kmp_is_address_mapped(void *addr) { - int found = 0; - int rc; + int found = 0; + int rc; - #if KMP_OS_LINUX || KMP_OS_FREEBSD +#if KMP_OS_LINUX || KMP_OS_FREEBSD - /* - On Linux* OS, read the /proc//maps pseudo-file to get all the address ranges mapped - into the address space. - */ + /* On Linux* OS, read the /proc//maps pseudo-file to get all the address + ranges mapped into the address space. */ - char * name = __kmp_str_format( "/proc/%d/maps", getpid() ); - FILE * file = NULL; + char *name = __kmp_str_format("/proc/%d/maps", getpid()); + FILE *file = NULL; - file = fopen( name, "r" ); - KMP_ASSERT( file != NULL ); + file = fopen(name, "r"); + KMP_ASSERT(file != NULL); - for ( ; ; ) { + for (;;) { - void * beginning = NULL; - void * ending = NULL; - char perms[ 5 ]; + void *beginning = NULL; + void *ending = NULL; + char perms[5]; - rc = fscanf( file, "%p-%p %4s %*[^\n]\n", & beginning, & ending, perms ); - if ( rc == EOF ) { - break; - }; // if - KMP_ASSERT( rc == 3 && KMP_STRLEN( perms ) == 4 ); // Make sure all fields are read. - - // Ending address is not included in the region, but beginning is. - if ( ( addr >= beginning ) && ( addr < ending ) ) { - perms[ 2 ] = 0; // 3th and 4th character does not matter. - if ( strcmp( perms, "rw" ) == 0 ) { - // Memory we are looking for should be readable and writable. - found = 1; - }; // if - break; - }; // if + rc = fscanf(file, "%p-%p %4s %*[^\n]\n", &beginning, &ending, perms); + if (rc == EOF) { + break; + }; // if + KMP_ASSERT(rc == 3 && + KMP_STRLEN(perms) == 4); // Make sure all fields are read. + + // Ending address is not included in the region, but beginning is. + if ((addr >= beginning) && (addr < ending)) { + perms[2] = 0; // 3th and 4th character does not matter. + if (strcmp(perms, "rw") == 0) { + // Memory we are looking for should be readable and writable. + found = 1; + }; // if + break; + }; // if - }; // forever - - // Free resources. - fclose( file ); - KMP_INTERNAL_FREE( name ); - - #elif KMP_OS_DARWIN - - /* - On OS X*, /proc pseudo filesystem is not available. Try to read memory using vm - interface. - */ - - int buffer; - vm_size_t count; - rc = - vm_read_overwrite( - mach_task_self(), // Task to read memory of. - (vm_address_t)( addr ), // Address to read from. - 1, // Number of bytes to be read. - (vm_address_t)( & buffer ), // Address of buffer to save read bytes in. - & count // Address of var to save number of read bytes in. - ); - if ( rc == 0 ) { - // Memory successfully read. - found = 1; - }; // if + }; // forever - #elif KMP_OS_FREEBSD || KMP_OS_NETBSD + // Free resources. + fclose(file); + KMP_INTERNAL_FREE(name); - // FIXME(FreeBSD, NetBSD): Implement this - found = 1; +#elif KMP_OS_DARWIN + + /* On OS X*, /proc pseudo filesystem is not available. Try to read memory + using vm interface. */ + + int buffer; + vm_size_t count; + rc = vm_read_overwrite( + mach_task_self(), // Task to read memory of. + (vm_address_t)(addr), // Address to read from. + 1, // Number of bytes to be read. + (vm_address_t)(&buffer), // Address of buffer to save read bytes in. + &count // Address of var to save number of read bytes in. + ); + if (rc == 0) { + // Memory successfully read. + found = 1; + }; // if - #else +#elif KMP_OS_FREEBSD || KMP_OS_NETBSD - #error "Unknown or unsupported OS" + // FIXME(FreeBSD, NetBSD): Implement this + found = 1; - #endif +#else + +#error "Unknown or unsupported OS" + +#endif - return found; + return found; } // __kmp_is_address_mapped #ifdef USE_LOAD_BALANCE - -# if KMP_OS_DARWIN +#if KMP_OS_DARWIN // The function returns the rounded value of the system load average // during given time interval which depends on the value of // __kmp_load_balance_interval variable (default is 60 sec, other values // may be 300 sec or 900 sec). // It returns -1 in case of error. -int -__kmp_get_load_balance( int max ) -{ - double averages[3]; - int ret_avg = 0; - - int res = getloadavg( averages, 3 ); - - //Check __kmp_load_balance_interval to determine which of averages to use. - // getloadavg() may return the number of samples less than requested that is - // less than 3. - if ( __kmp_load_balance_interval < 180 && ( res >= 1 ) ) { - ret_avg = averages[0];// 1 min - } else if ( ( __kmp_load_balance_interval >= 180 - && __kmp_load_balance_interval < 600 ) && ( res >= 2 ) ) { - ret_avg = averages[1];// 5 min - } else if ( ( __kmp_load_balance_interval >= 600 ) && ( res == 3 ) ) { - ret_avg = averages[2];// 15 min - } else {// Error occurred - return -1; - } +int __kmp_get_load_balance(int max) { + double averages[3]; + int ret_avg = 0; + + int res = getloadavg(averages, 3); + + // Check __kmp_load_balance_interval to determine which of averages to use. + // getloadavg() may return the number of samples less than requested that is + // less than 3. + if (__kmp_load_balance_interval < 180 && (res >= 1)) { + ret_avg = averages[0]; // 1 min + } else if ((__kmp_load_balance_interval >= 180 && + __kmp_load_balance_interval < 600) && + (res >= 2)) { + ret_avg = averages[1]; // 5 min + } else if ((__kmp_load_balance_interval >= 600) && (res == 3)) { + ret_avg = averages[2]; // 15 min + } else { // Error occurred + return -1; + } - return ret_avg; + return ret_avg; } -# else // Linux* OS - -// The fuction returns number of running (not sleeping) threads, or -1 in case of error. -// Error could be reported if Linux* OS kernel too old (without "/proc" support). -// Counting running threads stops if max running threads encountered. -int -__kmp_get_load_balance( int max ) -{ - static int permanent_error = 0; +#else // Linux* OS - static int glb_running_threads = 0; /* Saved count of the running threads for the thread balance algortihm */ - static double glb_call_time = 0; /* Thread balance algorithm call time */ +// The fuction returns number of running (not sleeping) threads, or -1 in case +// of error. Error could be reported if Linux* OS kernel too old (without +// "/proc" support). Counting running threads stops if max running threads +// encountered. +int __kmp_get_load_balance(int max) { + static int permanent_error = 0; + static int glb_running_threads = 0; // Saved count of the running threads for + // the thread balance algortihm + static double glb_call_time = 0; /* Thread balance algorithm call time */ - int running_threads = 0; // Number of running threads in the system. + int running_threads = 0; // Number of running threads in the system. - DIR * proc_dir = NULL; // Handle of "/proc/" directory. - struct dirent * proc_entry = NULL; + DIR *proc_dir = NULL; // Handle of "/proc/" directory. + struct dirent *proc_entry = NULL; - kmp_str_buf_t task_path; // "/proc//task//" path. - DIR * task_dir = NULL; // Handle of "/proc//task//" directory. - struct dirent * task_entry = NULL; - int task_path_fixed_len; + kmp_str_buf_t task_path; // "/proc//task//" path. + DIR *task_dir = NULL; // Handle of "/proc//task//" directory. + struct dirent *task_entry = NULL; + int task_path_fixed_len; - kmp_str_buf_t stat_path; // "/proc//task//stat" path. - int stat_file = -1; - int stat_path_fixed_len; + kmp_str_buf_t stat_path; // "/proc//task//stat" path. + int stat_file = -1; + int stat_path_fixed_len; - int total_processes = 0; // Total number of processes in system. - int total_threads = 0; // Total number of threads in system. + int total_processes = 0; // Total number of processes in system. + int total_threads = 0; // Total number of threads in system. - double call_time = 0.0; + double call_time = 0.0; - __kmp_str_buf_init( & task_path ); - __kmp_str_buf_init( & stat_path ); + __kmp_str_buf_init(&task_path); + __kmp_str_buf_init(&stat_path); - __kmp_elapsed( & call_time ); - - if ( glb_call_time && - ( call_time - glb_call_time < __kmp_load_balance_interval ) ) { - running_threads = glb_running_threads; - goto finish; - } - - glb_call_time = call_time; - - // Do not spend time on scanning "/proc/" if we have a permanent error. - if ( permanent_error ) { - running_threads = -1; - goto finish; - }; // if + __kmp_elapsed(&call_time); - if ( max <= 0 ) { - max = INT_MAX; - }; // if - - // Open "/proc/" directory. - proc_dir = opendir( "/proc" ); - if ( proc_dir == NULL ) { - // Cannot open "/prroc/". Probably the kernel does not support it. Return an error now and - // in subsequent calls. - running_threads = -1; - permanent_error = 1; - goto finish; - }; // if + if (glb_call_time && + (call_time - glb_call_time < __kmp_load_balance_interval)) { + running_threads = glb_running_threads; + goto finish; + } - // Initialize fixed part of task_path. This part will not change. - __kmp_str_buf_cat( & task_path, "/proc/", 6 ); - task_path_fixed_len = task_path.used; // Remember number of used characters. - - proc_entry = readdir( proc_dir ); - while ( proc_entry != NULL ) { - // Proc entry is a directory and name starts with a digit. Assume it is a process' - // directory. - if ( proc_entry->d_type == DT_DIR && isdigit( proc_entry->d_name[ 0 ] ) ) { - - ++ total_processes; - // Make sure init process is the very first in "/proc", so we can replace - // strcmp( proc_entry->d_name, "1" ) == 0 with simpler total_processes == 1. - // We are going to check that total_processes == 1 => d_name == "1" is true (where - // "=>" is implication). Since C++ does not have => operator, let us replace it with its - // equivalent: a => b == ! a || b. - KMP_DEBUG_ASSERT( total_processes != 1 || strcmp( proc_entry->d_name, "1" ) == 0 ); - - // Construct task_path. - task_path.used = task_path_fixed_len; // Reset task_path to "/proc/". - __kmp_str_buf_cat( & task_path, proc_entry->d_name, KMP_STRLEN( proc_entry->d_name ) ); - __kmp_str_buf_cat( & task_path, "/task", 5 ); - - task_dir = opendir( task_path.str ); - if ( task_dir == NULL ) { - // Process can finish between reading "/proc/" directory entry and opening process' - // "task/" directory. So, in general case we should not complain, but have to skip - // this process and read the next one. - // But on systems with no "task/" support we will spend lot of time to scan "/proc/" - // tree again and again without any benefit. "init" process (its pid is 1) should - // exist always, so, if we cannot open "/proc/1/task/" directory, it means "task/" - // is not supported by kernel. Report an error now and in the future. - if ( strcmp( proc_entry->d_name, "1" ) == 0 ) { - running_threads = -1; - permanent_error = 1; - goto finish; - }; // if + glb_call_time = call_time; + + // Do not spend time on scanning "/proc/" if we have a permanent error. + if (permanent_error) { + running_threads = -1; + goto finish; + }; // if + + if (max <= 0) { + max = INT_MAX; + }; // if + + // Open "/proc/" directory. + proc_dir = opendir("/proc"); + if (proc_dir == NULL) { + // Cannot open "/prroc/". Probably the kernel does not support it. Return an + // error now and in subsequent calls. + running_threads = -1; + permanent_error = 1; + goto finish; + }; // if + + // Initialize fixed part of task_path. This part will not change. + __kmp_str_buf_cat(&task_path, "/proc/", 6); + task_path_fixed_len = task_path.used; // Remember number of used characters. + + proc_entry = readdir(proc_dir); + while (proc_entry != NULL) { + // Proc entry is a directory and name starts with a digit. Assume it is a + // process' directory. + if (proc_entry->d_type == DT_DIR && isdigit(proc_entry->d_name[0])) { + + ++total_processes; + // Make sure init process is the very first in "/proc", so we can replace + // strcmp( proc_entry->d_name, "1" ) == 0 with simpler total_processes == + // 1. We are going to check that total_processes == 1 => d_name == "1" is + // true (where "=>" is implication). Since C++ does not have => operator, + // let us replace it with its equivalent: a => b == ! a || b. + KMP_DEBUG_ASSERT(total_processes != 1 || + strcmp(proc_entry->d_name, "1") == 0); + + // Construct task_path. + task_path.used = task_path_fixed_len; // Reset task_path to "/proc/". + __kmp_str_buf_cat(&task_path, proc_entry->d_name, + KMP_STRLEN(proc_entry->d_name)); + __kmp_str_buf_cat(&task_path, "/task", 5); + + task_dir = opendir(task_path.str); + if (task_dir == NULL) { + // Process can finish between reading "/proc/" directory entry and + // opening process' "task/" directory. So, in general case we should not + // complain, but have to skip this process and read the next one. But on + // systems with no "task/" support we will spend lot of time to scan + // "/proc/" tree again and again without any benefit. "init" process + // (its pid is 1) should exist always, so, if we cannot open + // "/proc/1/task/" directory, it means "task/" is not supported by + // kernel. Report an error now and in the future. + if (strcmp(proc_entry->d_name, "1") == 0) { + running_threads = -1; + permanent_error = 1; + goto finish; + }; // if + } else { + // Construct fixed part of stat file path. + __kmp_str_buf_clear(&stat_path); + __kmp_str_buf_cat(&stat_path, task_path.str, task_path.used); + __kmp_str_buf_cat(&stat_path, "/", 1); + stat_path_fixed_len = stat_path.used; + + task_entry = readdir(task_dir); + while (task_entry != NULL) { + // It is a directory and name starts with a digit. + if (proc_entry->d_type == DT_DIR && isdigit(task_entry->d_name[0])) { + ++total_threads; + + // Consruct complete stat file path. Easiest way would be: + // __kmp_str_buf_print( & stat_path, "%s/%s/stat", task_path.str, + // task_entry->d_name ); + // but seriae of __kmp_str_buf_cat works a bit faster. + stat_path.used = + stat_path_fixed_len; // Reset stat path to its fixed part. + __kmp_str_buf_cat(&stat_path, task_entry->d_name, + KMP_STRLEN(task_entry->d_name)); + __kmp_str_buf_cat(&stat_path, "/stat", 5); + + // Note: Low-level API (open/read/close) is used. High-level API + // (fopen/fclose) works ~ 30 % slower. + stat_file = open(stat_path.str, O_RDONLY); + if (stat_file == -1) { + // We cannot report an error because task (thread) can terminate + // just before reading this file. } else { - // Construct fixed part of stat file path. - __kmp_str_buf_clear( & stat_path ); - __kmp_str_buf_cat( & stat_path, task_path.str, task_path.used ); - __kmp_str_buf_cat( & stat_path, "/", 1 ); - stat_path_fixed_len = stat_path.used; - - task_entry = readdir( task_dir ); - while ( task_entry != NULL ) { - // It is a directory and name starts with a digit. - if ( proc_entry->d_type == DT_DIR && isdigit( task_entry->d_name[ 0 ] ) ) { - - ++ total_threads; - - // Consruct complete stat file path. Easiest way would be: - // __kmp_str_buf_print( & stat_path, "%s/%s/stat", task_path.str, task_entry->d_name ); - // but seriae of __kmp_str_buf_cat works a bit faster. - stat_path.used = stat_path_fixed_len; // Reset stat path to its fixed part. - __kmp_str_buf_cat( & stat_path, task_entry->d_name, KMP_STRLEN( task_entry->d_name ) ); - __kmp_str_buf_cat( & stat_path, "/stat", 5 ); - - // Note: Low-level API (open/read/close) is used. High-level API - // (fopen/fclose) works ~ 30 % slower. - stat_file = open( stat_path.str, O_RDONLY ); - if ( stat_file == -1 ) { - // We cannot report an error because task (thread) can terminate just - // before reading this file. - } else { - /* - Content of "stat" file looks like: - - 24285 (program) S ... - - It is a single line (if program name does not include fanny - symbols). First number is a thread id, then name of executable file - name in paretheses, then state of the thread. We need just thread - state. - - Good news: Length of program name is 15 characters max. Longer - names are truncated. - - Thus, we need rather short buffer: 15 chars for program name + - 2 parenthesis, + 3 spaces + ~7 digits of pid = 37. - - Bad news: Program name may contain special symbols like space, - closing parenthesis, or even new line. This makes parsing "stat" - file not 100 % reliable. In case of fanny program names parsing - may fail (report incorrect thread state). - - Parsing "status" file looks more promissing (due to different - file structure and escaping special symbols) but reading and - parsing of "status" file works slower. - - -- ln - */ - char buffer[ 65 ]; - int len; - len = read( stat_file, buffer, sizeof( buffer ) - 1 ); - if ( len >= 0 ) { - buffer[ len ] = 0; - // Using scanf: - // sscanf( buffer, "%*d (%*s) %c ", & state ); - // looks very nice, but searching for a closing parenthesis works a - // bit faster. - char * close_parent = strstr( buffer, ") " ); - if ( close_parent != NULL ) { - char state = * ( close_parent + 2 ); - if ( state == 'R' ) { - ++ running_threads; - if ( running_threads >= max ) { - goto finish; - }; // if - }; // if - }; // if - }; // if - close( stat_file ); - stat_file = -1; - }; // if + /* Content of "stat" file looks like: + 24285 (program) S ... + + It is a single line (if program name does not include funny + symbols). First number is a thread id, then name of executable + file name in paretheses, then state of the thread. We need just + thread state. + + Good news: Length of program name is 15 characters max. Longer + names are truncated. + + Thus, we need rather short buffer: 15 chars for program name + + 2 parenthesis, + 3 spaces + ~7 digits of pid = 37. + + Bad news: Program name may contain special symbols like space, + closing parenthesis, or even new line. This makes parsing + "stat" file not 100 % reliable. In case of fanny program names + parsing may fail (report incorrect thread state). + + Parsing "status" file looks more promissing (due to different + file structure and escaping special symbols) but reading and + parsing of "status" file works slower. + -- ln + */ + char buffer[65]; + int len; + len = read(stat_file, buffer, sizeof(buffer) - 1); + if (len >= 0) { + buffer[len] = 0; + // Using scanf: + // sscanf( buffer, "%*d (%*s) %c ", & state ); + // looks very nice, but searching for a closing parenthesis + // works a bit faster. + char *close_parent = strstr(buffer, ") "); + if (close_parent != NULL) { + char state = *(close_parent + 2); + if (state == 'R') { + ++running_threads; + if (running_threads >= max) { + goto finish; }; // if - task_entry = readdir( task_dir ); - }; // while - closedir( task_dir ); - task_dir = NULL; + }; // if + }; // if + }; // if + close(stat_file); + stat_file = -1; }; // if - }; // if - proc_entry = readdir( proc_dir ); - }; // while - - // - // There _might_ be a timing hole where the thread executing this - // code get skipped in the load balance, and running_threads is 0. - // Assert in the debug builds only!!! - // - KMP_DEBUG_ASSERT( running_threads > 0 ); - if ( running_threads <= 0 ) { - running_threads = 1; - } + }; // if + task_entry = readdir(task_dir); + }; // while + closedir(task_dir); + task_dir = NULL; + }; // if + }; // if + proc_entry = readdir(proc_dir); + }; // while + + // There _might_ be a timing hole where the thread executing this + // code get skipped in the load balance, and running_threads is 0. + // Assert in the debug builds only!!! + KMP_DEBUG_ASSERT(running_threads > 0); + if (running_threads <= 0) { + running_threads = 1; + } - finish: // Clean up and exit. - if ( proc_dir != NULL ) { - closedir( proc_dir ); - }; // if - __kmp_str_buf_free( & task_path ); - if ( task_dir != NULL ) { - closedir( task_dir ); - }; // if - __kmp_str_buf_free( & stat_path ); - if ( stat_file != -1 ) { - close( stat_file ); - }; // if +finish: // Clean up and exit. + if (proc_dir != NULL) { + closedir(proc_dir); + }; // if + __kmp_str_buf_free(&task_path); + if (task_dir != NULL) { + closedir(task_dir); + }; // if + __kmp_str_buf_free(&stat_path); + if (stat_file != -1) { + close(stat_file); + }; // if - glb_running_threads = running_threads; + glb_running_threads = running_threads; - return running_threads; + return running_threads; } // __kmp_get_load_balance -# endif // KMP_OS_DARWIN +#endif // KMP_OS_DARWIN #endif // USE_LOAD_BALANCE @@ -2520,15 +2302,13 @@ __kmp_get_load_balance( int max ) // we really only need the case with 1 argument, because CLANG always build // a struct of pointers to shared variables referenced in the outlined function -int -__kmp_invoke_microtask( microtask_t pkfn, - int gtid, int tid, - int argc, void *p_argv[] +int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc, + void *p_argv[] #if OMPT_SUPPORT - , void **exit_frame_ptr + , + void **exit_frame_ptr #endif -) -{ + ) { #if OMPT_SUPPORT *exit_frame_ptr = __builtin_frame_address(0); #endif @@ -2612,4 +2392,3 @@ __kmp_invoke_microtask( microtask_t pkfn, #endif // end of file // - diff --git a/openmp/runtime/src/z_Windows_NT-586_asm.asm b/openmp/runtime/src/z_Windows_NT-586_asm.asm index a4f9a38..eace718 100644 --- a/openmp/runtime/src/z_Windows_NT-586_asm.asm +++ b/openmp/runtime/src/z_Windows_NT-586_asm.asm @@ -42,13 +42,10 @@ endif ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_x86_pause ; ; void ; __kmp_x86_pause( void ) -; - PUBLIC ___kmp_x86_pause _p$ = 4 _d$ = 8 @@ -64,13 +61,10 @@ ___kmp_x86_pause ENDP _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_x86_cpuid ; ; void ; __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p ); -; - PUBLIC ___kmp_x86_cpuid _TEXT SEGMENT ALIGN 16 @@ -115,13 +109,10 @@ ___kmp_x86_cpuid ENDP _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_test_then_add32 ; ; kmp_int32 ; __kmp_test_then_add32( volatile kmp_int32 *p, kmp_int32 d ); -; - PUBLIC ___kmp_test_then_add32 _p$ = 4 _d$ = 8 @@ -138,13 +129,10 @@ ___kmp_test_then_add32 ENDP _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_compare_and_store8 ; ; kmp_int8 ; __kmp_compare_and_store8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -; - PUBLIC ___kmp_compare_and_store8 _TEXT SEGMENT ALIGN 16 @@ -166,13 +154,10 @@ ___kmp_compare_and_store8 ENDP _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_compare_and_store16 ; ; kmp_int16 ; __kmp_compare_and_store16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -; - PUBLIC ___kmp_compare_and_store16 _TEXT SEGMENT ALIGN 16 @@ -194,13 +179,10 @@ ___kmp_compare_and_store16 ENDP _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_compare_and_store32 ; ; kmp_int32 ; __kmp_compare_and_store32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -; - PUBLIC ___kmp_compare_and_store32 _TEXT SEGMENT ALIGN 16 @@ -222,13 +204,10 @@ ___kmp_compare_and_store32 ENDP _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_compare_and_store64 ; ; kmp_int32 ; __kmp_compare_and_store64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -; - PUBLIC ___kmp_compare_and_store64 _TEXT SEGMENT ALIGN 16 @@ -262,13 +241,10 @@ ___kmp_compare_and_store64 ENDP _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_xchg_fixed8 ; ; kmp_int8 ; __kmp_xchg_fixed8( volatile kmp_int8 *p, kmp_int8 d ); -; - PUBLIC ___kmp_xchg_fixed8 _TEXT SEGMENT ALIGN 16 @@ -286,13 +262,10 @@ ___kmp_xchg_fixed8 ENDP _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_xchg_fixed16 ; ; kmp_int16 ; __kmp_xchg_fixed16( volatile kmp_int16 *p, kmp_int16 d ); -; - PUBLIC ___kmp_xchg_fixed16 _TEXT SEGMENT ALIGN 16 @@ -310,13 +283,10 @@ ___kmp_xchg_fixed16 ENDP _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_xchg_fixed32 ; ; kmp_int32 ; __kmp_xchg_fixed32( volatile kmp_int32 *p, kmp_int32 d ); -; - PUBLIC ___kmp_xchg_fixed32 _TEXT SEGMENT ALIGN 16 @@ -335,13 +305,10 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_xchg_real32 ; ; kmp_real32 ; __kmp_xchg_real32( volatile kmp_real32 *p, kmp_real32 d ); -; - PUBLIC ___kmp_xchg_real32 _TEXT SEGMENT ALIGN 16 @@ -378,13 +345,10 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_compare_and_store_ret8 ; ; kmp_int8 ; __kmp_compare_and_store_ret8( volatile kmp_int8 *p, kmp_int8 cv, kmp_int8 sv ); -; - PUBLIC ___kmp_compare_and_store_ret8 _TEXT SEGMENT ALIGN 16 @@ -404,13 +368,10 @@ ___kmp_compare_and_store_ret8 ENDP _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_compare_and_store_ret16 ; ; kmp_int16 ; __kmp_compare_and_store_ret16( volatile kmp_int16 *p, kmp_int16 cv, kmp_int16 sv ); -; - PUBLIC ___kmp_compare_and_store_ret16 _TEXT SEGMENT ALIGN 16 @@ -430,13 +391,10 @@ ___kmp_compare_and_store_ret16 ENDP _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_compare_and_store_ret32 ; ; kmp_int32 ; __kmp_compare_and_store_ret32( volatile kmp_int32 *p, kmp_int32 cv, kmp_int32 sv ); -; - PUBLIC ___kmp_compare_and_store_ret32 _TEXT SEGMENT ALIGN 16 @@ -456,13 +414,10 @@ ___kmp_compare_and_store_ret32 ENDP _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_compare_and_store_ret64 ; ; kmp_int64 ; __kmp_compare_and_store_ret64( volatile kmp_int64 *p, kmp_int64 cv, kmp_int64 sv ); -; - PUBLIC ___kmp_compare_and_store_ret64 _TEXT SEGMENT ALIGN 16 @@ -494,7 +449,6 @@ ___kmp_compare_and_store_ret64 ENDP _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_load_x87_fpu_control_word ; ; void @@ -502,7 +456,6 @@ _TEXT ENDS ; ; parameters: ; p: 4(%esp) - PUBLIC ___kmp_load_x87_fpu_control_word _TEXT SEGMENT ALIGN 16 @@ -518,7 +471,6 @@ ___kmp_load_x87_fpu_control_word ENDP _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_store_x87_fpu_control_word ; ; void @@ -526,7 +478,6 @@ _TEXT ENDS ; ; parameters: ; p: 4(%esp) - PUBLIC ___kmp_store_x87_fpu_control_word _TEXT SEGMENT ALIGN 16 @@ -542,13 +493,10 @@ ___kmp_store_x87_fpu_control_word ENDP _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_clear_x87_fpu_status_word ; ; void ; __kmp_clear_x87_fpu_status_word(); -; - PUBLIC ___kmp_clear_x87_fpu_status_word _TEXT SEGMENT ALIGN 16 @@ -563,7 +511,6 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_invoke_microtask ; ; typedef void (*microtask_t)( int *gtid, int *tid, ... ); @@ -572,8 +519,6 @@ _TEXT ENDS ; __kmp_invoke_microtask( microtask_t pkfn, ; int gtid, int tid, ; int argc, void *p_argv[] ) -; - PUBLIC ___kmp_invoke_microtask _TEXT SEGMENT ALIGN 16 @@ -677,7 +622,6 @@ endif ifdef _M_AMD64 ;------------------------------------------------------------------------ -; ; FUNCTION __kmp_x86_cpuid ; ; void @@ -687,7 +631,6 @@ ifdef _M_AMD64 ; mode: ecx ; mode2: edx ; cpuid_buffer: r8 - PUBLIC __kmp_x86_cpuid _TEXT SEGMENT ALIGN 16 @@ -722,7 +665,6 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION __kmp_test_then_add32 ; ; kmp_int32 @@ -733,7 +675,6 @@ _TEXT ENDS ; d: edx ; ; return: eax - PUBLIC __kmp_test_then_add32 _TEXT SEGMENT ALIGN 16 @@ -748,7 +689,6 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION __kmp_test_then_add64 ; ; kmp_int32 @@ -759,7 +699,6 @@ _TEXT ENDS ; d: rdx ; ; return: rax - PUBLIC __kmp_test_then_add64 _TEXT SEGMENT ALIGN 16 @@ -774,7 +713,6 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION __kmp_compare_and_store8 ; ; kmp_int8 @@ -785,7 +723,6 @@ _TEXT ENDS ; sv: r8d ; ; return: eax - PUBLIC __kmp_compare_and_store8 _TEXT SEGMENT ALIGN 16 @@ -804,7 +741,6 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION __kmp_compare_and_store16 ; ; kmp_int16 @@ -815,7 +751,6 @@ _TEXT ENDS ; sv: r8d ; ; return: eax - PUBLIC __kmp_compare_and_store16 _TEXT SEGMENT ALIGN 16 @@ -834,7 +769,6 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION __kmp_compare_and_store32 ; ; kmp_int32 @@ -845,7 +779,6 @@ _TEXT ENDS ; sv: r8d ; ; return: eax - PUBLIC __kmp_compare_and_store32 _TEXT SEGMENT ALIGN 16 @@ -864,7 +797,6 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION __kmp_compare_and_store64 ; ; kmp_int32 @@ -875,7 +807,6 @@ _TEXT ENDS ; sv: r8 ; ; return: eax - PUBLIC __kmp_compare_and_store64 _TEXT SEGMENT ALIGN 16 @@ -894,7 +825,6 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_xchg_fixed8 ; ; kmp_int8 @@ -905,7 +835,6 @@ _TEXT ENDS ; d: dl ; ; return: al - PUBLIC __kmp_xchg_fixed8 _TEXT SEGMENT ALIGN 16 @@ -921,7 +850,6 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_xchg_fixed16 ; ; kmp_int16 @@ -932,7 +860,6 @@ _TEXT ENDS ; d: dx ; ; return: ax - PUBLIC __kmp_xchg_fixed16 _TEXT SEGMENT ALIGN 16 @@ -948,7 +875,6 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_xchg_fixed32 ; ; kmp_int32 @@ -959,7 +885,6 @@ _TEXT ENDS ; d: edx ; ; return: eax - PUBLIC __kmp_xchg_fixed32 _TEXT SEGMENT ALIGN 16 @@ -974,7 +899,6 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION ___kmp_xchg_fixed64 ; ; kmp_int64 @@ -985,7 +909,6 @@ _TEXT ENDS ; d: rdx ; ; return: rax - PUBLIC __kmp_xchg_fixed64 _TEXT SEGMENT ALIGN 16 @@ -1000,7 +923,6 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION __kmp_compare_and_store_ret8 ; ; kmp_int8 @@ -1011,7 +933,6 @@ _TEXT ENDS ; sv: r8d ; ; return: eax - PUBLIC __kmp_compare_and_store_ret8 _TEXT SEGMENT ALIGN 16 @@ -1030,7 +951,6 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION __kmp_compare_and_store_ret16 ; ; kmp_int16 @@ -1041,7 +961,6 @@ _TEXT ENDS ; sv: r8d ; ; return: eax - PUBLIC __kmp_compare_and_store_ret16 _TEXT SEGMENT ALIGN 16 @@ -1058,7 +977,6 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION __kmp_compare_and_store_ret32 ; ; kmp_int32 @@ -1069,7 +987,6 @@ _TEXT ENDS ; sv: r8d ; ; return: eax - PUBLIC __kmp_compare_and_store_ret32 _TEXT SEGMENT ALIGN 16 @@ -1086,7 +1003,6 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION __kmp_compare_and_store_ret64 ; ; kmp_int64 @@ -1097,7 +1013,6 @@ _TEXT ENDS ; sv: r8 ; ; return: rax - PUBLIC __kmp_compare_and_store_ret64 _TEXT SEGMENT ALIGN 16 @@ -1114,7 +1029,6 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION __kmp_compare_and_store_loop8 ; ; kmp_int8 @@ -1125,7 +1039,6 @@ _TEXT ENDS ; sv: r8d ; ; return: al - PUBLIC __kmp_compare_and_store_loop8 _TEXT SEGMENT ALIGN 16 @@ -1153,7 +1066,6 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION __kmp_xchg_real32 ; ; kmp_real32 @@ -1164,7 +1076,6 @@ _TEXT ENDS ; d: xmm1 (lower 4 bytes) ; ; return: xmm0 (lower 4 bytes) - PUBLIC __kmp_xchg_real32 _TEXT SEGMENT ALIGN 16 @@ -1182,7 +1093,6 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION __kmp_xchg_real64 ; ; kmp_real64 @@ -1193,7 +1103,6 @@ _TEXT ENDS ; d: xmm1 (lower 8 bytes) ; ; return: xmm0 (lower 8 bytes) - PUBLIC __kmp_xchg_real64 _TEXT SEGMENT ALIGN 16 @@ -1210,7 +1119,6 @@ __kmp_xchg_real64 ENDP _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION __kmp_load_x87_fpu_control_word ; ; void @@ -1218,8 +1126,6 @@ _TEXT ENDS ; ; parameters: ; p: rcx -; - PUBLIC __kmp_load_x87_fpu_control_word _TEXT SEGMENT ALIGN 16 @@ -1233,7 +1139,6 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION __kmp_store_x87_fpu_control_word ; ; void @@ -1241,8 +1146,6 @@ _TEXT ENDS ; ; parameters: ; p: rcx -; - PUBLIC __kmp_store_x87_fpu_control_word _TEXT SEGMENT ALIGN 16 @@ -1256,13 +1159,10 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION __kmp_clear_x87_fpu_status_word ; ; void ; __kmp_clear_x87_fpu_status_word() -; - PUBLIC __kmp_clear_x87_fpu_status_word _TEXT SEGMENT ALIGN 16 @@ -1276,7 +1176,6 @@ _TEXT ENDS ;------------------------------------------------------------------------ -; ; FUNCTION __kmp_invoke_microtask ; ; typedef void (*microtask_t)( int *gtid, int *tid, ... ); @@ -1307,8 +1206,6 @@ _TEXT ENDS ; r10: used to hold pkfn function pointer argument ; ; return: eax (always 1/TRUE) -; - $_pkfn = 16 $_gtid = 24 $_tid = 32 diff --git a/openmp/runtime/src/z_Windows_NT-586_util.cpp b/openmp/runtime/src/z_Windows_NT-586_util.cpp index d7697d5..4e21f7e 100644 --- a/openmp/runtime/src/z_Windows_NT-586_util.cpp +++ b/openmp/runtime/src/z_Windows_NT-586_util.cpp @@ -17,147 +17,118 @@ #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) /* Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to - * use compare_and_store for these routines - */ + use compare_and_store for these routines */ -kmp_int8 -__kmp_test_then_or8( volatile kmp_int8 *p, kmp_int8 d ) -{ - kmp_int8 old_value, new_value; +kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 d) { + kmp_int8 old_value, new_value; - old_value = TCR_1( *p ); - new_value = old_value | d; + old_value = TCR_1(*p); + new_value = old_value | d; - while ( ! __kmp_compare_and_store8 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_1( *p ); - new_value = old_value | d; - } - return old_value; + while (!__kmp_compare_and_store8(p, old_value, new_value)) { + KMP_CPU_PAUSE(); + old_value = TCR_1(*p); + new_value = old_value | d; + } + return old_value; } -kmp_int8 -__kmp_test_then_and8( volatile kmp_int8 *p, kmp_int8 d ) -{ - kmp_int8 old_value, new_value; +kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 d) { + kmp_int8 old_value, new_value; - old_value = TCR_1( *p ); - new_value = old_value & d; + old_value = TCR_1(*p); + new_value = old_value & d; - while ( ! __kmp_compare_and_store8 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_1( *p ); - new_value = old_value & d; - } - return old_value; + while (!__kmp_compare_and_store8(p, old_value, new_value)) { + KMP_CPU_PAUSE(); + old_value = TCR_1(*p); + new_value = old_value & d; + } + return old_value; } -kmp_int32 -__kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 d ) -{ - kmp_int32 old_value, new_value; +kmp_int32 __kmp_test_then_or32(volatile kmp_int32 *p, kmp_int32 d) { + kmp_int32 old_value, new_value; - old_value = TCR_4( *p ); - new_value = old_value | d; + old_value = TCR_4(*p); + new_value = old_value | d; - while ( ! __kmp_compare_and_store32 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_4( *p ); - new_value = old_value | d; - } - return old_value; + while (!__kmp_compare_and_store32(p, old_value, new_value)) { + KMP_CPU_PAUSE(); + old_value = TCR_4(*p); + new_value = old_value | d; + } + return old_value; } -kmp_int32 -__kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 d ) -{ - kmp_int32 old_value, new_value; +kmp_int32 __kmp_test_then_and32(volatile kmp_int32 *p, kmp_int32 d) { + kmp_int32 old_value, new_value; - old_value = TCR_4( *p ); - new_value = old_value & d; + old_value = TCR_4(*p); + new_value = old_value & d; - while ( ! __kmp_compare_and_store32 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_4( *p ); - new_value = old_value & d; - } - return old_value; + while (!__kmp_compare_and_store32(p, old_value, new_value)) { + KMP_CPU_PAUSE(); + old_value = TCR_4(*p); + new_value = old_value & d; + } + return old_value; } -kmp_int8 -__kmp_test_then_add8( volatile kmp_int8 *p, kmp_int8 d ) -{ - kmp_int64 old_value, new_value; +kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 d) { + kmp_int64 old_value, new_value; - old_value = TCR_1( *p ); + old_value = TCR_1(*p); + new_value = old_value + d; + while (!__kmp_compare_and_store8(p, old_value, new_value)) { + KMP_CPU_PAUSE(); + old_value = TCR_1(*p); new_value = old_value + d; - while ( ! __kmp_compare_and_store8 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_1( *p ); - new_value = old_value + d; - } - return old_value; + } + return old_value; } #if KMP_ARCH_X86 -kmp_int64 -__kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d ) -{ - kmp_int64 old_value, new_value; - - old_value = TCR_8( *p ); +kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 d) { + kmp_int64 old_value, new_value; + + old_value = TCR_8(*p); + new_value = old_value + d; + while (!__kmp_compare_and_store64(p, old_value, new_value)) { + KMP_CPU_PAUSE(); + old_value = TCR_8(*p); new_value = old_value + d; - while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_8( *p ); - new_value = old_value + d; - } - return old_value; + } + return old_value; } #endif /* KMP_ARCH_X86 */ -kmp_int64 -__kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 d ) -{ - kmp_int64 old_value, new_value; +kmp_int64 __kmp_test_then_or64(volatile kmp_int64 *p, kmp_int64 d) { + kmp_int64 old_value, new_value; - old_value = TCR_8( *p ); + old_value = TCR_8(*p); + new_value = old_value | d; + while (!__kmp_compare_and_store64(p, old_value, new_value)) { + KMP_CPU_PAUSE(); + old_value = TCR_8(*p); new_value = old_value | d; - while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_8( *p ); - new_value = old_value | d; - } - - return old_value; + } + + return old_value; } -kmp_int64 -__kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 d ) -{ - kmp_int64 old_value, new_value; +kmp_int64 __kmp_test_then_and64(volatile kmp_int64 *p, kmp_int64 d) { + kmp_int64 old_value, new_value; - old_value = TCR_8( *p ); + old_value = TCR_8(*p); + new_value = old_value & d; + while (!__kmp_compare_and_store64(p, old_value, new_value)) { + KMP_CPU_PAUSE(); + old_value = TCR_8(*p); new_value = old_value & d; - while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) ) - { - KMP_CPU_PAUSE(); - old_value = TCR_8( *p ); - new_value = old_value & d; - } - - return old_value; + } + + return old_value; } #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - diff --git a/openmp/runtime/src/z_Windows_NT_util.cpp b/openmp/runtime/src/z_Windows_NT_util.cpp index aa1edac..4854d9d 100644 --- a/openmp/runtime/src/z_Windows_NT_util.cpp +++ b/openmp/runtime/src/z_Windows_NT_util.cpp @@ -14,114 +14,115 @@ #include "kmp.h" -#include "kmp_itt.h" +#include "kmp_affinity.h" #include "kmp_i18n.h" #include "kmp_io.h" +#include "kmp_itt.h" #include "kmp_wait_release.h" -#include "kmp_affinity.h" /* This code is related to NtQuerySystemInformation() function. This function is used in the Load balance algorithm for OMP_DYNAMIC=true to find the number of running threads in the system. */ +#include // UNICODE_STRING #include -#include // UNICODE_STRING enum SYSTEM_INFORMATION_CLASS { - SystemProcessInformation = 5 + SystemProcessInformation = 5 }; // SYSTEM_INFORMATION_CLASS struct CLIENT_ID { - HANDLE UniqueProcess; - HANDLE UniqueThread; + HANDLE UniqueProcess; + HANDLE UniqueThread; }; // struct CLIENT_ID enum THREAD_STATE { - StateInitialized, - StateReady, - StateRunning, - StateStandby, - StateTerminated, - StateWait, - StateTransition, - StateUnknown + StateInitialized, + StateReady, + StateRunning, + StateStandby, + StateTerminated, + StateWait, + StateTransition, + StateUnknown }; // enum THREAD_STATE struct VM_COUNTERS { - SIZE_T PeakVirtualSize; - SIZE_T VirtualSize; - ULONG PageFaultCount; - SIZE_T PeakWorkingSetSize; - SIZE_T WorkingSetSize; - SIZE_T QuotaPeakPagedPoolUsage; - SIZE_T QuotaPagedPoolUsage; - SIZE_T QuotaPeakNonPagedPoolUsage; - SIZE_T QuotaNonPagedPoolUsage; - SIZE_T PagefileUsage; - SIZE_T PeakPagefileUsage; - SIZE_T PrivatePageCount; + SIZE_T PeakVirtualSize; + SIZE_T VirtualSize; + ULONG PageFaultCount; + SIZE_T PeakWorkingSetSize; + SIZE_T WorkingSetSize; + SIZE_T QuotaPeakPagedPoolUsage; + SIZE_T QuotaPagedPoolUsage; + SIZE_T QuotaPeakNonPagedPoolUsage; + SIZE_T QuotaNonPagedPoolUsage; + SIZE_T PagefileUsage; + SIZE_T PeakPagefileUsage; + SIZE_T PrivatePageCount; }; // struct VM_COUNTERS struct SYSTEM_THREAD { - LARGE_INTEGER KernelTime; - LARGE_INTEGER UserTime; - LARGE_INTEGER CreateTime; - ULONG WaitTime; - LPVOID StartAddress; - CLIENT_ID ClientId; - DWORD Priority; - LONG BasePriority; - ULONG ContextSwitchCount; - THREAD_STATE State; - ULONG WaitReason; + LARGE_INTEGER KernelTime; + LARGE_INTEGER UserTime; + LARGE_INTEGER CreateTime; + ULONG WaitTime; + LPVOID StartAddress; + CLIENT_ID ClientId; + DWORD Priority; + LONG BasePriority; + ULONG ContextSwitchCount; + THREAD_STATE State; + ULONG WaitReason; }; // SYSTEM_THREAD -KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, KernelTime ) == 0 ); +KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, KernelTime) == 0); #if KMP_ARCH_X86 - KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, StartAddress ) == 28 ); - KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, State ) == 52 ); +KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, StartAddress) == 28); +KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, State) == 52); #else - KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, StartAddress ) == 32 ); - KMP_BUILD_ASSERT( offsetof( SYSTEM_THREAD, State ) == 68 ); +KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, StartAddress) == 32); +KMP_BUILD_ASSERT(offsetof(SYSTEM_THREAD, State) == 68); #endif struct SYSTEM_PROCESS_INFORMATION { - ULONG NextEntryOffset; - ULONG NumberOfThreads; - LARGE_INTEGER Reserved[ 3 ]; - LARGE_INTEGER CreateTime; - LARGE_INTEGER UserTime; - LARGE_INTEGER KernelTime; - UNICODE_STRING ImageName; - DWORD BasePriority; - HANDLE ProcessId; - HANDLE ParentProcessId; - ULONG HandleCount; - ULONG Reserved2[ 2 ]; - VM_COUNTERS VMCounters; - IO_COUNTERS IOCounters; - SYSTEM_THREAD Threads[ 1 ]; + ULONG NextEntryOffset; + ULONG NumberOfThreads; + LARGE_INTEGER Reserved[3]; + LARGE_INTEGER CreateTime; + LARGE_INTEGER UserTime; + LARGE_INTEGER KernelTime; + UNICODE_STRING ImageName; + DWORD BasePriority; + HANDLE ProcessId; + HANDLE ParentProcessId; + ULONG HandleCount; + ULONG Reserved2[2]; + VM_COUNTERS VMCounters; + IO_COUNTERS IOCounters; + SYSTEM_THREAD Threads[1]; }; // SYSTEM_PROCESS_INFORMATION -typedef SYSTEM_PROCESS_INFORMATION * PSYSTEM_PROCESS_INFORMATION; +typedef SYSTEM_PROCESS_INFORMATION *PSYSTEM_PROCESS_INFORMATION; -KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, NextEntryOffset ) == 0 ); -KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, CreateTime ) == 32 ); -KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ImageName ) == 56 ); +KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, NextEntryOffset) == 0); +KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, CreateTime) == 32); +KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, ImageName) == 56); #if KMP_ARCH_X86 - KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ProcessId ) == 68 ); - KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, HandleCount ) == 76 ); - KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, VMCounters ) == 88 ); - KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, IOCounters ) == 136 ); - KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, Threads ) == 184 ); +KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, ProcessId) == 68); +KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, HandleCount) == 76); +KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, VMCounters) == 88); +KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, IOCounters) == 136); +KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, Threads) == 184); #else - KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, ProcessId ) == 80 ); - KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, HandleCount ) == 96 ); - KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, VMCounters ) == 112 ); - KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, IOCounters ) == 208 ); - KMP_BUILD_ASSERT( offsetof( SYSTEM_PROCESS_INFORMATION, Threads ) == 256 ); +KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, ProcessId) == 80); +KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, HandleCount) == 96); +KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, VMCounters) == 112); +KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, IOCounters) == 208); +KMP_BUILD_ASSERT(offsetof(SYSTEM_PROCESS_INFORMATION, Threads) == 256); #endif -typedef NTSTATUS (NTAPI *NtQuerySystemInformation_t)( SYSTEM_INFORMATION_CLASS, PVOID, ULONG, PULONG ); +typedef NTSTATUS(NTAPI *NtQuerySystemInformation_t)(SYSTEM_INFORMATION_CLASS, + PVOID, ULONG, PULONG); NtQuerySystemInformation_t NtQuerySystemInformation = NULL; HMODULE ntdll = NULL; @@ -130,17 +131,14 @@ HMODULE ntdll = NULL; static HMODULE kernel32 = NULL; -/* ----------------------------------------------------------------------------------- */ -/* ----------------------------------------------------------------------------------- */ - #if KMP_HANDLE_SIGNALS - typedef void (* sig_func_t )( int ); - static sig_func_t __kmp_sighldrs[ NSIG ]; - static int __kmp_siginstalled[ NSIG ]; +typedef void (*sig_func_t)(int); +static sig_func_t __kmp_sighldrs[NSIG]; +static int __kmp_siginstalled[NSIG]; #endif #if KMP_USE_MONITOR -static HANDLE __kmp_monitor_ev; +static HANDLE __kmp_monitor_ev; #endif static kmp_int64 __kmp_win32_time; double __kmp_win32_tick; @@ -148,1625 +146,1433 @@ double __kmp_win32_tick; int __kmp_init_runtime = FALSE; CRITICAL_SECTION __kmp_win32_section; -void -__kmp_win32_mutex_init( kmp_win32_mutex_t *mx ) -{ - InitializeCriticalSection( & mx->cs ); +void __kmp_win32_mutex_init(kmp_win32_mutex_t *mx) { + InitializeCriticalSection(&mx->cs); #if USE_ITT_BUILD - __kmp_itt_system_object_created( & mx->cs, "Critical Section" ); + __kmp_itt_system_object_created(&mx->cs, "Critical Section"); #endif /* USE_ITT_BUILD */ } -void -__kmp_win32_mutex_destroy( kmp_win32_mutex_t *mx ) -{ - DeleteCriticalSection( & mx->cs ); +void __kmp_win32_mutex_destroy(kmp_win32_mutex_t *mx) { + DeleteCriticalSection(&mx->cs); } -void -__kmp_win32_mutex_lock( kmp_win32_mutex_t *mx ) -{ - EnterCriticalSection( & mx->cs ); +void __kmp_win32_mutex_lock(kmp_win32_mutex_t *mx) { + EnterCriticalSection(&mx->cs); } -void -__kmp_win32_mutex_unlock( kmp_win32_mutex_t *mx ) -{ - LeaveCriticalSection( & mx->cs ); +void __kmp_win32_mutex_unlock(kmp_win32_mutex_t *mx) { + LeaveCriticalSection(&mx->cs); } -void -__kmp_win32_cond_init( kmp_win32_cond_t *cv ) -{ - cv->waiters_count_ = 0; - cv->wait_generation_count_ = 0; - cv->release_count_ = 0; - - /* Initialize the critical section */ - __kmp_win32_mutex_init( & cv->waiters_count_lock_ ); - - /* Create a manual-reset event. */ - cv->event_ = CreateEvent( NULL, // no security - TRUE, // manual-reset - FALSE, // non-signaled initially - NULL ); // unnamed +void __kmp_win32_cond_init(kmp_win32_cond_t *cv) { + cv->waiters_count_ = 0; + cv->wait_generation_count_ = 0; + cv->release_count_ = 0; + + /* Initialize the critical section */ + __kmp_win32_mutex_init(&cv->waiters_count_lock_); + + /* Create a manual-reset event. */ + cv->event_ = CreateEvent(NULL, // no security + TRUE, // manual-reset + FALSE, // non-signaled initially + NULL); // unnamed #if USE_ITT_BUILD - __kmp_itt_system_object_created( cv->event_, "Event" ); + __kmp_itt_system_object_created(cv->event_, "Event"); #endif /* USE_ITT_BUILD */ } -void -__kmp_win32_cond_destroy( kmp_win32_cond_t *cv ) -{ - __kmp_win32_mutex_destroy( & cv->waiters_count_lock_ ); - __kmp_free_handle( cv->event_ ); - memset( cv, '\0', sizeof( *cv ) ); +void __kmp_win32_cond_destroy(kmp_win32_cond_t *cv) { + __kmp_win32_mutex_destroy(&cv->waiters_count_lock_); + __kmp_free_handle(cv->event_); + memset(cv, '\0', sizeof(*cv)); } /* TODO associate cv with a team instead of a thread so as to optimize - * the case where we wake up a whole team */ + the case where we wake up a whole team */ -void -__kmp_win32_cond_wait( kmp_win32_cond_t *cv, kmp_win32_mutex_t *mx, kmp_info_t *th, int need_decrease_load ) -{ - int my_generation; - int last_waiter; +void __kmp_win32_cond_wait(kmp_win32_cond_t *cv, kmp_win32_mutex_t *mx, + kmp_info_t *th, int need_decrease_load) { + int my_generation; + int last_waiter; - /* Avoid race conditions */ - __kmp_win32_mutex_lock( &cv->waiters_count_lock_ ); + /* Avoid race conditions */ + __kmp_win32_mutex_lock(&cv->waiters_count_lock_); - /* Increment count of waiters */ - cv->waiters_count_++; + /* Increment count of waiters */ + cv->waiters_count_++; - /* Store current generation in our activation record. */ - my_generation = cv->wait_generation_count_; + /* Store current generation in our activation record. */ + my_generation = cv->wait_generation_count_; - __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ ); - __kmp_win32_mutex_unlock( mx ); + __kmp_win32_mutex_unlock(&cv->waiters_count_lock_); + __kmp_win32_mutex_unlock(mx); - for (;;) { - int wait_done; + for (;;) { + int wait_done; - /* Wait until the event is signaled */ - WaitForSingleObject( cv->event_, INFINITE ); + /* Wait until the event is signaled */ + WaitForSingleObject(cv->event_, INFINITE); - __kmp_win32_mutex_lock( &cv->waiters_count_lock_ ); + __kmp_win32_mutex_lock(&cv->waiters_count_lock_); - /* Exit the loop when the event_> is signaled and - * there are still waiting threads from this - * that haven't been released from this wait yet. */ - wait_done = ( cv->release_count_ > 0 ) && - ( cv->wait_generation_count_ != my_generation ); + /* Exit the loop when the event_> is signaled and there are still + waiting threads from this that haven't been released + from this wait yet. */ + wait_done = (cv->release_count_ > 0) && + (cv->wait_generation_count_ != my_generation); - __kmp_win32_mutex_unlock( &cv->waiters_count_lock_); + __kmp_win32_mutex_unlock(&cv->waiters_count_lock_); - /* there used to be a semicolon after the if statement, - * it looked like a bug, so i removed it */ - if( wait_done ) - break; - } + /* there used to be a semicolon after the if statement, it looked like a + bug, so i removed it */ + if (wait_done) + break; + } - __kmp_win32_mutex_lock( mx ); - __kmp_win32_mutex_lock( &cv->waiters_count_lock_ ); + __kmp_win32_mutex_lock(mx); + __kmp_win32_mutex_lock(&cv->waiters_count_lock_); - cv->waiters_count_--; - cv->release_count_--; + cv->waiters_count_--; + cv->release_count_--; - last_waiter = ( cv->release_count_ == 0 ); + last_waiter = (cv->release_count_ == 0); - __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ ); + __kmp_win32_mutex_unlock(&cv->waiters_count_lock_); - if( last_waiter ) { - /* We're the last waiter to be notified, so reset the manual event. */ - ResetEvent( cv->event_ ); - } + if (last_waiter) { + /* We're the last waiter to be notified, so reset the manual event. */ + ResetEvent(cv->event_); + } } -void -__kmp_win32_cond_broadcast( kmp_win32_cond_t *cv ) -{ - __kmp_win32_mutex_lock( &cv->waiters_count_lock_ ); +void __kmp_win32_cond_broadcast(kmp_win32_cond_t *cv) { + __kmp_win32_mutex_lock(&cv->waiters_count_lock_); - if( cv->waiters_count_ > 0 ) { - SetEvent( cv->event_ ); - /* Release all the threads in this generation. */ + if (cv->waiters_count_ > 0) { + SetEvent(cv->event_); + /* Release all the threads in this generation. */ - cv->release_count_ = cv->waiters_count_; + cv->release_count_ = cv->waiters_count_; - /* Start a new generation. */ - cv->wait_generation_count_++; - } + /* Start a new generation. */ + cv->wait_generation_count_++; + } - __kmp_win32_mutex_unlock( &cv->waiters_count_lock_ ); + __kmp_win32_mutex_unlock(&cv->waiters_count_lock_); } -void -__kmp_win32_cond_signal( kmp_win32_cond_t *cv ) -{ - __kmp_win32_cond_broadcast( cv ); +void __kmp_win32_cond_signal(kmp_win32_cond_t *cv) { + __kmp_win32_cond_broadcast(cv); } -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_enable( int new_state ) -{ - if (__kmp_init_runtime) - LeaveCriticalSection( & __kmp_win32_section ); +void __kmp_enable(int new_state) { + if (__kmp_init_runtime) + LeaveCriticalSection(&__kmp_win32_section); } -void -__kmp_disable( int *old_state ) -{ - *old_state = 0; +void __kmp_disable(int *old_state) { + *old_state = 0; - if (__kmp_init_runtime) - EnterCriticalSection( & __kmp_win32_section ); + if (__kmp_init_runtime) + EnterCriticalSection(&__kmp_win32_section); } -void -__kmp_suspend_initialize( void ) -{ - /* do nothing */ +void __kmp_suspend_initialize(void) { /* do nothing */ } -static void -__kmp_suspend_initialize_thread( kmp_info_t *th ) -{ - if ( ! TCR_4( th->th.th_suspend_init ) ) { - /* this means we haven't initialized the suspension pthread objects for this thread - in this instance of the process */ - __kmp_win32_cond_init( &th->th.th_suspend_cv ); - __kmp_win32_mutex_init( &th->th.th_suspend_mx ); - TCW_4( th->th.th_suspend_init, TRUE ); - } +static void __kmp_suspend_initialize_thread(kmp_info_t *th) { + if (!TCR_4(th->th.th_suspend_init)) { + /* this means we haven't initialized the suspension pthread objects for this + thread in this instance of the process */ + __kmp_win32_cond_init(&th->th.th_suspend_cv); + __kmp_win32_mutex_init(&th->th.th_suspend_mx); + TCW_4(th->th.th_suspend_init, TRUE); + } } -void -__kmp_suspend_uninitialize_thread( kmp_info_t *th ) -{ - if ( TCR_4( th->th.th_suspend_init ) ) { - /* this means we have initialize the suspension pthread objects for this thread - in this instance of the process */ - __kmp_win32_cond_destroy( & th->th.th_suspend_cv ); - __kmp_win32_mutex_destroy( & th->th.th_suspend_mx ); - TCW_4( th->th.th_suspend_init, FALSE ); - } +void __kmp_suspend_uninitialize_thread(kmp_info_t *th) { + if (TCR_4(th->th.th_suspend_init)) { + /* this means we have initialize the suspension pthread objects for this + thread in this instance of the process */ + __kmp_win32_cond_destroy(&th->th.th_suspend_cv); + __kmp_win32_mutex_destroy(&th->th.th_suspend_mx); + TCW_4(th->th.th_suspend_init, FALSE); + } } /* This routine puts the calling thread to sleep after setting the - * sleep bit for the indicated flag variable to true. - */ + sleep bit for the indicated flag variable to true. */ template -static inline void __kmp_suspend_template( int th_gtid, C *flag ) -{ - kmp_info_t *th = __kmp_threads[th_gtid]; - int status; - typename C::flag_t old_spin; - - KF_TRACE( 30, ("__kmp_suspend_template: T#%d enter for flag's loc(%p)\n", th_gtid, flag->get() ) ); - - __kmp_suspend_initialize_thread( th ); - __kmp_win32_mutex_lock( &th->th.th_suspend_mx ); - - KF_TRACE( 10, ( "__kmp_suspend_template: T#%d setting sleep bit for flag's loc(%p)\n", - th_gtid, flag->get() ) ); - - /* TODO: shouldn't this use release semantics to ensure that __kmp_suspend_initialize_thread - gets called first? - */ - old_spin = flag->set_sleeping(); - - KF_TRACE( 5, ( "__kmp_suspend_template: T#%d set sleep bit for flag's loc(%p)==%d\n", - th_gtid, flag->get(), *(flag->get()) ) ); - - if ( flag->done_check_val(old_spin) ) { - old_spin = flag->unset_sleeping(); - KF_TRACE( 5, ( "__kmp_suspend_template: T#%d false alarm, reset sleep bit for flag's loc(%p)\n", - th_gtid, flag->get()) ); - } else { +static inline void __kmp_suspend_template(int th_gtid, C *flag) { + kmp_info_t *th = __kmp_threads[th_gtid]; + int status; + typename C::flag_t old_spin; + + KF_TRACE(30, ("__kmp_suspend_template: T#%d enter for flag's loc(%p)\n", + th_gtid, flag->get())); + + __kmp_suspend_initialize_thread(th); + __kmp_win32_mutex_lock(&th->th.th_suspend_mx); + + KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for flag's" + " loc(%p)\n", + th_gtid, flag->get())); + + /* TODO: shouldn't this use release semantics to ensure that + __kmp_suspend_initialize_thread gets called first? */ + old_spin = flag->set_sleeping(); + + KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for flag's" + " loc(%p)==%d\n", + th_gtid, flag->get(), *(flag->get()))); + + if (flag->done_check_val(old_spin)) { + old_spin = flag->unset_sleeping(); + KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit " + "for flag's loc(%p)\n", + th_gtid, flag->get())); + } else { #ifdef DEBUG_SUSPEND - __kmp_suspend_count++; + __kmp_suspend_count++; #endif - /* Encapsulate in a loop as the documentation states that this may - * "with low probability" return when the condition variable has - * not been signaled or broadcast - */ - int deactivated = FALSE; - TCW_PTR(th->th.th_sleep_loc, (void *)flag); - while ( flag->is_sleeping() ) { - KF_TRACE( 15, ("__kmp_suspend_template: T#%d about to perform kmp_win32_cond_wait()\n", - th_gtid ) ); - // Mark the thread as no longer active (only in the first iteration of the loop). - if ( ! deactivated ) { - th->th.th_active = FALSE; - if ( th->th.th_active_in_pool ) { - th->th.th_active_in_pool = FALSE; - KMP_TEST_THEN_DEC32( - (kmp_int32 *) &__kmp_thread_pool_active_nth ); - KMP_DEBUG_ASSERT( TCR_4(__kmp_thread_pool_active_nth) >= 0 ); - } - deactivated = TRUE; - - __kmp_win32_cond_wait( &th->th.th_suspend_cv, &th->th.th_suspend_mx, 0, 0 ); - } - else { - __kmp_win32_cond_wait( &th->th.th_suspend_cv, &th->th.th_suspend_mx, 0, 0 ); - } + /* Encapsulate in a loop as the documentation states that this may "with + low probability" return when the condition variable has not been signaled + or broadcast */ + int deactivated = FALSE; + TCW_PTR(th->th.th_sleep_loc, (void *)flag); + while (flag->is_sleeping()) { + KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform " + "kmp_win32_cond_wait()\n", + th_gtid)); + // Mark the thread as no longer active (only in the first iteration of the + // loop). + if (!deactivated) { + th->th.th_active = FALSE; + if (th->th.th_active_in_pool) { + th->th.th_active_in_pool = FALSE; + KMP_TEST_THEN_DEC32((kmp_int32 *)&__kmp_thread_pool_active_nth); + KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0); + } + deactivated = TRUE; + + __kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, 0, + 0); + } else { + __kmp_win32_cond_wait(&th->th.th_suspend_cv, &th->th.th_suspend_mx, 0, + 0); + } #ifdef KMP_DEBUG - if( flag->is_sleeping() ) { - KF_TRACE( 100, ("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid )); - } + if (flag->is_sleeping()) { + KF_TRACE(100, + ("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid)); + } #endif /* KMP_DEBUG */ - } // while + } // while - // Mark the thread as active again (if it was previous marked as inactive) - if ( deactivated ) { - th->th.th_active = TRUE; - if ( TCR_4(th->th.th_in_pool) ) { - KMP_TEST_THEN_INC32( - (kmp_int32 *) &__kmp_thread_pool_active_nth ); - th->th.th_active_in_pool = TRUE; - } - } + // Mark the thread as active again (if it was previous marked as inactive) + if (deactivated) { + th->th.th_active = TRUE; + if (TCR_4(th->th.th_in_pool)) { + KMP_TEST_THEN_INC32((kmp_int32 *)&__kmp_thread_pool_active_nth); + th->th.th_active_in_pool = TRUE; + } } + } - __kmp_win32_mutex_unlock( &th->th.th_suspend_mx ); + __kmp_win32_mutex_unlock(&th->th.th_suspend_mx); - KF_TRACE( 30, ("__kmp_suspend_template: T#%d exit\n", th_gtid ) ); + KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid)); } void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) { - __kmp_suspend_template(th_gtid, flag); + __kmp_suspend_template(th_gtid, flag); } void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) { - __kmp_suspend_template(th_gtid, flag); + __kmp_suspend_template(th_gtid, flag); } void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) { - __kmp_suspend_template(th_gtid, flag); + __kmp_suspend_template(th_gtid, flag); } - /* This routine signals the thread specified by target_gtid to wake up - * after setting the sleep bit indicated by the flag argument to FALSE - */ + after setting the sleep bit indicated by the flag argument to FALSE */ template -static inline void __kmp_resume_template( int target_gtid, C *flag ) -{ - kmp_info_t *th = __kmp_threads[target_gtid]; - int status; +static inline void __kmp_resume_template(int target_gtid, C *flag) { + kmp_info_t *th = __kmp_threads[target_gtid]; + int status; #ifdef KMP_DEBUG - int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; + int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1; #endif - KF_TRACE( 30, ( "__kmp_resume_template: T#%d wants to wakeup T#%d enter\n", gtid, target_gtid ) ); + KF_TRACE(30, ("__kmp_resume_template: T#%d wants to wakeup T#%d enter\n", + gtid, target_gtid)); - __kmp_suspend_initialize_thread( th ); - __kmp_win32_mutex_lock( &th->th.th_suspend_mx ); + __kmp_suspend_initialize_thread(th); + __kmp_win32_mutex_lock(&th->th.th_suspend_mx); - if (!flag) { // coming from __kmp_null_resume_wrapper - flag = (C *)th->th.th_sleep_loc; - } + if (!flag) { // coming from __kmp_null_resume_wrapper + flag = (C *)th->th.th_sleep_loc; + } - // First, check if the flag is null or its type has changed. If so, someone else woke it up. - if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type simply shows what flag was cast to - KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag's loc(%p)\n", - gtid, target_gtid, NULL ) ); - __kmp_win32_mutex_unlock( &th->th.th_suspend_mx ); - return; - } - else { - typename C::flag_t old_spin = flag->unset_sleeping(); - if ( !flag->is_sleeping_val(old_spin) ) { - KF_TRACE( 5, ( "__kmp_resume_template: T#%d exiting, thread T#%d already awake: flag's loc(%p): " - "%u => %u\n", - gtid, target_gtid, flag->get(), old_spin, *(flag->get()) ) ); - __kmp_win32_mutex_unlock( &th->th.th_suspend_mx ); - return; - } + // First, check if the flag is null or its type has changed. If so, someone + // else woke it up. + if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type + // simply shows what + // flag was cast to + KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already " + "awake: flag's loc(%p)\n", + gtid, target_gtid, NULL)); + __kmp_win32_mutex_unlock(&th->th.th_suspend_mx); + return; + } else { + typename C::flag_t old_spin = flag->unset_sleeping(); + if (!flag->is_sleeping_val(old_spin)) { + KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already " + "awake: flag's loc(%p): %u => %u\n", + gtid, target_gtid, flag->get(), old_spin, *(flag->get()))); + __kmp_win32_mutex_unlock(&th->th.th_suspend_mx); + return; } - TCW_PTR(th->th.th_sleep_loc, NULL); - - KF_TRACE( 5, ( "__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep bit for flag's loc(%p)\n", - gtid, target_gtid, flag->get() ) ); + } + TCW_PTR(th->th.th_sleep_loc, NULL); + KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset sleep " + "bit for flag's loc(%p)\n", + gtid, target_gtid, flag->get())); - __kmp_win32_cond_signal( &th->th.th_suspend_cv ); - __kmp_win32_mutex_unlock( &th->th.th_suspend_mx ); + __kmp_win32_cond_signal(&th->th.th_suspend_cv); + __kmp_win32_mutex_unlock(&th->th.th_suspend_mx); - KF_TRACE( 30, ( "__kmp_resume_template: T#%d exiting after signaling wake up for T#%d\n", - gtid, target_gtid ) ); + KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up" + " for T#%d\n", + gtid, target_gtid)); } void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) { - __kmp_resume_template(target_gtid, flag); + __kmp_resume_template(target_gtid, flag); } void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) { - __kmp_resume_template(target_gtid, flag); + __kmp_resume_template(target_gtid, flag); } void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) { - __kmp_resume_template(target_gtid, flag); + __kmp_resume_template(target_gtid, flag); } - -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_yield( int cond ) -{ - if (cond) - Sleep(0); +void __kmp_yield(int cond) { + if (cond) + Sleep(0); } -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_gtid_set_specific( int gtid ) -{ - if( __kmp_init_gtid ) { - KA_TRACE( 50, ("__kmp_gtid_set_specific: T#%d key:%d\n", - gtid, __kmp_gtid_threadprivate_key )); - if( ! TlsSetValue( __kmp_gtid_threadprivate_key, (LPVOID)(gtid+1)) ) - KMP_FATAL( TLSSetValueFailed ); - } else { - KA_TRACE( 50, ("__kmp_gtid_set_specific: runtime shutdown, returning\n" ) ); - } +void __kmp_gtid_set_specific(int gtid) { + if (__kmp_init_gtid) { + KA_TRACE(50, ("__kmp_gtid_set_specific: T#%d key:%d\n", gtid, + __kmp_gtid_threadprivate_key)); + if (!TlsSetValue(__kmp_gtid_threadprivate_key, (LPVOID)(gtid + 1))) + KMP_FATAL(TLSSetValueFailed); + } else { + KA_TRACE(50, ("__kmp_gtid_set_specific: runtime shutdown, returning\n")); + } } -int -__kmp_gtid_get_specific() -{ - int gtid; - if( !__kmp_init_gtid ) { - KA_TRACE( 50, ("__kmp_gtid_get_specific: runtime shutdown, returning KMP_GTID_SHUTDOWN\n" ) ); - return KMP_GTID_SHUTDOWN; - } - gtid = (int)(kmp_intptr_t)TlsGetValue( __kmp_gtid_threadprivate_key ); - if ( gtid == 0 ) { - gtid = KMP_GTID_DNE; - } - else { - gtid--; - } - KA_TRACE( 50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n", - __kmp_gtid_threadprivate_key, gtid )); - return gtid; +int __kmp_gtid_get_specific() { + int gtid; + if (!__kmp_init_gtid) { + KA_TRACE(50, ("__kmp_gtid_get_specific: runtime shutdown, returning " + "KMP_GTID_SHUTDOWN\n")); + return KMP_GTID_SHUTDOWN; + } + gtid = (int)(kmp_intptr_t)TlsGetValue(__kmp_gtid_threadprivate_key); + if (gtid == 0) { + gtid = KMP_GTID_DNE; + } else { + gtid--; + } + KA_TRACE(50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n", + __kmp_gtid_threadprivate_key, gtid)); + return gtid; } -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void -__kmp_affinity_bind_thread( int proc ) -{ - if (__kmp_num_proc_groups > 1) { - // - // Form the GROUP_AFFINITY struct directly, rather than filling - // out a bit vector and calling __kmp_set_system_affinity(). - // - GROUP_AFFINITY ga; - KMP_DEBUG_ASSERT((proc >= 0) && (proc < (__kmp_num_proc_groups - * CHAR_BIT * sizeof(DWORD_PTR)))); - ga.Group = proc / (CHAR_BIT * sizeof(DWORD_PTR)); - ga.Mask = (unsigned long long)1 << (proc % (CHAR_BIT * sizeof(DWORD_PTR))); - ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; - - KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); - if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { - DWORD error = GetLastError(); - if (__kmp_affinity_verbose) { // AC: continue silently if not verbose - kmp_msg_t err_code = KMP_ERR( error ); - __kmp_msg( - kmp_ms_warning, - KMP_MSG( CantSetThreadAffMask ), - err_code, - __kmp_msg_null - ); - if (__kmp_generate_warnings == kmp_warnings_off) { - __kmp_str_free(&err_code.str); - } - } +void __kmp_affinity_bind_thread(int proc) { + if (__kmp_num_proc_groups > 1) { + // Form the GROUP_AFFINITY struct directly, rather than filling + // out a bit vector and calling __kmp_set_system_affinity(). + GROUP_AFFINITY ga; + KMP_DEBUG_ASSERT((proc >= 0) && (proc < (__kmp_num_proc_groups * CHAR_BIT * + sizeof(DWORD_PTR)))); + ga.Group = proc / (CHAR_BIT * sizeof(DWORD_PTR)); + ga.Mask = (unsigned long long)1 << (proc % (CHAR_BIT * sizeof(DWORD_PTR))); + ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0; + + KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL); + if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) { + DWORD error = GetLastError(); + if (__kmp_affinity_verbose) { // AC: continue silently if not verbose + kmp_msg_t err_code = KMP_ERR(error); + __kmp_msg(kmp_ms_warning, KMP_MSG(CantSetThreadAffMask), err_code, + __kmp_msg_null); + if (__kmp_generate_warnings == kmp_warnings_off) { + __kmp_str_free(&err_code.str); } - } else { - kmp_affin_mask_t *mask; - KMP_CPU_ALLOC_ON_STACK(mask); - KMP_CPU_ZERO(mask); - KMP_CPU_SET(proc, mask); - __kmp_set_system_affinity(mask, TRUE); - KMP_CPU_FREE_FROM_STACK(mask); + } } + } else { + kmp_affin_mask_t *mask; + KMP_CPU_ALLOC_ON_STACK(mask); + KMP_CPU_ZERO(mask); + KMP_CPU_SET(proc, mask); + __kmp_set_system_affinity(mask, TRUE); + KMP_CPU_FREE_FROM_STACK(mask); + } } -void -__kmp_affinity_determine_capable( const char *env_var ) -{ - // - // All versions of Windows* OS (since Win '95) support SetThreadAffinityMask(). - // +void __kmp_affinity_determine_capable(const char *env_var) { +// All versions of Windows* OS (since Win '95) support SetThreadAffinityMask(). #if KMP_GROUP_AFFINITY - KMP_AFFINITY_ENABLE(__kmp_num_proc_groups*sizeof(DWORD_PTR)); + KMP_AFFINITY_ENABLE(__kmp_num_proc_groups * sizeof(DWORD_PTR)); #else - KMP_AFFINITY_ENABLE(sizeof(DWORD_PTR)); + KMP_AFFINITY_ENABLE(sizeof(DWORD_PTR)); #endif - KA_TRACE( 10, ( - "__kmp_affinity_determine_capable: " - "Windows* OS affinity interface functional (mask size = %" KMP_SIZE_T_SPEC ").\n", - __kmp_affin_mask_size - ) ); + KA_TRACE(10, ("__kmp_affinity_determine_capable: " + "Windows* OS affinity interface functional (mask size = " + "%" KMP_SIZE_T_SPEC ").\n", + __kmp_affin_mask_size)); } -double -__kmp_read_cpu_time( void ) -{ - FILETIME CreationTime, ExitTime, KernelTime, UserTime; - int status; - double cpu_time; +double __kmp_read_cpu_time(void) { + FILETIME CreationTime, ExitTime, KernelTime, UserTime; + int status; + double cpu_time; - cpu_time = 0; + cpu_time = 0; - status = GetProcessTimes( GetCurrentProcess(), &CreationTime, - &ExitTime, &KernelTime, &UserTime ); + status = GetProcessTimes(GetCurrentProcess(), &CreationTime, &ExitTime, + &KernelTime, &UserTime); - if (status) { - double sec = 0; + if (status) { + double sec = 0; - sec += KernelTime.dwHighDateTime; - sec += UserTime.dwHighDateTime; + sec += KernelTime.dwHighDateTime; + sec += UserTime.dwHighDateTime; - /* Shift left by 32 bits */ - sec *= (double) (1 << 16) * (double) (1 << 16); + /* Shift left by 32 bits */ + sec *= (double)(1 << 16) * (double)(1 << 16); - sec += KernelTime.dwLowDateTime; - sec += UserTime.dwLowDateTime; + sec += KernelTime.dwLowDateTime; + sec += UserTime.dwLowDateTime; - cpu_time += (sec * 100.0) / KMP_NSEC_PER_SEC; - } + cpu_time += (sec * 100.0) / KMP_NSEC_PER_SEC; + } - return cpu_time; + return cpu_time; } -int -__kmp_read_system_info( struct kmp_sys_info *info ) -{ - info->maxrss = 0; /* the maximum resident set size utilized (in kilobytes) */ - info->minflt = 0; /* the number of page faults serviced without any I/O */ - info->majflt = 0; /* the number of page faults serviced that required I/O */ - info->nswap = 0; /* the number of times a process was "swapped" out of memory */ - info->inblock = 0; /* the number of times the file system had to perform input */ - info->oublock = 0; /* the number of times the file system had to perform output */ - info->nvcsw = 0; /* the number of times a context switch was voluntarily */ - info->nivcsw = 0; /* the number of times a context switch was forced */ - - return 1; +int __kmp_read_system_info(struct kmp_sys_info *info) { + info->maxrss = 0; /* the maximum resident set size utilized (in kilobytes) */ + info->minflt = 0; /* the number of page faults serviced without any I/O */ + info->majflt = 0; /* the number of page faults serviced that required I/O */ + info->nswap = 0; // the number of times a process was "swapped" out of memory + info->inblock = 0; // the number of times the file system had to perform input + info->oublock = 0; // number of times the file system had to perform output + info->nvcsw = 0; /* the number of times a context switch was voluntarily */ + info->nivcsw = 0; /* the number of times a context switch was forced */ + + return 1; } -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - - -void -__kmp_runtime_initialize( void ) -{ - SYSTEM_INFO info; - kmp_str_buf_t path; - UINT path_size; +void __kmp_runtime_initialize(void) { + SYSTEM_INFO info; + kmp_str_buf_t path; + UINT path_size; - if ( __kmp_init_runtime ) { - return; - }; + if (__kmp_init_runtime) { + return; + }; #if KMP_DYNAMIC_LIB - /* Pin dynamic library for the lifetime of application */ - { - // First, turn off error message boxes - UINT err_mode = SetErrorMode (SEM_FAILCRITICALERRORS); - HMODULE h; - BOOL ret = GetModuleHandleEx( GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS - |GET_MODULE_HANDLE_EX_FLAG_PIN, - (LPCTSTR)&__kmp_serial_initialize, &h); - KMP_DEBUG_ASSERT2(h && ret, "OpenMP RTL cannot find itself loaded"); - SetErrorMode (err_mode); // Restore error mode - KA_TRACE( 10, ("__kmp_runtime_initialize: dynamic library pinned\n") ); - } + /* Pin dynamic library for the lifetime of application */ + { + // First, turn off error message boxes + UINT err_mode = SetErrorMode(SEM_FAILCRITICALERRORS); + HMODULE h; + BOOL ret = GetModuleHandleEx(GET_MODULE_HANDLE_EX_FLAG_FROM_ADDRESS | + GET_MODULE_HANDLE_EX_FLAG_PIN, + (LPCTSTR)&__kmp_serial_initialize, &h); + KMP_DEBUG_ASSERT2(h && ret, "OpenMP RTL cannot find itself loaded"); + SetErrorMode(err_mode); // Restore error mode + KA_TRACE(10, ("__kmp_runtime_initialize: dynamic library pinned\n")); + } #endif - InitializeCriticalSection( & __kmp_win32_section ); + InitializeCriticalSection(&__kmp_win32_section); #if USE_ITT_BUILD - __kmp_itt_system_object_created( & __kmp_win32_section, "Critical Section" ); + __kmp_itt_system_object_created(&__kmp_win32_section, "Critical Section"); #endif /* USE_ITT_BUILD */ - __kmp_initialize_system_tick(); - - #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) - if ( ! __kmp_cpuinfo.initialized ) { - __kmp_query_cpuid( & __kmp_cpuinfo ); - }; // if - #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - - /* Set up minimum number of threads to switch to TLS gtid */ - #if KMP_OS_WINDOWS && ! defined KMP_DYNAMIC_LIB - // Windows* OS, static library. - /* - New thread may use stack space previously used by another thread, currently terminated. - On Windows* OS, in case of static linking, we do not know the moment of thread termination, - and our structures (__kmp_threads and __kmp_root arrays) are still keep info about dead - threads. This leads to problem in __kmp_get_global_thread_id() function: it wrongly - finds gtid (by searching through stack addresses of all known threads) for unregistered - foreign tread. - - Setting __kmp_tls_gtid_min to 0 workarounds this problem: __kmp_get_global_thread_id() - does not search through stacks, but get gtid from TLS immediately. - - --ln - */ - __kmp_tls_gtid_min = 0; - #else - __kmp_tls_gtid_min = KMP_TLS_GTID_MIN; - #endif - - /* for the static library */ - if ( !__kmp_gtid_threadprivate_key ) { - __kmp_gtid_threadprivate_key = TlsAlloc(); - if( __kmp_gtid_threadprivate_key == TLS_OUT_OF_INDEXES ) { - KMP_FATAL( TLSOutOfIndexes ); - } - } + __kmp_initialize_system_tick(); +#if (KMP_ARCH_X86 || KMP_ARCH_X86_64) + if (!__kmp_cpuinfo.initialized) { + __kmp_query_cpuid(&__kmp_cpuinfo); + }; // if +#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - // - // Load ntdll.dll. - // - /* - Simple - GetModuleHandle( "ntdll.dl" ) - is not suitable due to security issue (see - http://www.microsoft.com/technet/security/advisory/2269637.mspx). We have to specify full - path to the library. - */ - __kmp_str_buf_init( & path ); - path_size = GetSystemDirectory( path.str, path.size ); - KMP_DEBUG_ASSERT( path_size > 0 ); - if ( path_size >= path.size ) { - // - // Buffer is too short. Expand the buffer and try again. - // - __kmp_str_buf_reserve( & path, path_size ); - path_size = GetSystemDirectory( path.str, path.size ); - KMP_DEBUG_ASSERT( path_size > 0 ); - }; // if - if ( path_size > 0 && path_size < path.size ) { - // - // Now we have system directory name in the buffer. - // Append backslash and name of dll to form full path, - // - path.used = path_size; - __kmp_str_buf_print( & path, "\\%s", "ntdll.dll" ); - - // - // Now load ntdll using full path. - // - ntdll = GetModuleHandle( path.str ); - } +/* Set up minimum number of threads to switch to TLS gtid */ +#if KMP_OS_WINDOWS && !defined KMP_DYNAMIC_LIB + // Windows* OS, static library. + /* New thread may use stack space previously used by another thread, + currently terminated. On Windows* OS, in case of static linking, we do not + know the moment of thread termination, and our structures (__kmp_threads + and __kmp_root arrays) are still keep info about dead threads. This leads + to problem in __kmp_get_global_thread_id() function: it wrongly finds gtid + (by searching through stack addresses of all known threads) for + unregistered foreign tread. + + Setting __kmp_tls_gtid_min to 0 workarounds this problem: + __kmp_get_global_thread_id() does not search through stacks, but get gtid + from TLS immediately. + --ln + */ + __kmp_tls_gtid_min = 0; +#else + __kmp_tls_gtid_min = KMP_TLS_GTID_MIN; +#endif - KMP_DEBUG_ASSERT( ntdll != NULL ); - if ( ntdll != NULL ) { - NtQuerySystemInformation = (NtQuerySystemInformation_t) GetProcAddress( ntdll, "NtQuerySystemInformation" ); + /* for the static library */ + if (!__kmp_gtid_threadprivate_key) { + __kmp_gtid_threadprivate_key = TlsAlloc(); + if (__kmp_gtid_threadprivate_key == TLS_OUT_OF_INDEXES) { + KMP_FATAL(TLSOutOfIndexes); } - KMP_DEBUG_ASSERT( NtQuerySystemInformation != NULL ); + } + + // Load ntdll.dll. + /* Simple GetModuleHandle( "ntdll.dl" ) is not suitable due to security issue + (see http://www.microsoft.com/technet/security/advisory/2269637.mspx). We + have to specify full path to the library. */ + __kmp_str_buf_init(&path); + path_size = GetSystemDirectory(path.str, path.size); + KMP_DEBUG_ASSERT(path_size > 0); + if (path_size >= path.size) { + // Buffer is too short. Expand the buffer and try again. + __kmp_str_buf_reserve(&path, path_size); + path_size = GetSystemDirectory(path.str, path.size); + KMP_DEBUG_ASSERT(path_size > 0); + }; // if + if (path_size > 0 && path_size < path.size) { + // Now we have system directory name in the buffer. + // Append backslash and name of dll to form full path, + path.used = path_size; + __kmp_str_buf_print(&path, "\\%s", "ntdll.dll"); + + // Now load ntdll using full path. + ntdll = GetModuleHandle(path.str); + } + + KMP_DEBUG_ASSERT(ntdll != NULL); + if (ntdll != NULL) { + NtQuerySystemInformation = (NtQuerySystemInformation_t)GetProcAddress( + ntdll, "NtQuerySystemInformation"); + } + KMP_DEBUG_ASSERT(NtQuerySystemInformation != NULL); #if KMP_GROUP_AFFINITY - // - // Load kernel32.dll. - // Same caveat - must use full system path name. - // - if ( path_size > 0 && path_size < path.size ) { - // - // Truncate the buffer back to just the system path length, - // discarding "\\ntdll.dll", and replacing it with "kernel32.dll". - // - path.used = path_size; - __kmp_str_buf_print( & path, "\\%s", "kernel32.dll" ); - - // - // Load kernel32.dll using full path. - // - kernel32 = GetModuleHandle( path.str ); - KA_TRACE( 10, ("__kmp_runtime_initialize: kernel32.dll = %s\n", path.str ) ); - - // - // Load the function pointers to kernel32.dll routines - // that may or may not exist on this system. - // - if ( kernel32 != NULL ) { - __kmp_GetActiveProcessorCount = (kmp_GetActiveProcessorCount_t) GetProcAddress( kernel32, "GetActiveProcessorCount" ); - __kmp_GetActiveProcessorGroupCount = (kmp_GetActiveProcessorGroupCount_t) GetProcAddress( kernel32, "GetActiveProcessorGroupCount" ); - __kmp_GetThreadGroupAffinity = (kmp_GetThreadGroupAffinity_t) GetProcAddress( kernel32, "GetThreadGroupAffinity" ); - __kmp_SetThreadGroupAffinity = (kmp_SetThreadGroupAffinity_t) GetProcAddress( kernel32, "SetThreadGroupAffinity" ); - - KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_GetActiveProcessorCount = %p\n", __kmp_GetActiveProcessorCount ) ); - KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_GetActiveProcessorGroupCount = %p\n", __kmp_GetActiveProcessorGroupCount ) ); - KA_TRACE( 10, ("__kmp_runtime_initialize:__kmp_GetThreadGroupAffinity = %p\n", __kmp_GetThreadGroupAffinity ) ); - KA_TRACE( 10, ("__kmp_runtime_initialize: __kmp_SetThreadGroupAffinity = %p\n", __kmp_SetThreadGroupAffinity ) ); - KA_TRACE( 10, ("__kmp_runtime_initialize: sizeof(kmp_affin_mask_t) = %d\n", sizeof(kmp_affin_mask_t) ) ); - - // - // See if group affinity is supported on this system. - // If so, calculate the #groups and #procs. - // - // Group affinity was introduced with Windows* 7 OS and - // Windows* Server 2008 R2 OS. - // - if ( ( __kmp_GetActiveProcessorCount != NULL ) - && ( __kmp_GetActiveProcessorGroupCount != NULL ) - && ( __kmp_GetThreadGroupAffinity != NULL ) - && ( __kmp_SetThreadGroupAffinity != NULL ) - && ( ( __kmp_num_proc_groups - = __kmp_GetActiveProcessorGroupCount() ) > 1 ) ) { - // - // Calculate the total number of active OS procs. - // - int i; - - KA_TRACE( 10, ("__kmp_runtime_initialize: %d processor groups detected\n", __kmp_num_proc_groups ) ); - - __kmp_xproc = 0; - - for ( i = 0; i < __kmp_num_proc_groups; i++ ) { - DWORD size = __kmp_GetActiveProcessorCount( i ); - __kmp_xproc += size; - KA_TRACE( 10, ("__kmp_runtime_initialize: proc group %d size = %d\n", i, size ) ); - } - } - else { - KA_TRACE( 10, ("__kmp_runtime_initialize: %d processor groups detected\n", __kmp_num_proc_groups ) ); - } + // Load kernel32.dll. + // Same caveat - must use full system path name. + if (path_size > 0 && path_size < path.size) { + // Truncate the buffer back to just the system path length, + // discarding "\\ntdll.dll", and replacing it with "kernel32.dll". + path.used = path_size; + __kmp_str_buf_print(&path, "\\%s", "kernel32.dll"); + + // Load kernel32.dll using full path. + kernel32 = GetModuleHandle(path.str); + KA_TRACE(10, ("__kmp_runtime_initialize: kernel32.dll = %s\n", path.str)); + + // Load the function pointers to kernel32.dll routines + // that may or may not exist on this system. + if (kernel32 != NULL) { + __kmp_GetActiveProcessorCount = + (kmp_GetActiveProcessorCount_t)GetProcAddress( + kernel32, "GetActiveProcessorCount"); + __kmp_GetActiveProcessorGroupCount = + (kmp_GetActiveProcessorGroupCount_t)GetProcAddress( + kernel32, "GetActiveProcessorGroupCount"); + __kmp_GetThreadGroupAffinity = + (kmp_GetThreadGroupAffinity_t)GetProcAddress( + kernel32, "GetThreadGroupAffinity"); + __kmp_SetThreadGroupAffinity = + (kmp_SetThreadGroupAffinity_t)GetProcAddress( + kernel32, "SetThreadGroupAffinity"); + + KA_TRACE(10, ("__kmp_runtime_initialize: __kmp_GetActiveProcessorCount" + " = %p\n", + __kmp_GetActiveProcessorCount)); + KA_TRACE(10, ("__kmp_runtime_initialize: " + "__kmp_GetActiveProcessorGroupCount = %p\n", + __kmp_GetActiveProcessorGroupCount)); + KA_TRACE(10, ("__kmp_runtime_initialize:__kmp_GetThreadGroupAffinity" + " = %p\n", + __kmp_GetThreadGroupAffinity)); + KA_TRACE(10, ("__kmp_runtime_initialize: __kmp_SetThreadGroupAffinity" + " = %p\n", + __kmp_SetThreadGroupAffinity)); + KA_TRACE(10, ("__kmp_runtime_initialize: sizeof(kmp_affin_mask_t) = %d\n", + sizeof(kmp_affin_mask_t))); + + // See if group affinity is supported on this system. + // If so, calculate the #groups and #procs. + // + // Group affinity was introduced with Windows* 7 OS and + // Windows* Server 2008 R2 OS. + if ((__kmp_GetActiveProcessorCount != NULL) && + (__kmp_GetActiveProcessorGroupCount != NULL) && + (__kmp_GetThreadGroupAffinity != NULL) && + (__kmp_SetThreadGroupAffinity != NULL) && + ((__kmp_num_proc_groups = __kmp_GetActiveProcessorGroupCount()) > + 1)) { + // Calculate the total number of active OS procs. + int i; + + KA_TRACE(10, ("__kmp_runtime_initialize: %d processor groups" + " detected\n", + __kmp_num_proc_groups)); + + __kmp_xproc = 0; + + for (i = 0; i < __kmp_num_proc_groups; i++) { + DWORD size = __kmp_GetActiveProcessorCount(i); + __kmp_xproc += size; + KA_TRACE(10, ("__kmp_runtime_initialize: proc group %d size = %d\n", + i, size)); } + } else { + KA_TRACE(10, ("__kmp_runtime_initialize: %d processor groups" + " detected\n", + __kmp_num_proc_groups)); + } } - if ( __kmp_num_proc_groups <= 1 ) { - GetSystemInfo( & info ); - __kmp_xproc = info.dwNumberOfProcessors; - } -#else - GetSystemInfo( & info ); + } + if (__kmp_num_proc_groups <= 1) { + GetSystemInfo(&info); __kmp_xproc = info.dwNumberOfProcessors; + } +#else + GetSystemInfo(&info); + __kmp_xproc = info.dwNumberOfProcessors; #endif /* KMP_GROUP_AFFINITY */ - // - // If the OS said there were 0 procs, take a guess and use a value of 2. - // This is done for Linux* OS, also. Do we need error / warning? - // - if ( __kmp_xproc <= 0 ) { - __kmp_xproc = 2; - } + // If the OS said there were 0 procs, take a guess and use a value of 2. + // This is done for Linux* OS, also. Do we need error / warning? + if (__kmp_xproc <= 0) { + __kmp_xproc = 2; + } - KA_TRACE( 5, ("__kmp_runtime_initialize: total processors = %d\n", __kmp_xproc) ); + KA_TRACE(5, + ("__kmp_runtime_initialize: total processors = %d\n", __kmp_xproc)); - __kmp_str_buf_free( & path ); + __kmp_str_buf_free(&path); #if USE_ITT_BUILD - __kmp_itt_initialize(); + __kmp_itt_initialize(); #endif /* USE_ITT_BUILD */ - __kmp_init_runtime = TRUE; + __kmp_init_runtime = TRUE; } // __kmp_runtime_initialize -void -__kmp_runtime_destroy( void ) -{ - if ( ! __kmp_init_runtime ) { - return; - } +void __kmp_runtime_destroy(void) { + if (!__kmp_init_runtime) { + return; + } #if USE_ITT_BUILD - __kmp_itt_destroy(); + __kmp_itt_destroy(); #endif /* USE_ITT_BUILD */ - /* we can't DeleteCriticalsection( & __kmp_win32_section ); */ - /* due to the KX_TRACE() commands */ - KA_TRACE( 40, ("__kmp_runtime_destroy\n" )); + /* we can't DeleteCriticalsection( & __kmp_win32_section ); */ + /* due to the KX_TRACE() commands */ + KA_TRACE(40, ("__kmp_runtime_destroy\n")); - if( __kmp_gtid_threadprivate_key ) { - TlsFree( __kmp_gtid_threadprivate_key ); - __kmp_gtid_threadprivate_key = 0; - } + if (__kmp_gtid_threadprivate_key) { + TlsFree(__kmp_gtid_threadprivate_key); + __kmp_gtid_threadprivate_key = 0; + } - __kmp_affinity_uninitialize(); - DeleteCriticalSection( & __kmp_win32_section ); + __kmp_affinity_uninitialize(); + DeleteCriticalSection(&__kmp_win32_section); - ntdll = NULL; - NtQuerySystemInformation = NULL; + ntdll = NULL; + NtQuerySystemInformation = NULL; #if KMP_ARCH_X86_64 - kernel32 = NULL; - __kmp_GetActiveProcessorCount = NULL; - __kmp_GetActiveProcessorGroupCount = NULL; - __kmp_GetThreadGroupAffinity = NULL; - __kmp_SetThreadGroupAffinity = NULL; + kernel32 = NULL; + __kmp_GetActiveProcessorCount = NULL; + __kmp_GetActiveProcessorGroupCount = NULL; + __kmp_GetThreadGroupAffinity = NULL; + __kmp_SetThreadGroupAffinity = NULL; #endif // KMP_ARCH_X86_64 - __kmp_init_runtime = FALSE; + __kmp_init_runtime = FALSE; } +void __kmp_terminate_thread(int gtid) { + kmp_info_t *th = __kmp_threads[gtid]; -void -__kmp_terminate_thread( int gtid ) -{ - kmp_info_t *th = __kmp_threads[ gtid ]; - - if( !th ) return; + if (!th) + return; - KA_TRACE( 10, ("__kmp_terminate_thread: kill (%d)\n", gtid ) ); + KA_TRACE(10, ("__kmp_terminate_thread: kill (%d)\n", gtid)); - if (TerminateThread( th->th.th_info.ds.ds_thread, (DWORD) -1) == FALSE) { - /* It's OK, the thread may have exited already */ - } - __kmp_free_handle( th->th.th_info.ds.ds_thread ); + if (TerminateThread(th->th.th_info.ds.ds_thread, (DWORD)-1) == FALSE) { + /* It's OK, the thread may have exited already */ + } + __kmp_free_handle(th->th.th_info.ds.ds_thread); } -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ +void __kmp_clear_system_time(void) { + BOOL status; + LARGE_INTEGER time; + status = QueryPerformanceCounter(&time); + __kmp_win32_time = (kmp_int64)time.QuadPart; +} -void -__kmp_clear_system_time( void ) -{ +void __kmp_initialize_system_tick(void) { + { BOOL status; - LARGE_INTEGER time; - status = QueryPerformanceCounter( & time ); - __kmp_win32_time = (kmp_int64) time.QuadPart; -} + LARGE_INTEGER freq; -void -__kmp_initialize_system_tick( void ) -{ - { - BOOL status; - LARGE_INTEGER freq; - - status = QueryPerformanceFrequency( & freq ); - if (! status) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( FunctionError, "QueryPerformanceFrequency()" ), - KMP_ERR( error ), - __kmp_msg_null - ); + status = QueryPerformanceFrequency(&freq); + if (!status) { + DWORD error = GetLastError(); + __kmp_msg(kmp_ms_fatal, + KMP_MSG(FunctionError, "QueryPerformanceFrequency()"), + KMP_ERR(error), __kmp_msg_null); - } - else { - __kmp_win32_tick = ((double) 1.0) / (double) freq.QuadPart; - } + } else { + __kmp_win32_tick = ((double)1.0) / (double)freq.QuadPart; } + } } /* Calculate the elapsed wall clock time for the user */ -void -__kmp_elapsed( double *t ) -{ - BOOL status; - LARGE_INTEGER now; - status = QueryPerformanceCounter( & now ); - *t = ((double) now.QuadPart) * __kmp_win32_tick; +void __kmp_elapsed(double *t) { + BOOL status; + LARGE_INTEGER now; + status = QueryPerformanceCounter(&now); + *t = ((double)now.QuadPart) * __kmp_win32_tick; } /* Calculate the elapsed wall clock tick for the user */ -void -__kmp_elapsed_tick( double *t ) -{ - *t = __kmp_win32_tick; -} +void __kmp_elapsed_tick(double *t) { *t = __kmp_win32_tick; } -void -__kmp_read_system_time( double *delta ) -{ - if (delta != NULL) { - BOOL status; - LARGE_INTEGER now; +void __kmp_read_system_time(double *delta) { + if (delta != NULL) { + BOOL status; + LARGE_INTEGER now; - status = QueryPerformanceCounter( & now ); + status = QueryPerformanceCounter(&now); - *delta = ((double) (((kmp_int64) now.QuadPart) - __kmp_win32_time)) - * __kmp_win32_tick; - } + *delta = ((double)(((kmp_int64)now.QuadPart) - __kmp_win32_time)) * + __kmp_win32_tick; + } } /* Return the current time stamp in nsec */ -kmp_uint64 -__kmp_now_nsec() -{ - LARGE_INTEGER now; - QueryPerformanceCounter(&now); - return 1e9 * __kmp_win32_tick * now.QuadPart; +kmp_uint64 __kmp_now_nsec() { + LARGE_INTEGER now; + QueryPerformanceCounter(&now); + return 1e9 * __kmp_win32_tick * now.QuadPart; } -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - -void * __stdcall -__kmp_launch_worker( void *arg ) -{ - volatile void *stack_data; - void *exit_val; - void *padding = 0; - kmp_info_t *this_thr = (kmp_info_t *) arg; - int gtid; +void *__stdcall __kmp_launch_worker(void *arg) { + volatile void *stack_data; + void *exit_val; + void *padding = 0; + kmp_info_t *this_thr = (kmp_info_t *)arg; + int gtid; - gtid = this_thr->th.th_info.ds.ds_gtid; - __kmp_gtid_set_specific( gtid ); + gtid = this_thr->th.th_info.ds.ds_gtid; + __kmp_gtid_set_specific(gtid); #ifdef KMP_TDATA_GTID - #error "This define causes problems with LoadLibrary() + declspec(thread) " \ +#error "This define causes problems with LoadLibrary() + declspec(thread) " \ "on Windows* OS. See CQ50564, tests kmp_load_library*.c and this MSDN " \ "reference: http://support.microsoft.com/kb/118816" - //__kmp_gtid = gtid; +//__kmp_gtid = gtid; #endif #if USE_ITT_BUILD - __kmp_itt_thread_name( gtid ); + __kmp_itt_thread_name(gtid); #endif /* USE_ITT_BUILD */ - __kmp_affinity_set_init_mask( gtid, FALSE ); + __kmp_affinity_set_init_mask(gtid, FALSE); #if KMP_ARCH_X86 || KMP_ARCH_X86_64 - // - // Set the FP control regs to be a copy of - // the parallel initialization thread's. - // - __kmp_clear_x87_fpu_status_word(); - __kmp_load_x87_fpu_control_word( &__kmp_init_x87_fpu_control_word ); - __kmp_load_mxcsr( &__kmp_init_mxcsr ); + // Set FP control regs to be a copy of the parallel initialization thread's. + __kmp_clear_x87_fpu_status_word(); + __kmp_load_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word); + __kmp_load_mxcsr(&__kmp_init_mxcsr); #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ - if ( __kmp_stkoffset > 0 && gtid > 0 ) { - padding = KMP_ALLOCA( gtid * __kmp_stkoffset ); - } + if (__kmp_stkoffset > 0 && gtid > 0) { + padding = KMP_ALLOCA(gtid * __kmp_stkoffset); + } - KMP_FSYNC_RELEASING( &this_thr -> th.th_info.ds.ds_alive ); - this_thr -> th.th_info.ds.ds_thread_id = GetCurrentThreadId(); - TCW_4( this_thr -> th.th_info.ds.ds_alive, TRUE ); + KMP_FSYNC_RELEASING(&this_thr->th.th_info.ds.ds_alive); + this_thr->th.th_info.ds.ds_thread_id = GetCurrentThreadId(); + TCW_4(this_thr->th.th_info.ds.ds_alive, TRUE); - if ( TCR_4(__kmp_gtid_mode) < 2 ) { // check stack only if it is used to get gtid - TCW_PTR(this_thr->th.th_info.ds.ds_stackbase, &stack_data); - KMP_ASSERT( this_thr -> th.th_info.ds.ds_stackgrow == FALSE ); - __kmp_check_stack_overlap( this_thr ); - } - KMP_MB(); - exit_val = __kmp_launch_thread( this_thr ); - KMP_FSYNC_RELEASING( &this_thr -> th.th_info.ds.ds_alive ); - TCW_4( this_thr -> th.th_info.ds.ds_alive, FALSE ); - KMP_MB(); - return exit_val; + if (TCR_4(__kmp_gtid_mode) < + 2) { // check stack only if it is used to get gtid + TCW_PTR(this_thr->th.th_info.ds.ds_stackbase, &stack_data); + KMP_ASSERT(this_thr->th.th_info.ds.ds_stackgrow == FALSE); + __kmp_check_stack_overlap(this_thr); + } + KMP_MB(); + exit_val = __kmp_launch_thread(this_thr); + KMP_FSYNC_RELEASING(&this_thr->th.th_info.ds.ds_alive); + TCW_4(this_thr->th.th_info.ds.ds_alive, FALSE); + KMP_MB(); + return exit_val; } #if KMP_USE_MONITOR /* The monitor thread controls all of the threads in the complex */ -void * __stdcall -__kmp_launch_monitor( void *arg ) -{ - DWORD wait_status; - kmp_thread_t monitor; - int status; - int interval; - kmp_info_t *this_thr = (kmp_info_t *) arg; - - KMP_DEBUG_ASSERT(__kmp_init_monitor); - TCW_4( __kmp_init_monitor, 2 ); // AC: Signal the library that monitor has started - // TODO: hide "2" in enum (like {true,false,started}) - this_thr -> th.th_info.ds.ds_thread_id = GetCurrentThreadId(); - TCW_4( this_thr -> th.th_info.ds.ds_alive, TRUE ); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - KA_TRACE( 10, ("__kmp_launch_monitor: launched\n" ) ); - - monitor = GetCurrentThread(); - - /* set thread priority */ - status = SetThreadPriority( monitor, THREAD_PRIORITY_HIGHEST ); - if (! status) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetThreadPriority ), - KMP_ERR( error ), - __kmp_msg_null - ); - } +void *__stdcall __kmp_launch_monitor(void *arg) { + DWORD wait_status; + kmp_thread_t monitor; + int status; + int interval; + kmp_info_t *this_thr = (kmp_info_t *)arg; + + KMP_DEBUG_ASSERT(__kmp_init_monitor); + TCW_4(__kmp_init_monitor, 2); // AC: Signal library that monitor has started + // TODO: hide "2" in enum (like {true,false,started}) + this_thr->th.th_info.ds.ds_thread_id = GetCurrentThreadId(); + TCW_4(this_thr->th.th_info.ds.ds_alive, TRUE); + + KMP_MB(); /* Flush all pending memory write invalidates. */ + KA_TRACE(10, ("__kmp_launch_monitor: launched\n")); + + monitor = GetCurrentThread(); + + /* set thread priority */ + status = SetThreadPriority(monitor, THREAD_PRIORITY_HIGHEST); + if (!status) { + DWORD error = GetLastError(); + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetThreadPriority), KMP_ERR(error), + __kmp_msg_null); + } - /* register us as monitor */ - __kmp_gtid_set_specific( KMP_GTID_MONITOR ); + /* register us as monitor */ + __kmp_gtid_set_specific(KMP_GTID_MONITOR); #ifdef KMP_TDATA_GTID - #error "This define causes problems with LoadLibrary() + declspec(thread) " \ +#error "This define causes problems with LoadLibrary() + declspec(thread) " \ "on Windows* OS. See CQ50564, tests kmp_load_library*.c and this MSDN " \ "reference: http://support.microsoft.com/kb/118816" - //__kmp_gtid = KMP_GTID_MONITOR; +//__kmp_gtid = KMP_GTID_MONITOR; #endif #if USE_ITT_BUILD - __kmp_itt_thread_ignore(); // Instruct Intel(R) Threading Tools to ignore monitor thread. + __kmp_itt_thread_ignore(); // Instruct Intel(R) Threading Tools to ignore +// monitor thread. #endif /* USE_ITT_BUILD */ - KMP_MB(); /* Flush all pending memory write invalidates. */ - - interval = ( 1000 / __kmp_monitor_wakeups ); /* in milliseconds */ + KMP_MB(); /* Flush all pending memory write invalidates. */ - while (! TCR_4(__kmp_global.g.g_done)) { - /* This thread monitors the state of the system */ + interval = (1000 / __kmp_monitor_wakeups); /* in milliseconds */ - KA_TRACE( 15, ( "__kmp_launch_monitor: update\n" ) ); + while (!TCR_4(__kmp_global.g.g_done)) { + /* This thread monitors the state of the system */ - wait_status = WaitForSingleObject( __kmp_monitor_ev, interval ); + KA_TRACE(15, ("__kmp_launch_monitor: update\n")); - if (wait_status == WAIT_TIMEOUT) { - TCW_4( __kmp_global.g.g_time.dt.t_value, - TCR_4( __kmp_global.g.g_time.dt.t_value ) + 1 ); - } + wait_status = WaitForSingleObject(__kmp_monitor_ev, interval); - KMP_MB(); /* Flush all pending memory write invalidates. */ + if (wait_status == WAIT_TIMEOUT) { + TCW_4(__kmp_global.g.g_time.dt.t_value, + TCR_4(__kmp_global.g.g_time.dt.t_value) + 1); } - KA_TRACE( 10, ("__kmp_launch_monitor: finished\n" ) ); - - status = SetThreadPriority( monitor, THREAD_PRIORITY_NORMAL ); - if (! status) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetThreadPriority ), - KMP_ERR( error ), - __kmp_msg_null - ); - } + KMP_MB(); /* Flush all pending memory write invalidates. */ + } - if (__kmp_global.g.g_abort != 0) { - /* now we need to terminate the worker threads */ - /* the value of t_abort is the signal we caught */ + KA_TRACE(10, ("__kmp_launch_monitor: finished\n")); - int gtid; + status = SetThreadPriority(monitor, THREAD_PRIORITY_NORMAL); + if (!status) { + DWORD error = GetLastError(); + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetThreadPriority), KMP_ERR(error), + __kmp_msg_null); + } + + if (__kmp_global.g.g_abort != 0) { + /* now we need to terminate the worker threads */ + /* the value of t_abort is the signal we caught */ + int gtid; - KA_TRACE( 10, ("__kmp_launch_monitor: terminate sig=%d\n", (__kmp_global.g.g_abort) ) ); + KA_TRACE(10, ("__kmp_launch_monitor: terminate sig=%d\n", + (__kmp_global.g.g_abort))); - /* terminate the OpenMP worker threads */ - /* TODO this is not valid for sibling threads!! - * the uber master might not be 0 anymore.. */ - for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid) - __kmp_terminate_thread( gtid ); + /* terminate the OpenMP worker threads */ + /* TODO this is not valid for sibling threads!! + * the uber master might not be 0 anymore.. */ + for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid) + __kmp_terminate_thread(gtid); - __kmp_cleanup(); + __kmp_cleanup(); - Sleep( 0 ); + Sleep(0); - KA_TRACE( 10, ("__kmp_launch_monitor: raise sig=%d\n", (__kmp_global.g.g_abort) ) ); + KA_TRACE(10, + ("__kmp_launch_monitor: raise sig=%d\n", __kmp_global.g.g_abort)); - if (__kmp_global.g.g_abort > 0) { - raise( __kmp_global.g.g_abort ); - } + if (__kmp_global.g.g_abort > 0) { + raise(__kmp_global.g.g_abort); } + } - TCW_4( this_thr -> th.th_info.ds.ds_alive, FALSE ); + TCW_4(this_thr->th.th_info.ds.ds_alive, FALSE); - KMP_MB(); - return arg; + KMP_MB(); + return arg; } #endif -void -__kmp_create_worker( int gtid, kmp_info_t *th, size_t stack_size ) -{ - kmp_thread_t handle; - DWORD idThread; - - KA_TRACE( 10, ("__kmp_create_worker: try to create thread (%d)\n", gtid ) ); - - th->th.th_info.ds.ds_gtid = gtid; - - if ( KMP_UBER_GTID(gtid) ) { - int stack_data; - - /* TODO: GetCurrentThread() returns a pseudo-handle that is unsuitable for other threads to use. - Is it appropriate to just use GetCurrentThread? When should we close this handle? When - unregistering the root? - */ - { - BOOL rc; - rc = DuplicateHandle( - GetCurrentProcess(), - GetCurrentThread(), - GetCurrentProcess(), - &th->th.th_info.ds.ds_thread, - 0, - FALSE, - DUPLICATE_SAME_ACCESS - ); - KMP_ASSERT( rc ); - KA_TRACE( 10, (" __kmp_create_worker: ROOT Handle duplicated, th = %p, handle = %" KMP_UINTPTR_SPEC "\n", - (LPVOID)th, - th->th.th_info.ds.ds_thread ) ); - th->th.th_info.ds.ds_thread_id = GetCurrentThreadId(); - } - if ( TCR_4(__kmp_gtid_mode) < 2 ) { // check stack only if it is used to get gtid - /* we will dynamically update the stack range if gtid_mode == 1 */ - TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data); - TCW_PTR(th->th.th_info.ds.ds_stacksize, 0); - TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE); - __kmp_check_stack_overlap( th ); - } - } - else { - KMP_MB(); /* Flush all pending memory write invalidates. */ - - /* Set stack size for this thread now. */ - KA_TRACE( 10, ( "__kmp_create_worker: stack_size = %" KMP_SIZE_T_SPEC - " bytes\n", stack_size ) ); - - stack_size += gtid * __kmp_stkoffset; - - TCW_PTR(th->th.th_info.ds.ds_stacksize, stack_size); - TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE); - - KA_TRACE( 10, ( "__kmp_create_worker: (before) stack_size = %" - KMP_SIZE_T_SPEC - " bytes, &__kmp_launch_worker = %p, th = %p, " - "&idThread = %p\n", - (SIZE_T) stack_size, - (LPTHREAD_START_ROUTINE) & __kmp_launch_worker, - (LPVOID) th, &idThread ) ); - - handle = CreateThread( NULL, (SIZE_T) stack_size, - (LPTHREAD_START_ROUTINE) __kmp_launch_worker, - (LPVOID) th, STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread ); - - KA_TRACE( 10, ( "__kmp_create_worker: (after) stack_size = %" - KMP_SIZE_T_SPEC - " bytes, &__kmp_launch_worker = %p, th = %p, " - "idThread = %u, handle = %" KMP_UINTPTR_SPEC "\n", - (SIZE_T) stack_size, - (LPTHREAD_START_ROUTINE) & __kmp_launch_worker, - (LPVOID) th, idThread, handle ) ); - - if ( handle == 0 ) { - DWORD error = GetLastError(); - __kmp_msg(kmp_ms_fatal, KMP_MSG( CantCreateThread ), KMP_ERR( error ), __kmp_msg_null); - } else { - th->th.th_info.ds.ds_thread = handle; - } +void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size) { + kmp_thread_t handle; + DWORD idThread; - KMP_MB(); /* Flush all pending memory write invalidates. */ - } + KA_TRACE(10, ("__kmp_create_worker: try to create thread (%d)\n", gtid)); - KA_TRACE( 10, ("__kmp_create_worker: done creating thread (%d)\n", gtid ) ); -} + th->th.th_info.ds.ds_gtid = gtid; -int -__kmp_still_running(kmp_info_t *th) { - return (WAIT_TIMEOUT == WaitForSingleObject( th->th.th_info.ds.ds_thread, 0)); -} + if (KMP_UBER_GTID(gtid)) { + int stack_data; -#if KMP_USE_MONITOR -void -__kmp_create_monitor( kmp_info_t *th ) -{ - kmp_thread_t handle; - DWORD idThread; - int ideal, new_ideal; - - if( __kmp_dflt_blocktime == KMP_MAX_BLOCKTIME ) { - // We don't need monitor thread in case of MAX_BLOCKTIME - KA_TRACE( 10, ("__kmp_create_monitor: skipping monitor thread because of MAX blocktime\n" ) ); - th->th.th_info.ds.ds_tid = 0; // this makes reap_monitor no-op - th->th.th_info.ds.ds_gtid = 0; - TCW_4( __kmp_init_monitor, 2 ); // Signal to stop waiting for monitor creation - return; + /* TODO: GetCurrentThread() returns a pseudo-handle that is unsuitable for + other threads to use. Is it appropriate to just use GetCurrentThread? + When should we close this handle? When unregistering the root? */ + { + BOOL rc; + rc = DuplicateHandle(GetCurrentProcess(), GetCurrentThread(), + GetCurrentProcess(), &th->th.th_info.ds.ds_thread, 0, + FALSE, DUPLICATE_SAME_ACCESS); + KMP_ASSERT(rc); + KA_TRACE(10, (" __kmp_create_worker: ROOT Handle duplicated, th = %p, " + "handle = %" KMP_UINTPTR_SPEC "\n", + (LPVOID)th, th->th.th_info.ds.ds_thread)); + th->th.th_info.ds.ds_thread_id = GetCurrentThreadId(); } - KA_TRACE( 10, ("__kmp_create_monitor: try to create monitor\n" ) ); - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - __kmp_monitor_ev = CreateEvent( NULL, TRUE, FALSE, NULL ); - if ( __kmp_monitor_ev == NULL ) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantCreateEvent ), - KMP_ERR( error ), - __kmp_msg_null - ); - }; // if -#if USE_ITT_BUILD - __kmp_itt_system_object_created( __kmp_monitor_ev, "Event" ); -#endif /* USE_ITT_BUILD */ + if (TCR_4(__kmp_gtid_mode) < 2) { // check stack only if used to get gtid + /* we will dynamically update the stack range if gtid_mode == 1 */ + TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data); + TCW_PTR(th->th.th_info.ds.ds_stacksize, 0); + TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE); + __kmp_check_stack_overlap(th); + } + } else { + KMP_MB(); /* Flush all pending memory write invalidates. */ - th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR; - th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR; + /* Set stack size for this thread now. */ + KA_TRACE(10, + ("__kmp_create_worker: stack_size = %" KMP_SIZE_T_SPEC " bytes\n", + stack_size)); - // FIXME - on Windows* OS, if __kmp_monitor_stksize = 0, figure out how - // to automatically expand stacksize based on CreateThread error code. - if ( __kmp_monitor_stksize == 0 ) { - __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE; - } - if ( __kmp_monitor_stksize < __kmp_sys_min_stksize ) { - __kmp_monitor_stksize = __kmp_sys_min_stksize; - } + stack_size += gtid * __kmp_stkoffset; - KA_TRACE( 10, ("__kmp_create_monitor: requested stacksize = %d bytes\n", - (int) __kmp_monitor_stksize ) ); + TCW_PTR(th->th.th_info.ds.ds_stacksize, stack_size); + TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE); - TCW_4( __kmp_global.g.g_time.dt.t_value, 0 ); + KA_TRACE(10, + ("__kmp_create_worker: (before) stack_size = %" KMP_SIZE_T_SPEC + " bytes, &__kmp_launch_worker = %p, th = %p, &idThread = %p\n", + (SIZE_T)stack_size, (LPTHREAD_START_ROUTINE)&__kmp_launch_worker, + (LPVOID)th, &idThread)); + + handle = CreateThread( + NULL, (SIZE_T)stack_size, (LPTHREAD_START_ROUTINE)__kmp_launch_worker, + (LPVOID)th, STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread); + + KA_TRACE(10, + ("__kmp_create_worker: (after) stack_size = %" KMP_SIZE_T_SPEC + " bytes, &__kmp_launch_worker = %p, th = %p, " + "idThread = %u, handle = %" KMP_UINTPTR_SPEC "\n", + (SIZE_T)stack_size, (LPTHREAD_START_ROUTINE)&__kmp_launch_worker, + (LPVOID)th, idThread, handle)); - handle = CreateThread( NULL, (SIZE_T) __kmp_monitor_stksize, - (LPTHREAD_START_ROUTINE) __kmp_launch_monitor, - (LPVOID) th, STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread ); if (handle == 0) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantCreateThread ), - KMP_ERR( error ), - __kmp_msg_null - ); + DWORD error = GetLastError(); + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantCreateThread), KMP_ERR(error), + __kmp_msg_null); + } else { + th->th.th_info.ds.ds_thread = handle; } - else - th->th.th_info.ds.ds_thread = handle; - KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_MB(); /* Flush all pending memory write invalidates. */ + } - KA_TRACE( 10, ("__kmp_create_monitor: monitor created %p\n", - (void *) th->th.th_info.ds.ds_thread ) ); + KA_TRACE(10, ("__kmp_create_worker: done creating thread (%d)\n", gtid)); } -#endif -/* - Check to see if thread is still alive. - - NOTE: The ExitProcess(code) system call causes all threads to Terminate - with a exit_val = code. Because of this we can not rely on - exit_val having any particular value. So this routine may - return STILL_ALIVE in exit_val even after the thread is dead. -*/ - -int -__kmp_is_thread_alive( kmp_info_t * th, DWORD *exit_val ) -{ - DWORD rc; - rc = GetExitCodeThread( th->th.th_info.ds.ds_thread, exit_val ); - if ( rc == 0 ) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( FunctionError, "GetExitCodeThread()" ), - KMP_ERR( error ), - __kmp_msg_null - ); - }; // if - return ( *exit_val == STILL_ACTIVE ); +int __kmp_still_running(kmp_info_t *th) { + return (WAIT_TIMEOUT == WaitForSingleObject(th->th.th_info.ds.ds_thread, 0)); } +#if KMP_USE_MONITOR +void __kmp_create_monitor(kmp_info_t *th) { + kmp_thread_t handle; + DWORD idThread; + int ideal, new_ideal; + + if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) { + // We don't need monitor thread in case of MAX_BLOCKTIME + KA_TRACE(10, ("__kmp_create_monitor: skipping monitor thread because of " + "MAX blocktime\n")); + th->th.th_info.ds.ds_tid = 0; // this makes reap_monitor no-op + th->th.th_info.ds.ds_gtid = 0; + TCW_4(__kmp_init_monitor, 2); // Signal to stop waiting for monitor creation + return; + } + KA_TRACE(10, ("__kmp_create_monitor: try to create monitor\n")); -void -__kmp_exit_thread( - int exit_status -) { - ExitThread( exit_status ); -} // __kmp_exit_thread + KMP_MB(); /* Flush all pending memory write invalidates. */ -/* - This is a common part for both __kmp_reap_worker() and __kmp_reap_monitor(). -*/ -static void -__kmp_reap_common( kmp_info_t * th ) -{ - DWORD exit_val; + __kmp_monitor_ev = CreateEvent(NULL, TRUE, FALSE, NULL); + if (__kmp_monitor_ev == NULL) { + DWORD error = GetLastError(); + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantCreateEvent), KMP_ERR(error), + __kmp_msg_null); + }; // if +#if USE_ITT_BUILD + __kmp_itt_system_object_created(__kmp_monitor_ev, "Event"); +#endif /* USE_ITT_BUILD */ - KMP_MB(); /* Flush all pending memory write invalidates. */ + th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR; + th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR; - KA_TRACE( 10, ( "__kmp_reap_common: try to reap (%d)\n", th->th.th_info.ds.ds_gtid ) ); + // FIXME - on Windows* OS, if __kmp_monitor_stksize = 0, figure out how + // to automatically expand stacksize based on CreateThread error code. + if (__kmp_monitor_stksize == 0) { + __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE; + } + if (__kmp_monitor_stksize < __kmp_sys_min_stksize) { + __kmp_monitor_stksize = __kmp_sys_min_stksize; + } - /* - 2006-10-19: + KA_TRACE(10, ("__kmp_create_monitor: requested stacksize = %d bytes\n", + (int)__kmp_monitor_stksize)); - There are two opposite situations: + TCW_4(__kmp_global.g.g_time.dt.t_value, 0); - 1. Windows* OS keep thread alive after it resets ds_alive flag and exits from thread - function. (For example, see C70770/Q394281 "unloading of dll based on OMP is very - slow".) - 2. Windows* OS may kill thread before it resets ds_alive flag. + handle = + CreateThread(NULL, (SIZE_T)__kmp_monitor_stksize, + (LPTHREAD_START_ROUTINE)__kmp_launch_monitor, (LPVOID)th, + STACK_SIZE_PARAM_IS_A_RESERVATION, &idThread); + if (handle == 0) { + DWORD error = GetLastError(); + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantCreateThread), KMP_ERR(error), + __kmp_msg_null); + } else + th->th.th_info.ds.ds_thread = handle; - Right solution seems to be waiting for *either* thread termination *or* ds_alive resetting. + KMP_MB(); /* Flush all pending memory write invalidates. */ - */ + KA_TRACE(10, ("__kmp_create_monitor: monitor created %p\n", + (void *)th->th.th_info.ds.ds_thread)); +} +#endif - { - // TODO: This code is very similar to KMP_WAIT_YIELD. Need to generalize KMP_WAIT_YIELD to - // cover this usage also. - void * obj = NULL; - register kmp_uint32 spins; +/* Check to see if thread is still alive. + NOTE: The ExitProcess(code) system call causes all threads to Terminate + with a exit_val = code. Because of this we can not rely on exit_val having + any particular value. So this routine may return STILL_ALIVE in exit_val + even after the thread is dead. */ + +int __kmp_is_thread_alive(kmp_info_t *th, DWORD *exit_val) { + DWORD rc; + rc = GetExitCodeThread(th->th.th_info.ds.ds_thread, exit_val); + if (rc == 0) { + DWORD error = GetLastError(); + __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "GetExitCodeThread()"), + KMP_ERR(error), __kmp_msg_null); + }; // if + return (*exit_val == STILL_ACTIVE); +} + +void __kmp_exit_thread(int exit_status) { + ExitThread(exit_status); +} // __kmp_exit_thread + +// This is a common part for both __kmp_reap_worker() and __kmp_reap_monitor(). +static void __kmp_reap_common(kmp_info_t *th) { + DWORD exit_val; + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + KA_TRACE( + 10, ("__kmp_reap_common: try to reap (%d)\n", th->th.th_info.ds.ds_gtid)); + + /* 2006-10-19: + There are two opposite situations: + 1. Windows* OS keep thread alive after it resets ds_alive flag and + exits from thread function. (For example, see C70770/Q394281 "unloading of + dll based on OMP is very slow".) + 2. Windows* OS may kill thread before it resets ds_alive flag. + + Right solution seems to be waiting for *either* thread termination *or* + ds_alive resetting. */ + { + // TODO: This code is very similar to KMP_WAIT_YIELD. Need to generalize + // KMP_WAIT_YIELD to cover this usage also. + void *obj = NULL; + register kmp_uint32 spins; #if USE_ITT_BUILD - KMP_FSYNC_SPIN_INIT( obj, (void*) & th->th.th_info.ds.ds_alive ); + KMP_FSYNC_SPIN_INIT(obj, (void *)&th->th.th_info.ds.ds_alive); #endif /* USE_ITT_BUILD */ - KMP_INIT_YIELD( spins ); - do { + KMP_INIT_YIELD(spins); + do { #if USE_ITT_BUILD - KMP_FSYNC_SPIN_PREPARE( obj ); + KMP_FSYNC_SPIN_PREPARE(obj); #endif /* USE_ITT_BUILD */ - __kmp_is_thread_alive( th, &exit_val ); - KMP_YIELD( TCR_4(__kmp_nth) > __kmp_avail_proc ); - KMP_YIELD_SPIN( spins ); - } while ( exit_val == STILL_ACTIVE && TCR_4( th->th.th_info.ds.ds_alive ) ); + __kmp_is_thread_alive(th, &exit_val); + KMP_YIELD(TCR_4(__kmp_nth) > __kmp_avail_proc); + KMP_YIELD_SPIN(spins); + } while (exit_val == STILL_ACTIVE && TCR_4(th->th.th_info.ds.ds_alive)); #if USE_ITT_BUILD - if ( exit_val == STILL_ACTIVE ) { - KMP_FSYNC_CANCEL( obj ); - } else { - KMP_FSYNC_SPIN_ACQUIRED( obj ); - }; // if + if (exit_val == STILL_ACTIVE) { + KMP_FSYNC_CANCEL(obj); + } else { + KMP_FSYNC_SPIN_ACQUIRED(obj); + }; // if #endif /* USE_ITT_BUILD */ - } + } - __kmp_free_handle( th->th.th_info.ds.ds_thread ); - - /* - * NOTE: The ExitProcess(code) system call causes all threads to Terminate - * with a exit_val = code. Because of this we can not rely on - * exit_val having any particular value. - */ - if ( exit_val == STILL_ACTIVE ) { - KA_TRACE( 1, ( "__kmp_reap_common: thread still active.\n" ) ); - } else if ( (void *) exit_val != (void *) th) { - KA_TRACE( 1, ( "__kmp_reap_common: ExitProcess / TerminateThread used?\n" ) ); - }; // if + __kmp_free_handle(th->th.th_info.ds.ds_thread); - KA_TRACE( 10, - ( - "__kmp_reap_common: done reaping (%d), handle = %" KMP_UINTPTR_SPEC "\n", - th->th.th_info.ds.ds_gtid, - th->th.th_info.ds.ds_thread - ) - ); + /* NOTE: The ExitProcess(code) system call causes all threads to Terminate + with a exit_val = code. Because of this we can not rely on exit_val having + any particular value. */ + if (exit_val == STILL_ACTIVE) { + KA_TRACE(1, ("__kmp_reap_common: thread still active.\n")); + } else if ((void *)exit_val != (void *)th) { + KA_TRACE(1, ("__kmp_reap_common: ExitProcess / TerminateThread used?\n")); + }; // if - th->th.th_info.ds.ds_thread = 0; - th->th.th_info.ds.ds_tid = KMP_GTID_DNE; - th->th.th_info.ds.ds_gtid = KMP_GTID_DNE; - th->th.th_info.ds.ds_thread_id = 0; + KA_TRACE(10, + ("__kmp_reap_common: done reaping (%d), handle = %" KMP_UINTPTR_SPEC + "\n", + th->th.th_info.ds.ds_gtid, th->th.th_info.ds.ds_thread)); - KMP_MB(); /* Flush all pending memory write invalidates. */ + th->th.th_info.ds.ds_thread = 0; + th->th.th_info.ds.ds_tid = KMP_GTID_DNE; + th->th.th_info.ds.ds_gtid = KMP_GTID_DNE; + th->th.th_info.ds.ds_thread_id = 0; + + KMP_MB(); /* Flush all pending memory write invalidates. */ } #if KMP_USE_MONITOR -void -__kmp_reap_monitor( kmp_info_t *th ) -{ - int status; - - KA_TRACE( 10, ("__kmp_reap_monitor: try to reap %p\n", - (void *) th->th.th_info.ds.ds_thread ) ); - - // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR. - // If both tid and gtid are 0, it means the monitor did not ever start. - // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down. - KMP_DEBUG_ASSERT( th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid ); - if ( th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR ) { - KA_TRACE( 10, ("__kmp_reap_monitor: monitor did not start, returning\n") ); - return; - }; // if - - KMP_MB(); /* Flush all pending memory write invalidates. */ - - status = SetEvent( __kmp_monitor_ev ); - if ( status == FALSE ) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantSetEvent ), - KMP_ERR( error ), - __kmp_msg_null - ); - } - KA_TRACE( 10, ( "__kmp_reap_monitor: reaping thread (%d)\n", th->th.th_info.ds.ds_gtid ) ); - __kmp_reap_common( th ); +void __kmp_reap_monitor(kmp_info_t *th) { + int status; + + KA_TRACE(10, ("__kmp_reap_monitor: try to reap %p\n", + (void *)th->th.th_info.ds.ds_thread)); + + // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR. + // If both tid and gtid are 0, it means the monitor did not ever start. + // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down. + KMP_DEBUG_ASSERT(th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid); + if (th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR) { + KA_TRACE(10, ("__kmp_reap_monitor: monitor did not start, returning\n")); + return; + }; // if + + KMP_MB(); /* Flush all pending memory write invalidates. */ + + status = SetEvent(__kmp_monitor_ev); + if (status == FALSE) { + DWORD error = GetLastError(); + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetEvent), KMP_ERR(error), + __kmp_msg_null); + } + KA_TRACE(10, ("__kmp_reap_monitor: reaping thread (%d)\n", + th->th.th_info.ds.ds_gtid)); + __kmp_reap_common(th); - __kmp_free_handle( __kmp_monitor_ev ); + __kmp_free_handle(__kmp_monitor_ev); - KMP_MB(); /* Flush all pending memory write invalidates. */ + KMP_MB(); /* Flush all pending memory write invalidates. */ } #endif -void -__kmp_reap_worker( kmp_info_t * th ) -{ - KA_TRACE( 10, ( "__kmp_reap_worker: reaping thread (%d)\n", th->th.th_info.ds.ds_gtid ) ); - __kmp_reap_common( th ); +void __kmp_reap_worker(kmp_info_t *th) { + KA_TRACE(10, ("__kmp_reap_worker: reaping thread (%d)\n", + th->th.th_info.ds.ds_gtid)); + __kmp_reap_common(th); } -/* ------------------------------------------------------------------------ */ -/* ------------------------------------------------------------------------ */ - #if KMP_HANDLE_SIGNALS - -static void -__kmp_team_handler( int signo ) -{ - if ( __kmp_global.g.g_abort == 0 ) { - // Stage 1 signal handler, let's shut down all of the threads. - if ( __kmp_debug_buf ) { - __kmp_dump_debug_buffer(); - }; // if - KMP_MB(); // Flush all pending memory write invalidates. - TCW_4( __kmp_global.g.g_abort, signo ); - KMP_MB(); // Flush all pending memory write invalidates. - TCW_4( __kmp_global.g.g_done, TRUE ); - KMP_MB(); // Flush all pending memory write invalidates. - } +static void __kmp_team_handler(int signo) { + if (__kmp_global.g.g_abort == 0) { + // Stage 1 signal handler, let's shut down all of the threads. + if (__kmp_debug_buf) { + __kmp_dump_debug_buffer(); + }; // if + KMP_MB(); // Flush all pending memory write invalidates. + TCW_4(__kmp_global.g.g_abort, signo); + KMP_MB(); // Flush all pending memory write invalidates. + TCW_4(__kmp_global.g.g_done, TRUE); + KMP_MB(); // Flush all pending memory write invalidates. + } } // __kmp_team_handler - - -static -sig_func_t __kmp_signal( int signum, sig_func_t handler ) { - sig_func_t old = signal( signum, handler ); - if ( old == SIG_ERR ) { - int error = errno; - __kmp_msg( kmp_ms_fatal, KMP_MSG( FunctionError, "signal" ), KMP_ERR( error ), __kmp_msg_null ); - }; // if - return old; +static sig_func_t __kmp_signal(int signum, sig_func_t handler) { + sig_func_t old = signal(signum, handler); + if (old == SIG_ERR) { + int error = errno; + __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "signal"), KMP_ERR(error), + __kmp_msg_null); + }; // if + return old; } -static void -__kmp_install_one_handler( - int sig, - sig_func_t handler, - int parallel_init -) { - sig_func_t old; - KMP_MB(); /* Flush all pending memory write invalidates. */ - KB_TRACE( 60, ("__kmp_install_one_handler: called: sig=%d\n", sig ) ); - if ( parallel_init ) { - old = __kmp_signal( sig, handler ); - // SIG_DFL on Windows* OS in NULL or 0. - if ( old == __kmp_sighldrs[ sig ] ) { - __kmp_siginstalled[ sig ] = 1; - } else { - // Restore/keep user's handler if one previously installed. - old = __kmp_signal( sig, old ); - }; // if - } else { - // Save initial/system signal handlers to see if user handlers installed. - // 2009-09-23: It is a dead code. On Windows* OS __kmp_install_signals called once with - // parallel_init == TRUE. - old = __kmp_signal( sig, SIG_DFL ); - __kmp_sighldrs[ sig ] = old; - __kmp_signal( sig, old ); +static void __kmp_install_one_handler(int sig, sig_func_t handler, + int parallel_init) { + sig_func_t old; + KMP_MB(); /* Flush all pending memory write invalidates. */ + KB_TRACE(60, ("__kmp_install_one_handler: called: sig=%d\n", sig)); + if (parallel_init) { + old = __kmp_signal(sig, handler); + // SIG_DFL on Windows* OS in NULL or 0. + if (old == __kmp_sighldrs[sig]) { + __kmp_siginstalled[sig] = 1; + } else { // Restore/keep user's handler if one previously installed. + old = __kmp_signal(sig, old); }; // if - KMP_MB(); /* Flush all pending memory write invalidates. */ + } else { + // Save initial/system signal handlers to see if user handlers installed. + // 2009-09-23: It is a dead code. On Windows* OS __kmp_install_signals + // called once with parallel_init == TRUE. + old = __kmp_signal(sig, SIG_DFL); + __kmp_sighldrs[sig] = old; + __kmp_signal(sig, old); + }; // if + KMP_MB(); /* Flush all pending memory write invalidates. */ } // __kmp_install_one_handler -static void -__kmp_remove_one_handler( int sig ) { - if ( __kmp_siginstalled[ sig ] ) { - sig_func_t old; - KMP_MB(); // Flush all pending memory write invalidates. - KB_TRACE( 60, ( "__kmp_remove_one_handler: called: sig=%d\n", sig ) ); - old = __kmp_signal( sig, __kmp_sighldrs[ sig ] ); - if ( old != __kmp_team_handler ) { - KB_TRACE( 10, ( "__kmp_remove_one_handler: oops, not our handler, restoring: sig=%d\n", sig ) ); - old = __kmp_signal( sig, old ); - }; // if - __kmp_sighldrs[ sig ] = NULL; - __kmp_siginstalled[ sig ] = 0; - KMP_MB(); // Flush all pending memory write invalidates. +static void __kmp_remove_one_handler(int sig) { + if (__kmp_siginstalled[sig]) { + sig_func_t old; + KMP_MB(); // Flush all pending memory write invalidates. + KB_TRACE(60, ("__kmp_remove_one_handler: called: sig=%d\n", sig)); + old = __kmp_signal(sig, __kmp_sighldrs[sig]); + if (old != __kmp_team_handler) { + KB_TRACE(10, ("__kmp_remove_one_handler: oops, not our handler, " + "restoring: sig=%d\n", + sig)); + old = __kmp_signal(sig, old); }; // if + __kmp_sighldrs[sig] = NULL; + __kmp_siginstalled[sig] = 0; + KMP_MB(); // Flush all pending memory write invalidates. + }; // if } // __kmp_remove_one_handler - -void -__kmp_install_signals( int parallel_init ) -{ - KB_TRACE( 10, ( "__kmp_install_signals: called\n" ) ); - if ( ! __kmp_handle_signals ) { - KB_TRACE( 10, ( "__kmp_install_signals: KMP_HANDLE_SIGNALS is false - handlers not installed\n" ) ); - return; - }; // if - __kmp_install_one_handler( SIGINT, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGILL, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGABRT, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGFPE, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGSEGV, __kmp_team_handler, parallel_init ); - __kmp_install_one_handler( SIGTERM, __kmp_team_handler, parallel_init ); +void __kmp_install_signals(int parallel_init) { + KB_TRACE(10, ("__kmp_install_signals: called\n")); + if (!__kmp_handle_signals) { + KB_TRACE(10, ("__kmp_install_signals: KMP_HANDLE_SIGNALS is false - " + "handlers not installed\n")); + return; + }; // if + __kmp_install_one_handler(SIGINT, __kmp_team_handler, parallel_init); + __kmp_install_one_handler(SIGILL, __kmp_team_handler, parallel_init); + __kmp_install_one_handler(SIGABRT, __kmp_team_handler, parallel_init); + __kmp_install_one_handler(SIGFPE, __kmp_team_handler, parallel_init); + __kmp_install_one_handler(SIGSEGV, __kmp_team_handler, parallel_init); + __kmp_install_one_handler(SIGTERM, __kmp_team_handler, parallel_init); } // __kmp_install_signals - -void -__kmp_remove_signals( void ) -{ - int sig; - KB_TRACE( 10, ("__kmp_remove_signals: called\n" ) ); - for ( sig = 1; sig < NSIG; ++ sig ) { - __kmp_remove_one_handler( sig ); - }; // for sig +void __kmp_remove_signals(void) { + int sig; + KB_TRACE(10, ("__kmp_remove_signals: called\n")); + for (sig = 1; sig < NSIG; ++sig) { + __kmp_remove_one_handler(sig); + }; // for sig } // __kmp_remove_signals - #endif // KMP_HANDLE_SIGNALS /* Put the thread to sleep for a time period */ -void -__kmp_thread_sleep( int millis ) -{ - DWORD status; - - status = SleepEx( (DWORD) millis, FALSE ); - if ( status ) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( FunctionError, "SleepEx()" ), - KMP_ERR( error ), - __kmp_msg_null - ); - } +void __kmp_thread_sleep(int millis) { + DWORD status; + + status = SleepEx((DWORD)millis, FALSE); + if (status) { + DWORD error = GetLastError(); + __kmp_msg(kmp_ms_fatal, KMP_MSG(FunctionError, "SleepEx()"), KMP_ERR(error), + __kmp_msg_null); + } } -/* Determine whether the given address is mapped into the current address space. */ -int -__kmp_is_address_mapped( void * addr ) -{ - DWORD status; - MEMORY_BASIC_INFORMATION lpBuffer; - SIZE_T dwLength; +// Determine whether the given address is mapped into the current address space. +int __kmp_is_address_mapped(void *addr) { + DWORD status; + MEMORY_BASIC_INFORMATION lpBuffer; + SIZE_T dwLength; - dwLength = sizeof(MEMORY_BASIC_INFORMATION); + dwLength = sizeof(MEMORY_BASIC_INFORMATION); - status = VirtualQuery( addr, &lpBuffer, dwLength ); + status = VirtualQuery(addr, &lpBuffer, dwLength); - return !((( lpBuffer.State == MEM_RESERVE) || ( lpBuffer.State == MEM_FREE )) || - (( lpBuffer.Protect == PAGE_NOACCESS ) || ( lpBuffer.Protect == PAGE_EXECUTE ))); + return !(((lpBuffer.State == MEM_RESERVE) || (lpBuffer.State == MEM_FREE)) || + ((lpBuffer.Protect == PAGE_NOACCESS) || + (lpBuffer.Protect == PAGE_EXECUTE))); } -kmp_uint64 -__kmp_hardware_timestamp(void) -{ - kmp_uint64 r = 0; +kmp_uint64 __kmp_hardware_timestamp(void) { + kmp_uint64 r = 0; - QueryPerformanceCounter((LARGE_INTEGER*) &r); - return r; + QueryPerformanceCounter((LARGE_INTEGER *)&r); + return r; } /* Free handle and check the error code */ -void -__kmp_free_handle( kmp_thread_t tHandle ) -{ -/* called with parameter type HANDLE also, thus suppose kmp_thread_t defined as HANDLE */ - BOOL rc; - rc = CloseHandle( tHandle ); - if ( !rc ) { - DWORD error = GetLastError(); - __kmp_msg( - kmp_ms_fatal, - KMP_MSG( CantCloseHandle ), - KMP_ERR( error ), - __kmp_msg_null - ); - } +void __kmp_free_handle(kmp_thread_t tHandle) { + /* called with parameter type HANDLE also, thus suppose kmp_thread_t defined + * as HANDLE */ + BOOL rc; + rc = CloseHandle(tHandle); + if (!rc) { + DWORD error = GetLastError(); + __kmp_msg(kmp_ms_fatal, KMP_MSG(CantCloseHandle), KMP_ERR(error), + __kmp_msg_null); + } } -int -__kmp_get_load_balance( int max ) { +int __kmp_get_load_balance(int max) { + static ULONG glb_buff_size = 100 * 1024; - static ULONG glb_buff_size = 100 * 1024; + // Saved count of the running threads for the thread balance algortihm + static int glb_running_threads = 0; + static double glb_call_time = 0; /* Thread balance algorithm call time */ - static int glb_running_threads = 0; /* Saved count of the running threads for the thread balance algortihm */ - static double glb_call_time = 0; /* Thread balance algorithm call time */ + int running_threads = 0; // Number of running threads in the system. + NTSTATUS status = 0; + ULONG buff_size = 0; + ULONG info_size = 0; + void *buffer = NULL; + PSYSTEM_PROCESS_INFORMATION spi = NULL; + int first_time = 1; - int running_threads = 0; // Number of running threads in the system. - NTSTATUS status = 0; - ULONG buff_size = 0; - ULONG info_size = 0; - void * buffer = NULL; - PSYSTEM_PROCESS_INFORMATION spi = NULL; - int first_time = 1; + double call_time = 0.0; // start, finish; - double call_time = 0.0; //start, finish; + __kmp_elapsed(&call_time); - __kmp_elapsed( & call_time ); + if (glb_call_time && + (call_time - glb_call_time < __kmp_load_balance_interval)) { + running_threads = glb_running_threads; + goto finish; + } + glb_call_time = call_time; - if ( glb_call_time && - ( call_time - glb_call_time < __kmp_load_balance_interval ) ) { - running_threads = glb_running_threads; - goto finish; - } - glb_call_time = call_time; + // Do not spend time on running algorithm if we have a permanent error. + if (NtQuerySystemInformation == NULL) { + running_threads = -1; + goto finish; + }; // if - // Do not spend time on running algorithm if we have a permanent error. - if ( NtQuerySystemInformation == NULL ) { - running_threads = -1; - goto finish; - }; // if + if (max <= 0) { + max = INT_MAX; + }; // if - if ( max <= 0 ) { - max = INT_MAX; - }; // if + do { - do { + if (first_time) { + buff_size = glb_buff_size; + } else { + buff_size = 2 * buff_size; + } - if ( first_time ) { - buff_size = glb_buff_size; - } else { - buff_size = 2 * buff_size; - } + buffer = KMP_INTERNAL_REALLOC(buffer, buff_size); + if (buffer == NULL) { + running_threads = -1; + goto finish; + }; // if + status = NtQuerySystemInformation(SystemProcessInformation, buffer, + buff_size, &info_size); + first_time = 0; + + } while (status == STATUS_INFO_LENGTH_MISMATCH); + glb_buff_size = buff_size; + +#define CHECK(cond) \ + { \ + KMP_DEBUG_ASSERT(cond); \ + if (!(cond)) { \ + running_threads = -1; \ + goto finish; \ + } \ + } - buffer = KMP_INTERNAL_REALLOC( buffer, buff_size ); - if ( buffer == NULL ) { - running_threads = -1; + CHECK(buff_size >= info_size); + spi = PSYSTEM_PROCESS_INFORMATION(buffer); + for (;;) { + ptrdiff_t offset = uintptr_t(spi) - uintptr_t(buffer); + CHECK(0 <= offset && + offset + sizeof(SYSTEM_PROCESS_INFORMATION) < info_size); + HANDLE pid = spi->ProcessId; + ULONG num = spi->NumberOfThreads; + CHECK(num >= 1); + size_t spi_size = + sizeof(SYSTEM_PROCESS_INFORMATION) + sizeof(SYSTEM_THREAD) * (num - 1); + CHECK(offset + spi_size < + info_size); // Make sure process info record fits the buffer. + if (spi->NextEntryOffset != 0) { + CHECK(spi_size <= + spi->NextEntryOffset); // And do not overlap with the next record. + }; // if + // pid == 0 corresponds to the System Idle Process. It always has running + // threads on all cores. So, we don't consider the running threads of this + // process. + if (pid != 0) { + for (int i = 0; i < num; ++i) { + THREAD_STATE state = spi->Threads[i].State; + // Count threads that have Ready or Running state. + // !!! TODO: Why comment does not match the code??? + if (state == StateRunning) { + ++running_threads; + // Stop counting running threads if the number is already greater than + // the number of available cores + if (running_threads >= max) { goto finish; - }; // if - status = NtQuerySystemInformation( SystemProcessInformation, buffer, buff_size, & info_size ); - first_time = 0; - - } while ( status == STATUS_INFO_LENGTH_MISMATCH ); - glb_buff_size = buff_size; - - #define CHECK( cond ) \ - { \ - KMP_DEBUG_ASSERT( cond ); \ - if ( ! ( cond ) ) { \ - running_threads = -1; \ - goto finish; \ - } \ - } - - CHECK( buff_size >= info_size ); - spi = PSYSTEM_PROCESS_INFORMATION( buffer ); - for ( ; ; ) { - ptrdiff_t offset = uintptr_t( spi ) - uintptr_t( buffer ); - CHECK( 0 <= offset && offset + sizeof( SYSTEM_PROCESS_INFORMATION ) < info_size ); - HANDLE pid = spi->ProcessId; - ULONG num = spi->NumberOfThreads; - CHECK( num >= 1 ); - size_t spi_size = sizeof( SYSTEM_PROCESS_INFORMATION ) + sizeof( SYSTEM_THREAD ) * ( num - 1 ); - CHECK( offset + spi_size < info_size ); // Make sure process info record fits the buffer. - if ( spi->NextEntryOffset != 0 ) { - CHECK( spi_size <= spi->NextEntryOffset ); // And do not overlap with the next record. - }; // if - // pid == 0 corresponds to the System Idle Process. It always has running threads - // on all cores. So, we don't consider the running threads of this process. - if ( pid != 0 ) { - for ( int i = 0; i < num; ++ i ) { - THREAD_STATE state = spi->Threads[ i ].State; - // Count threads that have Ready or Running state. - // !!! TODO: Why comment does not match the code??? - if ( state == StateRunning ) { - ++ running_threads; - // Stop counting running threads if the number is already greater than - // the number of available cores - if ( running_threads >= max ) { - goto finish; - } - } // if - }; // for i + } } // if - if ( spi->NextEntryOffset == 0 ) { - break; - }; // if - spi = PSYSTEM_PROCESS_INFORMATION( uintptr_t( spi ) + spi->NextEntryOffset ); - }; // forever - - #undef CHECK + }; // for i + } // if + if (spi->NextEntryOffset == 0) { + break; + }; // if + spi = PSYSTEM_PROCESS_INFORMATION(uintptr_t(spi) + spi->NextEntryOffset); + }; // forever - finish: // Clean up and exit. +#undef CHECK - if ( buffer != NULL ) { - KMP_INTERNAL_FREE( buffer ); - }; // if +finish: // Clean up and exit. - glb_running_threads = running_threads; + if (buffer != NULL) { + KMP_INTERNAL_FREE(buffer); + }; // if - return running_threads; + glb_running_threads = running_threads; + return running_threads; } //__kmp_get_load_balance() -