#include "flang/Common/idioms.h"
#include "flang/Parser/char-buffer.h"
#include "llvm/Support/Errno.h"
+#include "llvm/Support/FileSystem.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
-#include <cstddef>
-#include <cstring>
-#include <fcntl.h>
#include <memory>
-#include <sys/mman.h>
-#include <sys/stat.h>
-#include <sys/types.h>
-#include <unistd.h>
#include <vector>
-// TODO: Port to Windows &c.
-
namespace Fortran::parser {
-static constexpr bool useMMap{true};
-static constexpr int minMapFileBytes{1}; // i.e., no minimum requirement
-static constexpr int maxMapOpenFileDescriptors{100};
-static int openFileDescriptors{0};
-
SourceFile::~SourceFile() { Close(); }
-static std::vector<std::size_t> FindLineStarts(
- const char *source, std::size_t bytes) {
+static std::vector<std::size_t> FindLineStarts(llvm::StringRef source) {
std::vector<std::size_t> result;
- if (bytes > 0) {
- CHECK(source[bytes - 1] == '\n' && "missing ultimate newline");
+ if (source.size() > 0) {
+ CHECK(source.back() == '\n' && "missing ultimate newline");
std::size_t at{0};
do {
result.push_back(at);
- const void *vp{static_cast<const void *>(&source[at])};
- const void *vnl{std::memchr(vp, '\n', bytes - at)};
- const char *nl{static_cast<const char *>(vnl)};
- at = nl + 1 - source;
- } while (at < bytes);
+ at = source.find('\n', at) + 1;
+ } while (at < source.size());
result.shrink_to_fit();
}
return result;
}
void SourceFile::RecordLineStarts() {
- lineStart_ = FindLineStarts(content_, bytes_);
+ lineStart_ = FindLineStarts({content().data(), bytes()});
}
// Check for a Unicode byte order mark (BOM).
// Module files all have one; so can source files.
void SourceFile::IdentifyPayload() {
- content_ = address_;
- bytes_ = size_;
- if (content_) {
- static constexpr int BOMBytes{3};
- static const char UTF8_BOM[]{"\xef\xbb\xbf"};
- if (bytes_ >= BOMBytes && std::memcmp(content_, UTF8_BOM, BOMBytes) == 0) {
- content_ += BOMBytes;
- bytes_ -= BOMBytes;
- encoding_ = Encoding::UTF_8;
- }
+ llvm::StringRef content{buf_->getBufferStart(), buf_->getBufferSize()};
+ constexpr llvm::StringLiteral UTF8_BOM{"\xef\xbb\xbf"};
+ if (content.startswith(UTF8_BOM)) {
+ bom_end_ = UTF8_BOM.size();
+ encoding_ = Encoding::UTF_8;
}
}
}
for (const std::string &dir : searchPath) {
std::string path{dir + '/' + name};
- struct stat statbuf;
- if (stat(path.c_str(), &statbuf) == 0 && !S_ISDIR(statbuf.st_mode)) {
+ bool isDir{false};
+ auto er = llvm::sys::fs::is_directory(path, isDir);
+ if (!er && !isDir) {
return path;
}
}
return name;
}
-static std::size_t RemoveCarriageReturns(char *buffer, std::size_t bytes) {
+std::size_t RemoveCarriageReturns(llvm::MutableArrayRef<char> buf) {
std::size_t wrote{0};
- char *p{buffer};
+ char *buffer{buf.data()};
+ char *p{buf.data()};
+ std::size_t bytes = buf.size();
while (bytes > 0) {
void *vp{static_cast<void *>(p)};
void *crvp{std::memchr(vp, '\r', bytes)};
bool SourceFile::Open(std::string path, llvm::raw_ostream &error) {
Close();
path_ = path;
- std::string errorPath{"'"s + path + "'"};
- errno = 0;
- fileDescriptor_ = open(path.c_str(), O_RDONLY);
- if (fileDescriptor_ < 0) {
- error << "Could not open " << errorPath << ": "
- << llvm::sys::StrError(errno);
+ std::string errorPath{"'"s + path_ + "'"};
+ auto bufOr{llvm::WritableMemoryBuffer::getFile(path)};
+ if (!bufOr) {
+ auto err = bufOr.getError();
+ error << "Could not open " << errorPath << ": " << err.message();
return false;
}
- ++openFileDescriptors;
- return ReadFile(errorPath, error);
+ buf_ = std::move(bufOr.get());
+ ReadFile();
+ return true;
}
bool SourceFile::ReadStandardInput(llvm::raw_ostream &error) {
Close();
path_ = "standard input";
- fileDescriptor_ = 0;
- return ReadFile(path_, error);
-}
-bool SourceFile::ReadFile(std::string errorPath, llvm::raw_ostream &error) {
- struct stat statbuf;
- if (fstat(fileDescriptor_, &statbuf) != 0) {
- error << "fstat failed on " << errorPath << ": "
- << llvm::sys::StrError(errno);
- Close();
+ auto buf_or = llvm::MemoryBuffer::getSTDIN();
+ if (!buf_or) {
+ auto err = buf_or.getError();
+ error << err.message();
return false;
}
- if (S_ISDIR(statbuf.st_mode)) {
- error << errorPath << " is a directory";
- Close();
- return false;
- }
-
- // Try to map a large source file into the process' address space.
- // Don't bother with small ones. This also helps keep the number
- // of open file descriptors from getting out of hand.
- if (useMMap && S_ISREG(statbuf.st_mode)) {
- size_ = static_cast<std::size_t>(statbuf.st_size);
- if (size_ >= minMapFileBytes &&
- openFileDescriptors <= maxMapOpenFileDescriptors) {
- void *vp = mmap(0, size_, PROT_READ, MAP_SHARED, fileDescriptor_, 0);
- if (vp != MAP_FAILED) {
- address_ = static_cast<const char *>(const_cast<const void *>(vp));
- IdentifyPayload();
- if (bytes_ > 0 && content_[bytes_ - 1] == '\n' &&
- std::memchr(static_cast<const void *>(content_), '\r', bytes_) ==
- nullptr) {
- isMemoryMapped_ = true;
- RecordLineStarts();
- return true;
- }
- // The file needs to have its line endings normalized to simple
- // newlines. Remap it for a private rewrite in place.
- vp = mmap(
- vp, size_, PROT_READ | PROT_WRITE, MAP_PRIVATE, fileDescriptor_, 0);
- if (vp != MAP_FAILED) {
- address_ = static_cast<const char *>(const_cast<const void *>(vp));
- IdentifyPayload();
- auto mutableContent{const_cast<char *>(content_)};
- bytes_ = RemoveCarriageReturns(mutableContent, bytes_);
- if (bytes_ > 0) {
- if (mutableContent[bytes_ - 1] == '\n' ||
- (bytes_ & 0xfff) != 0 /* don't cross into next page */) {
- if (mutableContent[bytes_ - 1] != '\n') {
- // Append a final newline.
- mutableContent[bytes_++] = '\n';
- }
- bool isNowReadOnly{mprotect(vp, bytes_, PROT_READ) == 0};
- CHECK(isNowReadOnly);
- content_ = mutableContent;
- isMemoryMapped_ = true;
- RecordLineStarts();
- return true;
- }
- }
- }
- munmap(vp, size_);
- address_ = content_ = nullptr;
- size_ = bytes_ = 0;
- }
- }
- }
+ auto inbuf = std::move(buf_or.get());
+ buf_ =
+ llvm::WritableMemoryBuffer::getNewUninitMemBuffer(inbuf->getBufferSize());
+ llvm::copy(inbuf->getBuffer(), buf_->getBufferStart());
+ ReadFile();
+ return true;
+}
- // Read it into an expandable buffer, then marshal its content into a single
- // contiguous block.
- CharBuffer buffer;
- while (true) {
- std::size_t count;
- char *to{buffer.FreeSpace(count)};
- ssize_t got{read(fileDescriptor_, to, count)};
- if (got < 0) {
- error << "could not read " << errorPath << ": "
- << llvm::sys::StrError(errno);
- Close();
- return false;
- }
- if (got == 0) {
- break;
- }
- buffer.Claim(got);
- }
- if (fileDescriptor_ > 0) {
- close(fileDescriptor_);
- --openFileDescriptors;
+void SourceFile::ReadFile() {
+ if (buf_->getBuffer().size() == 0) {
+ Close();
+ buf_ = llvm::WritableMemoryBuffer::getNewUninitMemBuffer(1);
+ buf_->getBuffer()[0] = '\n';
}
- fileDescriptor_ = -1;
- normalized_ = buffer.MarshalNormalized();
- address_ = normalized_.c_str();
- size_ = normalized_.size();
+ buf_end_ = RemoveCarriageReturns(buf_->getBuffer());
IdentifyPayload();
RecordLineStarts();
- return true;
}
void SourceFile::Close() {
- if (useMMap && isMemoryMapped_) {
- munmap(reinterpret_cast<void *>(const_cast<char *>(address_)), size_);
- isMemoryMapped_ = false;
- } else if (!normalized_.empty()) {
- normalized_.clear();
- } else if (address_) {
- delete[] address_;
- }
- address_ = content_ = nullptr;
- size_ = bytes_ = 0;
- if (fileDescriptor_ > 0) {
- close(fileDescriptor_);
- --openFileDescriptors;
- }
- fileDescriptor_ = -1;
path_.clear();
+ buf_.reset();
}
SourcePosition SourceFile::FindOffsetLineAndColumn(std::size_t at) const {
- CHECK(at < bytes_);
- if (lineStart_.empty()) {
- return {*this, 1, static_cast<int>(at + 1)};
- }
- std::size_t low{0}, count{lineStart_.size()};
- while (count > 1) {
- std::size_t mid{low + (count >> 1)};
- if (lineStart_[mid] > at) {
- count = mid - low;
- } else {
- count -= mid - low;
- low = mid;
- }
- }
+ CHECK(at < bytes());
+
+ auto it = llvm::upper_bound(lineStart_, at);
+ auto low = std::distance(lineStart_.begin(), it - 1);
return {*this, static_cast<int>(low + 1),
static_cast<int>(at - lineStart_[low] + 1)};
}