238 lines
6.2 KiB
C++
238 lines
6.2 KiB
C++
|
// Copyright 2019 Roman Perepelitsa.
|
||
|
//
|
||
|
// This file is part of GitStatus.
|
||
|
//
|
||
|
// GitStatus is free software: you can redistribute it and/or modify
|
||
|
// it under the terms of the GNU General Public License as published by
|
||
|
// the Free Software Foundation, either version 3 of the License, or
|
||
|
// (at your option) any later version.
|
||
|
//
|
||
|
// GitStatus is distributed in the hope that it will be useful,
|
||
|
// but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
// GNU General Public License for more details.
|
||
|
//
|
||
|
// You should have received a copy of the GNU General Public License
|
||
|
// along with GitStatus. If not, see <https://www.gnu.org/licenses/>.
|
||
|
|
||
|
#include "dir.h"
|
||
|
|
||
|
#include <algorithm>
|
||
|
#include <atomic>
|
||
|
#include <cerrno>
|
||
|
#include <cstring>
|
||
|
|
||
|
#include <dirent.h>
|
||
|
#include <fcntl.h>
|
||
|
#include <stdio.h>
|
||
|
#include <stdlib.h>
|
||
|
#include <sys/stat.h>
|
||
|
#include <unistd.h>
|
||
|
|
||
|
#ifdef __linux__
|
||
|
#include <endian.h>
|
||
|
#include <sys/syscall.h>
|
||
|
#endif
|
||
|
|
||
|
#ifdef __APPLE__
|
||
|
#include <iconv.h>
|
||
|
#endif
|
||
|
|
||
|
#include "bits.h"
|
||
|
#include "check.h"
|
||
|
#include "scope_guard.h"
|
||
|
#include "string_cmp.h"
|
||
|
#include "tribool.h"
|
||
|
|
||
|
namespace gitstatus {
|
||
|
|
||
|
namespace {
|
||
|
|
||
|
bool Dots(const char* name) {
|
||
|
if (name[0] == '.') {
|
||
|
if (name[1] == 0) return true;
|
||
|
if (name[1] == '.' && name[2] == 0) return true;
|
||
|
}
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
} // namespace
|
||
|
|
||
|
// The linux-specific implementation is about 20% faster than the generic (posix) implementation.
|
||
|
#ifdef __linux__
|
||
|
|
||
|
uint64_t Read64(const void* p) {
|
||
|
uint64_t res;
|
||
|
std::memcpy(&res, p, 8);
|
||
|
return res;
|
||
|
}
|
||
|
|
||
|
void Write64(uint64_t x, void* p) { std::memcpy(p, &x, 8); }
|
||
|
|
||
|
void SwapBytes(char** begin, char** end) {
|
||
|
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
|
||
|
for (; begin != end; ++begin) Write64(__builtin_bswap64(Read64(*begin)), *begin);
|
||
|
#elif __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__
|
||
|
#error "sorry, not implemented"
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
template <bool kCaseSensitive>
|
||
|
void SortEntries(char** begin, char** end) {
|
||
|
static_assert(kCaseSensitive, "");
|
||
|
SwapBytes(begin, end);
|
||
|
std::sort(begin, end, [](const char* a, const char* b) {
|
||
|
uint64_t x = Read64(a);
|
||
|
uint64_t y = Read64(b);
|
||
|
// Add 5 for good luck.
|
||
|
return x < y || (x == y && std::memcmp(a + 5, b + 5, 256) < 0);
|
||
|
});
|
||
|
SwapBytes(begin, end);
|
||
|
}
|
||
|
|
||
|
template <>
|
||
|
void SortEntries<false>(char** begin, char** end) {
|
||
|
std::sort(begin, end, StrLt<false>());
|
||
|
}
|
||
|
|
||
|
bool ListDir(int dir_fd, Arena& arena, std::vector<char*>& entries, bool precompose_unicode,
|
||
|
bool case_sensitive) {
|
||
|
struct linux_dirent64 {
|
||
|
ino64_t d_ino;
|
||
|
off64_t d_off;
|
||
|
unsigned short d_reclen;
|
||
|
unsigned char d_type;
|
||
|
char d_name[];
|
||
|
};
|
||
|
|
||
|
constexpr size_t kBufSize = 8 << 10;
|
||
|
entries.clear();
|
||
|
|
||
|
while (true) {
|
||
|
char* buf = static_cast<char*>(arena.Allocate(kBufSize, alignof(linux_dirent64)));
|
||
|
// Save 256 bytes for the rainy day.
|
||
|
int n = syscall(SYS_getdents64, dir_fd, buf, kBufSize - 256);
|
||
|
if (n < 0) {
|
||
|
entries.clear();
|
||
|
return false;
|
||
|
}
|
||
|
for (int pos = 0; pos < n;) {
|
||
|
auto* ent = reinterpret_cast<linux_dirent64*>(buf + pos);
|
||
|
if (!Dots(ent->d_name)) entries.push_back(ent->d_name);
|
||
|
pos += ent->d_reclen;
|
||
|
}
|
||
|
if (n == 0) break;
|
||
|
// The following optimization relies on SYS_getdents64 always returning as many
|
||
|
// entries as would fit. This is not guaranteed by the specification and I don't
|
||
|
// know if this is true in practice. The optimization has no measurable effect on
|
||
|
// gitstatus performance, so it's turned off.
|
||
|
//
|
||
|
// if (n + sizeof(linux_dirent64) + 512 <= kBufSize) break;
|
||
|
}
|
||
|
|
||
|
if (case_sensitive) {
|
||
|
SortEntries<true>(entries.data(), entries.data() + entries.size());
|
||
|
} else {
|
||
|
SortEntries<false>(entries.data(), entries.data() + entries.size());
|
||
|
}
|
||
|
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
#else // __linux__
|
||
|
|
||
|
namespace {
|
||
|
|
||
|
char* DirentDup(Arena& arena, const struct dirent& ent, size_t len) {
|
||
|
char* p = arena.Allocate<char>(len + 2);
|
||
|
*p++ = ent.d_type;
|
||
|
std::memcpy(p, ent.d_name, len + 1);
|
||
|
return p;
|
||
|
}
|
||
|
|
||
|
#ifdef __APPLE__
|
||
|
|
||
|
std::atomic<bool> g_iconv_error(true);
|
||
|
|
||
|
Tribool IConvTry(char* inp, size_t ins, char* outp, size_t outs) {
|
||
|
if (outs == 0) return Tribool::kUnknown;
|
||
|
iconv_t ic = iconv_open("UTF-8", "UTF-8-MAC");
|
||
|
if (ic == (iconv_t)-1) {
|
||
|
if (g_iconv_error.load(std::memory_order_relaxed) &&
|
||
|
g_iconv_error.exchange(false, std::memory_order_relaxed)) {
|
||
|
LOG(ERROR) << "iconv_open(\"UTF-8\", \"UTF-8-MAC\") failed";
|
||
|
}
|
||
|
return Tribool::kFalse;
|
||
|
}
|
||
|
ON_SCOPE_EXIT(&) { CHECK(iconv_close(ic) == 0) << Errno(); };
|
||
|
--outs;
|
||
|
if (iconv(ic, &inp, &ins, &outp, &outs) >= 0) {
|
||
|
*outp = 0;
|
||
|
return Tribool::kTrue;
|
||
|
}
|
||
|
return errno == E2BIG ? Tribool::kUnknown : Tribool::kFalse;
|
||
|
}
|
||
|
|
||
|
char* DirenvConvert(Arena& arena, struct dirent& ent, bool do_convert) {
|
||
|
if (!do_convert) return DirentDup(arena, ent, std::strlen(ent.d_name));
|
||
|
|
||
|
size_t len = 0;
|
||
|
do_convert = false;
|
||
|
for (unsigned char c; (c = ent.d_name[len]); ++len) {
|
||
|
if (c & 0x80) do_convert = true;
|
||
|
}
|
||
|
if (!do_convert) return DirentDup(arena, ent, len);
|
||
|
|
||
|
size_t n = NextPow2(len + 2);
|
||
|
while (true) {
|
||
|
char* p = arena.Allocate<char>(n);
|
||
|
switch (IConvTry(ent.d_name, len, p + 1, n - 1)) {
|
||
|
case Tribool::kFalse:
|
||
|
return DirentDup(arena, ent, len);
|
||
|
case Tribool::kTrue:
|
||
|
*p = ent.d_type;
|
||
|
return p + 1;
|
||
|
case Tribool::kUnknown:
|
||
|
break;
|
||
|
}
|
||
|
n *= 2;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#else // __APPLE__
|
||
|
|
||
|
char* DirenvConvert(Arena& arena, struct dirent& ent, bool do_convert) {
|
||
|
return DirentDup(arena, ent, std::strlen(ent.d_name));
|
||
|
}
|
||
|
|
||
|
#endif // __APPLE__
|
||
|
|
||
|
} // namespace
|
||
|
|
||
|
bool ListDir(int dir_fd, Arena& arena, std::vector<char*>& entries, bool precompose_unicode,
|
||
|
bool case_sensitive) {
|
||
|
entries.clear();
|
||
|
dir_fd = dup(dir_fd);
|
||
|
if (dir_fd < 0) return false;
|
||
|
DIR* dir = fdopendir(dir_fd);
|
||
|
if (!dir) {
|
||
|
CHECK(!close(dir_fd)) << Errno();
|
||
|
return false;
|
||
|
}
|
||
|
ON_SCOPE_EXIT(&) { CHECK(!closedir(dir)) << Errno(); };
|
||
|
while (struct dirent* ent = (errno = 0, readdir(dir))) {
|
||
|
if (Dots(ent->d_name)) continue;
|
||
|
entries.push_back(DirenvConvert(arena, *ent, precompose_unicode));
|
||
|
}
|
||
|
if (errno) {
|
||
|
entries.clear();
|
||
|
return false;
|
||
|
}
|
||
|
StrSort(entries.data(), entries.data() + entries.size(), case_sensitive);
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
#endif // __linux__
|
||
|
|
||
|
} // namespace gitstatus
|