1 // Copyright 2022 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "pdf/file_extension.h"
11 #include "base/check_op.h"
12 #include "base/files/file_path.h"
13 #include "base/ranges/algorithm.h"
14 #include "base/strings/string_util.h"
17 #include "base/strings/utf_string_conversions.h"
20 namespace chrome_pdf {
22 // The order of the entries should always match `ViewFileType` in
23 // tools/metrics/histograms/enums.xml and the indexes defined in
25 constexpr std::array<const char*, 76> kFileExtensions = {
26 "other", ".3ga", ".3gp",
27 ".aac", ".alac", ".asf",
28 ".avi", ".bmp", ".csv",
29 ".doc", ".docx", ".flac",
30 ".gif", ".jpeg", ".jpg",
31 ".log", ".m3u", ".m3u8",
32 ".m4a", ".m4v", ".mid",
33 ".mkv", ".mov", ".mp3",
34 ".mp4", ".mpg", ".odf",
35 ".odp", ".ods", ".odt",
36 ".oga", ".ogg", ".ogv",
37 ".pdf", ".png", ".ppt",
38 ".pptx", ".ra", ".ram",
39 ".rar", ".rm", ".rtf",
40 ".wav", ".webm", ".webp",
41 ".wma", ".wmv", ".xls",
42 ".xlsx", ".crdownload", ".crx",
43 ".dmg", ".exe", ".html",
44 ".htm", ".jar", ".ps",
45 ".torrent", ".txt", ".zip",
46 "directory", "no extension", "unknown extension",
47 ".mhtml", ".gdoc", ".gsheet",
48 ".gslides", ".arw", ".cr2",
49 ".dng", ".nef", ".nrw",
50 ".orf", ".raf", ".rw2",
54 static_assert(kFileExtensions.size() ==
55 static_cast<size_t>(ExtensionIndex::kMaxValue) + 1);
57 enum ExtensionIndex FileNameToExtensionIndex(const std::u16string& file_name) {
58 const base::FilePath::StringType extension_str =
59 base::FilePath::FromUTF16Unsafe(file_name).Extension();
60 if (extension_str.empty())
61 return ExtensionIndex::kEmptyExt;
63 // All known extensions are ASCII characters. So when an extension contains
64 // non-ASCII characters, this extension is not recognizable.
65 if (!base::IsStringASCII(extension_str))
66 return ExtensionIndex::kOtherExt;
68 const base::FilePath::StringType extension_str_lower =
69 base::ToLowerASCII(extension_str);
72 const std::string extension = base::WideToUTF8(extension_str_lower);
74 const std::string& extension = extension_str_lower;
77 auto* const* it = base::ranges::find(kFileExtensions, extension);
78 if (it == kFileExtensions.end())
79 return ExtensionIndex::kOtherExt;
81 const int distance = std::distance(kFileExtensions.begin(), it);
82 DCHECK_GT(distance, 0);
83 DCHECK_LT(static_cast<size_t>(distance), kFileExtensions.size());
84 return static_cast<enum ExtensionIndex>(distance);
87 } // namespace chrome_pdf