1 // Copyright 2019 The Chromium Authors
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "pdf/accessibility.h"
11 #include "base/numerics/safe_math.h"
12 #include "pdf/accessibility_helper.h"
13 #include "pdf/accessibility_structs.h"
14 #include "pdf/pdf_engine.h"
15 #include "ui/gfx/geometry/rect_f.h"
17 namespace chrome_pdf {
21 AccessibilityFormFieldInfo GetAccessibilityFormFieldInfo(
24 uint32_t text_run_count) {
25 AccessibilityFormFieldInfo form_field_info;
26 form_field_info.text_fields =
27 engine->GetTextFieldInfo(page_index, text_run_count);
28 return form_field_info;
33 bool GetAccessibilityInfo(PDFEngine* engine,
35 AccessibilityPageInfo& page_info,
36 std::vector<AccessibilityTextRunInfo>& text_runs,
37 std::vector<AccessibilityCharInfo>& chars,
38 AccessibilityPageObjects& page_objects) {
39 int page_count = engine->GetNumberOfPages();
40 if (page_index < 0 || page_index >= page_count)
43 int char_count = engine->GetCharCount(page_index);
45 // Treat a char count of -1 (error) as 0 (an empty page), since
46 // other pages might have valid content.
50 page_info.page_index = page_index;
51 page_info.bounds = engine->GetPageBoundsRect(page_index);
52 page_info.char_count = char_count;
54 chars.resize(page_info.char_count);
55 for (uint32_t i = 0; i < page_info.char_count; ++i) {
56 chars[i].unicode_character = engine->GetCharUnicode(page_index, i);
60 while (char_index < char_count) {
61 absl::optional<AccessibilityTextRunInfo> text_run_info_result =
62 engine->GetTextRunInfo(page_index, char_index);
63 DCHECK(text_run_info_result.has_value());
64 const auto& text_run_info = text_run_info_result.value();
65 uint32_t text_run_end = char_index + text_run_info.len;
66 DCHECK_LE(text_run_end, static_cast<uint32_t>(char_count));
67 text_runs.push_back(text_run_info);
69 // We need to provide enough information to draw a bounding box
70 // around any arbitrary text range, but the bounding boxes of characters
71 // we get from PDFium don't necessarily "line up".
72 // Example for LTR text direction: walk through the
73 // characters in each text run and let the width of each character be
74 // the difference between the x coordinate of one character and the
75 // x coordinate of the next. The rest of the bounds of each character
76 // can be computed from the bounds of the text run.
77 // The same idea is used for RTL, TTB and BTT text direction.
78 gfx::RectF char_bounds = engine->GetCharBounds(page_index, char_index);
79 for (uint32_t i = char_index; i < text_run_end - 1; i++) {
80 DCHECK_LT(i + 1, static_cast<uint32_t>(char_count));
81 gfx::RectF next_char_bounds = engine->GetCharBounds(page_index, i + 1);
82 double& char_width = chars[i].char_width;
83 switch (text_run_info.direction) {
84 case AccessibilityTextDirection::kNone:
85 case AccessibilityTextDirection::kLeftToRight:
86 char_width = next_char_bounds.x() - char_bounds.x();
88 case AccessibilityTextDirection::kTopToBottom:
89 char_width = next_char_bounds.y() - char_bounds.y();
91 case AccessibilityTextDirection::kRightToLeft:
92 char_width = char_bounds.right() - next_char_bounds.right();
94 case AccessibilityTextDirection::kBottomToTop:
95 char_width = char_bounds.bottom() - next_char_bounds.bottom();
98 char_bounds = next_char_bounds;
100 double& char_width = chars[text_run_end - 1].char_width;
101 if (text_run_info.direction == AccessibilityTextDirection::kBottomToTop ||
102 text_run_info.direction == AccessibilityTextDirection::kTopToBottom) {
103 char_width = char_bounds.height();
105 char_width = char_bounds.width();
108 char_index += text_run_info.len;
111 page_info.text_run_count = text_runs.size();
112 page_objects.links = engine->GetLinkInfo(page_index, text_runs);
113 page_objects.images =
114 engine->GetImageInfo(page_index, page_info.text_run_count);
115 page_objects.highlights = engine->GetHighlightInfo(page_index, text_runs);
116 page_objects.form_fields = GetAccessibilityFormFieldInfo(
117 engine, page_index, page_info.text_run_count);
121 } // namespace chrome_pdf