2 Copyright (c) 2019 Intel Corporation
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
8 http://www.apache.org/licenses/LICENSE-2.0
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
18 from ..representation import TextDetectionAnnotation, CharacterRecognitionAnnotation
19 from ..utils import read_txt
20 from .format_converter import FileBasedAnnotationConverter, DirectoryBasedAnnotationConverter
23 class ICDAR15DetectionDatasetConverter(DirectoryBasedAnnotationConverter):
24 __provider__ = 'icdar15_detection'
29 for gt_file in self.data_dir.iterdir():
30 gt_file_name = str(gt_file.parts[-1])
31 identifier = '{}.jpg'.format(gt_file_name.split('gt_')[-1].split('.txt')[0])
32 all_points, transcriptions, difficult = [], [], []
34 for text_area in read_txt(gt_file):
35 text_annotation = text_area.split(',')
36 transcription = text_annotation[-1]
37 points = np.reshape(list(map(float, text_annotation[:8])), (-1, 2))
38 if transcription == '###':
39 difficult.append(len(transcriptions))
40 all_points.append(points)
41 transcriptions.append(transcription)
42 annotation = TextDetectionAnnotation(identifier, all_points, transcriptions)
43 annotation.metadata['difficult_boxes'] = difficult
44 annotations.append(annotation)
46 return annotations, None
49 class ICDAR13RecognitionDatasetConverter(FileBasedAnnotationConverter):
50 __provider__ = 'icdar13_recognition'
52 supported_symbols = '0123456789abcdefghijklmnopqrstuvwxyz'
57 for line in read_txt(self.annotation_file):
58 identifier, text = line.strip().split(' ')
59 annotations.append(CharacterRecognitionAnnotation(identifier, text))
61 label_map = {ind: str(key) for ind, key in enumerate(self.supported_symbols)}
63 return annotations, {'label_map': label_map, 'blank_label': len(label_map)}