2 Copyright (c) 2019 Intel Corporation
4 Licensed under the Apache License, Version 2.0 (the "License");
5 you may not use this file except in compliance with the License.
6 You may obtain a copy of the License at
8 http://www.apache.org/licenses/LICENSE-2.0
10 Unless required by applicable law or agreed to in writing, software
11 distributed under the License is distributed on an "AS IS" BASIS,
12 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 See the License for the specific language governing permissions and
14 limitations under the License.
17 from pathlib import Path
20 from .annotation_converters import BaseFormatConverter, save_annotation, make_subset
21 from .data_readers import BaseReader, DataReaderField
22 from .config import ConfigValidator, StringField, PathField, ListField, DictField, BaseField, NumberField, ConfigError
23 from .utils import JSONDecoderWithAutoConversion, read_json, get_path, contains_all
24 from .representation import BaseRepresentation
27 class DataRepresentation:
28 def __init__(self, data, meta=None, identifier=''):
29 self.identifier = identifier
31 self.metadata = meta or {}
33 self.metadata['image_size'] = 1
34 elif isinstance(data, list) and np.isscalar(data[0]):
35 self.metadata['image_size'] = len(data)
37 self.metadata['image_size'] = data.shape if not isinstance(data, list) else data[0].shape
40 class DatasetConfig(ConfigValidator):
42 Specifies configuration structure for dataset
45 annotation = BaseField(optional=True)
46 data_source = PathField()
47 dataset_meta = BaseField(optional=True)
48 metrics = ListField(allow_empty=False)
49 postprocessing = ListField(allow_empty=False, optional=True)
50 preprocessing = ListField(allow_empty=False, optional=True)
51 reader = DataReaderField(optional=True)
52 annotation_conversion = DictField(optional=True)
53 subsample_size = BaseField(optional=True)
54 subsample_seed = NumberField(floats=False, min_value=0, optional=True)
58 def __init__(self, config_entry, preprocessor):
59 self._config = config_entry
60 self._preprocessor = preprocessor
64 dataset_config = DatasetConfig('Dataset')
65 data_reader_config = self._config.get('reader', 'opencv_imread')
66 if isinstance(data_reader_config, str):
67 self.read_image_fn = BaseReader.provide(data_reader_config)
68 elif isinstance(data_reader_config, dict):
69 self.read_image_fn = BaseReader.provide(data_reader_config['type'], data_reader_config)
71 raise ConfigError('reader should be dict or string')
73 dataset_config.fields['data_source'].is_directory = self.read_image_fn.data_source_is_dir
74 dataset_config.fields['data_source'].optional = self.read_image_fn.data_source_optional
75 dataset_config.validate(self._config)
76 annotation, meta = None, None
77 self._images_dir = Path(self._config.get('data_source', ''))
78 if 'annotation_conversion' in self._config:
79 annotation, meta = self._convert_annotation()
81 stored_annotation = self._config.get('annotation')
83 annotation = read_annotation(get_path(stored_annotation))
84 meta = self._load_meta()
87 raise ConfigError('path to converted annotation or data for conversion should be specified')
89 subsample_size = self._config.get('subsample_size')
91 subsample_seed = self._config.get('subsample_seed', 666)
92 if isinstance(subsample_size, str):
93 if subsample_size.endswith('%'):
94 subsample_size = float(subsample_size[:-1]) / 100 * len(annotation)
95 subsample_size = int(subsample_size)
96 annotation = make_subset(annotation, subsample_size, subsample_seed)
98 if contains_all(self._config, ['annotation', 'annotation_conversion']):
99 annotation_name = self._config['annotation']
100 meta_name = self._config.get('dataset_meta')
102 meta_name = Path(meta_name)
103 save_annotation(annotation, meta, Path(annotation_name), meta_name)
105 self._annotation = annotation
107 self.size = len(self._annotation)
108 self.name = self._config.get('name')
111 def annotation(self):
112 return self._annotation
123 return self._meta.get('label_map', {})
125 def __getitem__(self, item):
126 if self.size <= item * self.batch:
129 batch_start = item * self.batch
130 batch_end = min(self.size, batch_start + self.batch)
131 batch_annotation = self._annotation[batch_start:batch_end]
133 identifiers = [annotation.identifier for annotation in batch_annotation]
134 images = self._read_images(identifiers)
136 for image, annotation in zip(images, batch_annotation):
137 self.set_annotation_metadata(annotation, image)
139 preprocessed = self._preprocessor.process(images, batch_annotation)
141 return batch_annotation, preprocessed
144 def set_image_metadata(annotation, images):
146 if not isinstance(images, list):
149 if np.isscalar(image):
150 image_sizes.append((1,))
152 image_sizes.append(image.shape)
153 annotation.set_image_size(image_sizes)
155 def set_annotation_metadata(self, annotation, image):
156 self.set_image_metadata(annotation, image.data)
157 annotation.set_data_source(self._images_dir)
159 def _read_images(self, identifiers):
161 for identifier in identifiers:
162 images.append(DataRepresentation(self.read_image_fn(identifier, self._images_dir), identifier=identifier))
166 def _load_meta(self):
167 meta_data_file = self._config.get('dataset_meta')
168 return read_json(meta_data_file, cls=JSONDecoderWithAutoConversion) if meta_data_file else None
170 def _convert_annotation(self):
171 conversion_params = self._config.get('annotation_conversion')
172 converter = conversion_params['converter']
173 annotation_converter = BaseFormatConverter.provide(converter, conversion_params)
174 annotation, meta = annotation_converter.convert()
176 return annotation, meta
179 def read_annotation(annotation_file: Path):
180 annotation_file = get_path(annotation_file)
183 with annotation_file.open('rb') as file:
186 result.append(BaseRepresentation.load(file))