From b85c5be7f5555de40e8c3a3181082c2378119cc2 Mon Sep 17 00:00:00 2001
From: Kai Li <kaili_kloud@163.com>
Date: Tue, 11 Feb 2014 19:39:02 +0800
Subject: [PATCH] Add script to resize and crop images in parallel using
 mincepie

---
 scripts/launch_resize_and_crop_images.sh |  24 +++++++
 scripts/resize_and_crop_images.py        | 108 +++++++++++++++++++++++++++++++
 2 files changed, 132 insertions(+)
 create mode 100755 scripts/launch_resize_and_crop_images.sh
 create mode 100755 scripts/resize_and_crop_images.py

diff --git a/scripts/launch_resize_and_crop_images.sh b/scripts/launch_resize_and_crop_images.sh
new file mode 100755
index 0000000..84ca858
--- /dev/null
+++ b/scripts/launch_resize_and_crop_images.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+#### https://github.com/Yangqing/mincepie/wiki/Launch-Your-Mapreducer
+
+# If you encounter error that the address already in use, kill the process.
+# 11235 is the port of server process
+# https://github.com/Yangqing/mincepie/blob/master/mincepie/mince.py
+#     sudo netstat -ap | grep 11235
+# The last column of the output is  PID/Program name
+#     kill -9 PID
+# Second solution: 
+#     nmap localhost
+#     fuser -k 11235/tcp
+# Or just wait a few seconds.
+
+## Launch your Mapreduce locally
+# num_clients: number of processes
+# image_lib: OpenCV or PIL, case insensitive. The default value is the faster OpenCV.
+# input: the file containing one image path relative to input_folder each line
+# input_folder: where are the original images
+# output_folder: where to save the resized and cropped images
+./resize_and_crop_images.py --num_clients=8 --image_lib=opencv --input=/home/user/Datasets/ImageNet/ILSVRC2010/ILSVRC2010_images.txt --input_folder=/home/user/Datasets/ImageNet/ILSVRC2010/ILSVRC2010_images_train/ --output_folder=/home/user/Datasets/ImageNet/ILSVRC2010/ILSVRC2010_images_train_resized/
+
+## Launch your Mapreduce with MPI
+# mpirun -n 8 --launch=mpi resize_and_crop_images.py --image_lib=opencv --input=/home/user/Datasets/ImageNet/ILSVRC2010/ILSVRC2010_images.txt --input_folder=/home/user/Datasets/ImageNet/ILSVRC2010/ILSVRC2010_images_train/ --output_folder=/home/user/Datasets/ImageNet/ILSVRC2010/ILSVRC2010_images_train_resized/
diff --git a/scripts/resize_and_crop_images.py b/scripts/resize_and_crop_images.py
new file mode 100755
index 0000000..0ab75dc
--- /dev/null
+++ b/scripts/resize_and_crop_images.py
@@ -0,0 +1,108 @@
+#!/usr/bin/env python
+from mincepie import mapreducer, launcher
+import gflags
+import os
+import cv2
+import PIL
+
+# gflags
+gflags.DEFINE_string('image_lib', 'opencv',
+                     'OpenCV or PIL, case insensitive. The default value is the faster OpenCV.')
+gflags.DEFINE_string('input_folder', '',
+                     'The folder that contains all input images, organized in synsets.')
+gflags.DEFINE_integer('output_side_length', 256,
+                     'Expected side length of the output image.')
+gflags.DEFINE_string('output_folder', '',
+                     'The folder that we write output resized and cropped images to')
+FLAGS = gflags.FLAGS
+
+class OpenCVResizeCrop:
+    def resize_and_crop_image(self, input_file, output_file, output_side_length = 256):
+        '''Takes an image name, resize it and crop the center square
+        '''
+        img = cv2.imread(input_file)
+        height, width, depth = img.shape
+        new_height = output_side_length
+        new_width = output_side_length
+        if height > width:
+            new_height = output_side_length * height / width
+        else:
+            new_width = output_side_length * width / height
+        resized_img = cv2.resize(img, (new_width, new_height))
+        height_offset = (new_height - output_side_length) / 2
+        width_offset = (new_width - output_side_length) / 2
+        cropped_img = resized_img[height_offset:height_offset + output_side_length,
+                                  width_offset:width_offset + output_side_length]
+        cv2.imwrite(output_file, cropped_img)
+
+class PILResizeCrop:
+## http://united-coders.com/christian-harms/image-resizing-tips-every-coder-should-know/
+    def resize_and_crop_image(self, input_file, output_file, output_side_length = 256):
+        '''Downsample the image.
+        '''
+        box = (output_side_length, output_side_length)
+        #preresize image with factor 2, 4, 8 and fast algorithm
+        factor = 1
+        while img.size[0]/factor > 2*box[0] and img.size[1]*2/factor > 2*box[1]:
+            factor *=2
+        if factor > 1:
+            img.thumbnail((img.size[0]/factor, img.size[1]/factor), Image.NEAREST)
+
+        #calculate the cropping box and get the cropped part
+        if fit:
+            x1 = y1 = 0
+            x2, y2 = img.size
+            wRatio = 1.0 * x2/box[0]
+            hRatio = 1.0 * y2/box[1]
+            if hRatio > wRatio:
+                y1 = int(y2/2-box[1]*wRatio/2)
+                y2 = int(y2/2+box[1]*wRatio/2)
+            else:
+                x1 = int(x2/2-box[0]*hRatio/2)
+                x2 = int(x2/2+box[0]*hRatio/2)
+            img = img.crop((x1,y1,x2,y2))
+
+        #Resize the image with best quality algorithm ANTI-ALIAS
+        img.thumbnail(box, Image.ANTIALIAS)
+
+        #save it into a file-like object
+        with open(output_file, 'wb') as out:
+            img.save(out, 'JPEG', quality=75)
+
+class ResizeCropImagesMapper(mapreducer.BasicMapper):
+    '''The ImageNet Compute mapper. 
+    The input value would be the file listing images' paths relative to input_folder.
+    '''
+    def map(self, key, value):
+        if type(value) is not str:
+            value = str(value)
+        files = [value]
+        image_lib = FLAGS.image_lib.lower()
+        if image_lib == 'pil':
+            resize_crop = PILResizeCrop()
+        else:
+            resize_crop = OpenCVResizeCrop()
+        for i, line in enumerate(files):
+            try:
+                line = line.replace(FLAGS.input_folder, '').strip()
+                line = line.split()
+                image_file_name = line[0]
+                input_file = os.path.join(FLAGS.input_folder, image_file_name)
+                output_file = os.path.join(FLAGS.output_folder, image_file_name)
+                output_dir = output_file[:output_file.rfind('/')]
+                if not os.path.exists(output_dir):
+                    os.makedirs(output_dir)
+                feat = resize_crop.resize_and_crop_image(input_file, output_file,
+                                                              FLAGS.output_side_length)
+            except Exception, e:
+                # we ignore the exception (maybe the image is corrupted?)
+                print line, Exception, e
+        yield value, FLAGS.output_folder
+
+mapreducer.REGISTER_DEFAULT_MAPPER(ResizeCropImagesMapper)
+
+mapreducer.REGISTER_DEFAULT_READER(mapreducer.FileReader)
+mapreducer.REGISTER_DEFAULT_WRITER(mapreducer.FileWriter)
+ 
+if __name__ == '__main__':
+    launcher.launch()
-- 
2.7.4