From f94253b82bf5728a06e2643524325f0e4b7a5946 Mon Sep 17 00:00:00 2001 From: Lubov Batanina Date: Thu, 30 May 2019 17:36:00 +0300 Subject: [PATCH] Merge pull request #14627 from l-bat:demo_kinetics * Support 3D ResNet-34-kinetics * Update sample * Remove preprocess * Change test * Fix sample --- modules/dnn/test/test_onnx_importer.cpp | 61 +++- samples/data/dnn/action_recongnition_kinetics.txt | 400 ++++++++++++++++++++++ samples/dnn/action_recognition.py | 82 +++++ 3 files changed, 540 insertions(+), 3 deletions(-) create mode 100644 samples/data/dnn/action_recongnition_kinetics.txt create mode 100644 samples/dnn/action_recognition.py diff --git a/modules/dnn/test/test_onnx_importer.cpp b/modules/dnn/test/test_onnx_importer.cpp index f926a43..9de4603 100644 --- a/modules/dnn/test/test_onnx_importer.cpp +++ b/modules/dnn/test/test_onnx_importer.cpp @@ -352,7 +352,7 @@ TEST_P(Test_ONNX_nets, ResNet18v1) applyTestTag(CV_TEST_TAG_MEMORY_512MB); // output range: [-16; 22], after Softmax [0, 0.51] - testONNXModels("resnet18v1", pb, default_l1, default_lInf, true); + testONNXModels("resnet18v1", pb, default_l1, default_lInf, true, target != DNN_TARGET_MYRIAD); } TEST_P(Test_ONNX_nets, ResNet50v1) @@ -360,7 +360,7 @@ TEST_P(Test_ONNX_nets, ResNet50v1) applyTestTag(CV_TEST_TAG_MEMORY_512MB); // output range: [-67; 75], after Softmax [0, 0.98] - testONNXModels("resnet50v1", pb, default_l1, default_lInf, true); + testONNXModels("resnet50v1", pb, default_l1, default_lInf, true, target != DNN_TARGET_MYRIAD); } TEST_P(Test_ONNX_nets, ResNet101_DUC_HDC) @@ -477,7 +477,7 @@ TEST_P(Test_ONNX_nets, DenseNet121) applyTestTag(CV_TEST_TAG_MEMORY_512MB); // output range: [-87; 138], after Softmax [0; 1] - testONNXModels("densenet121", pb, default_l1, default_lInf, true); + testONNXModels("densenet121", pb, default_l1, default_lInf, true, target != DNN_TARGET_MYRIAD); } TEST_P(Test_ONNX_nets, Inception_v1) @@ -497,6 +497,61 @@ TEST_P(Test_ONNX_nets, Shufflenet) testONNXModels("shufflenet", pb); } +TEST_P(Test_ONNX_nets, Resnet34_kinetics) +{ + if (backend != DNN_BACKEND_INFERENCE_ENGINE || target != DNN_TARGET_CPU) + throw SkipTestException("Only DLIE backend on CPU is supported"); + + String onnxmodel = findDataFile("dnn/resnet-34_kinetics.onnx"); + Mat image0 = imread(findDataFile("dnn/dog416.png")); + Mat image1 = imread(findDataFile("dnn/street.png")); + + Mat ref0 = blobFromNPY(_tf("data/output_kinetics0.npy")); + Mat ref1 = blobFromNPY(_tf("data/output_kinetics1.npy")); + + std::vector images_0(16, image0); + std::vector images_1(16, image1); + Mat blob0 = blobFromImages(images_0, 1.0, Size(112, 112), Scalar(114.7748, 107.7354, 99.4750), true, true); + Mat blob1 = blobFromImages(images_1, 1.0, Size(112, 112), Scalar(114.7748, 107.7354, 99.4750), true, true); + + Net permute; + LayerParams lp; + int order[] = {1, 0, 2, 3}; + lp.set("order", DictValue::arrayInt(&order[0], 4)); + permute.addLayerToPrev("perm", "Permute", lp); + + permute.setInput(blob0); + Mat input0 = permute.forward().clone(); + + permute.setInput(blob1); + Mat input1 = permute.forward().clone(); + + int dims[] = {1, 3, 16, 112, 112}; + input0 = input0.reshape(0, 5, &dims[0]); + input1 = input1.reshape(0, 5, &dims[0]); + + Net net = readNetFromONNX(onnxmodel); + ASSERT_FALSE(net.empty()); + net.setPreferableBackend(backend); + net.setPreferableTarget(target); + + // output range [-5, 11] + float l1 = 0.0013; + float lInf = 0.009; + + checkBackend(&input0, &ref0); + net.setInput(input0); + Mat out = net.forward().clone(); + normAssert(ref0, out, "", l1, lInf); + + checkBackend(&input1, &ref1); + net.setInput(input1); + out = net.forward().clone(); + normAssert(ref1, out, "", l1, lInf); + + expectNoFallbacksFromIE(net); +} + INSTANTIATE_TEST_CASE_P(/**/, Test_ONNX_nets, dnnBackendsAndTargets()); }} // namespace diff --git a/samples/data/dnn/action_recongnition_kinetics.txt b/samples/data/dnn/action_recongnition_kinetics.txt new file mode 100644 index 0000000..cdaafcb --- /dev/null +++ b/samples/data/dnn/action_recongnition_kinetics.txt @@ -0,0 +1,400 @@ +abseiling +air drumming +answering questions +applauding +applying cream +archery +arm wrestling +arranging flowers +assembling computer +auctioning +baby waking up +baking cookies +balloon blowing +bandaging +barbequing +bartending +beatboxing +bee keeping +belly dancing +bench pressing +bending back +bending metal +biking through snow +blasting sand +blowing glass +blowing leaves +blowing nose +blowing out candles +bobsledding +bookbinding +bouncing on trampoline +bowling +braiding hair +breading or breadcrumbing +breakdancing +brush painting +brushing hair +brushing teeth +building cabinet +building shed +bungee jumping +busking +canoeing or kayaking +capoeira +carrying baby +cartwheeling +carving pumpkin +catching fish +catching or throwing baseball +catching or throwing frisbee +catching or throwing softball +celebrating +changing oil +changing wheel +checking tires +cheerleading +chopping wood +clapping +clay pottery making +clean and jerk +cleaning floor +cleaning gutters +cleaning pool +cleaning shoes +cleaning toilet +cleaning windows +climbing a rope +climbing ladder +climbing tree +contact juggling +cooking chicken +cooking egg +cooking on campfire +cooking sausages +counting money +country line dancing +cracking neck +crawling baby +crossing river +crying +curling hair +cutting nails +cutting pineapple +cutting watermelon +dancing ballet +dancing charleston +dancing gangnam style +dancing macarena +deadlifting +decorating the christmas tree +digging +dining +disc golfing +diving cliff +dodgeball +doing aerobics +doing laundry +doing nails +drawing +dribbling basketball +drinking +drinking beer +drinking shots +driving car +driving tractor +drop kicking +drumming fingers +dunking basketball +dying hair +eating burger +eating cake +eating carrots +eating chips +eating doughnuts +eating hotdog +eating ice cream +eating spaghetti +eating watermelon +egg hunting +exercising arm +exercising with an exercise ball +extinguishing fire +faceplanting +feeding birds +feeding fish +feeding goats +filling eyebrows +finger snapping +fixing hair +flipping pancake +flying kite +folding clothes +folding napkins +folding paper +front raises +frying vegetables +garbage collecting +gargling +getting a haircut +getting a tattoo +giving or receiving award +golf chipping +golf driving +golf putting +grinding meat +grooming dog +grooming horse +gymnastics tumbling +hammer throw +headbanging +headbutting +high jump +high kick +hitting baseball +hockey stop +holding snake +hopscotch +hoverboarding +hugging +hula hooping +hurdling +hurling (sport) +ice climbing +ice fishing +ice skating +ironing +javelin throw +jetskiing +jogging +juggling balls +juggling fire +juggling soccer ball +jumping into pool +jumpstyle dancing +kicking field goal +kicking soccer ball +kissing +kitesurfing +knitting +krumping +laughing +laying bricks +long jump +lunge +making a cake +making a sandwich +making bed +making jewelry +making pizza +making snowman +making sushi +making tea +marching +massaging back +massaging feet +massaging legs +massaging person's head +milking cow +mopping floor +motorcycling +moving furniture +mowing lawn +news anchoring +opening bottle +opening present +paragliding +parasailing +parkour +passing American football (in game) +passing American football (not in game) +peeling apples +peeling potatoes +petting animal (not cat) +petting cat +picking fruit +planting trees +plastering +playing accordion +playing badminton +playing bagpipes +playing basketball +playing bass guitar +playing cards +playing cello +playing chess +playing clarinet +playing controller +playing cricket +playing cymbals +playing didgeridoo +playing drums +playing flute +playing guitar +playing harmonica +playing harp +playing ice hockey +playing keyboard +playing kickball +playing monopoly +playing organ +playing paintball +playing piano +playing poker +playing recorder +playing saxophone +playing squash or racquetball +playing tennis +playing trombone +playing trumpet +playing ukulele +playing violin +playing volleyball +playing xylophone +pole vault +presenting weather forecast +pull ups +pumping fist +pumping gas +punching bag +punching person (boxing) +push up +pushing car +pushing cart +pushing wheelchair +reading book +reading newspaper +recording music +riding a bike +riding camel +riding elephant +riding mechanical bull +riding mountain bike +riding mule +riding or walking with horse +riding scooter +riding unicycle +ripping paper +robot dancing +rock climbing +rock scissors paper +roller skating +running on treadmill +sailing +salsa dancing +sanding floor +scrambling eggs +scuba diving +setting table +shaking hands +shaking head +sharpening knives +sharpening pencil +shaving head +shaving legs +shearing sheep +shining shoes +shooting basketball +shooting goal (soccer) +shot put +shoveling snow +shredding paper +shuffling cards +side kick +sign language interpreting +singing +situp +skateboarding +ski jumping +skiing (not slalom or crosscountry) +skiing crosscountry +skiing slalom +skipping rope +skydiving +slacklining +slapping +sled dog racing +smoking +smoking hookah +snatch weight lifting +sneezing +sniffing +snorkeling +snowboarding +snowkiting +snowmobiling +somersaulting +spinning poi +spray painting +spraying +springboard diving +squat +sticking tongue out +stomping grapes +stretching arm +stretching leg +strumming guitar +surfing crowd +surfing water +sweeping floor +swimming backstroke +swimming breast stroke +swimming butterfly stroke +swing dancing +swinging legs +swinging on something +sword fighting +tai chi +taking a shower +tango dancing +tap dancing +tapping guitar +tapping pen +tasting beer +tasting food +testifying +texting +throwing axe +throwing ball +throwing discus +tickling +tobogganing +tossing coin +tossing salad +training dog +trapezing +trimming or shaving beard +trimming trees +triple jump +tying bow tie +tying knot (not on a tie) +tying tie +unboxing +unloading truck +using computer +using remote controller (not gaming) +using segway +vault +waiting in line +walking the dog +washing dishes +washing feet +washing hair +washing hands +water skiing +water sliding +watering plants +waxing back +waxing chest +waxing eyebrows +waxing legs +weaving basket +welding +whistling +windsurfing +wrapping present +wrestling +writing +yawning +yoga +zumba diff --git a/samples/dnn/action_recognition.py b/samples/dnn/action_recognition.py new file mode 100644 index 0000000..f2f3730 --- /dev/null +++ b/samples/dnn/action_recognition.py @@ -0,0 +1,82 @@ +import os +import numpy as np +import cv2 as cv +import argparse +from common import findFile + +parser = argparse.ArgumentParser(description='Use this script to run action recognition using 3D ResNet34', + formatter_class=argparse.ArgumentDefaultsHelpFormatter) +parser.add_argument('--input', '-i', help='Path to input video file. Skip this argument to capture frames from a camera.') +parser.add_argument('--model', required=True, help='Path to model.') +parser.add_argument('--classes', default=findFile('action_recongnition_kinetics.txt'), help='Path to classes list.') + +# To get net download original repository https://github.com/kenshohara/video-classification-3d-cnn-pytorch +# For correct ONNX export modify file: video-classification-3d-cnn-pytorch/models/resnet.py +# change +# - def downsample_basic_block(x, planes, stride): +# - out = F.avg_pool3d(x, kernel_size=1, stride=stride) +# - zero_pads = torch.Tensor(out.size(0), planes - out.size(1), +# - out.size(2), out.size(3), +# - out.size(4)).zero_() +# - if isinstance(out.data, torch.cuda.FloatTensor): +# - zero_pads = zero_pads.cuda() +# - +# - out = Variable(torch.cat([out.data, zero_pads], dim=1)) +# - return out + +# To +# + def downsample_basic_block(x, planes, stride): +# + out = F.avg_pool3d(x, kernel_size=1, stride=stride) +# + out = F.pad(out, (0, 0, 0, 0, 0, 0, 0, int(planes - out.size(1)), 0, 0), "constant", 0) +# + return out + +# To ONNX export use torch.onnx.export(model, inputs, model_name) + +def get_class_names(path): + class_names = [] + with open(path) as f: + for row in f: + class_names.append(row[:-1]) + return class_names + +def classify_video(video_path, net_path): + SAMPLE_DURATION = 16 + SAMPLE_SIZE = 112 + mean = (114.7748, 107.7354, 99.4750) + class_names = get_class_names(args.classes) + + net = cv.dnn.readNet(net_path) + net.setPreferableBackend(cv.dnn.DNN_BACKEND_INFERENCE_ENGINE) + net.setPreferableTarget(cv.dnn.DNN_TARGET_CPU) + + winName = 'Deep learning image classification in OpenCV' + cv.namedWindow(winName, cv.WINDOW_AUTOSIZE) + cap = cv.VideoCapture(video_path) + while cv.waitKey(1) < 0: + frames = [] + for _ in range(SAMPLE_DURATION): + hasFrame, frame = cap.read() + if not hasFrame: + exit(0) + frames.append(frame) + + inputs = cv.dnn.blobFromImages(frames, 1, (SAMPLE_SIZE, SAMPLE_SIZE), mean, True, crop=True) + inputs = np.transpose(inputs, (1, 0, 2, 3)) + inputs = np.expand_dims(inputs, axis=0) + net.setInput(inputs) + outputs = net.forward() + class_pred = np.argmax(outputs) + label = class_names[class_pred] + + for frame in frames: + labelSize, baseLine = cv.getTextSize(label, cv.FONT_HERSHEY_SIMPLEX, 0.5, 1) + cv.rectangle(frame, (0, 10 - labelSize[1]), + (labelSize[0], 10 + baseLine), (255, 255, 255), cv.FILLED) + cv.putText(frame, label, (0, 10), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0)) + cv.imshow(winName, frame) + if cv.waitKey(1) & 0xFF == ord('q'): + break + +if __name__ == "__main__": + args, _ = parser.parse_known_args() + classify_video(args.input if args.input else 0, args.model) -- 2.7.4