Merge remote-tracking branch 'origin/master' into tizen
[platform/core/csapi/tizenfx.git] / src / Tizen.AIAvatar / src / Lipsync / VowelConverter / VowelConverter.cs
1 /*
2  * Copyright(c) 2024 Samsung Electronics Co., Ltd.
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  *
16  */
17
18 using System.Collections.Generic;
19
20 using static Tizen.AIAvatar.AIAvatar;
21
22 namespace Tizen.AIAvatar
23 {
24     internal class VowelConverter
25     {
26         private TFVowel6 vowelClassfierModel = null;
27         private VowelClassifier vowelClassifier = null;
28         private Dictionary<int, VowelClassifier> vowelClassifiers;
29         private AnimationConverter animationConverter = new AnimationConverter();
30
31         internal VowelConverter()
32         {
33             vowelClassifiers = new Dictionary<int, VowelClassifier>();
34             vowelClassifier = GetVowelClassifier(CurrentAudioOptions.SampleRate);
35             vowelClassfierModel = new TFVowel6(new int[4] { 12, 1, 1, 1 }, new int[4] { 7, 1, 1, 1 });
36             animationConverter.InitializeVisemeInfo(VisemeInfo);
37         }
38
39         internal AnimationKeyFrame CreateKeyFrames(string[] vowels, int sampleRate, bool isMic = false)
40         {
41             vowelClassifier = GetVowelClassifier(sampleRate);
42
43             if (isMic)
44             {
45                 return animationConverter.ConvertVowelsToAnimationMic(vowels, vowelClassifier.GetStepTime());
46             }
47             else
48             {
49                 return animationConverter.ConvertVowelsToAnimation(vowels, vowelClassifier.GetStepTime());
50             }
51         }
52
53         internal AnimationKeyFrame CreateKeyFrames(byte[] audioData, int sampleRate, bool isMic = false)
54         {
55             vowelClassifier = GetVowelClassifier(sampleRate);
56
57             if (vowelClassifier == null)
58             {
59                 Log.Error(LogTag, "Failed to play Buffer");
60                 return null;
61             }
62             var vowels = PredictVowels(audioData);
63             Log.Info(LogTag, $"vowelRecognition: {string.Join(", ", vowels)}");
64
65             if (isMic) return animationConverter.ConvertVowelsToAnimationMic(vowels, vowelClassifier.GetStepTime());
66             else return animationConverter.ConvertVowelsToAnimation(vowels, vowelClassifier.GetStepTime());
67         }
68
69         internal string[] PredictVowels(byte[] audioData)
70         {
71             var vowels = vowelClassifier.Inference(audioData, vowelClassfierModel);
72             return vowels;
73         }
74
75         internal VowelClassifier GetVowelClassifier(int sampleRate)
76         {
77             if (vowelClassifiers.ContainsKey(sampleRate))
78             {
79                 return vowelClassifiers[sampleRate];
80             }
81             else
82             {
83                 vowelClassifiers[sampleRate] = new VowelClassifier(sampleRate);
84                 return vowelClassifiers[sampleRate];
85             }
86         }
87     }
88 }