2 * Copyright(c) 2023 Samsung Electronics Co., Ltd.
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
19 using System.Collections.Generic;
21 using System.Runtime.InteropServices;
23 using static Tizen.AIAvatar.AIAvatar;
27 namespace Tizen.AIAvatar
29 internal class TTSLipSyncer
32 private Avatar currentAvatar;
34 private List<UtteranceText> textList;
35 private TtsClient ttsHandle;
36 private VoiceInfo voiceInfo;
37 private List<Byte> byteList;
39 private byte[] recordedBuffer;
40 private byte[] audioTailBuffer;
42 private int sampleRate;
43 private float desiredBufferDuration = 0.175f;
44 private float audioTailLengthFactor = 0.015f;
45 private float audioBufferMultiflier = 2f;
47 private int desiredBufferLength;
48 private int audioTailLength;
50 private bool isPrepared = false;
51 private bool isAsync = false;
53 private Action<byte[], int> bufferChangedAction;
55 private int audioLength;
56 private bool isAsyncLipStarting;
58 private AsyncLipSyncer lipSyncer;
61 internal TTSLipSyncer(Avatar avatar)
63 currentAvatar = avatar;
64 lipSyncer = (currentAvatar.AvatarAnimator.GetAnimationModule(AnimationModuleType.LipSyncer) as AsyncLipSyncer);
73 internal event EventHandler PlayReadyCallback;
75 internal TtsClient TtsHandle
77 get { return ttsHandle; }
80 internal VoiceInfo VoiceInfo
82 get { return voiceInfo; }
89 internal List<VoiceInfo> GetSupportedVoices()
91 var voiceInfoList = new List<VoiceInfo>();
93 if (ttsHandle == null)
95 Log.Error(LogTag, $"ttsHandle is null");
99 var supportedVoices = ttsHandle.GetSupportedVoices();
100 foreach (var supportedVoice in supportedVoices)
102 Log.Info(LogTag, $"{supportedVoice.Language} & {supportedVoice.VoiceType} is supported");
103 voiceInfoList.Add(new VoiceInfo() { Lang = supportedVoice.Language, Type = supportedVoice.VoiceType });
105 return voiceInfoList;
108 internal bool IsSupportedVoice(string lang)
110 if (ttsHandle == null)
112 Log.Error(LogTag, $"ttsHandle is null");
115 var supportedVoices = ttsHandle.GetSupportedVoices();
117 foreach (var supportedVoice in supportedVoices)
119 if (supportedVoice.Language.Equals(lang))
121 Log.Info(LogTag, $"{lang} is supported");
128 internal bool IsSupportedVoice(VoiceInfo voiceInfo)
130 if (ttsHandle == null)
132 Log.Error(LogTag, $"ttsHandle is null");
135 var supportedVoices = ttsHandle.GetSupportedVoices();
136 foreach (var supportedVoice in supportedVoices)
138 if (supportedVoice.Language.Equals(voiceInfo.Lang) && (supportedVoice.VoiceType == voiceInfo.Type))
140 Log.Info(LogTag, $"{voiceInfo.Lang} & {voiceInfo.Type} is supported");
148 internal void AddText(string txt, VoiceInfo voiceInfo)
150 if (voiceInfo.Lang == null || voiceInfo.Type == null)
152 Log.Error(LogTag, "VoiceInfo's value is null");
154 if (ttsHandle == null)
156 Log.Error(LogTag, $"ttsHandle is null");
159 var temp = new UtteranceText();
161 temp.UttID = ttsHandle.AddText(txt, voiceInfo.Lang, (int)voiceInfo.Type, 0);
168 Log.Error(LogTag, $"Error AddText" + e.Message);
172 internal void AddText(string txt, string lang)
174 if (ttsHandle == null)
176 Log.Error(LogTag, $"ttsHandle is null");
179 var temp = new UtteranceText();
181 temp.UttID = ttsHandle.AddText(txt, lang, (int)voiceInfo.Type, 0);
188 Log.Error(LogTag, $"Error AddText" + e.Message);
192 internal void Prepare(EventHandler playReadyCallback)
194 if (ttsHandle == null)
196 Log.Error(LogTag, $"ttsHandle is null");
199 Log.Info(LogTag, "Prepare TTS");
202 PlayReadyCallback = playReadyCallback;
206 internal bool PlayPreparedText()
208 if (byteList != null && byteList.Count != 0)
210 Log.Info(LogTag, "PlayPreparedText TTS");
211 currentAvatar?.AvatarAnimator?.PlayLipSync(byteList.ToArray(), sampleRate);
217 internal void Play(bool isPrepared = false)
219 if (ttsHandle == null)
221 Log.Error(LogTag, $"ttsHandle is null");
225 this.isPrepared = isPrepared;
230 internal void PlayAsync(EventHandler playReadyCallback)
232 if (ttsHandle == null)
234 Log.Error(LogTag, $"ttsHandle is null");
240 PlayReadyCallback = playReadyCallback;
246 if (ttsHandle == null)
248 Log.Error(LogTag, $"ttsHandle is null");
256 if (ttsHandle == null)
258 Log.Error(LogTag, $"ttsHandle is null");
262 currentAvatar?.AvatarAnimator?.StopLipSync();
265 private void InitTts()
269 ttsHandle = new TtsClient();
271 // Register Callbacks
272 ttsHandle.DefaultVoiceChanged += TtsDefaultVoiceChangedCallback;
273 ttsHandle.EngineChanged += TtsEngineChangedCallback;
274 ttsHandle.ErrorOccurred += TtsErrorOccuredCallback;
275 ttsHandle.StateChanged += TtsStateChangedCallback;
276 ttsHandle.UtteranceCompleted += TtsUtteranceCompletedCallback;
277 ttsHandle.UtteranceStarted += TtsUtteranceStartedCallback;
279 ttsHandle.SynthesizedPcm += TtsSyntheiszedPCM;
280 ttsHandle.PlayingMode = PlayingMode.ByClient;
284 voiceInfo = new VoiceInfo
286 Lang = ttsHandle.DefaultVoice.Language,
287 Type = ttsHandle.DefaultVoice.VoiceType
290 textList = new List<UtteranceText>();
291 Log.Info(LogTag, voiceInfo.Lang + ", " + voiceInfo.Type.ToString());
296 Log.Error(LogTag, "[ERROR] Fail to prepare Tts");
297 Log.Error(LogTag, e.Message);
301 internal void DeinitTts()
305 if (ttsHandle != null)
307 ttsHandle.Unprepare();
309 // Unregister Callbacks
310 ttsHandle.DefaultVoiceChanged -= TtsDefaultVoiceChangedCallback;
311 ttsHandle.EngineChanged -= TtsEngineChangedCallback;
312 ttsHandle.ErrorOccurred -= TtsErrorOccuredCallback;
313 ttsHandle.StateChanged -= TtsStateChangedCallback;
314 ttsHandle.UtteranceCompleted -= TtsUtteranceCompletedCallback;
315 ttsHandle.UtteranceStarted -= TtsUtteranceStartedCallback;
321 if (textList != null)
327 if (byteList != null)
332 currentAvatar = null;
336 Log.Error(LogTag, "[ERROR] Fail to unprepare Tts");
337 Log.Error(LogTag, e.Message);
341 private void TtsSyntheiszedPCM(object sender, SynthesizedPcmEventArgs e)
344 var dataSize = e.Data.Length;
345 var audio = new byte[dataSize];
346 sampleRate = e.SampleRate;
348 //Marshal.Copy(e.Data, audio, 0, dataSize);
349 switch (e.EventType) //START
351 case SynthesizedPcmEvent.Start://start
352 Tizen.Log.Info(LogTag, "------------------Start : " + e.UtteranceId);
353 Tizen.Log.Info(LogTag, "Output audio Size : " + dataSize);
354 Tizen.Log.Info(LogTag, "SampleRate" + e.SampleRate);
355 if (byteList == null)
357 byteList = new List<byte>();
359 if (recordedBuffer == null)
361 recordedBuffer = new byte[0];
367 recordedBuffer = Array.Empty<byte>();
369 desiredBufferLength = (int)(e.SampleRate * desiredBufferDuration * audioBufferMultiflier);
370 audioTailLength = (int)(sampleRate * audioTailLengthFactor * audioBufferMultiflier);
371 audioTailBuffer = new byte[audioTailLength];
372 PlayReadyCallback?.Invoke(null, EventArgs.Empty);
374 lipSyncer.SampleRate = sampleRate;
377 case SynthesizedPcmEvent.Continue://continue
380 recordedBuffer = recordedBuffer.Concat(e.Data).ToArray();
382 if (recordedBuffer.Length >= desiredBufferLength)
384 Tizen.Log.Error(LogTag, "Current recordbuffer length :" + recordedBuffer.Length);
385 UpdateBuffer(recordedBuffer, sampleRate);
387 Buffer.BlockCopy(recordedBuffer, recordedBuffer.Length - audioTailLength, audioTailBuffer, 0, audioTailLength);
389 recordedBuffer = Array.Empty<byte>();
390 recordedBuffer = recordedBuffer.Concat(audioTailBuffer).ToArray();
391 Array.Clear(audioTailBuffer, 0, audioTailLength);
396 byteList.AddRange(e.Data);
399 case SynthesizedPcmEvent.Finish://finish
400 Tizen.Log.Info(LogTag, "------------------Finish : " + e.UtteranceId);
405 //Play voice immediately
406 //PlayPreparedText();
410 //Notify finished state
411 Log.Info(LogTag, "Notify finished state");
412 PlayReadyCallback?.Invoke(null, EventArgs.Empty);
417 lipSyncer.SetFinishAsyncLip(true);
420 case SynthesizedPcmEvent.Fail: //fail
426 private void TtsUtteranceStartedCallback(object sender, UtteranceEventArgs e)
428 Log.Debug(LogTag, "Utterance start now (" + e.UtteranceId + ")");
431 private void TtsUtteranceCompletedCallback(object sender, UtteranceEventArgs e)
433 Log.Debug(LogTag, "Utterance complete (" + e.UtteranceId + ")");
435 foreach (UtteranceText item in textList)
437 if (item.UttID == e.UtteranceId)
439 textList.Remove(item);
440 Log.Debug(LogTag, "TextList Count (" + textList.Count.ToString() + ")");
446 private void TtsStateChangedCallback(object sender, StateChangedEventArgs e)
448 Log.Debug(LogTag, "Current state is changed from (" + e.Previous + ") to (" + e.Current + ")");
451 private void TtsErrorOccuredCallback(object sender, ErrorOccurredEventArgs e)
453 Log.Error(LogTag, "Error is occured (" + e.ErrorMessage + ")");
456 private void TtsEngineChangedCallback(object sender, EngineChangedEventArgs e)
458 Log.Debug(LogTag, "Prefered engine is changed (" + e.EngineId + ") (" + e.VoiceType.Language + ")");
461 private void TtsDefaultVoiceChangedCallback(object sender, DefaultVoiceChangedEventArgs e)
463 Log.Debug(LogTag, "Default voice is changed from (" + e.Previous + ") to (" + e.Current + ")");
466 internal void InitAsyncBuffer()
468 if (!lipSyncer.IsAsyncInit)
470 audioLength = (int)(sampleRate * 0.16f * 2f);
472 lipSyncer.InitAsyncLipsync();
473 lipSyncer.IsAsyncInit = true;
475 lipSyncer.SetFinishAsyncLip(false);
476 isAsyncLipStarting = false;
480 internal void UpdateBuffer(byte[] recordBuffer, int sampleRate)
482 if (lipSyncer != null)
484 Log.Error(LogTag, "OnTTSBufferChanged");
485 lipSyncer.EnqueueAnimation(recordBuffer, sampleRate, audioLength);
486 if (!isAsyncLipStarting)
488 lipSyncer.StartAsyncLipPlayTimer();
489 isAsyncLipStarting = true;
494 Log.Error(LogTag, "avatarLipSyncer is null");