Files
wwdpublic/Content.Server/_White/TTS/TTSManagerExtension.cs
Spatison 3b0dadc0d4 [Fix] TTS (#139)
* Revert "[Fix] TTS (#137)"

This reverts commit c5bd6b70a2.

* Revert "[Fix] Исправление ТТСа (#136)"

This reverts commit 3759acb84e.

* Revert "[Port] TTS (#121)"

This reverts commit 0db8f3aaa4.

* new TTS

* new TTS

* new TTS

* new TTS

* fix
2024-12-06 08:49:32 +02:00

254 lines
9.1 KiB
C#

using System.Linq;
using System.Net.Http;
using System.Net.Http.Json;
using System.Text;
using System.Text.Json.Serialization;
using System.Threading;
using System.Threading.Tasks;
using Content.Shared._White;
using Prometheus;
using Robust.Shared.Configuration;
namespace Content.Server._White.TTS;
// ReSharper disable once InconsistentNaming
public static class TTSManagerExtension
{
private static readonly Histogram AnnounceRequestTimings = Metrics.CreateHistogram(
"tts_announce_req_timings",
"Timings announce of TTS API requests",
new HistogramConfiguration()
{
LabelNames = new[] {"type"},
Buckets = Histogram.ExponentialBuckets(.1, 1.5, 10),
});
private static readonly Counter AnnounceWantedCount = Metrics.CreateCounter(
"tts_announce_wanted_count",
"Amount announce of wanted TTS audio.");
private static readonly Counter AnnounceReusedCount = Metrics.CreateCounter(
"tts_announce_reused_count",
"Amount announce of reused TTS audio from cache.");
private static readonly Histogram RadioRequestTimings = Metrics.CreateHistogram(
"tts_radio_req_timings",
"Timings radio of TTS API requests",
new HistogramConfiguration()
{
LabelNames = new[] {"type"},
Buckets = Histogram.ExponentialBuckets(.1, 1.5, 10),
});
private static readonly Counter RadioWantedCount = Metrics.CreateCounter(
"tts_radio_wanted_count",
"Amount radio of wanted TTS audio.");
private static readonly Counter RadioReusedCount = Metrics.CreateCounter(
"tts_radio_reused_count",
"Amount radio of reused TTS audio from cache.");
private static readonly HttpClient _httpClient = new();
public static async Task<byte[]> RadioConvertTextToSpeech(this TTSManager _cfTtsManager, string speaker, string text)
{
// ReSharper disable once InconsistentNaming
var _sawmill = Logger.GetSawmill("tts");
// ReSharper disable once InconsistentNaming
var _cfg = IoCManager.Resolve<IConfigurationManager>();
var url = _cfg.GetCVar(WhiteCVars.TTSApiUrl);
if (string.IsNullOrWhiteSpace(url))
{
throw new Exception("TTS Api url not specified");
}
var token = _cfg.GetCVar(WhiteCVars.TTSApiToken);
if (string.IsNullOrWhiteSpace(token))
{
throw new Exception("TTS Api token not specified");
}
RadioWantedCount.Inc();
var cacheKey = GenerateCacheKey(speaker, text, "echo");
if (_cfTtsManager._cache.TryGetValue(cacheKey, out var data))
{
RadioReusedCount.Inc();
_sawmill.Debug($"Use cached radio sound for '{text}' speech by '{speaker}' speaker");
return data;
}
var body = new GenerateVoiceRequest
{
ApiToken = token,
Text = text,
Speaker = speaker,
Effect = "Radio"
};
var reqTime = DateTime.UtcNow;
try
{
var cts = new CancellationTokenSource(TimeSpan.FromSeconds(4));
var response = await _httpClient.PostAsJsonAsync(url, body, cts.Token);
if (!response.IsSuccessStatusCode)
{
throw new Exception($"TTS request returned bad status code: {response.StatusCode}");
}
var json = await response.Content.ReadFromJsonAsync<GenerateVoiceResponse>();
var soundData = Convert.FromBase64String(json.Results.First().Audio);
_cfTtsManager._cache.Add(cacheKey, soundData);
_cfTtsManager._cacheKeysSeq.Add(cacheKey);
_sawmill.Debug($"Generated new radio sound for '{text}' speech by '{speaker}' speaker ({soundData.Length} bytes)");
RadioRequestTimings.WithLabels("Success").Observe((DateTime.UtcNow - reqTime).TotalSeconds);
return soundData;
}
catch (TaskCanceledException)
{
RadioRequestTimings.WithLabels("Timeout").Observe((DateTime.UtcNow - reqTime).TotalSeconds);
_sawmill.Error($"Timeout of request generation new radio sound for '{text}' speech by '{speaker}' speaker");
throw new Exception("TTS request timeout");
}
catch (Exception e)
{
RadioRequestTimings.WithLabels("Error").Observe((DateTime.UtcNow - reqTime).TotalSeconds);
_sawmill.Error($"Failed of request generation new radio sound for '{text}' speech by '{speaker}' speaker\n{e}");
throw new Exception("TTS request failed");
}
}
public static async Task<byte[]> AnnounceConvertTextToSpeech(this TTSManager _cfTtsManager, string speaker, string text)
{
// ReSharper disable once InconsistentNaming
var _sawmill = Logger.GetSawmill("tts");
// ReSharper disable once InconsistentNaming
var _cfg = IoCManager.Resolve<IConfigurationManager>();
var url = _cfg.GetCVar(WhiteCVars.TTSApiUrl);
if (string.IsNullOrWhiteSpace(url))
{
throw new Exception("TTS Api url not specified");
}
var token = _cfg.GetCVar(WhiteCVars.TTSApiToken);
if (string.IsNullOrWhiteSpace(token))
{
throw new Exception("TTS Api token not specified");
}
AnnounceWantedCount.Inc();
var cacheKey = GenerateCacheKey(speaker, text, "echo");
if (_cfTtsManager._cache.TryGetValue(cacheKey, out var data))
{
AnnounceReusedCount.Inc();
_sawmill.Debug($"Use cached announce sound for '{text}' speech by '{speaker}' speaker");
return data;
}
var body = new GenerateVoiceRequest
{
ApiToken = token,
Text = text,
Speaker = speaker,
Effect = "Echo"
};
var reqTime = DateTime.UtcNow;
try
{
var cts = new CancellationTokenSource(TimeSpan.FromSeconds(15));
var response = await _httpClient.PostAsJsonAsync(url, body, cts.Token);
if (!response.IsSuccessStatusCode)
{
throw new Exception($"TTS request returned bad status code: {response.StatusCode}");
}
var json = await response.Content.ReadFromJsonAsync<GenerateVoiceResponse>();
var soundData = Convert.FromBase64String(json.Results.First().Audio);
_cfTtsManager._cache.Add(cacheKey, soundData);
_cfTtsManager._cacheKeysSeq.Add(cacheKey);
_sawmill.Debug($"Generated new announce sound for '{text}' speech by '{speaker}' speaker ({soundData.Length} bytes)");
AnnounceRequestTimings.WithLabels("Success").Observe((DateTime.UtcNow - reqTime).TotalSeconds);
return soundData;
}
catch (TaskCanceledException)
{
AnnounceRequestTimings.WithLabels("Timeout").Observe((DateTime.UtcNow - reqTime).TotalSeconds);
_sawmill.Error($"Timeout of request generation new announce sound for '{text}' speech by '{speaker}' speaker");
throw new Exception("TTS request timeout");
}
catch (Exception e)
{
AnnounceRequestTimings.WithLabels("Error").Observe((DateTime.UtcNow - reqTime).TotalSeconds);
_sawmill.Error($"Failed of request generation new announce sound for '{text}' speech by '{speaker}' speaker\n{e}");
throw new Exception("TTS request failed", e);
}
}
private static string GenerateCacheKey(string speaker, string text, string effect = "")
{
var key = $"{speaker}/{text}/{effect}";
byte[] keyData = Encoding.UTF8.GetBytes(key);
var bytes = System.Security.Cryptography.SHA1.HashData(keyData);
return Convert.ToHexString(bytes);
}
private struct GenerateVoiceRequest
{
public GenerateVoiceRequest()
{
}
[JsonPropertyName("api_token")]
public string ApiToken { get; set; } = "";
[JsonPropertyName("text")]
public string Text { get; set; } = "";
[JsonPropertyName("speaker")]
public string Speaker { get; set; } = "";
[JsonPropertyName("ssml")]
public bool SSML { get; private set; } = true;
[JsonPropertyName("word_ts")]
public bool WordTS { get; private set; } = false;
[JsonPropertyName("put_accent")]
public bool PutAccent { get; private set; } = true;
[JsonPropertyName("put_yo")]
public bool PutYo { get; private set; } = false;
[JsonPropertyName("sample_rate")]
public int SampleRate { get; private set; } = 24000;
[JsonPropertyName("format")]
public string Format { get; private set; } = "ogg";
[JsonPropertyName("effect")]
public string Effect { get; set; } = "none";
}
private struct GenerateVoiceResponse
{
[JsonPropertyName("results")]
public List<VoiceResult> Results { get; set; }
[JsonPropertyName("original_sha1")]
public string Hash { get; set; }
}
private struct VoiceResult
{
[JsonPropertyName("audio")]
public string Audio { get; set; }
}
}