mirror of
https://github.com/WWhiteDreamProject/wwdpublic.git
synced 2026-04-24 09:08:04 +03:00
* Revert "[Fix] TTS (#137)" This reverts commitc5bd6b70a2. * Revert "[Fix] Исправление ТТСа (#136)" This reverts commit3759acb84e. * Revert "[Port] TTS (#121)" This reverts commit0db8f3aaa4. * new TTS * new TTS * new TTS * new TTS * fix
254 lines
9.1 KiB
C#
254 lines
9.1 KiB
C#
using System.Linq;
|
|
using System.Net.Http;
|
|
using System.Net.Http.Json;
|
|
using System.Text;
|
|
using System.Text.Json.Serialization;
|
|
using System.Threading;
|
|
using System.Threading.Tasks;
|
|
using Content.Shared._White;
|
|
using Prometheus;
|
|
using Robust.Shared.Configuration;
|
|
|
|
namespace Content.Server._White.TTS;
|
|
|
|
// ReSharper disable once InconsistentNaming
|
|
public static class TTSManagerExtension
|
|
{
|
|
private static readonly Histogram AnnounceRequestTimings = Metrics.CreateHistogram(
|
|
"tts_announce_req_timings",
|
|
"Timings announce of TTS API requests",
|
|
new HistogramConfiguration()
|
|
{
|
|
LabelNames = new[] {"type"},
|
|
Buckets = Histogram.ExponentialBuckets(.1, 1.5, 10),
|
|
});
|
|
|
|
private static readonly Counter AnnounceWantedCount = Metrics.CreateCounter(
|
|
"tts_announce_wanted_count",
|
|
"Amount announce of wanted TTS audio.");
|
|
|
|
private static readonly Counter AnnounceReusedCount = Metrics.CreateCounter(
|
|
"tts_announce_reused_count",
|
|
"Amount announce of reused TTS audio from cache.");
|
|
|
|
private static readonly Histogram RadioRequestTimings = Metrics.CreateHistogram(
|
|
"tts_radio_req_timings",
|
|
"Timings radio of TTS API requests",
|
|
new HistogramConfiguration()
|
|
{
|
|
LabelNames = new[] {"type"},
|
|
Buckets = Histogram.ExponentialBuckets(.1, 1.5, 10),
|
|
});
|
|
|
|
private static readonly Counter RadioWantedCount = Metrics.CreateCounter(
|
|
"tts_radio_wanted_count",
|
|
"Amount radio of wanted TTS audio.");
|
|
|
|
private static readonly Counter RadioReusedCount = Metrics.CreateCounter(
|
|
"tts_radio_reused_count",
|
|
"Amount radio of reused TTS audio from cache.");
|
|
|
|
private static readonly HttpClient _httpClient = new();
|
|
|
|
public static async Task<byte[]> RadioConvertTextToSpeech(this TTSManager _cfTtsManager, string speaker, string text)
|
|
{
|
|
// ReSharper disable once InconsistentNaming
|
|
var _sawmill = Logger.GetSawmill("tts");
|
|
// ReSharper disable once InconsistentNaming
|
|
var _cfg = IoCManager.Resolve<IConfigurationManager>();
|
|
|
|
var url = _cfg.GetCVar(WhiteCVars.TTSApiUrl);
|
|
if (string.IsNullOrWhiteSpace(url))
|
|
{
|
|
throw new Exception("TTS Api url not specified");
|
|
}
|
|
|
|
var token = _cfg.GetCVar(WhiteCVars.TTSApiToken);
|
|
if (string.IsNullOrWhiteSpace(token))
|
|
{
|
|
throw new Exception("TTS Api token not specified");
|
|
}
|
|
|
|
RadioWantedCount.Inc();
|
|
var cacheKey = GenerateCacheKey(speaker, text, "echo");
|
|
if (_cfTtsManager._cache.TryGetValue(cacheKey, out var data))
|
|
{
|
|
RadioReusedCount.Inc();
|
|
_sawmill.Debug($"Use cached radio sound for '{text}' speech by '{speaker}' speaker");
|
|
return data;
|
|
}
|
|
|
|
var body = new GenerateVoiceRequest
|
|
{
|
|
ApiToken = token,
|
|
Text = text,
|
|
Speaker = speaker,
|
|
Effect = "Radio"
|
|
};
|
|
|
|
var reqTime = DateTime.UtcNow;
|
|
try
|
|
{
|
|
var cts = new CancellationTokenSource(TimeSpan.FromSeconds(4));
|
|
var response = await _httpClient.PostAsJsonAsync(url, body, cts.Token);
|
|
if (!response.IsSuccessStatusCode)
|
|
{
|
|
throw new Exception($"TTS request returned bad status code: {response.StatusCode}");
|
|
}
|
|
|
|
var json = await response.Content.ReadFromJsonAsync<GenerateVoiceResponse>();
|
|
var soundData = Convert.FromBase64String(json.Results.First().Audio);
|
|
|
|
_cfTtsManager._cache.Add(cacheKey, soundData);
|
|
_cfTtsManager._cacheKeysSeq.Add(cacheKey);
|
|
|
|
_sawmill.Debug($"Generated new radio sound for '{text}' speech by '{speaker}' speaker ({soundData.Length} bytes)");
|
|
RadioRequestTimings.WithLabels("Success").Observe((DateTime.UtcNow - reqTime).TotalSeconds);
|
|
|
|
return soundData;
|
|
}
|
|
catch (TaskCanceledException)
|
|
{
|
|
RadioRequestTimings.WithLabels("Timeout").Observe((DateTime.UtcNow - reqTime).TotalSeconds);
|
|
_sawmill.Error($"Timeout of request generation new radio sound for '{text}' speech by '{speaker}' speaker");
|
|
throw new Exception("TTS request timeout");
|
|
}
|
|
catch (Exception e)
|
|
{
|
|
RadioRequestTimings.WithLabels("Error").Observe((DateTime.UtcNow - reqTime).TotalSeconds);
|
|
_sawmill.Error($"Failed of request generation new radio sound for '{text}' speech by '{speaker}' speaker\n{e}");
|
|
throw new Exception("TTS request failed");
|
|
}
|
|
}
|
|
public static async Task<byte[]> AnnounceConvertTextToSpeech(this TTSManager _cfTtsManager, string speaker, string text)
|
|
{
|
|
// ReSharper disable once InconsistentNaming
|
|
var _sawmill = Logger.GetSawmill("tts");
|
|
// ReSharper disable once InconsistentNaming
|
|
var _cfg = IoCManager.Resolve<IConfigurationManager>();
|
|
|
|
var url = _cfg.GetCVar(WhiteCVars.TTSApiUrl);
|
|
if (string.IsNullOrWhiteSpace(url))
|
|
{
|
|
throw new Exception("TTS Api url not specified");
|
|
}
|
|
|
|
var token = _cfg.GetCVar(WhiteCVars.TTSApiToken);
|
|
if (string.IsNullOrWhiteSpace(token))
|
|
{
|
|
throw new Exception("TTS Api token not specified");
|
|
}
|
|
|
|
AnnounceWantedCount.Inc();
|
|
var cacheKey = GenerateCacheKey(speaker, text, "echo");
|
|
if (_cfTtsManager._cache.TryGetValue(cacheKey, out var data))
|
|
{
|
|
AnnounceReusedCount.Inc();
|
|
_sawmill.Debug($"Use cached announce sound for '{text}' speech by '{speaker}' speaker");
|
|
return data;
|
|
}
|
|
|
|
var body = new GenerateVoiceRequest
|
|
{
|
|
ApiToken = token,
|
|
Text = text,
|
|
Speaker = speaker,
|
|
Effect = "Echo"
|
|
};
|
|
|
|
var reqTime = DateTime.UtcNow;
|
|
try
|
|
{
|
|
var cts = new CancellationTokenSource(TimeSpan.FromSeconds(15));
|
|
var response = await _httpClient.PostAsJsonAsync(url, body, cts.Token);
|
|
if (!response.IsSuccessStatusCode)
|
|
{
|
|
throw new Exception($"TTS request returned bad status code: {response.StatusCode}");
|
|
}
|
|
|
|
var json = await response.Content.ReadFromJsonAsync<GenerateVoiceResponse>();
|
|
var soundData = Convert.FromBase64String(json.Results.First().Audio);
|
|
|
|
_cfTtsManager._cache.Add(cacheKey, soundData);
|
|
_cfTtsManager._cacheKeysSeq.Add(cacheKey);
|
|
|
|
_sawmill.Debug($"Generated new announce sound for '{text}' speech by '{speaker}' speaker ({soundData.Length} bytes)");
|
|
AnnounceRequestTimings.WithLabels("Success").Observe((DateTime.UtcNow - reqTime).TotalSeconds);
|
|
|
|
return soundData;
|
|
}
|
|
catch (TaskCanceledException)
|
|
{
|
|
AnnounceRequestTimings.WithLabels("Timeout").Observe((DateTime.UtcNow - reqTime).TotalSeconds);
|
|
_sawmill.Error($"Timeout of request generation new announce sound for '{text}' speech by '{speaker}' speaker");
|
|
throw new Exception("TTS request timeout");
|
|
}
|
|
catch (Exception e)
|
|
{
|
|
AnnounceRequestTimings.WithLabels("Error").Observe((DateTime.UtcNow - reqTime).TotalSeconds);
|
|
_sawmill.Error($"Failed of request generation new announce sound for '{text}' speech by '{speaker}' speaker\n{e}");
|
|
throw new Exception("TTS request failed", e);
|
|
}
|
|
}
|
|
|
|
private static string GenerateCacheKey(string speaker, string text, string effect = "")
|
|
{
|
|
var key = $"{speaker}/{text}/{effect}";
|
|
byte[] keyData = Encoding.UTF8.GetBytes(key);
|
|
var bytes = System.Security.Cryptography.SHA1.HashData(keyData);
|
|
return Convert.ToHexString(bytes);
|
|
}
|
|
|
|
private struct GenerateVoiceRequest
|
|
{
|
|
public GenerateVoiceRequest()
|
|
{
|
|
}
|
|
|
|
[JsonPropertyName("api_token")]
|
|
public string ApiToken { get; set; } = "";
|
|
|
|
[JsonPropertyName("text")]
|
|
public string Text { get; set; } = "";
|
|
|
|
[JsonPropertyName("speaker")]
|
|
public string Speaker { get; set; } = "";
|
|
|
|
[JsonPropertyName("ssml")]
|
|
public bool SSML { get; private set; } = true;
|
|
|
|
[JsonPropertyName("word_ts")]
|
|
public bool WordTS { get; private set; } = false;
|
|
|
|
[JsonPropertyName("put_accent")]
|
|
public bool PutAccent { get; private set; } = true;
|
|
|
|
[JsonPropertyName("put_yo")]
|
|
public bool PutYo { get; private set; } = false;
|
|
|
|
[JsonPropertyName("sample_rate")]
|
|
public int SampleRate { get; private set; } = 24000;
|
|
|
|
[JsonPropertyName("format")]
|
|
public string Format { get; private set; } = "ogg";
|
|
|
|
[JsonPropertyName("effect")]
|
|
public string Effect { get; set; } = "none";
|
|
}
|
|
|
|
private struct GenerateVoiceResponse
|
|
{
|
|
[JsonPropertyName("results")]
|
|
public List<VoiceResult> Results { get; set; }
|
|
|
|
[JsonPropertyName("original_sha1")]
|
|
public string Hash { get; set; }
|
|
}
|
|
|
|
private struct VoiceResult
|
|
{
|
|
[JsonPropertyName("audio")]
|
|
public string Audio { get; set; }
|
|
}
|
|
}
|