From 6e447710280d5d04a446a9be7209ba35f4fc7517 Mon Sep 17 00:00:00 2001 From: 康鲁杰 <60095866+KangLujie@users.noreply.github.com> Date: 星期一, 09 六月 2025 16:08:42 +0800 Subject: [PATCH] 声音模型替换 --- yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/service/voices/VoicesServiceUtil.java | 87 ++++++++++++++++++--- yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/controller/admin/coursescenevoices/vo/AppCourseSceneVoicesMegerReqVO.java | 3 yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/dal/dataobject/voices/AuditionVO.java | 1 yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/service/coursemedia/CourseMediaServiceUtil.java | 1 yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/service/voices/VoicesServiceImpl.java | 142 +++++++++++++++++++++++++++++----- 5 files changed, 195 insertions(+), 39 deletions(-) diff --git a/yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/controller/admin/coursescenevoices/vo/AppCourseSceneVoicesMegerReqVO.java b/yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/controller/admin/coursescenevoices/vo/AppCourseSceneVoicesMegerReqVO.java index 6bc6dae..1bb345c 100644 --- a/yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/controller/admin/coursescenevoices/vo/AppCourseSceneVoicesMegerReqVO.java +++ b/yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/controller/admin/coursescenevoices/vo/AppCourseSceneVoicesMegerReqVO.java @@ -36,4 +36,5 @@ @Schema(description = "鐘舵��", requiredMode = Schema.RequiredMode.REQUIRED, example = "2") private Integer status; -} \ No newline at end of file + private String language; +} diff --git a/yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/dal/dataobject/voices/AuditionVO.java b/yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/dal/dataobject/voices/AuditionVO.java index 1a03ea0..ab27f9a 100644 --- a/yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/dal/dataobject/voices/AuditionVO.java +++ b/yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/dal/dataobject/voices/AuditionVO.java @@ -15,4 +15,5 @@ //澹伴煶妯″瀷ID private String voiceId; + private String language; } diff --git a/yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/service/coursemedia/CourseMediaServiceUtil.java b/yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/service/coursemedia/CourseMediaServiceUtil.java index cdeb917..c2b1e80 100644 --- a/yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/service/coursemedia/CourseMediaServiceUtil.java +++ b/yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/service/coursemedia/CourseMediaServiceUtil.java @@ -119,6 +119,7 @@ for (AppCourseScenesMegerReqVO scene : scenes) { //TODO 鍏堝垽鏂槸鍚︽湁澶囨敞鍐呭 auditionVO.setText(scene.getBackground().getPptRemark()); + auditionVO.setLanguage(scene.getVoice().getLanguage().toLowerCase()); if (scene.getVoice().getVoiceId() == null){ auditionVO.setHumanId(String.valueOf(digitalHumansDO.getId())); }else{ diff --git a/yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/service/voices/VoicesServiceImpl.java b/yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/service/voices/VoicesServiceImpl.java index 4f677b7..80ccd3d 100644 --- a/yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/service/voices/VoicesServiceImpl.java +++ b/yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/service/voices/VoicesServiceImpl.java @@ -1,5 +1,6 @@ package cn.iocoder.yudao.module.digitalcourse.service.voices; +import cn.hutool.core.io.FileUtil; import cn.hutool.core.lang.UUID; import cn.hutool.core.util.StrUtil; import cn.hutool.http.HttpRequest; @@ -16,11 +17,17 @@ import cn.iocoder.yudao.module.digitalcourse.dal.mysql.voices.VoicesMapper; import cn.iocoder.yudao.module.infra.api.config.ConfigApi; import cn.iocoder.yudao.module.infra.api.file.FileApi; +import com.alibaba.fastjson.JSONObject; +import com.alibaba.fastjson.JSON; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import jakarta.annotation.Resource; import org.springframework.stereotype.Service; import org.springframework.validation.annotation.Validated; + +import java.util.HashMap; +import java.util.Map; +import java.util.Set; import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception; import static cn.iocoder.yudao.module.digitalcourse.enums.ErrorCodeConstants.VOICES_NOT_EXISTS; @@ -135,47 +142,136 @@ private DigitalHumansMapper digitalHumansMapper; private static final String EASEGEN_URL = "easegen.url"; private static final String HEYGEM_CORE_URL = "heygem.core.url"; + private static final String HEYGEM_VOICE_DATA = "heygem.voice.data"; + + public static final Set<String> SUPPORTED_LANGUAGES = Set.of( + "en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", + "nl", "cs", "ar", "zh-cn", "hu", "ko", "ja", "hi" + ); + // 涓嫳鏂囦笓鐢ㄦā鍨嬫敮鎸佺殑璇█ + private static final Set<String> CN_EN_LANGUAGES = Set.of("zh-cn", "en"); + @Override public String audition(AuditionVO auditionVO) { + String language = auditionVO.getLanguage().toLowerCase(); + // 鍒ゆ柇鏄惁鏄敮鎸佺殑璇█ + if (!SUPPORTED_LANGUAGES.contains(language)) { + throw new IllegalArgumentException("涓嶆敮鎸佺殑璇█绫诲瀷: " + language); + } + + // 鏋勫缓鍙傛暟 InvokeVO invokeVO = new InvokeVO(); invokeVO.setSpeaker(InvokeVO.generateUUID()); invokeVO.setText(auditionVO.getText()); + if (auditionVO.getVoiceId() == null) { DigitalHumansDO digitalHumansDO = digitalHumansMapper.selectById(auditionVO.getHumanId()); invokeVO.setReferenceText(digitalHumansDO.getReferenceAudioText()); invokeVO.setReferenceAudio(digitalHumansDO.getAsrFormatAudioUrl()); - }else if (auditionVO.getHumanId() == null){ + } else if (auditionVO.getHumanId() == null) { VoicesDO voicesDO = voicesMapper.selectById(auditionVO.getVoiceId()); invokeVO.setReferenceText(voicesDO.getReferenceAudioText()); invokeVO.setReferenceAudio(voicesDO.getAsrFormatAudioUrl()); } - ObjectMapper objectMapper = new ObjectMapper(); - String jsonString = null; - try { - jsonString = objectMapper.writeValueAsString(invokeVO); - } catch (JsonProcessingException e) { - throw new RuntimeException(e); - } - String configValueByKey = configApi.getConfigValueByKey(HEYGEM_CORE_URL); - String url = configValueByKey + "/v1/invoke"; - HttpResponse execute = HttpRequest.post(url) - .body(jsonString) - .execute(); - - if (execute.getStatus() != 200) { - return null; - } String fileName = UUID.randomUUID().toString() + ".wav"; + byte[] content; - // 鑾峰彇闊抽鏂囦欢鐨勪簩杩涘埗鏁版嵁 - byte[] content = execute.bodyBytes(); + try { + if (CN_EN_LANGUAGES.contains(language)) { + // 浣跨敤涓嫳鏂囨ā鍨� + String jsonString = new ObjectMapper().writeValueAsString(invokeVO); + String coreUrl = configApi.getConfigValueByKey(HEYGEM_CORE_URL) + "/v1/invoke"; + HttpResponse response = HttpRequest.post(coreUrl) + .body(jsonString) + .execute(); - // 浣跨敤 `createFile` 鏂规硶瀛樺偍鏂囦欢锛屽苟鑾峰彇 URL - String fileUrl = fileApi.createFile(fileName, null, content); + if (response.getStatus() != 200) { + return null; + } + content = response.bodyBytes(); - return fileUrl; // 杩斿洖瀛樺偍鐨勬枃浠� URL - // 杩斿洖闊抽鏂囦欢璺緞 + } else { + // 浣跨敤鍏朵粬璇█妯″瀷锛屽 http://127.0.0.1:5002/synthesize + String referenceAudio = invokeVO.getReferenceAudio(); + String resultName = ""; + if (referenceAudio != null) { + if (referenceAudio.startsWith("/code/sessions/") || referenceAudio.startsWith("/code/data/")) { + System.out.println("璺緞灞炰簬 /code/sessions/ 鎴� /code/data/"); + + // 鍙彇绗竴涓矾寰勶紙浠||鍒嗗壊锛� + String firstPath = referenceAudio.split("\\|\\|\\|")[0]; + + // 鍙栨渶鍚庝竴绾ф枃浠跺悕 + String fileName1 = firstPath.substring(firstPath.lastIndexOf('/') + 1); + + String coreName; + if (referenceAudio.startsWith("/code/sessions/")) { + // sessions璺緞鍙兘鏈� _partN锛屽幓闄� _partN 鍙婂悗闈㈤儴鍒� + int partIndex = fileName1.indexOf("_part"); + if (partIndex != -1) { + coreName = fileName1.substring(0, partIndex); + } else { + // 娌℃湁_part锛屽幓鎺夋墿灞曞悕 + int dotIndex = fileName1.lastIndexOf('.'); + coreName = (dotIndex != -1) ? fileName1.substring(0, dotIndex) : fileName1; + } + } else { + // data璺緞鐩存帴鍙栧畬鏁存枃浠跺悕锛堝嵆鏍煎紡鍚�+鍚庣紑锛� + coreName = fileName1.substring(0, fileName1.lastIndexOf('.')); + } + + // 鑾峰彇鍚庣紑 + int dotIndex = fileName1.lastIndexOf('.'); + String suffix = (dotIndex != -1) ? fileName1.substring(dotIndex) : ""; + + // 鏈�缁堢粨鏋� + resultName = coreName + suffix; + + System.out.println("鎻愬彇鐨勬牸寮忓悕锛�" + resultName); + } else { + // 鍏朵粬璺緞 + System.out.println("鏈煡璺緞绫诲瀷"); + throw new IllegalArgumentException("澹伴煶妯″瀷寮傚父锛岃鑱旂郴绠$悊鍛�"); + } + } + //resultName + String resultVoiceUrl = configApi.getConfigValueByKey(HEYGEM_VOICE_DATA)+"/origin_audio/" + resultName; + Map<String, Object> params = new HashMap<>(); + params.put("text", auditionVO.getText()); + params.put("speaker_wav", resultVoiceUrl); + params.put("language", language); + + HttpResponse response = HttpRequest.post("http://127.0.0.1:5002/synthesize") + .contentType("application/json") + .body(new ObjectMapper().writeValueAsString(params)) + .execute(); + + if (response.getStatus() != 200) { + return null; + } + String body = response.body(); + JSONObject json = JSON.parseObject(body); + + Integer code = json.getInteger("code"); + String message = json.getString("message"); + + if (code == null || code != 200) { + throw new RuntimeException("璇煶鍚堟垚澶辫触锛�" + message); + } + + JSONObject outputPath = json.getJSONObject("output_path"); + String diskPath = outputPath.getString("disk_path"); + String url = outputPath.getString("url"); + + // 浣跨敤 diskPath 鍜� url + content = FileUtil.readBytes(diskPath); + } + // 淇濆瓨闊抽鏂囦欢骞惰繑鍥炲湴鍧� + return fileApi.createFile(fileName, null, content); + } catch (Exception e) { + throw new RuntimeException("璇煶鍚堟垚澶辫触", e); + } } diff --git a/yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/service/voices/VoicesServiceUtil.java b/yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/service/voices/VoicesServiceUtil.java index 0ababbb..f9211a8 100644 --- a/yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/service/voices/VoicesServiceUtil.java +++ b/yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/service/voices/VoicesServiceUtil.java @@ -26,10 +26,7 @@ import java.nio.file.Paths; import java.nio.file.StandardCopyOption; import java.text.SimpleDateFormat; -import java.util.Calendar; -import java.util.Date; -import java.util.List; -import java.util.Map; +import java.util.*; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; @@ -60,28 +57,65 @@ private static final String HEYGEM_CORE_URL = "heygem.core.url"; @Async public void remoteTrain(VoicesTrailVO trailVO){ + // 鍒涘缓鐩爣鐩綍 String origin_audio = configApi.getConfigValueByKey(HEYGEM_VOICE_DATA) + "/origin_audio"; - //璁粌鍓嶆牎楠� try { Files.createDirectories(Path.of(origin_audio)); } catch (IOException e) { - throw new RuntimeException(e); + throw new RuntimeException("鍒涘缓鐩綍澶辫触: " + origin_audio, e); } - String extname = trailVO.getFixAuditionUrl().substring(trailVO.getFixAuditionUrl().lastIndexOf(".")); + + // 鑾峰彇婧愭枃浠剁殑鎵╁睍鍚嶏紙濡� .mp3 鎴� .wav锛� + String fixAuditionUrl = trailVO.getFixAuditionUrl(); + String extname = fixAuditionUrl.substring(fixAuditionUrl.lastIndexOf(".")); + + // 鐢熸垚鐩爣鏂囦欢鍚嶏紙鍒濆涓哄師濮嬫墿灞曞悕锛� String modelFileName = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date()) + extname; - String modelFilePath = Paths.get(origin_audio, modelFileName).toString(); + Path modelFilePath = Paths.get(origin_audio, modelFileName); - String substring = configApi.getConfigValueByKey(EASEGEN_URL)+trailVO.getFixAuditionUrl().substring(trailVO.getFixAuditionUrl().lastIndexOf("/")); + // 鑾峰彇鏈湴婧愭枃浠惰矾寰勶紙鎷兼帴 EASEGEN_URL 鍜屾枃浠跺悕锛� + String substring = configApi.getConfigValueByKey(EASEGEN_URL) + + fixAuditionUrl.substring(fixAuditionUrl.lastIndexOf("/")); + Path sourcePath = Path.of(substring); - try { - Files.copy(Path.of(substring), Path.of(modelFilePath), StandardCopyOption.REPLACE_EXISTING); - } catch (IOException e) { - throw new RuntimeException(e); + // 濡傛灉涓嶆槸 .wav锛屽氨杞爜 + if (!substring.toLowerCase(Locale.ROOT).endsWith(".wav")) { + // 鏋勯�� wav 璺緞锛堜笌鍘熻矾寰勫悓鐩綍锛� + String filename = sourcePath.getFileName().toString(); + String nameWithoutExt = filename.substring(0, filename.lastIndexOf(".")); + Path wavPath = sourcePath.resolveSibling(nameWithoutExt + ".wav"); + + // 鎵ц FFmpeg 鍛戒护 + String ffmpegCmd = String.format("ffmpeg -y -i \"%s\" \"%s\"", sourcePath, wavPath); + try { + Process process = Runtime.getRuntime().exec(ffmpegCmd); + int exitCode = process.waitFor(); + if (exitCode != 0) { + throw new RuntimeException("FFmpeg 杞崲澶辫触锛岃繑鍥炵爜锛�" + exitCode); + } + + // 杞崲鎴愬姛鍚庝娇鐢ㄦ柊鐨� .wav 璺緞浣滀负鏈�缁堟枃浠惰矾寰勶紙鐩爣鍚嶄篃鏀逛负 .wav锛� + modelFileName = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date()) + ".wav"; + modelFilePath = Paths.get(origin_audio, modelFileName); + Files.copy(wavPath, modelFilePath, StandardCopyOption.REPLACE_EXISTING); + + } catch (IOException | InterruptedException e) { + throw new RuntimeException("鎵ц FFmpeg 杞崲澶辫触", e); + } + + } else { + // 宸叉槸 .wav锛岀洿鎺ュ鍒� + try { + Files.copy(sourcePath, modelFilePath, StandardCopyOption.REPLACE_EXISTING); + } catch (IOException e) { + throw new RuntimeException("澶嶅埗 .wav 鏂囦欢澶辫触", e); + } } + String configValueByKey = configApi.getConfigValueByKey(HEYGEM_VOICE_DATA); // 璁$畻鐩稿璺緞 - Path relativeAudioPath = Path.of(configValueByKey).relativize(Path.of(modelFilePath)); + Path relativeAudioPath = Path.of(configValueByKey).relativize(modelFilePath); Map<String, Object> map = Map.of( "format", "wav", "reference_audio", relativeAudioPath.toString().replace("\\", "/"), @@ -100,6 +134,7 @@ .body(JSON.toJSONString(map)) .execute(); String body = execute.body(); + // 妫�鏌ュ搷搴旂姸鎬佺爜鏄惁鎴愬姛 if (execute.getStatus() != 200) { @@ -123,13 +158,35 @@ // 澶勭悊涓氬姟閫昏緫閿欒锛屾洿鏂扮姸鎬佸拰閿欒淇℃伅 String referenceAudioText = responseJson.getString("reference_audio_text"); String asrFormatAudioUrl = responseJson.getString("asr_format_audio_url"); + if (referenceAudioText == null || asrFormatAudioUrl == null) { + if (body.equals("{\"code\":-1,\"msg\":\"asr failed\"}")) { + String a = origin_audio+"/format_denoise_"+modelFileName; + String b = origin_audio+"/format_"+modelFileName; + Path pathA = Path.of(a); + Path pathB = Path.of(b); + ///code/data/origin_audio/format_denoise_20250609090124273.wav + if (Files.exists(pathA)) { + asrFormatAudioUrl = "/code/data/origin_audio/format_denoise_" + modelFileName; + referenceAudioText = "123"; + }else if (Files.exists(pathB)) { + asrFormatAudioUrl = "/code/data/origin_audio/format_" + modelFileName; + referenceAudioText = "123"; + } + }else{ + // 濡傛灉娌℃湁杩斿洖棰勬湡鐨勫瓧娈碉紝璁や负鏄敊璇� + voicesMapper.update(new UpdateWrapper<VoicesDO>().lambda().eq(VoicesDO::getCode, trailVO.getCode()).set(VoicesDO::getStatus, ERROR_STATUS)); + log.error("璁粌澶辫触锛�->>>>>>>>> 鏈繑鍥為鏈熺殑瀛楁"); + return; + } + + } voicesMapper.update( new UpdateWrapper<VoicesDO>() .lambda() .eq(VoicesDO::getCode, trailVO.getCode()) // 鏉′欢锛歝ode 绛変簬浼犲叆鐨勫�� - .set(VoicesDO::getStatus, 0) // 鏇存柊瀛楁 status 涓� 0 + .set(VoicesDO::getStatus, COMPLETE_STATUS) // 鏇存柊瀛楁 status 涓� 0 .set(VoicesDO::getAsrFormatAudioUrl,asrFormatAudioUrl) .set(VoicesDO::getReferenceAudioText,referenceAudioText) ); -- Gitblit v1.9.3