yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/controller/admin/coursescenevoices/vo/AppCourseSceneVoicesMegerReqVO.java
@@ -36,4 +36,5 @@ @Schema(description = "状态", requiredMode = Schema.RequiredMode.REQUIRED, example = "2") private Integer status; private String language; } yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/dal/dataobject/voices/AuditionVO.java
@@ -15,4 +15,5 @@ //声音模型ID private String voiceId; private String language; } yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/service/coursemedia/CourseMediaServiceUtil.java
@@ -119,6 +119,7 @@ for (AppCourseScenesMegerReqVO scene : scenes) { //TODO 先判断是否有备注内容 auditionVO.setText(scene.getBackground().getPptRemark()); auditionVO.setLanguage(scene.getVoice().getLanguage().toLowerCase()); if (scene.getVoice().getVoiceId() == null){ auditionVO.setHumanId(String.valueOf(digitalHumansDO.getId())); }else{ yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/service/voices/VoicesServiceImpl.java
@@ -1,5 +1,6 @@ package cn.iocoder.yudao.module.digitalcourse.service.voices; import cn.hutool.core.io.FileUtil; import cn.hutool.core.lang.UUID; import cn.hutool.core.util.StrUtil; import cn.hutool.http.HttpRequest; @@ -16,11 +17,17 @@ import cn.iocoder.yudao.module.digitalcourse.dal.mysql.voices.VoicesMapper; import cn.iocoder.yudao.module.infra.api.config.ConfigApi; import cn.iocoder.yudao.module.infra.api.file.FileApi; import com.alibaba.fastjson.JSONObject; import com.alibaba.fastjson.JSON; import com.fasterxml.jackson.core.JsonProcessingException; import com.fasterxml.jackson.databind.ObjectMapper; import jakarta.annotation.Resource; import org.springframework.stereotype.Service; import org.springframework.validation.annotation.Validated; import java.util.HashMap; import java.util.Map; import java.util.Set; import static cn.iocoder.yudao.framework.common.exception.util.ServiceExceptionUtil.exception; import static cn.iocoder.yudao.module.digitalcourse.enums.ErrorCodeConstants.VOICES_NOT_EXISTS; @@ -135,11 +142,28 @@ private DigitalHumansMapper digitalHumansMapper; private static final String EASEGEN_URL = "easegen.url"; private static final String HEYGEM_CORE_URL = "heygem.core.url"; private static final String HEYGEM_VOICE_DATA = "heygem.voice.data"; public static final Set<String> SUPPORTED_LANGUAGES = Set.of( "en", "es", "fr", "de", "it", "pt", "pl", "tr", "ru", "nl", "cs", "ar", "zh-cn", "hu", "ko", "ja", "hi" ); // 中英文专用模型支持的语言 private static final Set<String> CN_EN_LANGUAGES = Set.of("zh-cn", "en"); @Override public String audition(AuditionVO auditionVO) { String language = auditionVO.getLanguage().toLowerCase(); // 判断是否是支持的语言 if (!SUPPORTED_LANGUAGES.contains(language)) { throw new IllegalArgumentException("不支持的语言类型: " + language); } // 构建参数 InvokeVO invokeVO = new InvokeVO(); invokeVO.setSpeaker(InvokeVO.generateUUID()); invokeVO.setText(auditionVO.getText()); if (auditionVO.getVoiceId() == null) { DigitalHumansDO digitalHumansDO = digitalHumansMapper.selectById(auditionVO.getHumanId()); invokeVO.setReferenceText(digitalHumansDO.getReferenceAudioText()); @@ -149,33 +173,105 @@ invokeVO.setReferenceText(voicesDO.getReferenceAudioText()); invokeVO.setReferenceAudio(voicesDO.getAsrFormatAudioUrl()); } ObjectMapper objectMapper = new ObjectMapper(); String jsonString = null; String fileName = UUID.randomUUID().toString() + ".wav"; byte[] content; try { jsonString = objectMapper.writeValueAsString(invokeVO); } catch (JsonProcessingException e) { throw new RuntimeException(e); } String configValueByKey = configApi.getConfigValueByKey(HEYGEM_CORE_URL); String url = configValueByKey + "/v1/invoke"; HttpResponse execute = HttpRequest.post(url) if (CN_EN_LANGUAGES.contains(language)) { // 使用中英文模型 String jsonString = new ObjectMapper().writeValueAsString(invokeVO); String coreUrl = configApi.getConfigValueByKey(HEYGEM_CORE_URL) + "/v1/invoke"; HttpResponse response = HttpRequest.post(coreUrl) .body(jsonString) .execute(); if (execute.getStatus() != 200) { if (response.getStatus() != 200) { return null; } content = response.bodyBytes(); String fileName = UUID.randomUUID().toString() + ".wav"; } else { // 使用其他语言模型,如 http://127.0.0.1:5002/synthesize String referenceAudio = invokeVO.getReferenceAudio(); String resultName = ""; if (referenceAudio != null) { if (referenceAudio.startsWith("/code/sessions/") || referenceAudio.startsWith("/code/data/")) { System.out.println("路径属于 /code/sessions/ 或 /code/data/"); // 获取音频文件的二进制数据 byte[] content = execute.bodyBytes(); // 只取第一个路径(以|||分割) String firstPath = referenceAudio.split("\\|\\|\\|")[0]; // 使用 `createFile` 方法存储文件,并获取 URL String fileUrl = fileApi.createFile(fileName, null, content); // 取最后一级文件名 String fileName1 = firstPath.substring(firstPath.lastIndexOf('/') + 1); return fileUrl; // 返回存储的文件 URL // 返回音频文件路径 String coreName; if (referenceAudio.startsWith("/code/sessions/")) { // sessions路径可能有 _partN,去除 _partN 及后面部分 int partIndex = fileName1.indexOf("_part"); if (partIndex != -1) { coreName = fileName1.substring(0, partIndex); } else { // 没有_part,去掉扩展名 int dotIndex = fileName1.lastIndexOf('.'); coreName = (dotIndex != -1) ? fileName1.substring(0, dotIndex) : fileName1; } } else { // data路径直接取完整文件名(即格式名+后缀) coreName = fileName1.substring(0, fileName1.lastIndexOf('.')); } // 获取后缀 int dotIndex = fileName1.lastIndexOf('.'); String suffix = (dotIndex != -1) ? fileName1.substring(dotIndex) : ""; // 最终结果 resultName = coreName + suffix; System.out.println("提取的格式名:" + resultName); } else { // 其他路径 System.out.println("未知路径类型"); throw new IllegalArgumentException("声音模型异常,请联系管理员"); } } //resultName String resultVoiceUrl = configApi.getConfigValueByKey(HEYGEM_VOICE_DATA)+"/origin_audio/" + resultName; Map<String, Object> params = new HashMap<>(); params.put("text", auditionVO.getText()); params.put("speaker_wav", resultVoiceUrl); params.put("language", language); HttpResponse response = HttpRequest.post("http://127.0.0.1:5002/synthesize") .contentType("application/json") .body(new ObjectMapper().writeValueAsString(params)) .execute(); if (response.getStatus() != 200) { return null; } String body = response.body(); JSONObject json = JSON.parseObject(body); Integer code = json.getInteger("code"); String message = json.getString("message"); if (code == null || code != 200) { throw new RuntimeException("语音合成失败:" + message); } JSONObject outputPath = json.getJSONObject("output_path"); String diskPath = outputPath.getString("disk_path"); String url = outputPath.getString("url"); // 使用 diskPath 和 url content = FileUtil.readBytes(diskPath); } // 保存音频文件并返回地址 return fileApi.createFile(fileName, null, content); } catch (Exception e) { throw new RuntimeException("语音合成失败", e); } } yudao-module-digitalcourse/yudao-module-digitalcourse-biz/src/main/java/cn/iocoder/yudao/module/digitalcourse/service/voices/VoicesServiceUtil.java
@@ -26,10 +26,7 @@ import java.nio.file.Paths; import java.nio.file.StandardCopyOption; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; import java.util.List; import java.util.Map; import java.util.*; import java.util.concurrent.TimeUnit; import java.util.stream.Collectors; @@ -60,28 +57,65 @@ private static final String HEYGEM_CORE_URL = "heygem.core.url"; @Async public void remoteTrain(VoicesTrailVO trailVO){ // 创建目标目录 String origin_audio = configApi.getConfigValueByKey(HEYGEM_VOICE_DATA) + "/origin_audio"; //训练前校验 try { Files.createDirectories(Path.of(origin_audio)); } catch (IOException e) { throw new RuntimeException(e); throw new RuntimeException("创建目录失败: " + origin_audio, e); } String extname = trailVO.getFixAuditionUrl().substring(trailVO.getFixAuditionUrl().lastIndexOf(".")); // 获取源文件的扩展名(如 .mp3 或 .wav) String fixAuditionUrl = trailVO.getFixAuditionUrl(); String extname = fixAuditionUrl.substring(fixAuditionUrl.lastIndexOf(".")); // 生成目标文件名(初始为原始扩展名) String modelFileName = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date()) + extname; String modelFilePath = Paths.get(origin_audio, modelFileName).toString(); Path modelFilePath = Paths.get(origin_audio, modelFileName); String substring = configApi.getConfigValueByKey(EASEGEN_URL)+trailVO.getFixAuditionUrl().substring(trailVO.getFixAuditionUrl().lastIndexOf("/")); // 获取本地源文件路径(拼接 EASEGEN_URL 和文件名) String substring = configApi.getConfigValueByKey(EASEGEN_URL) + fixAuditionUrl.substring(fixAuditionUrl.lastIndexOf("/")); Path sourcePath = Path.of(substring); // 如果不是 .wav,就转码 if (!substring.toLowerCase(Locale.ROOT).endsWith(".wav")) { // 构造 wav 路径(与原路径同目录) String filename = sourcePath.getFileName().toString(); String nameWithoutExt = filename.substring(0, filename.lastIndexOf(".")); Path wavPath = sourcePath.resolveSibling(nameWithoutExt + ".wav"); // 执行 FFmpeg 命令 String ffmpegCmd = String.format("ffmpeg -y -i \"%s\" \"%s\"", sourcePath, wavPath); try { Files.copy(Path.of(substring), Path.of(modelFilePath), StandardCopyOption.REPLACE_EXISTING); } catch (IOException e) { throw new RuntimeException(e); Process process = Runtime.getRuntime().exec(ffmpegCmd); int exitCode = process.waitFor(); if (exitCode != 0) { throw new RuntimeException("FFmpeg 转换失败,返回码:" + exitCode); } // 转换成功后使用新的 .wav 路径作为最终文件路径(目标名也改为 .wav) modelFileName = new SimpleDateFormat("yyyyMMddHHmmssSSS").format(new Date()) + ".wav"; modelFilePath = Paths.get(origin_audio, modelFileName); Files.copy(wavPath, modelFilePath, StandardCopyOption.REPLACE_EXISTING); } catch (IOException | InterruptedException e) { throw new RuntimeException("执行 FFmpeg 转换失败", e); } } else { // 已是 .wav,直接复制 try { Files.copy(sourcePath, modelFilePath, StandardCopyOption.REPLACE_EXISTING); } catch (IOException e) { throw new RuntimeException("复制 .wav 文件失败", e); } } String configValueByKey = configApi.getConfigValueByKey(HEYGEM_VOICE_DATA); // 计算相对路径 Path relativeAudioPath = Path.of(configValueByKey).relativize(Path.of(modelFilePath)); Path relativeAudioPath = Path.of(configValueByKey).relativize(modelFilePath); Map<String, Object> map = Map.of( "format", "wav", "reference_audio", relativeAudioPath.toString().replace("\\", "/"), @@ -100,6 +134,7 @@ .body(JSON.toJSONString(map)) .execute(); String body = execute.body(); // 检查响应状态码是否成功 if (execute.getStatus() != 200) { @@ -123,13 +158,35 @@ // 处理业务逻辑错误,更新状态和错误信息 String referenceAudioText = responseJson.getString("reference_audio_text"); String asrFormatAudioUrl = responseJson.getString("asr_format_audio_url"); if (referenceAudioText == null || asrFormatAudioUrl == null) { if (body.equals("{\"code\":-1,\"msg\":\"asr failed\"}")) { String a = origin_audio+"/format_denoise_"+modelFileName; String b = origin_audio+"/format_"+modelFileName; Path pathA = Path.of(a); Path pathB = Path.of(b); ///code/data/origin_audio/format_denoise_20250609090124273.wav if (Files.exists(pathA)) { asrFormatAudioUrl = "/code/data/origin_audio/format_denoise_" + modelFileName; referenceAudioText = "123"; }else if (Files.exists(pathB)) { asrFormatAudioUrl = "/code/data/origin_audio/format_" + modelFileName; referenceAudioText = "123"; } }else{ // 如果没有返回预期的字段,认为是错误 voicesMapper.update(new UpdateWrapper<VoicesDO>().lambda().eq(VoicesDO::getCode, trailVO.getCode()).set(VoicesDO::getStatus, ERROR_STATUS)); log.error("训练失败:->>>>>>>>> 未返回预期的字段"); return; } } voicesMapper.update( new UpdateWrapper<VoicesDO>() .lambda() .eq(VoicesDO::getCode, trailVO.getCode()) // 条件:code 等于传入的值 .set(VoicesDO::getStatus, 0) // 更新字段 status 为 0 .set(VoicesDO::getStatus, COMPLETE_STATUS) // 更新字段 status 为 0 .set(VoicesDO::getAsrFormatAudioUrl,asrFormatAudioUrl) .set(VoicesDO::getReferenceAudioText,referenceAudioText) );