From 3ceef41ab05a1fa177ffdf0afb51fbba5829615b Mon Sep 17 00:00:00 2001 From: ageerle <ageerle@163.com> Date: 星期一, 19 五月 2025 16:11:37 +0800 Subject: [PATCH] feat: 知识库上传逻辑调整 --- /dev/null | 385 ----------------------------------- ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java | 191 ++-------------- ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java | 29 -- 3 files changed, 30 insertions(+), 575 deletions(-) diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java index 917652c..4b6c01d 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java @@ -1,13 +1,11 @@ package org.ruoyi.service.impl; -import cn.hutool.core.util.RandomUtil; import com.google.protobuf.ServiceException; import dev.langchain4j.data.embedding.Embedding; import dev.langchain4j.data.segment.TextSegment; import dev.langchain4j.model.embedding.EmbeddingModel; import dev.langchain4j.model.ollama.OllamaEmbeddingModel; import dev.langchain4j.model.openai.OpenAiEmbeddingModel; -import dev.langchain4j.model.output.Response; import dev.langchain4j.store.embedding.EmbeddingMatch; import dev.langchain4j.store.embedding.EmbeddingSearchRequest; import dev.langchain4j.store.embedding.EmbeddingStore; @@ -31,6 +29,7 @@ import java.util.HashMap; import java.util.List; import java.util.Map; +import java.util.stream.Collectors; /** * 鍚戦噺搴撶鐞� @@ -131,31 +130,7 @@ createSchema(kid,modelName); // 鏍规嵁鏉′欢鍒犻櫎鍚戦噺鏁版嵁 Filter simpleFilter = new IsEqualTo("kid", kid); - removeByFilter(simpleFilter); - } - - public void removeByFilter(Filter filter) { - List<Float> dummyVector = new ArrayList<>(); - // TODO 妯″瀷缁村害 - int dimension = 1024; - for (int i = 0; i < dimension; i++) { - dummyVector.add(0.0f); - } - Embedding dummyEmbedding = Embedding.from(dummyVector); - EmbeddingSearchRequest request = EmbeddingSearchRequest.builder() - .queryEmbedding(dummyEmbedding) - .filter(filter) - .maxResults(10000) - .build(); - // 鎼滅储 - List<String> idsToDelete = embeddingStore.search(request) - .matches().stream() - .map(EmbeddingMatch::embeddingId) - .collect(Collectors.toList()); - // 鍒犻櫎 - if (!idsToDelete.isEmpty()) { - embeddingStore.removeAll(idsToDelete); - } + embeddingStore.removeAll(simpleFilter); } @Override diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/DealFileService.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/DealFileService.java deleted file mode 100644 index 2951c20..0000000 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/DealFileService.java +++ /dev/null @@ -1,385 +0,0 @@ -package org.ruoyi.chat.service.knowledge; - -import cn.hutool.core.util.ObjectUtil; -import cn.hutool.core.util.RandomUtil; -import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; -import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper; -import com.baomidou.mybatisplus.core.toolkit.Wrappers; -import java.util.Collection; -import java.util.Date; -import java.util.List; -import java.util.stream.Collectors; -import lombok.RequiredArgsConstructor; -import org.ruoyi.chain.loader.ResourceLoaderFactory; -import org.ruoyi.constant.DealStatus; -import org.ruoyi.domain.KnowledgeAttach; -import org.ruoyi.domain.KnowledgeAttachPic; -import org.ruoyi.domain.KnowledgeFragment; -import org.ruoyi.domain.KnowledgeInfo; -import org.ruoyi.domain.PdfFileContentResult; -import org.ruoyi.domain.bo.StoreEmbeddingBo; -import org.ruoyi.domain.vo.ChatModelVo; -import org.ruoyi.domain.vo.KnowledgeAttachVo; -import org.ruoyi.domain.vo.KnowledgeInfoVo; -import org.ruoyi.mapper.KnowledgeAttachMapper; -import org.ruoyi.mapper.KnowledgeAttachPicMapper; -import org.ruoyi.mapper.KnowledgeFragmentMapper; -import org.ruoyi.mapper.KnowledgeInfoMapper; -import org.ruoyi.service.IChatModelService; -import org.ruoyi.service.VectorStoreService; -import org.ruoyi.service.impl.PdfImageExtractServiceImpl; -import org.ruoyi.system.domain.vo.SysOssVo; -import org.ruoyi.system.service.ISysOssService; -import org.ruoyi.utils.ZipUtils; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.scheduling.annotation.Async; -import org.springframework.stereotype.Service; -import org.springframework.web.multipart.MultipartFile; - -import com.fasterxml.jackson.databind.JsonNode; -import com.fasterxml.jackson.databind.ObjectMapper; - -/** - * @Description: - * @Date: 2025/5/15 涓嬪崍4:29 - */ -@Service -@RequiredArgsConstructor -public class DealFileService { - private static final Logger log = LoggerFactory.getLogger(DealFileService.class); - - private final KnowledgeInfoMapper baseMapper; - - private final VectorStoreService vectorStoreService; - - private final ResourceLoaderFactory resourceLoaderFactory; - - private final KnowledgeFragmentMapper fragmentMapper; - - private final KnowledgeAttachMapper attachMapper; - - private final IChatModelService chatModelService; - - private final ISysOssService ossService; - -// private final PdfImageExtractService pdfImageExtractService; - - private final KnowledgeAttachPicMapper picMapper; - - @Value("${pdf.extract.service.url}") - private String serviceUrl; - @Value("${pdf.extract.ai-api.url}") - private String aiApiUrl; - @Value("${pdf.extract.ai-api.key}") - private String aiApiKey; - - - @Async - public void dealVectorStatus(KnowledgeAttach attachItem) throws Exception { - try { - //閿佸畾鏁版嵁 鏇存敼VectorStatus 鍒拌繘琛屼腑 - if (attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>() - .set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20) - .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getId, attachItem.getId()) - ) == 0) { - return; - } - List<KnowledgeFragment> knowledgeFragments = fragmentMapper.selectList( - new LambdaQueryWrapper<KnowledgeFragment>() - .eq(KnowledgeFragment::getKid, attachItem.getKid()) - .eq(KnowledgeFragment::getDocId, attachItem.getDocId()) - ); - if (ObjectUtil.isEmpty(knowledgeFragments)) { - throw new Exception("鏂囦欢娈佃惤涓虹┖"); - } - List<String> fids = knowledgeFragments.stream() - .map(KnowledgeFragment::getFid) - .collect(Collectors.toList()); - if (ObjectUtil.isEmpty(fids)) { - throw new Exception("fids 涓虹┖"); - } - List<String> chunkList = knowledgeFragments.stream() - .map(KnowledgeFragment::getContent) - .collect(Collectors.toList()); - - if (ObjectUtil.isEmpty(chunkList)) { - throw new Exception("chunkList 涓虹┖"); - } - // 閫氳繃kid鏌ヨ鐭ヨ瘑搴撲俊鎭� - KnowledgeInfoVo knowledgeInfoVo = baseMapper.selectVoOne(Wrappers.<KnowledgeInfo>lambdaQuery() - .eq(KnowledgeInfo::getId, attachItem.getKid())); - // 閫氳繃鍚戦噺妯″瀷鏌ヨ妯″瀷淇℃伅 - ChatModelVo chatModelVo = chatModelService.selectModelByName( - knowledgeInfoVo.getEmbeddingModelName()); - - StoreEmbeddingBo storeEmbeddingBo = new StoreEmbeddingBo(); - storeEmbeddingBo.setKid(attachItem.getKid()); - storeEmbeddingBo.setDocId(attachItem.getDocId()); - storeEmbeddingBo.setFids(fids); - storeEmbeddingBo.setChunkList(chunkList); - storeEmbeddingBo.setVectorModelName(knowledgeInfoVo.getVectorModelName()); - storeEmbeddingBo.setEmbeddingModelName(knowledgeInfoVo.getEmbeddingModelName()); - storeEmbeddingBo.setApiKey(chatModelVo.getApiKey()); - storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost()); - vectorStoreService.storeEmbeddings(storeEmbeddingBo); - - //璁剧疆澶勭悊瀹屾垚 - attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>() - .set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20) - .eq(KnowledgeAttach::getId, attachItem.getId())); - } catch (Exception e) { - //璁剧疆澶勭悊澶辫触 - attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>() - .set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40) - .set(KnowledgeAttach::getRemark, attachItem.getRemark() + e.getMessage()) - .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20) - .eq(KnowledgeAttach::getId, attachItem.getId())); - throw new RuntimeException(e); - } - } - - @Async - public void dealPicStatus(KnowledgeAttach attachItem) throws Exception { - try { - //閿佸畾鏁版嵁 鏇存敼picStatus 鍒拌繘琛屼腑 - if (attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>() - .set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_20) - .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getId, attachItem.getId()) - ) == 0) { - return; - } - //鑾峰彇闄勪欢 - if (ObjectUtil.isEmpty(attachItem.getOssId())) { - log.error("==========OssId 涓虹┖锛宎ttachItem={}", attachItem); - throw new Exception("OssId 涓虹┖"); - } - //鑾峰彇oss鏂囦欢 - MultipartFile multipartFile = ossService.downloadByFile(attachItem.getOssId()); - //鎷嗚В鍑哄浘鐗嘮IP - PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl(serviceUrl, - aiApiUrl, aiApiKey); - byte[] pngs = pdfImageExtractService.extractImages(multipartFile, "png", true); - //瑙e帇zip锛屽緱鍒板浘鐗囨枃浠� - MultipartFile[] multipartFiles = ZipUtils.unzipToMultipartFiles(pngs); - //涓婁紶鏂囦欢鍒癘SS锛屽啓鍏ヨ〃 - for (MultipartFile file : multipartFiles) { - //鍏堟煡鎵炬槸鍚︽湁鐩稿悓鍥剧墖鍚嶇О锛屽厛鍋氬垹闄� - List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList( - new LambdaQueryWrapper<KnowledgeAttachPic>() - .eq(KnowledgeAttachPic::getKid, attachItem.getKid()) - .eq(KnowledgeAttachPic::getAid, attachItem.getId()) - .eq(KnowledgeAttachPic::getDocName, file.getOriginalFilename()) - ); - if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) { - Collection<Long> ossIds = knowledgeAttachPics.stream() - .map(KnowledgeAttachPic::getOssId) - .collect(Collectors.toList()); - ossService.deleteWithValidByIds(ossIds, false); - List<Long> collect = knowledgeAttachPics.stream().map(KnowledgeAttachPic::getId) - .collect(Collectors.toList()); - picMapper.deleteByIds(collect); - } - - SysOssVo upload = ossService.upload(file); - KnowledgeAttachPic entity = new KnowledgeAttachPic(); - entity.setKid(attachItem.getKid()); - entity.setAid(String.valueOf(attachItem.getId())); - entity.setDocName(file.getOriginalFilename()); - entity.setDocType( - file.getOriginalFilename().substring(file.getOriginalFilename().lastIndexOf(".") + 1)); - entity.setOssId(upload.getOssId()); - int[] ints = extractPageNumbers(file.getOriginalFilename()); - if (ObjectUtil.isNotEmpty(ints)) { - assert ints != null; - if (ints.length == 2) { - entity.setPageNum(ints[0]); - entity.setIndexNum(ints[1]); - } - } - picMapper.insert(entity); - } - - //璁剧疆澶勭悊瀹屾垚 - attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>() - .set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_20) - .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getId, attachItem.getId())); - } catch (Exception e) { - //璁剧疆澶勭悊澶辫触 - attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>() - .set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40) - .set(KnowledgeAttach::getRemark, attachItem.getRemark() + e.getMessage()) - .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_20) - .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getId, attachItem.getId())); - throw new RuntimeException(e); - } - - } - - - @Async - public void dealPicAnysStatus(KnowledgeAttachPic picItem) throws Exception { - try { - //閿佸畾鏁版嵁 鏇存敼 getPicAnysStatus 鍒拌繘琛屼腑 - if (picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>() - .set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_20) - .eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttachPic::getId, picItem.getId()) - ) == 0) { - return; - } - SysOssVo ossVo = ossService.getById(picItem.getOssId()); - if (ObjectUtil.isNotEmpty(ossVo)) { - String fileStr = ossService.downloadByByte(picItem.getOssId()); - //璋冪敤绗笁鏂� 鍒嗘瀽鍥剧墖鍐呭 - PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl( - serviceUrl, - aiApiUrl, aiApiKey); - List<PdfFileContentResult> pdfFileContentResults = pdfImageExtractService.dealFileContent( - new String[]{fileStr}); - if (ObjectUtil.isNotEmpty(pdfFileContentResults)) { - for (PdfFileContentResult resultItem : pdfFileContentResults) { - //鍥剧墖瑙f瀽鍐呭鍥炲啓鍒皃ic琛� - picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>() - .set(KnowledgeAttachPic::getContent, parseContent(resultItem.getContent())) - .set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttachPic::getId, picItem.getId())); - //灏嗗浘鐗囪В鏋愬唴瀹� 鍐欏叆娈佃惤琛� fragment - KnowledgeAttachVo knowledgeAttachVo = attachMapper.selectVoById(picItem.getAid()); - if (ObjectUtil.isNotEmpty(knowledgeAttachVo)) { - String fid = RandomUtil.randomString(10); - KnowledgeFragment knowledgeFragment = new KnowledgeFragment(); - knowledgeFragment.setKid(knowledgeAttachVo.getKid()); - knowledgeFragment.setDocId(knowledgeAttachVo.getDocId()); - knowledgeFragment.setFid(fid); - knowledgeFragment.setIdx(0); - knowledgeFragment.setContent(parseContent(resultItem.getContent())); - knowledgeFragment.setCreateTime(new Date()); - fragmentMapper.insert(knowledgeFragment); - - //鏇存柊attach琛紝闇�瑕佹墍鏈夊浘鐗囬兘澶勭悊瀹屾瘯 - // 鏌ヨ闈�30鐘舵�侊紙瀹屾垚鐘舵�侊級鐨勮褰曟暟閲� - long nonStatus30Count = picMapper.selectCount( - new LambdaQueryWrapper<KnowledgeAttachPic>() - .ne(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttachPic::getAid, picItem.getAid()) - ); - if (nonStatus30Count == 0) { - // 鎵ц琛ㄦ洿鏂版搷浣� - attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>() - .set(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getId, picItem.getAid())); - } - } - } - } - } - } catch (Exception e) { - //澶辫触 - picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>() - .set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_40) - .set(KnowledgeAttachPic::getRemark, picItem.getRemark() + e.getMessage()) - .eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_20) - .eq(KnowledgeAttachPic::getId, picItem.getId())); - throw new RuntimeException(e); - } - } - - - /** - * 浠庢枃浠跺悕涓彁鍙杙age鍚庨潰鐨勪袱涓暟瀛� - * - * @param fileName 鏂囦欢鍚� - * @return 鍖呭惈涓や釜鏁板瓧鐨勬暟缁勶紝濡傛灉鏈壘鍒板垯杩斿洖null - */ - public static int[] extractPageNumbers(String fileName) { - // 鏌ユ壘"page_"鐨勪綅缃� - int pageIndex = fileName.indexOf("page_"); - - if (pageIndex == -1) { - return null; - } - - // 浠�"page_"鍚庡紑濮嬫埅鍙� - String afterPage = fileName.substring(pageIndex + 5); - - // 鎸変笅鍒掔嚎鍒嗗壊 - String[] parts = afterPage.split("_"); - - if (parts.length >= 2) { - try { - // 鎻愬彇涓や釜鏁板瓧 - int firstNumber = Integer.parseInt(parts[0]); - - // 瀵逛簬绗簩涓暟瀛楋紝闇�瑕佸幓鎺夊彲鑳界殑鏂囦欢鎵╁睍鍚� - String secondPart = parts[1]; - int dotIndex = secondPart.indexOf("."); - if (dotIndex != -1) { - secondPart = secondPart.substring(0, dotIndex); - } - - int secondNumber = Integer.parseInt(secondPart); - - return new int[]{firstNumber, secondNumber}; - } catch (NumberFormatException e) { - return null; - } - } - - return null; - } - - public static String parseContent(String jsonString) { - try { - // 鍒涘缓ObjectMapper瀹炰緥 - ObjectMapper objectMapper = new ObjectMapper(); - - // 瑙f瀽JSON瀛楃涓� - JsonNode rootNode = objectMapper.readTree(jsonString); - - // 鑾峰彇choices鏁扮粍鐨勭涓�涓厓绱� - JsonNode choicesNode = rootNode.get("choices"); - if (choicesNode != null && choicesNode.isArray() && choicesNode.size() > 0) { - // 鑾峰彇绗竴涓猚hoice - JsonNode firstChoice = choicesNode.get(0); - - // 鑾峰彇message鑺傜偣 - JsonNode messageNode = firstChoice.get("message"); - if (messageNode != null) { - // 鑾峰彇content瀛楁鐨勫�� - JsonNode contentNode = messageNode.get("content"); - if (contentNode != null) { - return contentNode.asText(); - } - } - } - - return "鏃犳硶鎵惧埌content鍐呭"; - } catch (Exception e) { - e.printStackTrace(); - return "瑙f瀽JSON鏃跺彂鐢熼敊璇�: " + e.getMessage(); - } - } - - -} diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java index 06ad06b..914256e 100644 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java +++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java @@ -4,29 +4,21 @@ import cn.hutool.core.util.ObjectUtil; import cn.hutool.core.util.RandomUtil; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; -import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; -import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper; import com.baomidou.mybatisplus.core.toolkit.Wrappers; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; -import java.util.stream.Collectors; import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; import org.ruoyi.chain.loader.ResourceLoader; import org.ruoyi.chain.loader.ResourceLoaderFactory; import org.ruoyi.common.core.domain.model.LoginUser; import org.ruoyi.common.core.utils.MapstructUtils; import org.ruoyi.common.core.utils.StringUtils; import org.ruoyi.common.satoken.utils.LoginHelper; -import org.ruoyi.constant.DealStatus; -import org.ruoyi.constant.FileType; import org.ruoyi.core.page.PageQuery; import org.ruoyi.core.page.TableDataInfo; -import org.ruoyi.domain.ChatModel; import org.ruoyi.domain.KnowledgeAttach; import org.ruoyi.domain.KnowledgeAttachPic; import org.ruoyi.domain.KnowledgeFragment; import org.ruoyi.domain.KnowledgeInfo; -import org.ruoyi.domain.PdfFileContentResult; import org.ruoyi.domain.bo.KnowledgeInfoBo; import org.ruoyi.domain.bo.KnowledgeInfoUploadBo; import org.ruoyi.domain.bo.StoreEmbeddingBo; @@ -34,28 +26,21 @@ import org.ruoyi.domain.vo.KnowledgeAttachVo; import org.ruoyi.domain.vo.KnowledgeInfoVo; import org.ruoyi.mapper.KnowledgeAttachMapper; -import org.ruoyi.mapper.KnowledgeAttachPicMapper; import org.ruoyi.mapper.KnowledgeFragmentMapper; import org.ruoyi.mapper.KnowledgeInfoMapper; import org.ruoyi.service.IChatModelService; -import org.ruoyi.service.PdfImageExtractService; -import org.ruoyi.service.VectorStoreService; import org.ruoyi.service.IKnowledgeInfoService; -import org.ruoyi.service.impl.PdfImageExtractServiceImpl; -import org.ruoyi.system.domain.vo.SysOssVo; -import org.ruoyi.utils.ZipUtils; +import org.ruoyi.service.VectorStoreService; +import org.ruoyi.system.service.ISysOssService; import org.slf4j.Logger; import org.slf4j.LoggerFactory; -import org.springframework.beans.factory.annotation.Value; -import org.springframework.scheduling.annotation.Async; -import org.springframework.scheduling.annotation.Scheduled; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import org.springframework.web.multipart.MultipartFile; -import org.ruoyi.system.service.ISysOssService; import java.io.IOException; import java.util.*; +import java.util.stream.Collectors; /** @@ -69,6 +54,7 @@ public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService { private static final Logger log = LoggerFactory.getLogger(KnowledgeInfoServiceImpl.class); + private final KnowledgeInfoMapper baseMapper; private final VectorStoreService vectorStoreService; @@ -82,19 +68,6 @@ private final IChatModelService chatModelService; private final ISysOssService ossService; - -// private final PdfImageExtractService pdfImageExtractService; - - private final KnowledgeAttachPicMapper picMapper; - - private final DealFileService dealFileService; - - @Value("${pdf.extract.service.url}") - private String serviceUrl; - @Value("${pdf.extract.ai-api.url}") - private String aiApiUrl; - @Value("${pdf.extract.ai-api.key}") - private String aiApiKey; /** * 鏌ヨ鐭ヨ瘑搴� @@ -207,45 +180,16 @@ @Override @Transactional(rollbackFor = Exception.class) public void removeKnowledge(String id) { - Map<String, Object> map = new HashMap<>(); - map.put("kid", id); + Map<String,Object> map = new HashMap<>(); + map.put("kid",id); List<KnowledgeInfoVo> knowledgeInfoList = baseMapper.selectVoByMap(map); check(knowledgeInfoList); // 鍒犻櫎鍚戦噺搴撲俊鎭� - knowledgeInfoList.forEach(knowledgeInfoVo -> { - vectorStoreService.removeByKid(String.valueOf(knowledgeInfoVo.getId()), - knowledgeInfoVo.getVectorModelName()); - }); +// knowledgeInfoList.forEach(knowledgeInfoVo -> { +// vectorStoreService.removeByKid(String.valueOf(knowledgeInfoVo.getId()),knowledgeInfoVo.getVectorModelName()); +// }); // 鍒犻櫎闄勪欢鍜岀煡璇嗙墖娈� fragmentMapper.deleteByMap(map); - List<KnowledgeAttachVo> knowledgeAttachVos = attachMapper.selectVoByMap(map); - if (ObjectUtil.isNotEmpty(knowledgeAttachVos)) { - Collection<Long> ossIds = knowledgeAttachVos.stream() - .map(KnowledgeAttachVo::getOssId) - .collect(Collectors.toList()); - //鍒犻櫎oss - ossService.deleteWithValidByIds(ossIds, false); - - //鍒犻櫎鍥剧墖oss - List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList( - new LambdaQueryWrapper<KnowledgeAttachPic>() - .in(KnowledgeAttachPic::getKid, - knowledgeAttachVos.stream().map(KnowledgeAttachVo::getKid) - .collect(Collectors.toList())) - .in(KnowledgeAttachPic::getAid, - knowledgeAttachVos.stream().map(KnowledgeAttachVo::getId) - .collect(Collectors.toList())) - ); - if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) { - Collection<Long> tossIds = knowledgeAttachPics.stream() - .map(KnowledgeAttachPic::getOssId) - .collect(Collectors.toList()); - ossService.deleteWithValidByIds(tossIds, false); - List<Long> collect = knowledgeAttachPics.stream().map(KnowledgeAttachPic::getId) - .collect(Collectors.toList()); - picMapper.deleteByIds(collect); - } - } attachMapper.deleteByMap(map); // 鍒犻櫎鐭ヨ瘑搴� baseMapper.deleteByMap(map); @@ -257,11 +201,6 @@ } public void storeContent(MultipartFile file, String kid) { - if (file == null || file.isEmpty()) { - throw new IllegalArgumentException("File cannot be null or empty"); - } - - SysOssVo uploadDto = null; String fileName = file.getOriginalFilename(); List<String> chunkList = new ArrayList<>(); KnowledgeAttach knowledgeAttach = new KnowledgeAttach(); @@ -269,18 +208,15 @@ String docId = RandomUtil.randomString(10); knowledgeAttach.setDocId(docId); knowledgeAttach.setDocName(fileName); - knowledgeAttach.setDocType(fileName.substring(fileName.lastIndexOf(".") + 1)); + knowledgeAttach.setDocType(fileName.substring(fileName.lastIndexOf(".")+1)); String content = ""; - ResourceLoader resourceLoader = resourceLoaderFactory.getLoaderByFileType( - knowledgeAttach.getDocType()); + ResourceLoader resourceLoader = resourceLoaderFactory.getLoaderByFileType(knowledgeAttach.getDocType()); List<String> fids = new ArrayList<>(); try { content = resourceLoader.getContent(file.getInputStream()); chunkList = resourceLoader.getChunkList(content, kid); List<KnowledgeFragment> knowledgeFragmentList = new ArrayList<>(); if (CollUtil.isNotEmpty(chunkList)) { - // Upload file to OSS - uploadDto = ossService.upload(file); for (int i = 0; i < chunkList.size(); i++) { String fid = RandomUtil.randomString(10); fids.add(fid); @@ -300,21 +236,25 @@ } knowledgeAttach.setContent(content); knowledgeAttach.setCreateTime(new Date()); - if (ObjectUtil.isNotEmpty(uploadDto) && ObjectUtil.isNotEmpty(uploadDto.getOssId())) { - knowledgeAttach.setOssId(uploadDto.getOssId()); - //鍙湁pdf鏂囦欢 鎵嶉渶瑕佹媶瑙e浘鐗囧拰鍒嗘瀽鍥剧墖鍐呭 - if (FileType.PDF.equals(knowledgeAttach.getDocType())) { - knowledgeAttach.setPicStatus(DealStatus.STATUS_10); - knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_10); - } else { - knowledgeAttach.setPicStatus(DealStatus.STATUS_30); - knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_30); - } - //鎵�鏈夋枃浠朵笂浼犲悗锛岄兘闇�瑕佸悓姝ュ埌鍚戦噺鏁版嵁搴� - knowledgeAttach.setVectorStatus(DealStatus.STATUS_10); - } attachMapper.insert(knowledgeAttach); + // 閫氳繃kid鏌ヨ鐭ヨ瘑搴撲俊鎭� + KnowledgeInfoVo knowledgeInfoVo = baseMapper.selectVoOne(Wrappers.<KnowledgeInfo>lambdaQuery() + .eq(KnowledgeInfo::getId, kid)); + + // 閫氳繃鍚戦噺妯″瀷鏌ヨ妯″瀷淇℃伅 + ChatModelVo chatModelVo = chatModelService.selectModelByName(knowledgeInfoVo.getEmbeddingModelName()); + + StoreEmbeddingBo storeEmbeddingBo = new StoreEmbeddingBo(); + storeEmbeddingBo.setKid(kid); + storeEmbeddingBo.setDocId(docId); + storeEmbeddingBo.setFids(fids); + storeEmbeddingBo.setChunkList(chunkList); + storeEmbeddingBo.setVectorModelName(knowledgeInfoVo.getVectorModelName()); + storeEmbeddingBo.setEmbeddingModelName(knowledgeInfoVo.getEmbeddingModelName()); + storeEmbeddingBo.setApiKey(chatModelVo.getApiKey()); + storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost()); + vectorStoreService.storeEmbeddings(storeEmbeddingBo); } /** @@ -331,79 +271,4 @@ } } - /** - * 绗竴姝� 瀹氭椂 鎷嗚ВPDF鏂囦欢涓殑鍥剧墖 - */ - //@Scheduled(fixedDelay = 15000) // 姣�3绉掓墽琛屼竴娆� - public void dealKnowledgeAttachPic() throws Exception { - //澶勭悊 鎷嗚ВPDF鏂囦欢涓殑鍥剧墖鐨勮褰� - List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList( - new LambdaQueryWrapper<KnowledgeAttach>() - .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10) - ); - log.info("===============鎷嗚ВPDF鏂囦欢涓殑鍥剧墖 size = {}", knowledgeAttaches.size()); - if (ObjectUtil.isNotEmpty(knowledgeAttaches)) { - for (KnowledgeAttach attachItem : knowledgeAttaches) { - dealFileService.dealPicStatus(attachItem); - } - } - } - - /** - * 绗簩姝� 瀹氭椂 瑙f瀽鍥剧墖鍐呭 - */ - //@Scheduled(fixedDelay = 15000) - public void dealKnowledgeAttachPicAnys() throws Exception { - //鑾峰彇鏈鐞嗙殑鍥剧墖璁板綍 - List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList( - new LambdaQueryWrapper<KnowledgeAttachPic>() - .eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10) - .last("LIMIT 20") - ); - if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) { - for (KnowledgeAttachPic picItem : knowledgeAttachPics) { - dealFileService.dealPicAnysStatus(picItem); - } - } - } - /** - * 绗笁姝� 瀹氭椂 澶勭悊 闄勪欢涓婁紶鍚庝笂浼犲悜閲忔暟鎹簱 - */ - //@Scheduled(fixedDelay = 30000) // 姣�3绉掓墽琛屼竴娆� - public void dealKnowledgeAttachVector() throws Exception { - //澶勭悊 闇�瑕佷笂浼犲悜閲忔暟鎹簱鐨勮褰� - List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList( - new LambdaQueryWrapper<KnowledgeAttach>() - .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30) - .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10) - ); - log.info("===============涓婁紶鍚戦噺鏁版嵁搴� size = {}", knowledgeAttaches.size()); - if (ObjectUtil.isNotEmpty(knowledgeAttaches)) { - for (KnowledgeAttach attachItem : knowledgeAttaches) { - dealFileService.dealVectorStatus(attachItem); - } - } - } - /** - * 绗洓姝� 瀹氭椂 澶勭悊 澶辫触鏁版嵁 - */ - //@Scheduled(fixedDelay = 30 * 60 * 1000) - public void dealKnowledge40Status() throws Exception { - //鎷嗚ВPDF澶辫触 閲嶆柊璁剧疆鐘舵�� - attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>() - .set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_40)); - //灏嗗浘鐗囧垎鏋愬け璐ョ殑鏁版嵁 閲嶆柊璁剧疆鐘舵�� - picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>() - .set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_40)); - //涓婁紶鍚戦噺搴撳け璐� 閲嶆柊璁剧疆鐘舵�� - attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>() - .set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10) - .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40)); - } - @Scheduled(fixedDelay = 180000) // 3鍒嗛挓鎵ц涓�娆� } -- Gitblit v1.9.3