From aa92d232bb49a275838a74ba9d28d9448ad188c6 Mon Sep 17 00:00:00 2001 From: ageerle <ageerle@163.com> Date: 星期四, 08 五月 2025 10:41:01 +0800 Subject: [PATCH] feat: Weaviate操作向量库功能优化 --- ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/domain/bo/QueryVectorBo.java | 43 ++++++++ ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/WeaviateVectorStoreImpl.java | 106 ++++++++++++--------- ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorStoreService.java | 9 + ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java | 30 +++++ ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/chat/impl/SseServiceImpl.java | 15 ++ ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/domain/bo/StoreEmbeddingBo.java | 49 +++++++++ ruoyi-modules-api/ruoyi-knowledge-api/pom.xml | 27 +++-- 7 files changed, 217 insertions(+), 62 deletions(-) diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/pom.xml b/ruoyi-modules-api/ruoyi-knowledge-api/pom.xml index cb35d34..8d7d396 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/pom.xml +++ b/ruoyi-modules-api/ruoyi-knowledge-api/pom.xml @@ -48,17 +48,17 @@ </dependency> <!-- milvus java sdk --> - <dependency> - <groupId>io.milvus</groupId> - <artifactId>milvus-sdk-java</artifactId> - <version>2.3.2</version> - </dependency> +<!-- <dependency>--> +<!-- <groupId>io.milvus</groupId>--> +<!-- <artifactId>milvus-sdk-java</artifactId>--> +<!-- <version>2.3.2</version>--> +<!-- </dependency>--> - <dependency> - <groupId>io.weaviate</groupId> - <artifactId>client</artifactId> - <version>4.0.0</version> - </dependency> +<!-- <dependency>--> +<!-- <groupId>io.weaviate</groupId>--> +<!-- <artifactId>client</artifactId>--> +<!-- <version>4.0.0</version>--> +<!-- </dependency>--> <dependency> @@ -86,7 +86,12 @@ <dependency> <groupId>dev.langchain4j</groupId> - <artifactId>langchain4j-open-ai-spring-boot-starter</artifactId> + <artifactId>langchain4j-open-ai</artifactId> + </dependency> + + <dependency> + <groupId>dev.langchain4j</groupId> + <artifactId>langchain4j-ollama</artifactId> </dependency> </dependencies> diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/domain/bo/QueryVectorBo.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/domain/bo/QueryVectorBo.java new file mode 100644 index 0000000..33e8204 --- /dev/null +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/domain/bo/QueryVectorBo.java @@ -0,0 +1,43 @@ +package org.ruoyi.domain.bo; + + +import lombok.Data; + +/** + * 鏌ヨ鍚戦噺鎵�闇�鍙傛暟 + * @author ageer + */ +@Data +public class QueryVectorBo { + + /** + * 鏌ヨ鍐呭 + */ + private String query; + + /** + * 鐭ヨ瘑搴搆id + */ + private String kid; + + /** + * 鏌ヨ鍚戦噺杩斿洖鏉℃暟 + */ + private Integer maxResults; + + /** + * 妯″瀷鍚嶇О + */ + private String modelName; + + /** + * 璇锋眰key + */ + private String apiKey; + + /** + * 璇锋眰鍦板潃 + */ + private String baseUrl; + +} diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/domain/bo/StoreEmbeddingBo.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/domain/bo/StoreEmbeddingBo.java new file mode 100644 index 0000000..9510403 --- /dev/null +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/domain/bo/StoreEmbeddingBo.java @@ -0,0 +1,49 @@ +package org.ruoyi.domain.bo; + +import lombok.Data; + +import java.util.List; + +/** + * 淇濆瓨鍚戦噺鎵�闇�鍙傛暟 + * @author ageer + */ +@Data +public class StoreEmbeddingBo { + + /** + * 鍒囧垎鏂囨湰鍧楀垪琛� + */ + private List<String> chunkList; + + /** + * 鐭ヨ瘑搴搆id + */ + private String kid; + + /** + * 鏂囨。id + */ + private String docId; + + /** + * 鐭ヨ瘑鍧梚d鍒楄〃 + */ + private List<String> fids; + + /** + * 妯″瀷鍚嶇О + */ + private String modelName; + + /** + * 璇锋眰key + */ + private String apiKey; + + /** + * 璇锋眰鍦板潃 + */ + private String baseUrl; + +} diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorStoreService.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorStoreService.java index 6edaa5d..277d0b1 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorStoreService.java +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorStoreService.java @@ -1,20 +1,23 @@ package org.ruoyi.service; +import org.ruoyi.domain.bo.QueryVectorBo; +import org.ruoyi.domain.bo.StoreEmbeddingBo; + import java.util.List; /** - * @author ageer * 鍚戦噺搴撶鐞� + * @author ageer */ public interface VectorStoreService { - void storeEmbeddings(List<String> chunkList, String kid,String docId,List<String> fids); + void storeEmbeddings(StoreEmbeddingBo storeEmbeddingBo); void removeByDocId(String kid,String docId); void removeByKid(String kid); - List<String> getQueryVector(String query, String kid); + List<String> getQueryVector(QueryVectorBo queryVectorBo); void createSchema(String kid); diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/WeaviateVectorStoreImpl.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/WeaviateVectorStoreImpl.java index 9d5b929..680a1bb 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/WeaviateVectorStoreImpl.java +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/WeaviateVectorStoreImpl.java @@ -3,6 +3,7 @@ import dev.langchain4j.data.embedding.Embedding; import dev.langchain4j.data.segment.TextSegment; import dev.langchain4j.model.embedding.EmbeddingModel; +import dev.langchain4j.model.ollama.OllamaEmbeddingModel; import dev.langchain4j.model.openai.OpenAiEmbeddingModel; import dev.langchain4j.model.output.Response; import dev.langchain4j.store.embedding.EmbeddingMatch; @@ -11,9 +12,12 @@ import dev.langchain4j.store.embedding.filter.Filter; import dev.langchain4j.store.embedding.filter.comparison.IsEqualTo; import dev.langchain4j.store.embedding.weaviate.WeaviateEmbeddingStore; +import jakarta.annotation.PostConstruct; import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.ruoyi.common.core.service.ConfigService; +import org.ruoyi.domain.bo.QueryVectorBo; +import org.ruoyi.domain.bo.StoreEmbeddingBo; import org.ruoyi.service.VectorStoreService; import org.springframework.stereotype.Service; @@ -23,9 +27,11 @@ import java.util.List; import java.util.Map; + + /** + * Weaviate鍚戦噺搴撶鐞� * @author ageer - * Weaviate 鍚戦噺搴撶鐞� */ @Service @Slf4j @@ -37,38 +43,7 @@ private final ConfigService configService; @Override - public List<String> getQueryVector(String query, String kid) { - EmbeddingModel embeddingModel = OpenAiEmbeddingModel.builder() - .apiKey("sk-xxx") - .baseUrl("https://api.pandarobot.chat/v1/") - .modelName(TEXT_EMBEDDING_3_SMALL) - .build(); - - // Filter simpleFilter = new IsEqualTo("kid", kid); - - // createSchema(kid); - - Embedding queryEmbedding = embeddingModel.embed("鑱婂ぉ琛ュ叏妯″瀷").content(); - EmbeddingSearchRequest embeddingSearchRequest = EmbeddingSearchRequest.builder() - .queryEmbedding(queryEmbedding) - .maxResults(2) - // 娣诲姞杩囨护鏉′欢 - // .filter(simpleFilter) - .build(); - List<EmbeddingMatch<TextSegment>> matches = embeddingStore.search(embeddingSearchRequest).matches(); - - - - List<String> results = new ArrayList<>(); - - matches.forEach(embeddingMatch -> { - results.add(embeddingMatch.embedded().text()); - }); - - return results; - } - - @Override + @PostConstruct public void createSchema(String kid) { String protocol = configService.getConfigValue("weaviate", "protocol"); String host = configService.getConfigValue("weaviate", "host"); @@ -84,24 +59,42 @@ } @Override - public void storeEmbeddings(List<String> chunkList,String kid,String docId,List<String> fids) { - EmbeddingModel embeddingModel = OpenAiEmbeddingModel.builder() - .apiKey("sk-xxxx") - .baseUrl("https://api.pandarobot.chat/v1/") - .modelName(TEXT_EMBEDDING_3_SMALL) - .build(); - - chunkList.forEach(chunk -> { + public void storeEmbeddings(StoreEmbeddingBo storeEmbeddingBo) { + EmbeddingModel embeddingModel = getEmbeddingModel(storeEmbeddingBo.getModelName(), + storeEmbeddingBo.getApiKey(), storeEmbeddingBo.getBaseUrl()); + for (int i = 0; i < storeEmbeddingBo.getChunkList().size(); i++) { Map<String, Object> dataSchema = new HashMap<>(); - dataSchema.put("kid", kid); - dataSchema.put("docId", docId); - dataSchema.put("fid", fids.get(0)); - Response<Embedding> response = embeddingModel.embed(chunk); + dataSchema.put("kid", storeEmbeddingBo.getKid()); + dataSchema.put("docId", storeEmbeddingBo.getKid()); + dataSchema.put("fid", storeEmbeddingBo.getFids().get(i)); + Response<Embedding> response = embeddingModel.embed(storeEmbeddingBo.getChunkList().get(i)); Embedding embedding = response.content(); - TextSegment segment = TextSegment.from(chunk); + TextSegment segment = TextSegment.from(storeEmbeddingBo.getChunkList().get(i)); segment.metadata().putAll(dataSchema); embeddingStore.add(embedding,segment); + } + } + + @Override + public List<String> getQueryVector(QueryVectorBo queryVectorBo) { + EmbeddingModel embeddingModel = getEmbeddingModel(queryVectorBo.getModelName(), + queryVectorBo.getApiKey(), queryVectorBo.getBaseUrl()); + Filter simpleFilter = new IsEqualTo("kid", queryVectorBo.getKid()); + Embedding queryEmbedding = embeddingModel.embed(queryVectorBo.getQuery()).content(); + EmbeddingSearchRequest embeddingSearchRequest = EmbeddingSearchRequest.builder() + .queryEmbedding(queryEmbedding) + .maxResults(queryVectorBo.getMaxResults()) + // 娣诲姞杩囨护鏉′欢 + .filter(simpleFilter) + .build(); + List<EmbeddingMatch<TextSegment>> matches = embeddingStore.search(embeddingSearchRequest).matches(); + + List<String> results = new ArrayList<>(); + + matches.forEach(embeddingMatch -> { + results.add(embeddingMatch.embedded().text()); }); + return results; } @@ -128,4 +121,25 @@ embeddingStore.removeAll(simpleFilterByAnd); } + /** + * 鑾峰彇鍚戦噺妯″瀷 + */ + public EmbeddingModel getEmbeddingModel(String modelName,String apiKey,String baseUrl) { + EmbeddingModel embeddingModel = OpenAiEmbeddingModel.builder().build(); + if(TEXT_EMBEDDING_3_SMALL.toString().equals(modelName)) { + embeddingModel = OpenAiEmbeddingModel.builder() + .apiKey(apiKey) + .baseUrl(baseUrl) + .modelName(TEXT_EMBEDDING_3_SMALL) + .build(); + // TODO 娣诲姞鏋氫妇 + }else if("quentinz/bge-large-zh-v1.5".equals(modelName)) { + embeddingModel = OllamaEmbeddingModel.builder() + .baseUrl(baseUrl) + .modelName(modelName) + .build(); + } + return embeddingModel; + } + } diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/chat/impl/SseServiceImpl.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/chat/impl/SseServiceImpl.java index 5a86f95..c12ed44 100644 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/chat/impl/SseServiceImpl.java +++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/chat/impl/SseServiceImpl.java @@ -25,6 +25,7 @@ import org.ruoyi.common.core.utils.file.MimeTypeUtils; import org.ruoyi.common.redis.utils.RedisUtils; import org.ruoyi.domain.bo.ChatSessionBo; +import org.ruoyi.domain.bo.QueryVectorBo; import org.ruoyi.domain.vo.ChatModelVo; import org.ruoyi.service.VectorStoreService; import org.ruoyi.service.IChatModelService; @@ -166,7 +167,10 @@ // 鑾峰彇瀵硅瘽娑堟伅鍒楄〃 List<Message> messages = chatRequest.getMessages(); String sysPrompt = chatModelVo.getSystemPrompt(); + + if(StringUtils.isEmpty(sysPrompt)){ + // TODO 绯荤粺榛樿鎻愮ず璇�,鍚庣画浼氬鍔犳彁绀鸿瘝绠$悊 sysPrompt ="浣犳槸涓�涓敱RuoYI-AI寮�鍙戠殑浜哄伐鏅鸿兘鍔╂墜锛屽悕瀛楀彨鐔婄尗鍔╂墜銆備綘鎿呴暱涓嫳鏂囧璇濓紝鑳藉鐞嗚В骞跺鐞嗗悇绉嶉棶棰橈紝鎻愪緵瀹夊叏銆佹湁甯姪銆佸噯纭殑鍥炵瓟銆�" + "褰撳墠鏃堕棿锛�"+ DateUtils.getDate()+ "#娉ㄦ剰锛氬洖澶嶄箣鍓嶆敞鎰忕粨鍚堜笂涓嬫枃鍜屽伐鍏疯繑鍥炲唴瀹硅繘琛屽洖澶嶃��"; @@ -180,11 +184,20 @@ if(StringUtils.isNotEmpty(chatRequest.getKid())){ List<Message> knMessages = new ArrayList<>(); String content = messages.get(messages.size() - 1).getContent().toString(); - List<String> nearestList = vectorStoreService.getQueryVector(content, chatRequest.getKid()); + QueryVectorBo queryVectorBo = new QueryVectorBo(); + queryVectorBo.setQuery(content); + queryVectorBo.setKid(chatRequest.getKid()); + queryVectorBo.setApiKey(chatModelVo.getApiKey()); + queryVectorBo.setBaseUrl(chatModelVo.getApiHost()); + queryVectorBo.setModelName(chatModelVo.getModelName()); + // TODO 鏌ヨ鍚戦噺杩斿洖鏉℃暟,杩欓噷搴旇鏌ヨ鐭ヨ瘑搴撻厤缃� + queryVectorBo.setMaxResults(3); + List<String> nearestList = vectorStoreService.getQueryVector(queryVectorBo); for (String prompt : nearestList) { Message userMessage = Message.builder().content(prompt).role(Message.Role.USER).build(); knMessages.add(userMessage); } + // TODO 鎻愮ず璇�,杩欓噷搴旇鏌ヨ鐭ヨ瘑搴撻厤缃� Message userMessage = Message.builder().content(content + (!nearestList.isEmpty() ? "\n\n娉ㄦ剰锛氬洖绛旈棶棰樻椂锛岄』涓ユ牸鏍规嵁鎴戠粰浣犵殑绯荤粺涓婁笅鏂囧唴瀹瑰師鏂囪繘琛屽洖绛旓紝璇蜂笉瑕佽嚜宸卞彂鎸�,鍥炵瓟鏃朵繚鎸佸師鏉ユ枃鏈殑娈佃惤灞傜骇" : "")).role(Message.Role.USER).build(); knMessages.add(userMessage); messages.addAll(knMessages); diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java index 259e8a3..6cf6251 100644 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java +++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java @@ -3,6 +3,7 @@ import cn.hutool.core.collection.CollUtil; import cn.hutool.core.util.RandomUtil; import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper; +import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper; import com.baomidou.mybatisplus.core.toolkit.Wrappers; import com.baomidou.mybatisplus.extension.plugins.pagination.Page; import lombok.RequiredArgsConstructor; @@ -14,15 +15,19 @@ import org.ruoyi.common.satoken.utils.LoginHelper; import org.ruoyi.core.page.PageQuery; import org.ruoyi.core.page.TableDataInfo; +import org.ruoyi.domain.ChatModel; import org.ruoyi.domain.KnowledgeAttach; import org.ruoyi.domain.KnowledgeFragment; import org.ruoyi.domain.KnowledgeInfo; import org.ruoyi.domain.bo.KnowledgeInfoBo; import org.ruoyi.domain.bo.KnowledgeInfoUploadBo; +import org.ruoyi.domain.bo.StoreEmbeddingBo; +import org.ruoyi.domain.vo.ChatModelVo; import org.ruoyi.domain.vo.KnowledgeInfoVo; import org.ruoyi.mapper.KnowledgeAttachMapper; import org.ruoyi.mapper.KnowledgeFragmentMapper; import org.ruoyi.mapper.KnowledgeInfoMapper; +import org.ruoyi.service.IChatModelService; import org.ruoyi.service.VectorStoreService; import org.ruoyi.service.IKnowledgeInfoService; import org.slf4j.Logger; @@ -54,6 +59,8 @@ private final KnowledgeFragmentMapper fragmentMapper; private final KnowledgeAttachMapper attachMapper; + + private final IChatModelService chatModelService; /** * 鏌ヨ鐭ヨ瘑搴� @@ -219,10 +226,31 @@ knowledgeAttach.setContent(content); knowledgeAttach.setCreateTime(new Date()); attachMapper.insert(knowledgeAttach); - vectorStoreService.storeEmbeddings(chunkList,kid,docId,fids); + + // 閫氳繃kid鏌ヨ鐭ヨ瘑搴撲俊鎭� + KnowledgeInfoVo knowledgeInfoVo = baseMapper.selectVoOne(Wrappers.<KnowledgeInfo>lambdaQuery() + .eq(KnowledgeInfo::getKid, kid)); + + // 閫氳繃鍚戦噺妯″瀷鏌ヨ妯″瀷淇℃伅 + ChatModelVo chatModelVo = chatModelService.selectModelByName(knowledgeInfoVo.getVectorModel()); + + StoreEmbeddingBo storeEmbeddingBo = new StoreEmbeddingBo(); + storeEmbeddingBo.setKid(kid); + storeEmbeddingBo.setDocId(docId); + storeEmbeddingBo.setFids(fids); + storeEmbeddingBo.setChunkList(chunkList); + storeEmbeddingBo.setModelName(knowledgeInfoVo.getVectorModel()); + storeEmbeddingBo.setApiKey(chatModelVo.getApiKey()); + storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost()); + vectorStoreService.storeEmbeddings(storeEmbeddingBo); } + /** + * 妫�鏌ョ敤鎴锋槸鍚︽湁鍒犻櫎鐭ヨ瘑搴撴潈闄� + * + * @param knowledgeInfoList 鐭ヨ瘑搴撳垪琛� + */ public void check(List<KnowledgeInfoVo> knowledgeInfoList){ LoginUser loginUser = LoginHelper.getLoginUser(); for (KnowledgeInfoVo knowledgeInfoVo : knowledgeInfoList) { -- Gitblit v1.9.3