From 81c0bb5738d17e846faccb18169e7e53381d0884 Mon Sep 17 00:00:00 2001 From: ageer <ageerle@163.com> Date: 星期三, 07 五月 2025 22:53:21 +0800 Subject: [PATCH] feat: Weaviate改为langchain4j方式调用 --- /dev/null | 30 ---------- ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/WeaviateVectorStoreImpl.java | 81 +++++++++++--------------- ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorStoreService.java | 6 + ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java | 7 + ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/chat/impl/SseServiceImpl.java | 2 5 files changed, 44 insertions(+), 82 deletions(-) diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorStoreService.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorStoreService.java index dbc1a9a..6edaa5d 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorStoreService.java +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorStoreService.java @@ -2,9 +2,13 @@ import java.util.List; +/** + * @author ageer + * 鍚戦噺搴撶鐞� + */ public interface VectorStoreService { - void storeEmbeddings(List<String> chunkList, String kid); + void storeEmbeddings(List<String> chunkList, String kid,String docId,List<String> fids); void removeByDocId(String kid,String docId); diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorizationService.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorizationService.java deleted file mode 100644 index 8188881..0000000 --- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorizationService.java +++ /dev/null @@ -1,13 +0,0 @@ -package org.ruoyi.service; - -import java.util.List; - -/** - * 鏂囨湰鍚戦噺鍖� - */ -public interface VectorizationService { - - List<List<Double>> batchVectorization(List<String> chunkList, String kid); - - List<Double> singleVectorization(String chunk, String kid); -} diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/WeaviateVectorStoreImpl.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/WeaviateVectorStoreImpl.java index ca3d6e7..9d5b929 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/WeaviateVectorStoreImpl.java +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/WeaviateVectorStoreImpl.java @@ -1,75 +1,63 @@ package org.ruoyi.service.impl; -import cn.hutool.core.util.RandomUtil; import dev.langchain4j.data.embedding.Embedding; import dev.langchain4j.data.segment.TextSegment; import dev.langchain4j.model.embedding.EmbeddingModel; import dev.langchain4j.model.openai.OpenAiEmbeddingModel; +import dev.langchain4j.model.output.Response; import dev.langchain4j.store.embedding.EmbeddingMatch; import dev.langchain4j.store.embedding.EmbeddingSearchRequest; import dev.langchain4j.store.embedding.EmbeddingStore; import dev.langchain4j.store.embedding.filter.Filter; import dev.langchain4j.store.embedding.filter.comparison.IsEqualTo; import dev.langchain4j.store.embedding.weaviate.WeaviateEmbeddingStore; -import jakarta.annotation.PostConstruct; -import jakarta.annotation.Resource; +import lombok.RequiredArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.ruoyi.common.core.service.ConfigService; import org.ruoyi.service.VectorStoreService; -import org.ruoyi.service.IKnowledgeInfoService; -import org.springframework.context.annotation.Lazy; import org.springframework.stereotype.Service; -import org.testcontainers.weaviate.WeaviateContainer; +import static dev.langchain4j.model.openai.OpenAiEmbeddingModelName.TEXT_EMBEDDING_3_SMALL; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; +/** + * @author ageer + * Weaviate 鍚戦噺搴撶鐞� + */ @Service @Slf4j +@RequiredArgsConstructor public class WeaviateVectorStoreImpl implements VectorStoreService { - private volatile String protocol; - private volatile String host; - private volatile String className; + private EmbeddingStore<TextSegment> embeddingStore; - @Lazy - @Resource - private IKnowledgeInfoService knowledgeInfoService; - - @Lazy - @Resource - private ConfigService configService; - - private EmbeddingStore<TextSegment> embeddingStore; - - @PostConstruct - public void loadConfig() { - this.protocol = configService.getConfigValue("weaviate", "protocol"); - this.host = configService.getConfigValue("weaviate", "host"); - this.className = configService.getConfigValue("weaviate", "classname"); - } - + private final ConfigService configService; @Override public List<String> getQueryVector(String query, String kid) { EmbeddingModel embeddingModel = OpenAiEmbeddingModel.builder() - .apiKey(System.getenv("OPENAI_API_KEY")) - .baseUrl(System.getenv("OPENAI_BASE_URL")) - .modelName("text-embedding-3-small") + .apiKey("sk-xxx") + .baseUrl("https://api.pandarobot.chat/v1/") + .modelName(TEXT_EMBEDDING_3_SMALL) .build(); - Filter simpleFilter = new IsEqualTo("kid", kid); + // Filter simpleFilter = new IsEqualTo("kid", kid); - Embedding queryEmbedding = embeddingModel.embed("What is your favourite sport?").content(); + // createSchema(kid); + + Embedding queryEmbedding = embeddingModel.embed("鑱婂ぉ琛ュ叏妯″瀷").content(); EmbeddingSearchRequest embeddingSearchRequest = EmbeddingSearchRequest.builder() .queryEmbedding(queryEmbedding) - .maxResults(3) + .maxResults(2) // 娣诲姞杩囨护鏉′欢 - .filter(simpleFilter) + // .filter(simpleFilter) .build(); List<EmbeddingMatch<TextSegment>> matches = embeddingStore.search(embeddingSearchRequest).matches(); + + List<String> results = new ArrayList<>(); @@ -82,10 +70,11 @@ @Override public void createSchema(String kid) { - WeaviateContainer weaviate = new WeaviateContainer(protocol); - weaviate.start(); + String protocol = configService.getConfigValue("weaviate", "protocol"); + String host = configService.getConfigValue("weaviate", "host"); + String className = configService.getConfigValue("weaviate", "classname"); this.embeddingStore = WeaviateEmbeddingStore.builder() - .scheme("http") + .scheme(protocol) .host(host) .objectClass(className+kid) .scheme(protocol) @@ -95,25 +84,23 @@ } @Override - public void storeEmbeddings(List<String> chunkList,String kid) { + public void storeEmbeddings(List<String> chunkList,String kid,String docId,List<String> fids) { EmbeddingModel embeddingModel = OpenAiEmbeddingModel.builder() - .apiKey(System.getenv("OPENAI_API_KEY")) - .baseUrl(System.getenv("OPENAI_BASE_URL")) - .modelName("text-embedding-3-small") + .apiKey("sk-xxxx") + .baseUrl("https://api.pandarobot.chat/v1/") + .modelName(TEXT_EMBEDDING_3_SMALL) .build(); - // 鐢熸垚鏂囨。id - String docId = RandomUtil.randomString(10); + chunkList.forEach(chunk -> { - // 鐢熸垚鐭ヨ瘑鍧梚d - String fid = RandomUtil.randomString(10); Map<String, Object> dataSchema = new HashMap<>(); dataSchema.put("kid", kid); dataSchema.put("docId", docId); - dataSchema.put("fid", fid); + dataSchema.put("fid", fids.get(0)); + Response<Embedding> response = embeddingModel.embed(chunk); + Embedding embedding = response.content(); TextSegment segment = TextSegment.from(chunk); segment.metadata().putAll(dataSchema); - Embedding content = embeddingModel.embed(segment).content(); - embeddingStore.add(content); + embeddingStore.add(embedding,segment); }); } diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/factory/VectorizationFactory.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/factory/VectorizationFactory.java deleted file mode 100644 index 13537a3..0000000 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/factory/VectorizationFactory.java +++ /dev/null @@ -1,49 +0,0 @@ -package org.ruoyi.chat.factory; - -import cn.hutool.core.util.StrUtil; -import jakarta.annotation.Resource; -import lombok.extern.slf4j.Slf4j; - -import org.ruoyi.chat.service.knowledge.BgeLargeVectorizationImpl; -import org.ruoyi.chat.service.knowledge.OpenAiVectorizationImpl; -import org.ruoyi.domain.vo.KnowledgeInfoVo; -import org.ruoyi.service.IKnowledgeInfoService; -import org.ruoyi.service.VectorizationService; -import org.springframework.context.annotation.Lazy; -import org.springframework.stereotype.Component; - -/** - * 鏂囨湰鍚戦噺鍖� - * @author huangkh - */ -@Component -@Slf4j -public class VectorizationFactory { - - private final OpenAiVectorizationImpl openAiVectorization; - - private final BgeLargeVectorizationImpl bgeLargeVectorization; - - @Lazy - @Resource - private IKnowledgeInfoService knowledgeInfoService; - - public VectorizationFactory(OpenAiVectorizationImpl openAiVectorization, BgeLargeVectorizationImpl bgeLargeVectorization) { - this.openAiVectorization = openAiVectorization; - this.bgeLargeVectorization = bgeLargeVectorization; - } - - public VectorizationService getEmbedding(String kid){ - String vectorModel = "text-embedding-3-small"; - if (StrUtil.isNotEmpty(kid)) { - KnowledgeInfoVo knowledgeInfoVo = knowledgeInfoService.queryById(Long.valueOf(kid)); - if (knowledgeInfoVo != null && StrUtil.isNotEmpty(knowledgeInfoVo.getVectorModel())) { - vectorModel = knowledgeInfoVo.getVectorModel(); - } - } - return switch (vectorModel) { - case "quentinz/bge-large-zh-v1.5" -> bgeLargeVectorization; - default -> openAiVectorization; - }; - } -} diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/chat/impl/SseServiceImpl.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/chat/impl/SseServiceImpl.java index 8dabcc2..5a86f95 100644 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/chat/impl/SseServiceImpl.java +++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/chat/impl/SseServiceImpl.java @@ -56,8 +56,6 @@ private final VectorStoreService vectorStoreService; - private final VectorStoreService vectorStore; - private final IChatCostService chatCostService; private final IChatModelService chatModelService; diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/BgeLargeVectorizationImpl.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/BgeLargeVectorizationImpl.java deleted file mode 100644 index 530614e..0000000 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/BgeLargeVectorizationImpl.java +++ /dev/null @@ -1,66 +0,0 @@ -package org.ruoyi.chat.service.knowledge; - -import io.github.ollama4j.OllamaAPI; -import io.github.ollama4j.models.embeddings.OllamaEmbeddingsRequestModel; -import jakarta.annotation.Resource; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import org.ruoyi.common.core.exception.ServiceException; -import org.ruoyi.domain.vo.ChatModelVo; -import org.ruoyi.domain.vo.KnowledgeInfoVo; -import org.ruoyi.service.IChatModelService; -import org.ruoyi.service.IKnowledgeInfoService; -import org.ruoyi.service.VectorizationService; -import org.springframework.context.annotation.Lazy; -import org.springframework.stereotype.Component; - -import java.util.ArrayList; -import java.util.List; - -/** - * @author ageer - */ -@Component -@Slf4j -@RequiredArgsConstructor -public class BgeLargeVectorizationImpl implements VectorizationService { - - @Lazy - @Resource - private IKnowledgeInfoService knowledgeInfoService; - - @Lazy - @Resource - private final IChatModelService chatModelService; - - @Override - public List<List<Double>> batchVectorization(List<String> chunkList, String kid) { - - KnowledgeInfoVo knowledgeInfoVo = knowledgeInfoService.queryById(Long.valueOf(kid)); - - ChatModelVo chatModelVo = chatModelService.selectModelByName(knowledgeInfoVo.getVectorModel()); - - OllamaAPI api = new OllamaAPI(chatModelVo.getApiHost()); - - List<Double> doubleVector; - List<List<Double>> vectorList = new ArrayList<>(); - try { - for (String chunk : chunkList) { - doubleVector = api.generateEmbeddings(new OllamaEmbeddingsRequestModel(knowledgeInfoVo.getVectorModel(), chunk)); - vectorList.add(doubleVector); - } - } catch (Exception e) { - throw new ServiceException("鏂囨湰鍚戦噺鍖栧紓甯革細"+e.getMessage()); - } - return vectorList; - } - - @Override - public List<Double> singleVectorization(String chunk, String kid) { - List<String> chunkList = new ArrayList<>(); - chunkList.add(chunk); - List<List<Double>> vectorList = batchVectorization(chunkList, kid); - return vectorList.get(0); - } - -} diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java index 33d9c11..259e8a3 100644 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java +++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java @@ -25,6 +25,8 @@ import org.ruoyi.mapper.KnowledgeInfoMapper; import org.ruoyi.service.VectorStoreService; import org.ruoyi.service.IKnowledgeInfoService; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; import org.springframework.stereotype.Service; import org.springframework.transaction.annotation.Transactional; import org.springframework.web.multipart.MultipartFile; @@ -42,6 +44,7 @@ @Service public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService { + private static final Logger log = LoggerFactory.getLogger(KnowledgeInfoServiceImpl.class); private final KnowledgeInfoMapper baseMapper; private final VectorStoreService vectorStoreService; @@ -211,12 +214,12 @@ } fragmentMapper.insertBatch(knowledgeFragmentList); } catch (IOException e) { - e.printStackTrace(); + log.error("淇濆瓨鐭ヨ瘑搴撲俊鎭け璐ワ紒{}", e.getMessage()); } knowledgeAttach.setContent(content); knowledgeAttach.setCreateTime(new Date()); attachMapper.insert(knowledgeAttach); - vectorStoreService.storeEmbeddings(chunkList,kid); + vectorStoreService.storeEmbeddings(chunkList,kid,docId,fids); } diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/OpenAiVectorizationImpl.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/OpenAiVectorizationImpl.java deleted file mode 100644 index 8b1e36e..0000000 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/OpenAiVectorizationImpl.java +++ /dev/null @@ -1,107 +0,0 @@ -package org.ruoyi.chat.service.knowledge; - -import jakarta.annotation.Resource; -import lombok.Getter; -import lombok.RequiredArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import org.ruoyi.chat.config.ChatConfig; -import org.ruoyi.common.chat.entity.embeddings.Embedding; -import org.ruoyi.common.chat.entity.embeddings.EmbeddingResponse; -import org.ruoyi.common.chat.openai.OpenAiStreamClient; -import org.ruoyi.domain.vo.ChatModelVo; -import org.ruoyi.domain.vo.KnowledgeInfoVo; -import org.ruoyi.service.IChatModelService; -import org.ruoyi.service.IKnowledgeInfoService; -import org.ruoyi.service.VectorizationService; -import org.springframework.context.annotation.Lazy; -import org.springframework.stereotype.Component; - -import java.math.BigDecimal; -import java.util.ArrayList; -import java.util.List; -import java.util.stream.Collectors; - -@Component -@Slf4j -@RequiredArgsConstructor -public class OpenAiVectorizationImpl implements VectorizationService { - - @Lazy - @Resource - private IKnowledgeInfoService knowledgeInfoService; - - @Lazy - @Resource - private IChatModelService chatModelService; - - @Getter - private OpenAiStreamClient openAiStreamClient; - - private final ChatConfig chatConfig; - - @Override - public List<List<Double>> batchVectorization(List<String> chunkList, String kid) { - List<List<Double>> vectorList; - // 鑾峰彇鐭ヨ瘑搴撲俊鎭� - KnowledgeInfoVo knowledgeInfoVo = knowledgeInfoService.queryById(Long.valueOf(kid)); - if(knowledgeInfoVo == null){ - log.warn("鐭ヨ瘑搴撲笉瀛樺湪:璇锋煡妫�ID {}",kid); - vectorList=new ArrayList<>(); - vectorList.add(new ArrayList<>()); - return vectorList; - } - ChatModelVo chatModelVo = chatModelService.selectModelByName(knowledgeInfoVo.getVectorModel()); - String apiHost= chatModelVo.getApiHost(); - String apiKey= chatModelVo.getApiKey(); - openAiStreamClient = ChatConfig.createOpenAiStreamClient(apiHost,apiKey); - Embedding embedding = buildEmbedding(chunkList, knowledgeInfoVo); - EmbeddingResponse embeddings = openAiStreamClient.embeddings(embedding); - // 澶勭悊 OpenAI 杩斿洖鐨勫祵鍏ユ暟鎹� - vectorList = processOpenAiEmbeddings(embeddings); - return vectorList; - } - - /** - * 鏋勫缓 Embedding 瀵硅薄 - */ - private Embedding buildEmbedding(List<String> chunkList, KnowledgeInfoVo knowledgeInfoVo) { - return Embedding.builder() - .input(chunkList) - .model(knowledgeInfoVo.getVectorModel()) - .build(); - } - - /** - * 澶勭悊 OpenAI 杩斿洖鐨勫祵鍏ユ暟鎹� - */ - private List<List<Double>> processOpenAiEmbeddings(EmbeddingResponse embeddings) { - List<List<Double>> vectorList = new ArrayList<>(); - - embeddings.getData().forEach(data -> { - List<BigDecimal> vector = data.getEmbedding(); - List<Double> doubleVector = convertToDoubleList(vector); - vectorList.add(doubleVector); - }); - - return vectorList; - } - - /** - * 灏� BigDecimal 杞崲涓� Double 鍒楄〃 - */ - private List<Double> convertToDoubleList(List<BigDecimal> vector) { - return vector.stream() - .map(BigDecimal::doubleValue) - .collect(Collectors.toList()); - } - - - @Override - public List<Double> singleVectorization(String chunk, String kid) { - List<String> chunkList = new ArrayList<>(); - chunkList.add(chunk); - List<List<Double>> vectorList = batchVectorization(chunkList, kid); - return vectorList.get(0); - } - -} diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/VectorizationWrapper.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/VectorizationWrapper.java deleted file mode 100644 index b38a634..0000000 --- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/VectorizationWrapper.java +++ /dev/null @@ -1,30 +0,0 @@ -package org.ruoyi.chat.service.knowledge; - -import lombok.AllArgsConstructor; -import lombok.extern.slf4j.Slf4j; -import org.ruoyi.chat.factory.VectorizationFactory; -import org.ruoyi.service.VectorizationService; -import org.springframework.context.annotation.Primary; -import org.springframework.stereotype.Component; - -import java.util.List; - -@Component -@Slf4j -@Primary -@AllArgsConstructor -public class VectorizationWrapper implements VectorizationService { - - private final VectorizationFactory vectorizationFactory; - @Override - public List<List<Double>> batchVectorization(List<String> chunkList, String kid) { - VectorizationService embedding = vectorizationFactory.getEmbedding(kid); - return embedding.batchVectorization(chunkList, kid); - } - - @Override - public List<Double> singleVectorization(String chunk, String kid) { - VectorizationService embedding = vectorizationFactory.getEmbedding(kid); - return embedding.singleVectorization(chunk, kid); - } -} -- Gitblit v1.9.3