From 81c0bb5738d17e846faccb18169e7e53381d0884 Mon Sep 17 00:00:00 2001
From: ageer <ageerle@163.com>
Date: 星期三, 07 五月 2025 22:53:21 +0800
Subject: [PATCH] feat: Weaviate改为langchain4j方式调用

---
 /dev/null                                                                                               |   30 ----------
 ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/WeaviateVectorStoreImpl.java |   81 +++++++++++---------------
 ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorStoreService.java           |    6 +
 ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java   |    7 +
 ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/chat/impl/SseServiceImpl.java             |    2 
 5 files changed, 44 insertions(+), 82 deletions(-)

diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorStoreService.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorStoreService.java
index dbc1a9a..6edaa5d 100644
--- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorStoreService.java
+++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorStoreService.java
@@ -2,9 +2,13 @@
 
 import java.util.List;
 
+/**
+ * @author ageer
+ * 鍚戦噺搴撶鐞�
+ */
 public interface VectorStoreService {
 
-    void storeEmbeddings(List<String> chunkList, String kid);
+    void storeEmbeddings(List<String> chunkList, String kid,String docId,List<String> fids);
 
     void removeByDocId(String kid,String docId);
 
diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorizationService.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorizationService.java
deleted file mode 100644
index 8188881..0000000
--- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/VectorizationService.java
+++ /dev/null
@@ -1,13 +0,0 @@
-package org.ruoyi.service;
-
-import java.util.List;
-
-/**
- * 鏂囨湰鍚戦噺鍖�
- */
-public interface VectorizationService {
-
-    List<List<Double>> batchVectorization(List<String> chunkList, String kid);
-
-    List<Double> singleVectorization(String chunk, String kid);
-}
diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/WeaviateVectorStoreImpl.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/WeaviateVectorStoreImpl.java
index ca3d6e7..9d5b929 100644
--- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/WeaviateVectorStoreImpl.java
+++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/WeaviateVectorStoreImpl.java
@@ -1,75 +1,63 @@
 package org.ruoyi.service.impl;
 
-import cn.hutool.core.util.RandomUtil;
 import dev.langchain4j.data.embedding.Embedding;
 import dev.langchain4j.data.segment.TextSegment;
 import dev.langchain4j.model.embedding.EmbeddingModel;
 import dev.langchain4j.model.openai.OpenAiEmbeddingModel;
+import dev.langchain4j.model.output.Response;
 import dev.langchain4j.store.embedding.EmbeddingMatch;
 import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
 import dev.langchain4j.store.embedding.EmbeddingStore;
 import dev.langchain4j.store.embedding.filter.Filter;
 import dev.langchain4j.store.embedding.filter.comparison.IsEqualTo;
 import dev.langchain4j.store.embedding.weaviate.WeaviateEmbeddingStore;
-import jakarta.annotation.PostConstruct;
-import jakarta.annotation.Resource;
+import lombok.RequiredArgsConstructor;
 import lombok.extern.slf4j.Slf4j;
 import org.ruoyi.common.core.service.ConfigService;
 import org.ruoyi.service.VectorStoreService;
-import org.ruoyi.service.IKnowledgeInfoService;
-import org.springframework.context.annotation.Lazy;
 import org.springframework.stereotype.Service;
-import org.testcontainers.weaviate.WeaviateContainer;
 
+import static dev.langchain4j.model.openai.OpenAiEmbeddingModelName.TEXT_EMBEDDING_3_SMALL;
 import java.util.ArrayList;
 import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 
+/**
+ * @author ageer
+ * Weaviate 鍚戦噺搴撶鐞�
+ */
 @Service
 @Slf4j
+@RequiredArgsConstructor
 public class WeaviateVectorStoreImpl implements VectorStoreService {
 
-    private volatile String protocol;
-    private volatile String host;
-    private volatile String className;
+    private EmbeddingStore<TextSegment> embeddingStore;
 
-    @Lazy
-    @Resource
-    private IKnowledgeInfoService knowledgeInfoService;
-
-    @Lazy
-    @Resource
-    private ConfigService configService;
-
-    private  EmbeddingStore<TextSegment> embeddingStore;
-
-    @PostConstruct
-    public void loadConfig() {
-        this.protocol = configService.getConfigValue("weaviate", "protocol");
-        this.host = configService.getConfigValue("weaviate", "host");
-        this.className = configService.getConfigValue("weaviate", "classname");
-    }
-
+    private final ConfigService configService;
 
     @Override
     public List<String> getQueryVector(String query, String kid) {
         EmbeddingModel embeddingModel = OpenAiEmbeddingModel.builder()
-                .apiKey(System.getenv("OPENAI_API_KEY"))
-                .baseUrl(System.getenv("OPENAI_BASE_URL"))
-                .modelName("text-embedding-3-small")
+                .apiKey("sk-xxx")
+                .baseUrl("https://api.pandarobot.chat/v1/")
+                .modelName(TEXT_EMBEDDING_3_SMALL)
                 .build();
 
-        Filter simpleFilter = new IsEqualTo("kid", kid);
+      //  Filter simpleFilter = new IsEqualTo("kid", kid);
 
-        Embedding queryEmbedding = embeddingModel.embed("What is your favourite sport?").content();
+     //   createSchema(kid);
+
+        Embedding queryEmbedding = embeddingModel.embed("鑱婂ぉ琛ュ叏妯″瀷").content();
         EmbeddingSearchRequest embeddingSearchRequest = EmbeddingSearchRequest.builder()
                 .queryEmbedding(queryEmbedding)
-                .maxResults(3)
+                .maxResults(2)
                 // 娣诲姞杩囨护鏉′欢
-                .filter(simpleFilter)
+             //   .filter(simpleFilter)
                 .build();
         List<EmbeddingMatch<TextSegment>> matches = embeddingStore.search(embeddingSearchRequest).matches();
+
+
 
         List<String> results = new ArrayList<>();
 
@@ -82,10 +70,11 @@
 
     @Override
     public void createSchema(String kid) {
-        WeaviateContainer weaviate = new WeaviateContainer(protocol);
-        weaviate.start();
+        String protocol = configService.getConfigValue("weaviate", "protocol");
+        String host = configService.getConfigValue("weaviate", "host");
+        String className = configService.getConfigValue("weaviate", "classname");
         this.embeddingStore = WeaviateEmbeddingStore.builder()
-                .scheme("http")
+                .scheme(protocol)
                 .host(host)
                 .objectClass(className+kid)
                 .scheme(protocol)
@@ -95,25 +84,23 @@
     }
 
     @Override
-    public void storeEmbeddings(List<String> chunkList,String kid) {
+    public void storeEmbeddings(List<String> chunkList,String kid,String docId,List<String> fids) {
         EmbeddingModel embeddingModel = OpenAiEmbeddingModel.builder()
-                .apiKey(System.getenv("OPENAI_API_KEY"))
-                .baseUrl(System.getenv("OPENAI_BASE_URL"))
-                .modelName("text-embedding-3-small")
+                .apiKey("sk-xxxx")
+                .baseUrl("https://api.pandarobot.chat/v1/")
+                .modelName(TEXT_EMBEDDING_3_SMALL)
                 .build();
-        // 鐢熸垚鏂囨。id
-        String docId = RandomUtil.randomString(10);
+
         chunkList.forEach(chunk -> {
-            // 鐢熸垚鐭ヨ瘑鍧梚d
-            String fid = RandomUtil.randomString(10);
             Map<String, Object> dataSchema = new HashMap<>();
             dataSchema.put("kid", kid);
             dataSchema.put("docId", docId);
-            dataSchema.put("fid", fid);
+            dataSchema.put("fid", fids.get(0));
+            Response<Embedding> response = embeddingModel.embed(chunk);
+            Embedding embedding = response.content();
             TextSegment segment = TextSegment.from(chunk);
             segment.metadata().putAll(dataSchema);
-            Embedding content = embeddingModel.embed(segment).content();
-            embeddingStore.add(content);
+            embeddingStore.add(embedding,segment);
         });
     }
 
diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/factory/VectorizationFactory.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/factory/VectorizationFactory.java
deleted file mode 100644
index 13537a3..0000000
--- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/factory/VectorizationFactory.java
+++ /dev/null
@@ -1,49 +0,0 @@
-package org.ruoyi.chat.factory;
-
-import cn.hutool.core.util.StrUtil;
-import jakarta.annotation.Resource;
-import lombok.extern.slf4j.Slf4j;
-
-import org.ruoyi.chat.service.knowledge.BgeLargeVectorizationImpl;
-import org.ruoyi.chat.service.knowledge.OpenAiVectorizationImpl;
-import org.ruoyi.domain.vo.KnowledgeInfoVo;
-import org.ruoyi.service.IKnowledgeInfoService;
-import org.ruoyi.service.VectorizationService;
-import org.springframework.context.annotation.Lazy;
-import org.springframework.stereotype.Component;
-
-/**
- * 鏂囨湰鍚戦噺鍖�
- * @author huangkh
- */
-@Component
-@Slf4j
-public class VectorizationFactory {
-
-    private final OpenAiVectorizationImpl openAiVectorization;
-
-    private final BgeLargeVectorizationImpl bgeLargeVectorization;
-
-    @Lazy
-    @Resource
-    private IKnowledgeInfoService knowledgeInfoService;
-
-    public VectorizationFactory(OpenAiVectorizationImpl openAiVectorization, BgeLargeVectorizationImpl bgeLargeVectorization) {
-        this.openAiVectorization = openAiVectorization;
-        this.bgeLargeVectorization = bgeLargeVectorization;
-    }
-
-    public VectorizationService getEmbedding(String kid){
-        String vectorModel = "text-embedding-3-small";
-        if (StrUtil.isNotEmpty(kid)) {
-            KnowledgeInfoVo knowledgeInfoVo = knowledgeInfoService.queryById(Long.valueOf(kid));
-            if (knowledgeInfoVo != null && StrUtil.isNotEmpty(knowledgeInfoVo.getVectorModel())) {
-                vectorModel = knowledgeInfoVo.getVectorModel();
-            }
-        }
-        return switch (vectorModel) {
-            case "quentinz/bge-large-zh-v1.5" -> bgeLargeVectorization;
-            default -> openAiVectorization;
-        };
-    }
-}
diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/chat/impl/SseServiceImpl.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/chat/impl/SseServiceImpl.java
index 8dabcc2..5a86f95 100644
--- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/chat/impl/SseServiceImpl.java
+++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/chat/impl/SseServiceImpl.java
@@ -56,8 +56,6 @@
 
     private final VectorStoreService vectorStoreService;
 
-    private final VectorStoreService vectorStore;
-
     private final IChatCostService chatCostService;
 
     private final IChatModelService chatModelService;
diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/BgeLargeVectorizationImpl.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/BgeLargeVectorizationImpl.java
deleted file mode 100644
index 530614e..0000000
--- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/BgeLargeVectorizationImpl.java
+++ /dev/null
@@ -1,66 +0,0 @@
-package org.ruoyi.chat.service.knowledge;
-
-import io.github.ollama4j.OllamaAPI;
-import io.github.ollama4j.models.embeddings.OllamaEmbeddingsRequestModel;
-import jakarta.annotation.Resource;
-import lombok.RequiredArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-import org.ruoyi.common.core.exception.ServiceException;
-import org.ruoyi.domain.vo.ChatModelVo;
-import org.ruoyi.domain.vo.KnowledgeInfoVo;
-import org.ruoyi.service.IChatModelService;
-import org.ruoyi.service.IKnowledgeInfoService;
-import org.ruoyi.service.VectorizationService;
-import org.springframework.context.annotation.Lazy;
-import org.springframework.stereotype.Component;
-
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * @author ageer
- */
-@Component
-@Slf4j
-@RequiredArgsConstructor
-public class BgeLargeVectorizationImpl implements VectorizationService {
-
-    @Lazy
-    @Resource
-    private IKnowledgeInfoService knowledgeInfoService;
-
-    @Lazy
-    @Resource
-    private final IChatModelService chatModelService;
-
-    @Override
-    public List<List<Double>> batchVectorization(List<String> chunkList, String kid) {
-
-        KnowledgeInfoVo knowledgeInfoVo = knowledgeInfoService.queryById(Long.valueOf(kid));
-
-        ChatModelVo chatModelVo = chatModelService.selectModelByName(knowledgeInfoVo.getVectorModel());
-
-        OllamaAPI api = new OllamaAPI(chatModelVo.getApiHost());
-
-        List<Double> doubleVector;
-        List<List<Double>> vectorList = new ArrayList<>();
-        try {
-            for (String chunk : chunkList) {
-                doubleVector = api.generateEmbeddings(new OllamaEmbeddingsRequestModel(knowledgeInfoVo.getVectorModel(), chunk));
-                vectorList.add(doubleVector);
-            }
-        } catch (Exception e) {
-            throw new ServiceException("鏂囨湰鍚戦噺鍖栧紓甯革細"+e.getMessage());
-        }
-        return vectorList;
-    }
-
-    @Override
-    public List<Double> singleVectorization(String chunk, String kid) {
-        List<String> chunkList = new ArrayList<>();
-        chunkList.add(chunk);
-        List<List<Double>> vectorList = batchVectorization(chunkList, kid);
-        return vectorList.get(0);
-    }
-
-}
diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java
index 33d9c11..259e8a3 100644
--- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java
+++ b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java
@@ -25,6 +25,8 @@
 import org.ruoyi.mapper.KnowledgeInfoMapper;
 import org.ruoyi.service.VectorStoreService;
 import org.ruoyi.service.IKnowledgeInfoService;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 import org.springframework.stereotype.Service;
 import org.springframework.transaction.annotation.Transactional;
 import org.springframework.web.multipart.MultipartFile;
@@ -42,6 +44,7 @@
 @Service
 public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
 
+    private static final Logger log = LoggerFactory.getLogger(KnowledgeInfoServiceImpl.class);
     private final KnowledgeInfoMapper baseMapper;
 
     private final VectorStoreService vectorStoreService;
@@ -211,12 +214,12 @@
             }
             fragmentMapper.insertBatch(knowledgeFragmentList);
         } catch (IOException e) {
-            e.printStackTrace();
+            log.error("淇濆瓨鐭ヨ瘑搴撲俊鎭け璐ワ紒{}", e.getMessage());
         }
         knowledgeAttach.setContent(content);
         knowledgeAttach.setCreateTime(new Date());
         attachMapper.insert(knowledgeAttach);
-        vectorStoreService.storeEmbeddings(chunkList,kid);
+        vectorStoreService.storeEmbeddings(chunkList,kid,docId,fids);
     }
 
 
diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/OpenAiVectorizationImpl.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/OpenAiVectorizationImpl.java
deleted file mode 100644
index 8b1e36e..0000000
--- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/OpenAiVectorizationImpl.java
+++ /dev/null
@@ -1,107 +0,0 @@
-package org.ruoyi.chat.service.knowledge;
-
-import jakarta.annotation.Resource;
-import lombok.Getter;
-import lombok.RequiredArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-import org.ruoyi.chat.config.ChatConfig;
-import org.ruoyi.common.chat.entity.embeddings.Embedding;
-import org.ruoyi.common.chat.entity.embeddings.EmbeddingResponse;
-import org.ruoyi.common.chat.openai.OpenAiStreamClient;
-import org.ruoyi.domain.vo.ChatModelVo;
-import org.ruoyi.domain.vo.KnowledgeInfoVo;
-import org.ruoyi.service.IChatModelService;
-import org.ruoyi.service.IKnowledgeInfoService;
-import org.ruoyi.service.VectorizationService;
-import org.springframework.context.annotation.Lazy;
-import org.springframework.stereotype.Component;
-
-import java.math.BigDecimal;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.stream.Collectors;
-
-@Component
-@Slf4j
-@RequiredArgsConstructor
-public class OpenAiVectorizationImpl implements VectorizationService {
-
-    @Lazy
-    @Resource
-    private IKnowledgeInfoService knowledgeInfoService;
-
-    @Lazy
-    @Resource
-    private IChatModelService chatModelService;
-
-    @Getter
-    private OpenAiStreamClient openAiStreamClient;
-
-    private final ChatConfig chatConfig;
-
-    @Override
-    public List<List<Double>> batchVectorization(List<String> chunkList, String kid) {
-        List<List<Double>> vectorList;
-        // 鑾峰彇鐭ヨ瘑搴撲俊鎭�
-        KnowledgeInfoVo knowledgeInfoVo = knowledgeInfoService.queryById(Long.valueOf(kid));
-        if(knowledgeInfoVo == null){
-            log.warn("鐭ヨ瘑搴撲笉瀛樺湪:璇锋煡妫�ID {}",kid);
-            vectorList=new ArrayList<>();
-            vectorList.add(new ArrayList<>());
-            return vectorList;
-        }
-        ChatModelVo chatModelVo = chatModelService.selectModelByName(knowledgeInfoVo.getVectorModel());
-        String apiHost= chatModelVo.getApiHost();
-        String apiKey= chatModelVo.getApiKey();
-        openAiStreamClient = ChatConfig.createOpenAiStreamClient(apiHost,apiKey);
-        Embedding embedding = buildEmbedding(chunkList, knowledgeInfoVo);
-        EmbeddingResponse embeddings = openAiStreamClient.embeddings(embedding);
-        // 澶勭悊 OpenAI 杩斿洖鐨勫祵鍏ユ暟鎹�
-        vectorList = processOpenAiEmbeddings(embeddings);
-        return vectorList;
-    }
-
-    /**
-     * 鏋勫缓 Embedding 瀵硅薄
-     */
-    private Embedding buildEmbedding(List<String> chunkList, KnowledgeInfoVo knowledgeInfoVo) {
-        return Embedding.builder()
-                .input(chunkList)
-                .model(knowledgeInfoVo.getVectorModel())
-                .build();
-    }
-
-    /**
-     * 澶勭悊 OpenAI 杩斿洖鐨勫祵鍏ユ暟鎹�
-     */
-    private List<List<Double>> processOpenAiEmbeddings(EmbeddingResponse embeddings) {
-        List<List<Double>> vectorList = new ArrayList<>();
-
-        embeddings.getData().forEach(data -> {
-            List<BigDecimal> vector = data.getEmbedding();
-            List<Double> doubleVector = convertToDoubleList(vector);
-            vectorList.add(doubleVector);
-        });
-
-        return vectorList;
-    }
-
-    /**
-     * 灏� BigDecimal 杞崲涓� Double 鍒楄〃
-     */
-    private List<Double> convertToDoubleList(List<BigDecimal> vector) {
-        return vector.stream()
-                .map(BigDecimal::doubleValue)
-                .collect(Collectors.toList());
-    }
-
-
-    @Override
-    public List<Double> singleVectorization(String chunk, String kid) {
-        List<String> chunkList = new ArrayList<>();
-        chunkList.add(chunk);
-        List<List<Double>> vectorList = batchVectorization(chunkList, kid);
-        return vectorList.get(0);
-    }
-
-}
diff --git a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/VectorizationWrapper.java b/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/VectorizationWrapper.java
deleted file mode 100644
index b38a634..0000000
--- a/ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/VectorizationWrapper.java
+++ /dev/null
@@ -1,30 +0,0 @@
-package org.ruoyi.chat.service.knowledge;
-
-import lombok.AllArgsConstructor;
-import lombok.extern.slf4j.Slf4j;
-import org.ruoyi.chat.factory.VectorizationFactory;
-import org.ruoyi.service.VectorizationService;
-import org.springframework.context.annotation.Primary;
-import org.springframework.stereotype.Component;
-
-import java.util.List;
-
-@Component
-@Slf4j
-@Primary
-@AllArgsConstructor
-public class VectorizationWrapper implements VectorizationService {
-
-    private final VectorizationFactory vectorizationFactory;
-    @Override
-    public List<List<Double>> batchVectorization(List<String> chunkList, String kid) {
-        VectorizationService embedding = vectorizationFactory.getEmbedding(kid);
-        return embedding.batchVectorization(chunkList, kid);
-    }
-
-    @Override
-    public List<Double> singleVectorization(String chunk, String kid) {
-        VectorizationService embedding = vectorizationFactory.getEmbedding(kid);
-        return embedding.singleVectorization(chunk, kid);
-    }
-}

--
Gitblit v1.9.3