From e1dea1d9e226933d8273148d53c6e2d754129ba4 Mon Sep 17 00:00:00 2001
From: ageer <ageerle@163.com>
Date: 星期日, 11 五月 2025 17:25:08 +0800
Subject: [PATCH] Merge remote-tracking branch 'origin/main'

---
 ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ResourceLoaderFactory.java  |    9 ++--
 ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/split/ExcelTextSplitter.java       |   17 ++++++++
 ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ExcelFileLoader.java        |   41 ++++++++++++++++++++
 ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/constant/FileType.java                   |   11 +++++
 ruoyi-modules-api/ruoyi-knowledge-api/pom.xml                                                          |   12 ++++++
 ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java |    9 +++-
 6 files changed, 93 insertions(+), 6 deletions(-)

diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/pom.xml b/ruoyi-modules-api/ruoyi-knowledge-api/pom.xml
index f6412e1..83eac39 100644
--- a/ruoyi-modules-api/ruoyi-knowledge-api/pom.xml
+++ b/ruoyi-modules-api/ruoyi-knowledge-api/pom.xml
@@ -103,6 +103,18 @@
             <version>1.19.6</version>
         </dependency>
 
+        <dependency>
+            <groupId>dev.langchain4j</groupId>
+            <artifactId>langchain4j-document-parser-apache-tika</artifactId>
+        </dependency>
+
+        <!-- ruoyi-knowledge-api/pom.xml -->
+        <dependency>
+            <groupId>commons-io</groupId>
+            <artifactId>commons-io</artifactId>
+            <version>2.17.0</version>
+        </dependency>
+
     </dependencies>
 
 </project>
diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ExcelFileLoader.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ExcelFileLoader.java
new file mode 100644
index 0000000..b47ce11
--- /dev/null
+++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ExcelFileLoader.java
@@ -0,0 +1,41 @@
+package org.ruoyi.chain.loader;
+
+import dev.langchain4j.data.document.Document;
+import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser;
+import lombok.AllArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.ruoyi.chain.split.TextSplitter;
+import org.ruoyi.common.core.exception.UtilException;
+import org.springframework.stereotype.Component;
+
+import java.io.BufferedInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.List;
+@Component
+@AllArgsConstructor
+@Slf4j
+public class ExcelFileLoader implements ResourceLoader {
+    private final TextSplitter textSplitter;
+    private static final int DEFAULT_BUFFER_SIZE = 8192;
+    @Override
+    public String getContent(InputStream inputStream) {
+        // 浣跨敤甯︾紦鍐茬殑杈撳叆娴佸寘瑁咃紙淇濇寔鍘熸祦涓嶈嚜鍔ㄥ叧闂級
+        try (InputStream bufferedStream = new BufferedInputStream(inputStream, DEFAULT_BUFFER_SIZE)) {
+            ApacheTikaDocumentParser apacheTikaDocumentParser = new ApacheTikaDocumentParser();
+            Document document = apacheTikaDocumentParser.parse(bufferedStream);
+            return document.text();
+        } catch (IOException e) {
+            String errorMsg = "Excel鏂囦欢娴佽鍙栧け璐�";
+            throw new UtilException(errorMsg, e);
+        } catch (RuntimeException e) {
+            String errorMsg = "Excel鍐呭瑙f瀽寮傚父";
+            throw new UtilException(errorMsg, e);
+        }
+    }
+
+    @Override
+    public List<String> getChunkList(String content, String kid) {
+        return textSplitter.split(content, kid);
+    }
+}
diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ResourceLoaderFactory.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ResourceLoaderFactory.java
index aa72d76..ec33c66 100644
--- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ResourceLoaderFactory.java
+++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ResourceLoaderFactory.java
@@ -1,10 +1,7 @@
 package org.ruoyi.chain.loader;
 
 import lombok.AllArgsConstructor;
-import org.ruoyi.chain.split.CharacterTextSplitter;
-import org.ruoyi.chain.split.CodeTextSplitter;
-import org.ruoyi.chain.split.MarkdownTextSplitter;
-import org.ruoyi.chain.split.TokenTextSplitter;
+import org.ruoyi.chain.split.*;
 
 import org.ruoyi.constant.FileType;
 import org.springframework.stereotype.Component;
@@ -16,6 +13,8 @@
     private final CodeTextSplitter codeTextSplitter;
     private final MarkdownTextSplitter markdownTextSplitter;
     private final TokenTextSplitter tokenTextSplitter;
+    private final ExcelTextSplitter excelTextSplitter;
+
     public ResourceLoader getLoaderByFileType(String fileType){
         if (FileType.isTextFile(fileType)){
             return new TextFileLoader(characterTextSplitter);
@@ -25,6 +24,8 @@
             return new PdfFileLoader(characterTextSplitter);
         } else if (FileType.isMdFile(fileType)) {
             return new MarkDownFileLoader(markdownTextSplitter);
+        }else if (FileType.isExcel(fileType)) {
+            return new ExcelFileLoader(excelTextSplitter);
         }else if (FileType.isCodeFile(fileType)) {
             return new CodeFileLoader(codeTextSplitter);
         }else {
diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/split/ExcelTextSplitter.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/split/ExcelTextSplitter.java
new file mode 100644
index 0000000..cc2b5f0
--- /dev/null
+++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/split/ExcelTextSplitter.java
@@ -0,0 +1,17 @@
+package org.ruoyi.chain.split;
+
+import lombok.AllArgsConstructor;
+import lombok.extern.slf4j.Slf4j;
+import org.springframework.stereotype.Component;
+
+import java.util.List;
+
+@Component
+@AllArgsConstructor
+@Slf4j
+public class ExcelTextSplitter implements TextSplitter{
+    @Override
+    public List<String> split(String content, String kid) {
+        return null;
+    }
+}
diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/constant/FileType.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/constant/FileType.java
index aa14167..e939508 100644
--- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/constant/FileType.java
+++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/constant/FileType.java
@@ -7,6 +7,8 @@
     public static final String DOC = "doc";
     public static final String DOCX = "docx";
     public static final String PDF = "pdf";
+    public static final String XLS = "xls";
+    public static final String XLSX = "xlsx";
 
     public static final String LOG = "log";
     public static final String XML = "xml";
@@ -88,4 +90,13 @@
         }
     }
 
+    public static boolean isExcel(String type){
+        if (type.equalsIgnoreCase(XLS) || type.equalsIgnoreCase(XLSX)){
+            return true;
+        }
+        else {
+            return false;
+        }
+    }
+
 }
diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java
index 294342a..d74176a 100644
--- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java
+++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java
@@ -11,6 +11,7 @@
 import dev.langchain4j.store.embedding.EmbeddingStore;
 import dev.langchain4j.store.embedding.filter.Filter;
 import dev.langchain4j.store.embedding.filter.comparison.IsEqualTo;
+import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore;
 import dev.langchain4j.store.embedding.milvus.MilvusEmbeddingStore;
 import dev.langchain4j.store.embedding.qdrant.QdrantEmbeddingStore;
 import dev.langchain4j.store.embedding.weaviate.WeaviateEmbeddingStore;
@@ -39,11 +40,11 @@
 
     private final ConfigService configService;
 
-    Map<String,EmbeddingStore<TextSegment>> storeMap;
+    Map<String,EmbeddingStore<TextSegment>> storeMap = new HashMap<>();
 
     @Override
     public void createSchema(String kid,String modelName) {
-        EmbeddingStore<TextSegment> embeddingStore = WeaviateEmbeddingStore.builder().build();
+        EmbeddingStore<TextSegment> embeddingStore;
         switch (modelName) {
             case "weaviate" -> {
                 String protocol = configService.getConfigValue("weaviate", "protocol");
@@ -78,6 +79,10 @@
                         .collectionName(collectionName)
                         .build();
             }
+            default -> {
+                //浣跨敤鍐呭瓨
+                embeddingStore = new InMemoryEmbeddingStore<>();
+            }
         }
         storeMap.put(kid,embeddingStore);
     }

--
Gitblit v1.9.3