From e1dea1d9e226933d8273148d53c6e2d754129ba4 Mon Sep 17 00:00:00 2001 From: ageer <ageerle@163.com> Date: 星期日, 11 五月 2025 17:25:08 +0800 Subject: [PATCH] Merge remote-tracking branch 'origin/main' --- ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ResourceLoaderFactory.java | 9 ++-- ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/split/ExcelTextSplitter.java | 17 ++++++++ ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ExcelFileLoader.java | 41 ++++++++++++++++++++ ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/constant/FileType.java | 11 +++++ ruoyi-modules-api/ruoyi-knowledge-api/pom.xml | 12 ++++++ ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java | 9 +++- 6 files changed, 93 insertions(+), 6 deletions(-) diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/pom.xml b/ruoyi-modules-api/ruoyi-knowledge-api/pom.xml index f6412e1..83eac39 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/pom.xml +++ b/ruoyi-modules-api/ruoyi-knowledge-api/pom.xml @@ -103,6 +103,18 @@ <version>1.19.6</version> </dependency> + <dependency> + <groupId>dev.langchain4j</groupId> + <artifactId>langchain4j-document-parser-apache-tika</artifactId> + </dependency> + + <!-- ruoyi-knowledge-api/pom.xml --> + <dependency> + <groupId>commons-io</groupId> + <artifactId>commons-io</artifactId> + <version>2.17.0</version> + </dependency> + </dependencies> </project> diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ExcelFileLoader.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ExcelFileLoader.java new file mode 100644 index 0000000..b47ce11 --- /dev/null +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ExcelFileLoader.java @@ -0,0 +1,41 @@ +package org.ruoyi.chain.loader; + +import dev.langchain4j.data.document.Document; +import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser; +import lombok.AllArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.ruoyi.chain.split.TextSplitter; +import org.ruoyi.common.core.exception.UtilException; +import org.springframework.stereotype.Component; + +import java.io.BufferedInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.List; +@Component +@AllArgsConstructor +@Slf4j +public class ExcelFileLoader implements ResourceLoader { + private final TextSplitter textSplitter; + private static final int DEFAULT_BUFFER_SIZE = 8192; + @Override + public String getContent(InputStream inputStream) { + // 浣跨敤甯︾紦鍐茬殑杈撳叆娴佸寘瑁咃紙淇濇寔鍘熸祦涓嶈嚜鍔ㄥ叧闂級 + try (InputStream bufferedStream = new BufferedInputStream(inputStream, DEFAULT_BUFFER_SIZE)) { + ApacheTikaDocumentParser apacheTikaDocumentParser = new ApacheTikaDocumentParser(); + Document document = apacheTikaDocumentParser.parse(bufferedStream); + return document.text(); + } catch (IOException e) { + String errorMsg = "Excel鏂囦欢娴佽鍙栧け璐�"; + throw new UtilException(errorMsg, e); + } catch (RuntimeException e) { + String errorMsg = "Excel鍐呭瑙f瀽寮傚父"; + throw new UtilException(errorMsg, e); + } + } + + @Override + public List<String> getChunkList(String content, String kid) { + return textSplitter.split(content, kid); + } +} diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ResourceLoaderFactory.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ResourceLoaderFactory.java index aa72d76..ec33c66 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ResourceLoaderFactory.java +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ResourceLoaderFactory.java @@ -1,10 +1,7 @@ package org.ruoyi.chain.loader; import lombok.AllArgsConstructor; -import org.ruoyi.chain.split.CharacterTextSplitter; -import org.ruoyi.chain.split.CodeTextSplitter; -import org.ruoyi.chain.split.MarkdownTextSplitter; -import org.ruoyi.chain.split.TokenTextSplitter; +import org.ruoyi.chain.split.*; import org.ruoyi.constant.FileType; import org.springframework.stereotype.Component; @@ -16,6 +13,8 @@ private final CodeTextSplitter codeTextSplitter; private final MarkdownTextSplitter markdownTextSplitter; private final TokenTextSplitter tokenTextSplitter; + private final ExcelTextSplitter excelTextSplitter; + public ResourceLoader getLoaderByFileType(String fileType){ if (FileType.isTextFile(fileType)){ return new TextFileLoader(characterTextSplitter); @@ -25,6 +24,8 @@ return new PdfFileLoader(characterTextSplitter); } else if (FileType.isMdFile(fileType)) { return new MarkDownFileLoader(markdownTextSplitter); + }else if (FileType.isExcel(fileType)) { + return new ExcelFileLoader(excelTextSplitter); }else if (FileType.isCodeFile(fileType)) { return new CodeFileLoader(codeTextSplitter); }else { diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/split/ExcelTextSplitter.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/split/ExcelTextSplitter.java new file mode 100644 index 0000000..cc2b5f0 --- /dev/null +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/split/ExcelTextSplitter.java @@ -0,0 +1,17 @@ +package org.ruoyi.chain.split; + +import lombok.AllArgsConstructor; +import lombok.extern.slf4j.Slf4j; +import org.springframework.stereotype.Component; + +import java.util.List; + +@Component +@AllArgsConstructor +@Slf4j +public class ExcelTextSplitter implements TextSplitter{ + @Override + public List<String> split(String content, String kid) { + return null; + } +} diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/constant/FileType.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/constant/FileType.java index aa14167..e939508 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/constant/FileType.java +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/constant/FileType.java @@ -7,6 +7,8 @@ public static final String DOC = "doc"; public static final String DOCX = "docx"; public static final String PDF = "pdf"; + public static final String XLS = "xls"; + public static final String XLSX = "xlsx"; public static final String LOG = "log"; public static final String XML = "xml"; @@ -88,4 +90,13 @@ } } + public static boolean isExcel(String type){ + if (type.equalsIgnoreCase(XLS) || type.equalsIgnoreCase(XLSX)){ + return true; + } + else { + return false; + } + } + } diff --git a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java index 294342a..d74176a 100644 --- a/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java +++ b/ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java @@ -11,6 +11,7 @@ import dev.langchain4j.store.embedding.EmbeddingStore; import dev.langchain4j.store.embedding.filter.Filter; import dev.langchain4j.store.embedding.filter.comparison.IsEqualTo; +import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore; import dev.langchain4j.store.embedding.milvus.MilvusEmbeddingStore; import dev.langchain4j.store.embedding.qdrant.QdrantEmbeddingStore; import dev.langchain4j.store.embedding.weaviate.WeaviateEmbeddingStore; @@ -39,11 +40,11 @@ private final ConfigService configService; - Map<String,EmbeddingStore<TextSegment>> storeMap; + Map<String,EmbeddingStore<TextSegment>> storeMap = new HashMap<>(); @Override public void createSchema(String kid,String modelName) { - EmbeddingStore<TextSegment> embeddingStore = WeaviateEmbeddingStore.builder().build(); + EmbeddingStore<TextSegment> embeddingStore; switch (modelName) { case "weaviate" -> { String protocol = configService.getConfigValue("weaviate", "protocol"); @@ -78,6 +79,10 @@ .collectionName(collectionName) .build(); } + default -> { + //浣跨敤鍐呭瓨 + embeddingStore = new InMemoryEmbeddingStore<>(); + } } storeMap.put(kid,embeddingStore); } -- Gitblit v1.9.3