ruoyi-modules-api/ruoyi-knowledge-api/pom.xml
@@ -103,6 +103,18 @@ <version>1.19.6</version> </dependency> <dependency> <groupId>dev.langchain4j</groupId> <artifactId>langchain4j-document-parser-apache-tika</artifactId> </dependency> <!-- ruoyi-knowledge-api/pom.xml --> <dependency> <groupId>commons-io</groupId> <artifactId>commons-io</artifactId> <version>2.17.0</version> </dependency> </dependencies> </project> ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ExcelFileLoader.java
¶Ô±ÈÐÂÎļþ @@ -0,0 +1,41 @@ package org.ruoyi.chain.loader; import dev.langchain4j.data.document.Document; import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser; import lombok.AllArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.ruoyi.chain.split.TextSplitter; import org.ruoyi.common.core.exception.UtilException; import org.springframework.stereotype.Component; import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.util.List; @Component @AllArgsConstructor @Slf4j public class ExcelFileLoader implements ResourceLoader { private final TextSplitter textSplitter; private static final int DEFAULT_BUFFER_SIZE = 8192; @Override public String getContent(InputStream inputStream) { // 使ç¨å¸¦ç¼å²çè¾å ¥æµå è£ ï¼ä¿æåæµä¸èªå¨å ³éï¼ try (InputStream bufferedStream = new BufferedInputStream(inputStream, DEFAULT_BUFFER_SIZE)) { ApacheTikaDocumentParser apacheTikaDocumentParser = new ApacheTikaDocumentParser(); Document document = apacheTikaDocumentParser.parse(bufferedStream); return document.text(); } catch (IOException e) { String errorMsg = "Excelæä»¶æµè¯»å失败"; throw new UtilException(errorMsg, e); } catch (RuntimeException e) { String errorMsg = "Excelå 容解æå¼å¸¸"; throw new UtilException(errorMsg, e); } } @Override public List<String> getChunkList(String content, String kid) { return textSplitter.split(content, kid); } } ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/loader/ResourceLoaderFactory.java
@@ -1,10 +1,7 @@ package org.ruoyi.chain.loader; import lombok.AllArgsConstructor; import org.ruoyi.chain.split.CharacterTextSplitter; import org.ruoyi.chain.split.CodeTextSplitter; import org.ruoyi.chain.split.MarkdownTextSplitter; import org.ruoyi.chain.split.TokenTextSplitter; import org.ruoyi.chain.split.*; import org.ruoyi.constant.FileType; import org.springframework.stereotype.Component; @@ -16,6 +13,8 @@ private final CodeTextSplitter codeTextSplitter; private final MarkdownTextSplitter markdownTextSplitter; private final TokenTextSplitter tokenTextSplitter; private final ExcelTextSplitter excelTextSplitter; public ResourceLoader getLoaderByFileType(String fileType){ if (FileType.isTextFile(fileType)){ return new TextFileLoader(characterTextSplitter); @@ -25,6 +24,8 @@ return new PdfFileLoader(characterTextSplitter); } else if (FileType.isMdFile(fileType)) { return new MarkDownFileLoader(markdownTextSplitter); }else if (FileType.isExcel(fileType)) { return new ExcelFileLoader(excelTextSplitter); }else if (FileType.isCodeFile(fileType)) { return new CodeFileLoader(codeTextSplitter); }else { ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/chain/split/ExcelTextSplitter.java
¶Ô±ÈÐÂÎļþ @@ -0,0 +1,17 @@ package org.ruoyi.chain.split; import lombok.AllArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.springframework.stereotype.Component; import java.util.List; @Component @AllArgsConstructor @Slf4j public class ExcelTextSplitter implements TextSplitter{ @Override public List<String> split(String content, String kid) { return null; } } ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/constant/FileType.java
@@ -7,6 +7,8 @@ public static final String DOC = "doc"; public static final String DOCX = "docx"; public static final String PDF = "pdf"; public static final String XLS = "xls"; public static final String XLSX = "xlsx"; public static final String LOG = "log"; public static final String XML = "xml"; @@ -88,4 +90,13 @@ } } public static boolean isExcel(String type){ if (type.equalsIgnoreCase(XLS) || type.equalsIgnoreCase(XLSX)){ return true; } else { return false; } } } ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java
@@ -11,6 +11,7 @@ import dev.langchain4j.store.embedding.EmbeddingStore; import dev.langchain4j.store.embedding.filter.Filter; import dev.langchain4j.store.embedding.filter.comparison.IsEqualTo; import dev.langchain4j.store.embedding.inmemory.InMemoryEmbeddingStore; import dev.langchain4j.store.embedding.milvus.MilvusEmbeddingStore; import dev.langchain4j.store.embedding.qdrant.QdrantEmbeddingStore; import dev.langchain4j.store.embedding.weaviate.WeaviateEmbeddingStore; @@ -39,11 +40,11 @@ private final ConfigService configService; Map<String,EmbeddingStore<TextSegment>> storeMap; Map<String,EmbeddingStore<TextSegment>> storeMap = new HashMap<>(); @Override public void createSchema(String kid,String modelName) { EmbeddingStore<TextSegment> embeddingStore = WeaviateEmbeddingStore.builder().build(); EmbeddingStore<TextSegment> embeddingStore; switch (modelName) { case "weaviate" -> { String protocol = configService.getConfigValue("weaviate", "protocol"); @@ -78,6 +79,10 @@ .collectionName(collectionName) .build(); } default -> { //使ç¨å å embeddingStore = new InMemoryEmbeddingStore<>(); } } storeMap.put(kid,embeddingStore); }