package org.ruoyi.chain.loader; import dev.langchain4j.data.document.Document; import dev.langchain4j.data.document.parser.apache.tika.ApacheTikaDocumentParser; import lombok.AllArgsConstructor; import lombok.extern.slf4j.Slf4j; import org.ruoyi.chain.split.TextSplitter; import org.ruoyi.common.core.exception.UtilException; import org.springframework.stereotype.Component; import java.io.BufferedInputStream; import java.io.IOException; import java.io.InputStream; import java.util.List; @Component @AllArgsConstructor @Slf4j public class ExcelFileLoader implements ResourceLoader { private final TextSplitter textSplitter; private static final int DEFAULT_BUFFER_SIZE = 8192; @Override public String getContent(InputStream inputStream) { // 使用带缓冲的输入流包装(保持原流不自动关闭) try (InputStream bufferedStream = new BufferedInputStream(inputStream, DEFAULT_BUFFER_SIZE)) { ApacheTikaDocumentParser apacheTikaDocumentParser = new ApacheTikaDocumentParser(); Document document = apacheTikaDocumentParser.parse(bufferedStream); return document.text(); } catch (IOException e) { String errorMsg = "Excel文件流读取失败"; throw new UtilException(errorMsg, e); } catch (RuntimeException e) { String errorMsg = "Excel内容解析异常"; throw new UtilException(errorMsg, e); } } @Override public List getChunkList(String content, String kid) { return textSplitter.split(content, kid); } }