办学质量监测教学评价系统
ageerle
2025-05-19 3ceef41ab05a1fa177ffdf0afb51fbba5829615b
feat: 知识库上传逻辑调整
已修改2个文件
已删除1个文件
605 ■■■■■ 文件已修改
ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java 29 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/DealFileService.java 385 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java 191 ●●●● 补丁 | 查看 | 原始文档 | blame | 历史
ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/VectorStoreServiceImpl.java
@@ -1,13 +1,11 @@
package org.ruoyi.service.impl;
import cn.hutool.core.util.RandomUtil;
import com.google.protobuf.ServiceException;
import dev.langchain4j.data.embedding.Embedding;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.model.embedding.EmbeddingModel;
import dev.langchain4j.model.ollama.OllamaEmbeddingModel;
import dev.langchain4j.model.openai.OpenAiEmbeddingModel;
import dev.langchain4j.model.output.Response;
import dev.langchain4j.store.embedding.EmbeddingMatch;
import dev.langchain4j.store.embedding.EmbeddingSearchRequest;
import dev.langchain4j.store.embedding.EmbeddingStore;
@@ -31,6 +29,7 @@
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
/**
 * å‘量库管理
@@ -131,31 +130,7 @@
        createSchema(kid,modelName);
        // æ ¹æ®æ¡ä»¶åˆ é™¤å‘量数据
        Filter simpleFilter = new IsEqualTo("kid", kid);
        removeByFilter(simpleFilter);
    }
    public void removeByFilter(Filter filter) {
        List<Float> dummyVector = new ArrayList<>();
        // TODO æ¨¡åž‹ç»´åº¦
        int dimension = 1024;
        for (int i = 0; i < dimension; i++) {
            dummyVector.add(0.0f);
        }
        Embedding dummyEmbedding = Embedding.from(dummyVector);
        EmbeddingSearchRequest request = EmbeddingSearchRequest.builder()
                .queryEmbedding(dummyEmbedding)
                .filter(filter)
                .maxResults(10000)
                .build();
        // æœç´¢
        List<String> idsToDelete = embeddingStore.search(request)
                .matches().stream()
                .map(EmbeddingMatch::embeddingId)
                .collect(Collectors.toList());
        // åˆ é™¤
        if (!idsToDelete.isEmpty()) {
            embeddingStore.removeAll(idsToDelete);
        }
        embeddingStore.removeAll(simpleFilter);
    }
    @Override
ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/DealFileService.java
ÎļþÒÑɾ³ý
ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/service/knowledge/KnowledgeInfoServiceImpl.java
@@ -4,29 +4,21 @@
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.RandomUtil;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.conditions.query.QueryWrapper;
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import com.baomidou.mybatisplus.extension.plugins.pagination.Page;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
import lombok.extern.slf4j.Slf4j;
import org.ruoyi.chain.loader.ResourceLoader;
import org.ruoyi.chain.loader.ResourceLoaderFactory;
import org.ruoyi.common.core.domain.model.LoginUser;
import org.ruoyi.common.core.utils.MapstructUtils;
import org.ruoyi.common.core.utils.StringUtils;
import org.ruoyi.common.satoken.utils.LoginHelper;
import org.ruoyi.constant.DealStatus;
import org.ruoyi.constant.FileType;
import org.ruoyi.core.page.PageQuery;
import org.ruoyi.core.page.TableDataInfo;
import org.ruoyi.domain.ChatModel;
import org.ruoyi.domain.KnowledgeAttach;
import org.ruoyi.domain.KnowledgeAttachPic;
import org.ruoyi.domain.KnowledgeFragment;
import org.ruoyi.domain.KnowledgeInfo;
import org.ruoyi.domain.PdfFileContentResult;
import org.ruoyi.domain.bo.KnowledgeInfoBo;
import org.ruoyi.domain.bo.KnowledgeInfoUploadBo;
import org.ruoyi.domain.bo.StoreEmbeddingBo;
@@ -34,28 +26,21 @@
import org.ruoyi.domain.vo.KnowledgeAttachVo;
import org.ruoyi.domain.vo.KnowledgeInfoVo;
import org.ruoyi.mapper.KnowledgeAttachMapper;
import org.ruoyi.mapper.KnowledgeAttachPicMapper;
import org.ruoyi.mapper.KnowledgeFragmentMapper;
import org.ruoyi.mapper.KnowledgeInfoMapper;
import org.ruoyi.service.IChatModelService;
import org.ruoyi.service.PdfImageExtractService;
import org.ruoyi.service.VectorStoreService;
import org.ruoyi.service.IKnowledgeInfoService;
import org.ruoyi.service.impl.PdfImageExtractServiceImpl;
import org.ruoyi.system.domain.vo.SysOssVo;
import org.ruoyi.utils.ZipUtils;
import org.ruoyi.service.VectorStoreService;
import org.ruoyi.system.service.ISysOssService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Async;
import org.springframework.scheduling.annotation.Scheduled;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
import org.springframework.web.multipart.MultipartFile;
import org.ruoyi.system.service.ISysOssService;
import java.io.IOException;
import java.util.*;
import java.util.stream.Collectors;
/**
@@ -69,6 +54,7 @@
public class KnowledgeInfoServiceImpl implements IKnowledgeInfoService {
  private static final Logger log = LoggerFactory.getLogger(KnowledgeInfoServiceImpl.class);
  private final KnowledgeInfoMapper baseMapper;
  private final VectorStoreService vectorStoreService;
@@ -82,19 +68,6 @@
  private final IChatModelService chatModelService;
  private final ISysOssService ossService;
//  private final PdfImageExtractService pdfImageExtractService;
  private final KnowledgeAttachPicMapper picMapper;
  private final DealFileService dealFileService;
  @Value("${pdf.extract.service.url}")
  private String serviceUrl;
  @Value("${pdf.extract.ai-api.url}")
  private String aiApiUrl;
  @Value("${pdf.extract.ai-api.key}")
  private String aiApiKey;
  /**
   * æŸ¥è¯¢çŸ¥è¯†åº“
@@ -207,45 +180,16 @@
  @Override
  @Transactional(rollbackFor = Exception.class)
  public void removeKnowledge(String id) {
    Map<String, Object> map = new HashMap<>();
    map.put("kid", id);
    Map<String,Object> map = new HashMap<>();
    map.put("kid",id);
    List<KnowledgeInfoVo> knowledgeInfoList = baseMapper.selectVoByMap(map);
    check(knowledgeInfoList);
    // åˆ é™¤å‘量库信息
    knowledgeInfoList.forEach(knowledgeInfoVo -> {
      vectorStoreService.removeByKid(String.valueOf(knowledgeInfoVo.getId()),
          knowledgeInfoVo.getVectorModelName());
    });
//    knowledgeInfoList.forEach(knowledgeInfoVo -> {
//      vectorStoreService.removeByKid(String.valueOf(knowledgeInfoVo.getId()),knowledgeInfoVo.getVectorModelName());
//    });
    // åˆ é™¤é™„件和知识片段
    fragmentMapper.deleteByMap(map);
    List<KnowledgeAttachVo> knowledgeAttachVos = attachMapper.selectVoByMap(map);
    if (ObjectUtil.isNotEmpty(knowledgeAttachVos)) {
      Collection<Long> ossIds = knowledgeAttachVos.stream()
          .map(KnowledgeAttachVo::getOssId)
          .collect(Collectors.toList());
      //删除oss
      ossService.deleteWithValidByIds(ossIds, false);
      //删除图片oss
      List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
          new LambdaQueryWrapper<KnowledgeAttachPic>()
              .in(KnowledgeAttachPic::getKid,
                  knowledgeAttachVos.stream().map(KnowledgeAttachVo::getKid)
                      .collect(Collectors.toList()))
              .in(KnowledgeAttachPic::getAid,
                  knowledgeAttachVos.stream().map(KnowledgeAttachVo::getId)
                      .collect(Collectors.toList()))
      );
      if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
        Collection<Long> tossIds = knowledgeAttachPics.stream()
            .map(KnowledgeAttachPic::getOssId)
            .collect(Collectors.toList());
        ossService.deleteWithValidByIds(tossIds, false);
        List<Long> collect = knowledgeAttachPics.stream().map(KnowledgeAttachPic::getId)
            .collect(Collectors.toList());
        picMapper.deleteByIds(collect);
      }
    }
    attachMapper.deleteByMap(map);
    // åˆ é™¤çŸ¥è¯†åº“
    baseMapper.deleteByMap(map);
@@ -257,11 +201,6 @@
  }
  public void storeContent(MultipartFile file, String kid) {
    if (file == null || file.isEmpty()) {
      throw new IllegalArgumentException("File cannot be null or empty");
    }
    SysOssVo uploadDto = null;
    String fileName = file.getOriginalFilename();
    List<String> chunkList = new ArrayList<>();
    KnowledgeAttach knowledgeAttach = new KnowledgeAttach();
@@ -269,18 +208,15 @@
    String docId = RandomUtil.randomString(10);
    knowledgeAttach.setDocId(docId);
    knowledgeAttach.setDocName(fileName);
    knowledgeAttach.setDocType(fileName.substring(fileName.lastIndexOf(".") + 1));
    knowledgeAttach.setDocType(fileName.substring(fileName.lastIndexOf(".")+1));
    String content = "";
    ResourceLoader resourceLoader = resourceLoaderFactory.getLoaderByFileType(
        knowledgeAttach.getDocType());
    ResourceLoader resourceLoader = resourceLoaderFactory.getLoaderByFileType(knowledgeAttach.getDocType());
    List<String> fids = new ArrayList<>();
    try {
      content = resourceLoader.getContent(file.getInputStream());
      chunkList = resourceLoader.getChunkList(content, kid);
      List<KnowledgeFragment> knowledgeFragmentList = new ArrayList<>();
      if (CollUtil.isNotEmpty(chunkList)) {
        // Upload file to OSS
        uploadDto = ossService.upload(file);
        for (int i = 0; i < chunkList.size(); i++) {
          String fid = RandomUtil.randomString(10);
          fids.add(fid);
@@ -300,21 +236,25 @@
    }
    knowledgeAttach.setContent(content);
    knowledgeAttach.setCreateTime(new Date());
    if (ObjectUtil.isNotEmpty(uploadDto) && ObjectUtil.isNotEmpty(uploadDto.getOssId())) {
      knowledgeAttach.setOssId(uploadDto.getOssId());
      //只有pdf文件 æ‰éœ€è¦æ‹†è§£å›¾ç‰‡å’Œåˆ†æžå›¾ç‰‡å†…容
      if (FileType.PDF.equals(knowledgeAttach.getDocType())) {
        knowledgeAttach.setPicStatus(DealStatus.STATUS_10);
        knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_10);
      } else {
        knowledgeAttach.setPicStatus(DealStatus.STATUS_30);
        knowledgeAttach.setPicAnysStatus(DealStatus.STATUS_30);
      }
      //所有文件上传后,都需要同步到向量数据库
      knowledgeAttach.setVectorStatus(DealStatus.STATUS_10);
    }
    attachMapper.insert(knowledgeAttach);
    // é€šè¿‡kid查询知识库信息
    KnowledgeInfoVo knowledgeInfoVo = baseMapper.selectVoOne(Wrappers.<KnowledgeInfo>lambdaQuery()
            .eq(KnowledgeInfo::getId, kid));
    // é€šè¿‡å‘量模型查询模型信息
    ChatModelVo chatModelVo = chatModelService.selectModelByName(knowledgeInfoVo.getEmbeddingModelName());
    StoreEmbeddingBo storeEmbeddingBo = new StoreEmbeddingBo();
    storeEmbeddingBo.setKid(kid);
    storeEmbeddingBo.setDocId(docId);
    storeEmbeddingBo.setFids(fids);
    storeEmbeddingBo.setChunkList(chunkList);
    storeEmbeddingBo.setVectorModelName(knowledgeInfoVo.getVectorModelName());
    storeEmbeddingBo.setEmbeddingModelName(knowledgeInfoVo.getEmbeddingModelName());
    storeEmbeddingBo.setApiKey(chatModelVo.getApiKey());
    storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost());
    vectorStoreService.storeEmbeddings(storeEmbeddingBo);
  }
  /**
@@ -331,79 +271,4 @@
    }
  }
  /**
   * ç¬¬ä¸€æ­¥ å®šæ—¶ æ‹†è§£PDF文件中的图片
   */
  //@Scheduled(fixedDelay = 15000) // æ¯3秒执行一次
  public void dealKnowledgeAttachPic() throws Exception {
    //处理 æ‹†è§£PDF文件中的图片的记录
    List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList(
        new LambdaQueryWrapper<KnowledgeAttach>()
            .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
            .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
            .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
    );
    log.info("===============拆解PDF文件中的图片 size = {}", knowledgeAttaches.size());
    if (ObjectUtil.isNotEmpty(knowledgeAttaches)) {
      for (KnowledgeAttach attachItem : knowledgeAttaches) {
        dealFileService.dealPicStatus(attachItem);
      }
    }
  }
  /**
   * ç¬¬äºŒæ­¥ å®šæ—¶ è§£æžå›¾ç‰‡å†…容
   */
  //@Scheduled(fixedDelay = 15000)
  public void dealKnowledgeAttachPicAnys() throws Exception {
    //获取未处理的图片记录
    List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
        new LambdaQueryWrapper<KnowledgeAttachPic>()
            .eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
            .last("LIMIT 20")
    );
    if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
      for (KnowledgeAttachPic picItem : knowledgeAttachPics) {
        dealFileService.dealPicAnysStatus(picItem);
      }
    }
  }
  /**
   * ç¬¬ä¸‰æ­¥ å®šæ—¶ å¤„理 é™„件上传后上传向量数据库
   */
  //@Scheduled(fixedDelay = 30000) // æ¯3秒执行一次
  public void dealKnowledgeAttachVector() throws Exception {
    //处理 éœ€è¦ä¸Šä¼ å‘量数据库的记录
    List<KnowledgeAttach> knowledgeAttaches = attachMapper.selectList(
        new LambdaQueryWrapper<KnowledgeAttach>()
            .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
            .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
            .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
    );
    log.info("===============上传向量数据库 size = {}", knowledgeAttaches.size());
    if (ObjectUtil.isNotEmpty(knowledgeAttaches)) {
      for (KnowledgeAttach attachItem : knowledgeAttaches) {
        dealFileService.dealVectorStatus(attachItem);
      }
    }
  }
  /**
   * ç¬¬å››æ­¥ å®šæ—¶ å¤„理 å¤±è´¥æ•°æ®
   */
  //@Scheduled(fixedDelay = 30 * 60 * 1000)
  public void dealKnowledge40Status() throws Exception {
      //拆解PDF失败 é‡æ–°è®¾ç½®çŠ¶æ€
      attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
          .set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
          .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_40));
      //将图片分析失败的数据 é‡æ–°è®¾ç½®çŠ¶æ€
      picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
          .set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
          .eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_40));
      //上传向量库失败 é‡æ–°è®¾ç½®çŠ¶æ€
      attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
          .set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
          .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40));
  }
    @Scheduled(fixedDelay = 180000) // 3分钟执行一次
}