办学质量监测教学评价系统
ageerle
2025-05-16 031b7da19894c8539ff9fd6a7d4b5246f8a66b7d
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
package org.ruoyi.chat.service.knowledge;
 
import cn.hutool.core.util.ObjectUtil;
import cn.hutool.core.util.RandomUtil;
import com.baomidou.mybatisplus.core.conditions.query.LambdaQueryWrapper;
import com.baomidou.mybatisplus.core.conditions.update.LambdaUpdateWrapper;
import com.baomidou.mybatisplus.core.toolkit.Wrappers;
import java.util.Collection;
import java.util.Date;
import java.util.List;
import java.util.stream.Collectors;
import lombok.RequiredArgsConstructor;
import org.ruoyi.chain.loader.ResourceLoaderFactory;
import org.ruoyi.constant.DealStatus;
import org.ruoyi.domain.KnowledgeAttach;
import org.ruoyi.domain.KnowledgeAttachPic;
import org.ruoyi.domain.KnowledgeFragment;
import org.ruoyi.domain.KnowledgeInfo;
import org.ruoyi.domain.PdfFileContentResult;
import org.ruoyi.domain.bo.StoreEmbeddingBo;
import org.ruoyi.domain.vo.ChatModelVo;
import org.ruoyi.domain.vo.KnowledgeAttachVo;
import org.ruoyi.domain.vo.KnowledgeInfoVo;
import org.ruoyi.mapper.KnowledgeAttachMapper;
import org.ruoyi.mapper.KnowledgeAttachPicMapper;
import org.ruoyi.mapper.KnowledgeFragmentMapper;
import org.ruoyi.mapper.KnowledgeInfoMapper;
import org.ruoyi.service.IChatModelService;
import org.ruoyi.service.VectorStoreService;
import org.ruoyi.service.impl.PdfImageExtractServiceImpl;
import org.ruoyi.system.domain.vo.SysOssVo;
import org.ruoyi.system.service.ISysOssService;
import org.ruoyi.utils.ZipUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.scheduling.annotation.Async;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
 
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
 
/**
 * @Description:
 * @Date: 2025/5/15 下午4:29
 */
@Service
@RequiredArgsConstructor
public class DealFileService {
  private static final Logger log = LoggerFactory.getLogger(DealFileService.class);
 
  private final KnowledgeInfoMapper baseMapper;
 
  private final VectorStoreService vectorStoreService;
 
  private final ResourceLoaderFactory resourceLoaderFactory;
 
  private final KnowledgeFragmentMapper fragmentMapper;
 
  private final KnowledgeAttachMapper attachMapper;
 
  private final IChatModelService chatModelService;
 
  private final ISysOssService ossService;
 
//  private final PdfImageExtractService pdfImageExtractService;
 
  private final KnowledgeAttachPicMapper picMapper;
 
  @Value("${pdf.extract.service.url}")
  private String serviceUrl;
  @Value("${pdf.extract.ai-api.url}")
  private String aiApiUrl;
  @Value("${pdf.extract.ai-api.key}")
  private String aiApiKey;
 
 
  @Async
  public void dealVectorStatus(KnowledgeAttach attachItem) throws Exception {
    try {
      //锁定数据 更改VectorStatus 到进行中
      if (attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
          .set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20)
          .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
          .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
          .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
          .eq(KnowledgeAttach::getId, attachItem.getId())
      ) == 0) {
        return;
      }
      List<KnowledgeFragment> knowledgeFragments = fragmentMapper.selectList(
          new LambdaQueryWrapper<KnowledgeFragment>()
              .eq(KnowledgeFragment::getKid, attachItem.getKid())
              .eq(KnowledgeFragment::getDocId, attachItem.getDocId())
      );
      if (ObjectUtil.isEmpty(knowledgeFragments)) {
        throw new Exception("文件段落为空");
      }
      List<String> fids = knowledgeFragments.stream()
          .map(KnowledgeFragment::getFid)
          .collect(Collectors.toList());
      if (ObjectUtil.isEmpty(fids)) {
        throw new Exception("fids 为空");
      }
      List<String> chunkList = knowledgeFragments.stream()
          .map(KnowledgeFragment::getContent)
          .collect(Collectors.toList());
 
      if (ObjectUtil.isEmpty(chunkList)) {
        throw new Exception("chunkList 为空");
      }
      // 通过kid查询知识库信息
      KnowledgeInfoVo knowledgeInfoVo = baseMapper.selectVoOne(Wrappers.<KnowledgeInfo>lambdaQuery()
          .eq(KnowledgeInfo::getId, attachItem.getKid()));
      // 通过向量模型查询模型信息
      ChatModelVo chatModelVo = chatModelService.selectModelByName(
          knowledgeInfoVo.getEmbeddingModelName());
 
      StoreEmbeddingBo storeEmbeddingBo = new StoreEmbeddingBo();
      storeEmbeddingBo.setKid(attachItem.getKid());
      storeEmbeddingBo.setDocId(attachItem.getDocId());
      storeEmbeddingBo.setFids(fids);
      storeEmbeddingBo.setChunkList(chunkList);
      storeEmbeddingBo.setVectorModelName(knowledgeInfoVo.getVectorModelName());
      storeEmbeddingBo.setEmbeddingModelName(knowledgeInfoVo.getEmbeddingModelName());
      storeEmbeddingBo.setApiKey(chatModelVo.getApiKey());
      storeEmbeddingBo.setBaseUrl(chatModelVo.getApiHost());
      vectorStoreService.storeEmbeddings(storeEmbeddingBo);
 
      //设置处理完成
      attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
          .set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_30)
          .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
          .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
          .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20)
          .eq(KnowledgeAttach::getId, attachItem.getId()));
    } catch (Exception e) {
      //设置处理失败
      attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
          .set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40)
          .set(KnowledgeAttach::getRemark, attachItem.getRemark() + e.getMessage())
          .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
          .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
          .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_20)
          .eq(KnowledgeAttach::getId, attachItem.getId()));
      throw new RuntimeException(e);
    }
  }
 
  @Async
  public void dealPicStatus(KnowledgeAttach attachItem) throws Exception {
    try {
      //锁定数据 更改picStatus 到进行中
      if (attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
          .set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_20)
          .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_10)
          .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
          .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
          .eq(KnowledgeAttach::getId, attachItem.getId())
      ) == 0) {
        return;
      }
      //获取附件
      if (ObjectUtil.isEmpty(attachItem.getOssId())) {
        log.error("==========OssId 为空,attachItem={}", attachItem);
        throw new Exception("OssId 为空");
      }
      //获取oss文件
      MultipartFile multipartFile = ossService.downloadByFile(attachItem.getOssId());
      //拆解出图片ZIP
      PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl(serviceUrl,
          aiApiUrl, aiApiKey);
      byte[] pngs = pdfImageExtractService.extractImages(multipartFile, "png", true);
      //解压zip,得到图片文件
      MultipartFile[] multipartFiles = ZipUtils.unzipToMultipartFiles(pngs);
      //上传文件到OSS,写入表
      for (MultipartFile file : multipartFiles) {
        //先查找是否有相同图片名称,先做删除
        List<KnowledgeAttachPic> knowledgeAttachPics = picMapper.selectList(
            new LambdaQueryWrapper<KnowledgeAttachPic>()
                .eq(KnowledgeAttachPic::getKid, attachItem.getKid())
                .eq(KnowledgeAttachPic::getAid, attachItem.getId())
                .eq(KnowledgeAttachPic::getDocName, file.getOriginalFilename())
        );
        if (ObjectUtil.isNotEmpty(knowledgeAttachPics)) {
          Collection<Long> ossIds = knowledgeAttachPics.stream()
              .map(KnowledgeAttachPic::getOssId)
              .collect(Collectors.toList());
          ossService.deleteWithValidByIds(ossIds, false);
          List<Long> collect = knowledgeAttachPics.stream().map(KnowledgeAttachPic::getId)
              .collect(Collectors.toList());
          picMapper.deleteByIds(collect);
        }
 
        SysOssVo upload = ossService.upload(file);
        KnowledgeAttachPic entity = new KnowledgeAttachPic();
        entity.setKid(attachItem.getKid());
        entity.setAid(String.valueOf(attachItem.getId()));
        entity.setDocName(file.getOriginalFilename());
        entity.setDocType(
            file.getOriginalFilename().substring(file.getOriginalFilename().lastIndexOf(".") + 1));
        entity.setOssId(upload.getOssId());
        int[] ints = extractPageNumbers(file.getOriginalFilename());
        if (ObjectUtil.isNotEmpty(ints)) {
          assert ints != null;
          if (ints.length == 2) {
            entity.setPageNum(ints[0]);
            entity.setIndexNum(ints[1]);
          }
        }
        picMapper.insert(entity);
      }
 
      //设置处理完成
      attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
          .set(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
          .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_20)
          .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
          .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
          .eq(KnowledgeAttach::getId, attachItem.getId()));
    } catch (Exception e) {
      //设置处理失败
      attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
          .set(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_40)
          .set(KnowledgeAttach::getRemark, attachItem.getRemark() + e.getMessage())
          .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_20)
          .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
          .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
          .eq(KnowledgeAttach::getId, attachItem.getId()));
      throw new RuntimeException(e);
    }
 
  }
 
 
  @Async
  public void dealPicAnysStatus(KnowledgeAttachPic picItem) throws Exception {
    try {
      //锁定数据 更改 getPicAnysStatus 到进行中
      if (picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
          .set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_20)
          .eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_10)
          .eq(KnowledgeAttachPic::getId, picItem.getId())
      ) == 0) {
        return;
      }
      SysOssVo ossVo = ossService.getById(picItem.getOssId());
      if (ObjectUtil.isNotEmpty(ossVo)) {
        String fileStr = ossService.downloadByByte(picItem.getOssId());
        //调用第三方 分析图片内容
        PdfImageExtractServiceImpl pdfImageExtractService = new PdfImageExtractServiceImpl(
            serviceUrl,
            aiApiUrl, aiApiKey);
        List<PdfFileContentResult> pdfFileContentResults = pdfImageExtractService.dealFileContent(
            new String[]{fileStr});
        if (ObjectUtil.isNotEmpty(pdfFileContentResults)) {
          for (PdfFileContentResult resultItem : pdfFileContentResults) {
            //图片解析内容回写到pic表
            picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
                .set(KnowledgeAttachPic::getContent, parseContent(resultItem.getContent()))
                .set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_30)
                .eq(KnowledgeAttachPic::getId, picItem.getId()));
            //将图片解析内容 写入段落表 fragment
            KnowledgeAttachVo knowledgeAttachVo = attachMapper.selectVoById(picItem.getAid());
            if (ObjectUtil.isNotEmpty(knowledgeAttachVo)) {
              String fid = RandomUtil.randomString(10);
              KnowledgeFragment knowledgeFragment = new KnowledgeFragment();
              knowledgeFragment.setKid(knowledgeAttachVo.getKid());
              knowledgeFragment.setDocId(knowledgeAttachVo.getDocId());
              knowledgeFragment.setFid(fid);
              knowledgeFragment.setIdx(0);
              knowledgeFragment.setContent(parseContent(resultItem.getContent()));
              knowledgeFragment.setCreateTime(new Date());
              fragmentMapper.insert(knowledgeFragment);
 
              //更新attach表,需要所有图片都处理完毕
              // 查询非30状态(完成状态)的记录数量
              long nonStatus30Count = picMapper.selectCount(
                  new LambdaQueryWrapper<KnowledgeAttachPic>()
                      .ne(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_30)
                      .eq(KnowledgeAttachPic::getAid, picItem.getAid())
              );
              if (nonStatus30Count == 0) {
                // 执行表更新操作
                attachMapper.update(new LambdaUpdateWrapper<KnowledgeAttach>()
                    .set(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_30)
                    .eq(KnowledgeAttach::getPicStatus, DealStatus.STATUS_30)
                    .eq(KnowledgeAttach::getPicAnysStatus, DealStatus.STATUS_10)
                    .eq(KnowledgeAttach::getVectorStatus, DealStatus.STATUS_10)
                    .eq(KnowledgeAttach::getId, picItem.getAid()));
              }
            }
          }
        }
      }
    } catch (Exception e) {
      //失败
      picMapper.update(new LambdaUpdateWrapper<KnowledgeAttachPic>()
          .set(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_40)
          .set(KnowledgeAttachPic::getRemark, picItem.getRemark() + e.getMessage())
          .eq(KnowledgeAttachPic::getPicAnysStatus, DealStatus.STATUS_20)
          .eq(KnowledgeAttachPic::getId, picItem.getId()));
      throw new RuntimeException(e);
    }
  }
 
 
  /**
   * 从文件名中提取page后面的两个数字
   *
   * @param fileName 文件名
   * @return 包含两个数字的数组,如果未找到则返回null
   */
  public static int[] extractPageNumbers(String fileName) {
    // 查找"page_"的位置
    int pageIndex = fileName.indexOf("page_");
 
    if (pageIndex == -1) {
      return null;
    }
 
    // 从"page_"后开始截取
    String afterPage = fileName.substring(pageIndex + 5);
 
    // 按下划线分割
    String[] parts = afterPage.split("_");
 
    if (parts.length >= 2) {
      try {
        // 提取两个数字
        int firstNumber = Integer.parseInt(parts[0]);
 
        // 对于第二个数字,需要去掉可能的文件扩展名
        String secondPart = parts[1];
        int dotIndex = secondPart.indexOf(".");
        if (dotIndex != -1) {
          secondPart = secondPart.substring(0, dotIndex);
        }
 
        int secondNumber = Integer.parseInt(secondPart);
 
        return new int[]{firstNumber, secondNumber};
      } catch (NumberFormatException e) {
        return null;
      }
    }
 
    return null;
  }
 
  public static String parseContent(String jsonString) {
    try {
      // 创建ObjectMapper实例
      ObjectMapper objectMapper = new ObjectMapper();
 
      // 解析JSON字符串
      JsonNode rootNode = objectMapper.readTree(jsonString);
 
      // 获取choices数组的第一个元素
      JsonNode choicesNode = rootNode.get("choices");
      if (choicesNode != null && choicesNode.isArray() && choicesNode.size() > 0) {
        // 获取第一个choice
        JsonNode firstChoice = choicesNode.get(0);
 
        // 获取message节点
        JsonNode messageNode = firstChoice.get("message");
        if (messageNode != null) {
          // 获取content字段的值
          JsonNode contentNode = messageNode.get("content");
          if (contentNode != null) {
            return contentNode.asText();
          }
        }
      }
 
      return "无法找到content内容";
    } catch (Exception e) {
      e.printStackTrace();
      return "解析JSON时发生错误: " + e.getMessage();
    }
  }
 
 
}