办学质量监测教学评价系统
zhouweiyi
2025-05-13 32da85daabbb0e574f12bc7f1e0af6ff5b0cee6a
提取PDF中的图片并调用大模型,识别图片内容并返回
已修改3个文件
已添加4个文件
539 ■■■■ 文件已修改
ruoyi-admin/src/main/resources/application-dev.yml 5 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
ruoyi-admin/src/main/resources/application-prod.yml 5 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/domain/PdfFileContentResult.java 30 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/PdfImageExtractService.java 41 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/PdfImageExtractServiceImpl.java 144 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/utils/ZipUtils.java 95 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/controller/knowledge/KnowledgeController.java 219 ●●●●● 补丁 | 查看 | 原始文档 | blame | 历史
ruoyi-admin/src/main/resources/application-dev.yml
@@ -94,3 +94,8 @@
  # è…¾è®¯ä¸“用
  sdkAppId:
pdf:
  extract:
    service:
      url: http://localhost:8080
ruoyi-admin/src/main/resources/application-prod.yml
@@ -172,3 +172,8 @@
  signName: æµ‹è¯•
  # è…¾è®¯ä¸“用
  sdkAppId:
pdf:
  extract:
    service:
      url: http://localhost:8080
ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/domain/PdfFileContentResult.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,30 @@
package org.ruoyi.domain;
/**
 * æ–‡ä»¶å†…容结果封装类
 */
public class PdfFileContentResult {
    private String filename;
    private String content;
    public PdfFileContentResult(String filename, String content) {
        this.filename = filename;
        this.content = content;
    }
    public String getFilename() {
        return filename;
    }
    public void setFilename(String filename) {
        this.filename = filename;
    }
    public String getContent() {
        return content;
    }
    public void setContent(String content) {
        this.content = content;
    }
}
ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/PdfImageExtractService.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,41 @@
package org.ruoyi.service;
import java.io.IOException;
import java.util.List;
import org.ruoyi.domain.PdfFileContentResult;
import org.springframework.web.multipart.MultipartFile;
/**
 * PDF图片提取服务接口
 */
public interface PdfImageExtractService {
  /**
   * ä»ŽPDF文件中提取图片
   *
   * @param pdfFile PDF文件
   * @param imageFormat è¾“出图片格式 (png, jpeg, gif)
   * @param allowDuplicates æ˜¯å¦å…è®¸é‡å¤å›¾ç‰‡
   * @return åŒ…含提取图片的ZIP文件的字节数组
   * @throws IOException å¦‚果文件处理过程中发生错误
   */
  byte[] extractImages(MultipartFile pdfFile, String imageFormat, boolean allowDuplicates)
      throws IOException;
  /**
   * å¤„理文件内容
   *
   * @param unzip Base64编码的图片数组
   * @return æ–‡ä»¶å†…容结果列表
   * @throws IOException å¦‚æžœAPI调用过程中发生错误
   */
  List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException;
  /**
   * æå–PDF中的图片并调用gpt-4o-mini,识别图片内容并返回
   * @param file
   * @return
   * @throws IOException
   */
  List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException;
}
ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/service/impl/PdfImageExtractServiceImpl.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,144 @@
package org.ruoyi.service.impl;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
import lombok.extern.slf4j.Slf4j;
import okhttp3.MediaType;
import okhttp3.MultipartBody;
import okhttp3.OkHttpClient;
import okhttp3.OkHttpClient.Builder;
import okhttp3.Request;
import okhttp3.RequestBody;
import okhttp3.Response;
import org.ruoyi.common.core.domain.R;
import org.ruoyi.domain.PdfFileContentResult;
import org.ruoyi.service.PdfImageExtractService;
import org.ruoyi.utils.ZipUtils;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Service;
import org.springframework.web.multipart.MultipartFile;
/**
 * PDF图片提取服务实现类
 */
@Service
@Slf4j
public class PdfImageExtractServiceImpl implements PdfImageExtractService {
  @Value("${pdf.extract.service.url}")
  private String serviceUrl;
  @Value("${pdf.extract.ai-api.url}")
  private String aiApiUrl;
  @Value("${pdf.extract.ai-api.key}")
  private String aiApiKey ;
  private final OkHttpClient client = new Builder()
      .connectTimeout(100, TimeUnit.SECONDS)
      .readTimeout(150, TimeUnit.SECONDS)
      .writeTimeout(150, TimeUnit.SECONDS)
      .callTimeout(300, TimeUnit.SECONDS)
      .build();
  private static final MediaType JSON = MediaType.parse("application/json; charset=utf-8");
  @Override
  public byte[] extractImages(MultipartFile pdfFile, String imageFormat, boolean allowDuplicates)
      throws IOException {
    // æž„建multipart请求
    RequestBody requestBody = new MultipartBody.Builder()
        .setType(MultipartBody.FORM)
        .addFormDataPart("fileInput", pdfFile.getOriginalFilename(),
            RequestBody.create(MediaType.parse("application/pdf"), pdfFile.getBytes()))
        .addFormDataPart("format", imageFormat)
        .addFormDataPart("allowDuplicates", String.valueOf(allowDuplicates))
        .build();
    // åˆ›å»ºè¯·æ±‚
    Request request = new Request.Builder()
        .url(serviceUrl + "/api/v1/misc/extract-images")
        .post(requestBody)
        .build();
    // æ‰§è¡Œè¯·æ±‚
    try (Response response = client.newCall(request).execute()) {
      if (!response.isSuccessful()) {
        throw new IOException("请求失败: " + response.code());
      }
      return response.body().bytes();
    }
  }
  /**
   * å¤„理文件内容
   *
   * @param unzip Base64编码的图片数组
   * @return æ–‡ä»¶å†…容结果列表
   * @throws IOException å¦‚æžœAPI调用过程中发生错误
   */
  @Override
  public List<PdfFileContentResult> dealFileContent(String[] unzip) throws IOException {
    List<PdfFileContentResult> results = new ArrayList<>();
    int i = 0;
    for (String base64Image : unzip) {
      // æž„建请求JSON
      String requestJson = String.format("{"
          + "\"model\": \"gpt-4o\","
          + "\"stream\": false,"
          + "\"messages\": [{"
          + "\"role\": \"user\","
          + "\"content\": [{"
          + "\"type\": \"text\","
          + "\"text\": \"这张图片有什么\""
          + "}, {"
          + "\"type\": \"image_url\","
          + "\"image_url\": {"
          + "\"url\": \"%s\""
          + "}}"
          + "]}],"
          + "\"max_tokens\": 400"
          + "}", base64Image);
      // åˆ›å»ºè¯·æ±‚
      Request request = new Request.Builder()
          .url(aiApiUrl)
          .addHeader("Authorization", "Bearer " + aiApiKey)
          .post(RequestBody.create(JSON, requestJson))
          .build();
      // æ‰§è¡Œè¯·æ±‚
      try {
        log.info("=============call=" + ++i);
        Response response = client.newCall(request).execute();
        log.info("=============response=" + response);
        if (!response.isSuccessful()) {
          throw new IOException("API请求失败: " + response.code() + response.toString());
        }
        String responseBody = response.body().string();
        log.info("=============responseBody=" + responseBody);
        // ä½¿ç”¨æ–‡ä»¶åï¼ˆè¿™é‡Œä½¿ç”¨base64的前10个字符作为标识)和API返回内容创建结果对象
        String filename = base64Image.substring(0, Math.min(base64Image.length(), 10));
        results.add(new PdfFileContentResult(filename, responseBody));
      } catch (Exception e) {
        log.error(e.getMessage());
        throw new RuntimeException(e);
      }
    }
    return results;
  }
  @Override
  public List<PdfFileContentResult> extractImages(MultipartFile file) throws IOException {
    String format = "png";
    boolean allowDuplicates = true;
    // èŽ·å–ZIP数据
    byte[] zipData = this.extractImages(file, format, allowDuplicates);
    // è§£åŽ‹æ–‡ä»¶å¹¶è¯†åˆ«å›¾ç‰‡å†…å®¹å¹¶è¿”å›ž
    String[] unzip = ZipUtils.unzipForBase64(zipData);
    //解析图片内容
    return this.dealFileContent(unzip);
  }
}
ruoyi-modules-api/ruoyi-knowledge-api/src/main/java/org/ruoyi/utils/ZipUtils.java
¶Ô±ÈÐÂÎļþ
@@ -0,0 +1,95 @@
package org.ruoyi.utils;
import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Base64;
import java.util.List;
import java.util.zip.ZipEntry;
import java.util.zip.ZipInputStream;
/**
 * ZIP文件处理工具类
 */
public class ZipUtils {
    /**
     * è§£åŽ‹ZIP文件到指定目录
     *
     * @param zipData ZIP文件的字节数组
     * @param destDir ç›®æ ‡ç›®å½•
     * @return è§£åŽ‹åŽçš„æ–‡ä»¶è·¯å¾„åˆ—è¡¨
     * @throws IOException å¦‚果解压过程中发生错误
     */
    public static String[] unzip(byte[] zipData, String destDir) throws IOException {
        File destDirFile = new File(destDir);
        if (!destDirFile.exists()) {
            destDirFile.mkdirs();
        }
        List<String> extractedPaths = new ArrayList<>();
        try (ByteArrayInputStream bis = new ByteArrayInputStream(zipData);
             ZipInputStream zis = new ZipInputStream(bis)) {
            ZipEntry zipEntry;
            while ((zipEntry = zis.getNextEntry()) != null) {
                String filePath = destDir + File.separator + zipEntry.getName();
                if (!zipEntry.isDirectory()) {
                    extractFile(zis, filePath);
                    extractedPaths.add(filePath);
                } else {
                    new File(filePath).mkdirs();
                }
                zis.closeEntry();
            }
        }
        return extractedPaths.toArray(new String[0]);
    }
    private static void extractFile(ZipInputStream zis, String filePath) throws IOException {
        try (BufferedOutputStream bos = new BufferedOutputStream(new FileOutputStream(filePath))) {
            byte[] buffer = new byte[4096];
            int read;
            while ((read = zis.read(buffer)) != -1) {
                bos.write(buffer, 0, read);
            }
        }
    }
    /**
     * è§£åŽ‹ZIP文件并返回文件内容的Base64编码字符串数组
     *
     * @param zipData ZIP文件的字节数组
     * @return Base64编码的文件内容数组
     * @throws IOException å¦‚果解压过程中发生错误
     */
    public static String[] unzipForBase64(byte[] zipData) throws IOException {
        List<String> base64Contents = new ArrayList<>();
        try (ByteArrayInputStream bis = new ByteArrayInputStream(zipData);
             ZipInputStream zis = new ZipInputStream(bis)) {
            ZipEntry zipEntry;
            while ((zipEntry = zis.getNextEntry()) != null) {
                if (!zipEntry.isDirectory()) {
                    // è¯»å–文件内容到内存
                    ByteArrayOutputStream baos = new ByteArrayOutputStream();
                    byte[] buffer = new byte[4096];
                    int read;
                    while ((read = zis.read(buffer)) != -1) {
                        baos.write(buffer, 0, read);
                    }
                    // å°†æ–‡ä»¶å†…容转换为Base64字符串
                    String base64Content = Base64.getEncoder().encodeToString(baos.toByteArray());
                    base64Contents.add(base64Content);
                }
                zis.closeEntry();
            }
        }
        return base64Contents.toArray(new String[0]);
    }
}
ruoyi-modules/ruoyi-chat/src/main/java/org/ruoyi/chat/controller/knowledge/KnowledgeController.java
@@ -1,9 +1,12 @@
package org.ruoyi.chat.controller.knowledge;
import cn.dev33.satoken.stp.StpUtil;
import io.swagger.v3.oas.annotations.Operation;
import io.swagger.v3.oas.annotations.Parameter;
import jakarta.servlet.http.HttpServletResponse;
import jakarta.validation.constraints.NotEmpty;
import jakarta.validation.constraints.NotNull;
import java.io.IOException;
import lombok.RequiredArgsConstructor;
import org.ruoyi.common.core.domain.R;
import org.ruoyi.common.core.validate.AddGroup;
@@ -14,6 +17,7 @@
import org.ruoyi.common.web.core.BaseController;
import org.ruoyi.core.page.PageQuery;
import org.ruoyi.core.page.TableDataInfo;
import org.ruoyi.domain.PdfFileContentResult;
import org.ruoyi.domain.bo.KnowledgeAttachBo;
import org.ruoyi.domain.bo.KnowledgeFragmentBo;
import org.ruoyi.domain.bo.KnowledgeInfoBo;
@@ -24,6 +28,7 @@
import org.ruoyi.service.IKnowledgeAttachService;
import org.ruoyi.service.IKnowledgeFragmentService;
import org.ruoyi.service.IKnowledgeInfoService;
import org.ruoyi.service.PdfImageExtractService;
import org.springframework.validation.annotation.Validated;
import org.springframework.web.bind.annotation.*;
import org.springframework.web.multipart.MultipartFile;
@@ -41,117 +46,135 @@
@RequestMapping("/knowledge")
public class KnowledgeController extends BaseController {
    private final IKnowledgeInfoService knowledgeInfoService;
  private final IKnowledgeInfoService knowledgeInfoService;
    private final IKnowledgeAttachService attachService;
  private final IKnowledgeAttachService attachService;
    private final IKnowledgeFragmentService fragmentService;
  private final IKnowledgeFragmentService fragmentService;
    /**
     * æ ¹æ®ç”¨æˆ·ä¿¡æ¯æŸ¥è¯¢æœ¬åœ°çŸ¥è¯†åº“
     */
    @GetMapping("/list")
    public TableDataInfo<KnowledgeInfoVo> list(KnowledgeInfoBo bo, PageQuery pageQuery) {
        if (!StpUtil.isLogin()) {
            throw new SecurityException("请先去登录!");
        }
        bo.setUid(LoginHelper.getUserId());
        return knowledgeInfoService.queryPageList(bo, pageQuery);
  private final PdfImageExtractService pdfImageExtractService;
  /**
   * æ ¹æ®ç”¨æˆ·ä¿¡æ¯æŸ¥è¯¢æœ¬åœ°çŸ¥è¯†åº“
   */
  @GetMapping("/list")
  public TableDataInfo<KnowledgeInfoVo> list(KnowledgeInfoBo bo, PageQuery pageQuery) {
    if (!StpUtil.isLogin()) {
      throw new SecurityException("请先去登录!");
    }
    bo.setUid(LoginHelper.getUserId());
    return knowledgeInfoService.queryPageList(bo, pageQuery);
  }
    /**
     * æ–°å¢žçŸ¥è¯†åº“
     */
    @Log(title = "知识库", businessType = BusinessType.INSERT)
    @PostMapping("/save")
    public R<Void> save(@Validated(AddGroup.class) @RequestBody KnowledgeInfoBo bo) {
        knowledgeInfoService.saveOne(bo);
        return R.ok();
    }
  /**
   * æ–°å¢žçŸ¥è¯†åº“
   */
  @Log(title = "知识库", businessType = BusinessType.INSERT)
  @PostMapping("/save")
  public R<Void> save(@Validated(AddGroup.class) @RequestBody KnowledgeInfoBo bo) {
    knowledgeInfoService.saveOne(bo);
    return R.ok();
  }
    /**
     * åˆ é™¤çŸ¥è¯†åº“
     */
    @PostMapping("/remove/{id}")
    public R<String> remove(@PathVariable String id) {
        knowledgeInfoService.removeKnowledge(id);
        return R.ok("删除知识库成功!");
    }
  /**
   * åˆ é™¤çŸ¥è¯†åº“
   */
  @PostMapping("/remove/{id}")
  public R<String> remove(@PathVariable String id) {
    knowledgeInfoService.removeKnowledge(id);
    return R.ok("删除知识库成功!");
  }
    /**
     * ä¿®æ”¹çŸ¥è¯†åº“
     */
    @Log(title = "知识库", businessType = BusinessType.UPDATE)
    @PostMapping("/edit")
    public R<Void> edit(@RequestBody KnowledgeInfoBo bo) {
        return toAjax(knowledgeInfoService.updateByBo(bo));
    }
  /**
   * ä¿®æ”¹çŸ¥è¯†åº“
   */
  @Log(title = "知识库", businessType = BusinessType.UPDATE)
  @PostMapping("/edit")
  public R<Void> edit(@RequestBody KnowledgeInfoBo bo) {
    return toAjax(knowledgeInfoService.updateByBo(bo));
  }
    /**
     * å¯¼å‡ºçŸ¥è¯†åº“列表
     */
    @Log(title = "知识库", businessType = BusinessType.EXPORT)
    @PostMapping("/export")
    public void export(KnowledgeInfoBo bo, HttpServletResponse response) {
        List<KnowledgeInfoVo> list = knowledgeInfoService.queryList(bo);
        ExcelUtil.exportExcel(list, "知识库", KnowledgeInfoVo.class, response);
    }
  /**
   * å¯¼å‡ºçŸ¥è¯†åº“列表
   */
  @Log(title = "知识库", businessType = BusinessType.EXPORT)
  @PostMapping("/export")
  public void export(KnowledgeInfoBo bo, HttpServletResponse response) {
    List<KnowledgeInfoVo> list = knowledgeInfoService.queryList(bo);
    ExcelUtil.exportExcel(list, "知识库", KnowledgeInfoVo.class, response);
  }
    /**
     * æŸ¥è¯¢çŸ¥è¯†é™„件信息
     */
    @GetMapping("/detail/{kid}")
    public TableDataInfo<KnowledgeAttachVo> attach(KnowledgeAttachBo bo, PageQuery pageQuery, @PathVariable String kid) {
        bo.setKid(kid);
        return attachService.queryPageList(bo, pageQuery);
    }
  /**
   * æŸ¥è¯¢çŸ¥è¯†é™„件信息
   */
  @GetMapping("/detail/{kid}")
  public TableDataInfo<KnowledgeAttachVo> attach(KnowledgeAttachBo bo, PageQuery pageQuery,
      @PathVariable String kid) {
    bo.setKid(kid);
    return attachService.queryPageList(bo, pageQuery);
  }
    /**
     * ä¸Šä¼ çŸ¥è¯†åº“附件
     */
    @PostMapping(value = "/attach/upload")
    public R<String> upload(KnowledgeInfoUploadBo bo) {
        knowledgeInfoService.upload(bo);
        return R.ok("上传知识库附件成功!");
    }
  /**
   * ä¸Šä¼ çŸ¥è¯†åº“附件
   */
  @PostMapping(value = "/attach/upload")
  public R<String> upload(KnowledgeInfoUploadBo bo) {
    knowledgeInfoService.upload(bo);
    return R.ok("上传知识库附件成功!");
  }
    /**
     * èŽ·å–çŸ¥è¯†åº“é™„ä»¶è¯¦ç»†ä¿¡æ¯
     *
     * @param id ä¸»é”®
     */
    @GetMapping("attach/info/{id}")
    public R<KnowledgeAttachVo> getAttachInfo(@NotNull(message = "主键不能为空")
                                              @PathVariable Long id) {
        return R.ok(attachService.queryById(id));
    }
  /**
   * èŽ·å–çŸ¥è¯†åº“é™„ä»¶è¯¦ç»†ä¿¡æ¯
   *
   * @param id ä¸»é”®
   */
  @GetMapping("attach/info/{id}")
  public R<KnowledgeAttachVo> getAttachInfo(@NotNull(message = "主键不能为空")
  @PathVariable Long id) {
    return R.ok(attachService.queryById(id));
  }
    /**
     * åˆ é™¤çŸ¥è¯†åº“附件
     */
    @PostMapping("attach/remove/{kid}")
    public R<Void> removeAttach(@NotEmpty(message = "主键不能为空")
                                @PathVariable String kid) {
        attachService.removeKnowledgeAttach(kid);
        return R.ok();
    }
  /**
   * åˆ é™¤çŸ¥è¯†åº“附件
   */
  @PostMapping("attach/remove/{kid}")
  public R<Void> removeAttach(@NotEmpty(message = "主键不能为空")
  @PathVariable String kid) {
    attachService.removeKnowledgeAttach(kid);
    return R.ok();
  }
    /**
     * æŸ¥è¯¢çŸ¥è¯†ç‰‡æ®µ
     */
    @GetMapping("/fragment/list/{docId}")
    public TableDataInfo<KnowledgeFragmentVo> fragmentList(KnowledgeFragmentBo bo, PageQuery pageQuery, @PathVariable String docId) {
        bo.setDocId(docId);
        return fragmentService.queryPageList(bo, pageQuery);
    }
  /**
   * æŸ¥è¯¢çŸ¥è¯†ç‰‡æ®µ
   */
  @GetMapping("/fragment/list/{docId}")
  public TableDataInfo<KnowledgeFragmentVo> fragmentList(KnowledgeFragmentBo bo,
      PageQuery pageQuery, @PathVariable String docId) {
    bo.setDocId(docId);
    return fragmentService.queryPageList(bo, pageQuery);
  }
    /**
     * ä¸Šä¼ æ–‡ä»¶ç¿»è¯‘
     */
    @PostMapping("/translationByFile")
    @ResponseBody
    public String translationByFile(@RequestParam("file") MultipartFile file, String targetLanguage) {
        return attachService.translationByFile(file, targetLanguage);
    }
  /**
   * ä¸Šä¼ æ–‡ä»¶ç¿»è¯‘
   */
  @PostMapping("/translationByFile")
  @ResponseBody
  public String translationByFile(@RequestParam("file") MultipartFile file, String targetLanguage) {
    return attachService.translationByFile(file, targetLanguage);
  }
  /**
   * æå–PDF中的图片并调用gpt-4o-mini,识别图片内容并返回
   *
   * @param file PDF文件
   * @return ä¿å­˜çš„æ–‡ä»¶è·¯å¾„信息
   */
  @PostMapping("/extract-images")
  @Operation(summary = "提取PDF中的图片并调用大模型,识别图片内容并返回", description = "提取PDF中的图片并调用gpt-4o-mini,识别图片内容并返回")
  public R<List<PdfFileContentResult>> extractImages(
      @RequestPart("file") MultipartFile file
  ) throws IOException {
    return R.ok(pdfImageExtractService.extractImages(file));
  }
}