package org.ruoyi.knowledge.chain.vectorstore; import cn.hutool.core.lang.UUID; import com.alibaba.fastjson2.JSONObject; import com.google.gson.internal.LinkedTreeMap; import io.weaviate.client.Config; import io.weaviate.client.WeaviateClient; import io.weaviate.client.base.Result; import io.weaviate.client.v1.data.model.WeaviateObject; import io.weaviate.client.v1.data.replication.model.ConsistencyLevel; import io.weaviate.client.v1.filters.Operator; import io.weaviate.client.v1.filters.WhereFilter; import io.weaviate.client.v1.graphql.model.GraphQLResponse; import io.weaviate.client.v1.graphql.query.argument.NearTextArgument; import io.weaviate.client.v1.graphql.query.argument.NearVectorArgument; import io.weaviate.client.v1.graphql.query.fields.Field; import io.weaviate.client.v1.misc.model.Meta; import io.weaviate.client.v1.misc.model.ReplicationConfig; import io.weaviate.client.v1.misc.model.ShardingConfig; import io.weaviate.client.v1.misc.model.VectorIndexConfig; import io.weaviate.client.v1.schema.model.DataType; import io.weaviate.client.v1.schema.model.Property; import io.weaviate.client.v1.schema.model.Schema; import io.weaviate.client.v1.schema.model.WeaviateClass; import jakarta.annotation.PostConstruct; import jakarta.annotation.Resource; import lombok.extern.slf4j.Slf4j; import org.apache.commons.lang3.StringUtils; import org.ruoyi.common.core.service.ConfigService; import org.ruoyi.knowledge.domain.vo.KnowledgeInfoVo; import org.ruoyi.knowledge.service.IKnowledgeInfoService; import org.springframework.beans.factory.annotation.Value; import org.springframework.context.annotation.Lazy; import org.springframework.stereotype.Service; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @Service @Slf4j public class WeaviateVectorStore implements VectorStore{ private volatile String protocol; private volatile String host; private volatile String className; @Lazy @Resource private IKnowledgeInfoService knowledgeInfoService; @Lazy @Resource private ConfigService configService; @PostConstruct public void loadConfig() { this.protocol = configService.getConfigValue("weaviate", "protocol"); this.host = configService.getConfigValue("weaviate", "host"); this.className = configService.getConfigValue("weaviate", "classname"); } public WeaviateClient getClient(){ Config config = new Config(protocol, host); WeaviateClient client = new WeaviateClient(config); return client; } public Result getMeta(){ WeaviateClient client = getClient(); Result meta = client.misc().metaGetter().run(); if (meta.getError() == null) { System.out.printf("meta.hostname: %s\n", meta.getResult().getHostname()); System.out.printf("meta.version: %s\n", meta.getResult().getVersion()); System.out.printf("meta.modules: %s\n", meta.getResult().getModules()); } else { System.out.printf("Error: %s\n", meta.getError().getMessages()); } return meta; } public Result getSchemas(){ WeaviateClient client = getClient(); Result result = client.schema().getter().run(); if (result.hasErrors()) { System.out.println(result.getError()); }else { System.out.println(result.getResult()); } return result; } public Result createSchema(String kid){ WeaviateClient client = getClient(); VectorIndexConfig vectorIndexConfig = VectorIndexConfig.builder() .distance("cosine") .cleanupIntervalSeconds(300) .efConstruction(128) .maxConnections(64) .vectorCacheMaxObjects(500000L) .ef(-1) .skip(false) .dynamicEfFactor(8) .dynamicEfMax(500) .dynamicEfMin(100) .flatSearchCutoff(40000) .build(); ShardingConfig shardingConfig = ShardingConfig.builder() .desiredCount(3) .desiredVirtualCount(128) .function("murmur3") .key("_id") .strategy("hash") .virtualPerPhysical(128) .build(); ReplicationConfig replicationConfig = ReplicationConfig.builder() .factor(1) .build(); JSONObject classModuleConfigValue = new JSONObject(); classModuleConfigValue.put("vectorizeClassName",false); JSONObject classModuleConfig = new JSONObject(); classModuleConfig.put("text2vec-transformers",classModuleConfigValue); JSONObject propertyModuleConfigValueSkipTrue = new JSONObject(); propertyModuleConfigValueSkipTrue.put("vectorizePropertyName",false); propertyModuleConfigValueSkipTrue.put("skip",true); JSONObject propertyModuleConfigSkipTrue = new JSONObject(); propertyModuleConfigSkipTrue.put("text2vec-transformers",propertyModuleConfigValueSkipTrue); JSONObject propertyModuleConfigValueSkipFalse = new JSONObject(); propertyModuleConfigValueSkipFalse.put("vectorizePropertyName",false); propertyModuleConfigValueSkipFalse.put("skip",false); JSONObject propertyModuleConfigSkipFalse = new JSONObject(); propertyModuleConfigSkipFalse.put("text2vec-transformers",propertyModuleConfigValueSkipFalse); WeaviateClass clazz = WeaviateClass.builder() .className(className + kid) .description("local knowledge") .vectorIndexType("hnsw") .vectorizer("text2vec-transformers") .shardingConfig(shardingConfig) .vectorIndexConfig(vectorIndexConfig) .replicationConfig(replicationConfig) .moduleConfig(classModuleConfig) .properties(new ArrayList() {{ add(Property.builder() .dataType(new ArrayList(){ { add(DataType.TEXT); } }) .name("content") .description("The content of the local knowledge,for search") .moduleConfig(propertyModuleConfigSkipFalse) .build()); add(Property.builder() .dataType(new ArrayList(){ { add(DataType.TEXT); } }) .name("kid") .description("The knowledge id of the local knowledge,for search") .moduleConfig(propertyModuleConfigSkipTrue) .build()); add(Property.builder() .dataType(new ArrayList(){ { add(DataType.TEXT); } }) .name("docId") .description("The doc id of the local knowledge,for search") .moduleConfig(propertyModuleConfigSkipTrue) .build()); add(Property.builder() .dataType(new ArrayList(){ { add(DataType.TEXT); } }) .name("fid") .description("The fragment id of the local knowledge,for search") .moduleConfig(propertyModuleConfigSkipTrue) .build()); add(Property.builder() .dataType(new ArrayList(){ { add(DataType.TEXT); } }) .name("uuid") .description("The uuid id of the local knowledge fragment(same with id properties),for search") .moduleConfig(propertyModuleConfigSkipTrue) .build()); } }) .build(); Result result = client.schema().classCreator().withClass(clazz).run(); if (result.hasErrors()) { System.out.println(result.getError()); } System.out.println(result.getResult()); return result; } @Override public void newSchema(String kid) { createSchema(kid); } @Override public void removeByKidAndFid(String kid, String fid) { List resultList = new ArrayList<>(); WeaviateClient client = getClient(); Field fieldId = Field.builder().name("uuid").build(); WhereFilter where = WhereFilter.builder() .path(new String[]{ "fid" }) .operator(Operator.Equal) .valueString(fid) .build(); Result result = client.graphQL().get() .withClassName(className + kid) .withFields(fieldId) .withWhere(where) .run(); LinkedTreeMap t = (LinkedTreeMap) result.getResult().getData(); LinkedTreeMap> l = (LinkedTreeMap>) t.get("Get"); ArrayList m = l.get(className + kid); for (LinkedTreeMap linkedTreeMap : m){ String uuid = linkedTreeMap.get("uuid").toString(); resultList.add(uuid); } for (String uuid : resultList) { Result deleteResult = client.data().deleter() .withID(uuid) .withClassName(className + kid) .withConsistencyLevel(ConsistencyLevel.ALL) // default QUORUM .run(); } } @Override public void storeEmbeddings(List chunkList, List> vectorList,String kid, String docId,List fidList) { WeaviateClient client = getClient(); for (int i = 0; i < chunkList.size(); i++) { if (vectorList != null) { List vector = vectorList.get(i); Float[] vf = new Float[vector.size()]; for (int j = 0; j < vector.size(); j++) { Double value = vector.get(j); vf[j] = value.floatValue(); } Map dataSchema = new HashMap<>(); dataSchema.put("content", chunkList.get(i)); dataSchema.put("kid", kid); dataSchema.put("docId", docId); dataSchema.put("fid", fidList.get(i)); String uuid = UUID.randomUUID(true).toString(); dataSchema.put("uuid", uuid); Result result = client.data().creator() .withClassName(className + kid) .withID(uuid) .withVector(vf) .withProperties(dataSchema) .run(); } } } @Override public void removeByDocId(String kid,String docId) { List resultList = new ArrayList<>(); WeaviateClient client = getClient(); Field fieldId = Field.builder().name("uuid").build(); WhereFilter where = WhereFilter.builder() .path(new String[]{ "docId" }) .operator(Operator.Equal) .valueString(docId) .build(); Result result = client.graphQL().get() .withClassName(className + kid) .withFields(fieldId) .withWhere(where) .run(); LinkedTreeMap t = (LinkedTreeMap) result.getResult().getData(); LinkedTreeMap> l = (LinkedTreeMap>) t.get("Get"); ArrayList m = l.get(className + kid); for (LinkedTreeMap linkedTreeMap : m){ String uuid = linkedTreeMap.get("uuid").toString(); resultList.add(uuid); } for (String uuid : resultList) { Result deleteResult = client.data().deleter() .withID(uuid) .withClassName(className + kid) .withConsistencyLevel(ConsistencyLevel.ALL) // default QUORUM .run(); } } @Override public void removeByKid(String kid) { WeaviateClient client = getClient(); Result result = client.schema().classDeleter().withClassName(className + kid).run(); if (result.hasErrors()) { System.out.println("删除schema失败" + result.getError()); }else { System.out.println("删除schema成功" + result.getResult()); } log.info("drop schema by kid, result = {}",result); } @Override public List nearest(List queryVector,String kid) { if (StringUtils.isBlank(kid)){ return new ArrayList(); } List resultList = new ArrayList<>(); Float[] vf = new Float[queryVector.size()]; for (int j = 0; j < queryVector.size(); j++) { Double value = queryVector.get(j); vf[j] = value.floatValue(); } WeaviateClient client = getClient(); Field contentField = Field.builder().name("content").build(); Field _additional = Field.builder() .name("_additional") .fields(new Field[]{ Field.builder().name("distance").build() }).build(); NearVectorArgument nearVector = NearVectorArgument.builder() .vector(vf) .distance(1.6f) // certainty = 1f - distance /2f .build(); KnowledgeInfoVo knowledgeInfoVo = knowledgeInfoService.queryById(Long.valueOf(kid)); Result result = client.graphQL().get() .withClassName(className + kid) .withFields(contentField,_additional) .withNearVector(nearVector) .withLimit(knowledgeInfoVo.getRetrieveLimit()) .run(); LinkedTreeMap t = (LinkedTreeMap) result.getResult().getData(); LinkedTreeMap> l = (LinkedTreeMap>) t.get("Get"); ArrayList m = l.get(className + kid); for (LinkedTreeMap linkedTreeMap : m){ String content = linkedTreeMap.get("content").toString(); resultList.add(content); } return resultList; } @Override public List nearest(String query,String kid) { if (StringUtils.isBlank(kid)){ return new ArrayList(); } List resultList = new ArrayList<>(); WeaviateClient client = getClient(); Field contentField = Field.builder().name("content").build(); Field _additional = Field.builder() .name("_additional") .fields(new Field[]{ Field.builder().name("distance").build() }).build(); NearTextArgument nearText = client.graphQL().arguments().nearTextArgBuilder() .concepts(new String[]{ query }) .distance(1.6f) // certainty = 1f - distance /2f .build(); KnowledgeInfoVo knowledgeInfoVo = knowledgeInfoService.queryById(Long.valueOf(kid)); Result result = client.graphQL().get() .withClassName(className + kid) .withFields(contentField,_additional) .withNearText(nearText) .withLimit(knowledgeInfoVo.getRetrieveLimit()) .run(); LinkedTreeMap t = (LinkedTreeMap) result.getResult().getData(); LinkedTreeMap> l = (LinkedTreeMap>) t.get("Get"); ArrayList m = l.get(className + kid); for (LinkedTreeMap linkedTreeMap : m){ String content = linkedTreeMap.get("content").toString(); resultList.add(content); } return resultList; } public Result deleteSchema(String kid) { WeaviateClient client = getClient(); Result result = client.schema().classDeleter().withClassName(className+ kid).run(); if (result.hasErrors()) { System.out.println(result.getError()); }else { System.out.println(result.getResult()); } return result; } }