本文旨在讲解springboot+ollama+qdrang+deepseek 来实现知识库及大模型问答
话不多说直接上手

一、本地安装ollama服务(相信此项安装步骤大家近期已经都有接触就不在过多赘述了)

#我本地安装的是0.5.7版本
ollama version is 0.5.7
安装deepseek-r1:8b 服务
安装nomic-embed-text:latest服务
此服务作用是:支持生成文本向量的模型

在这里插入图片描述在这里插入图片描述

二、安装qdrant服务

登录qdrant官网根据提示进行部署安装
https://qdrant.tech/documentation/guides/installation/

三、本地搭建SpringBoot环境。

pom中引入以下依赖:

<!-- HTTP 客户端ollama+向量数据库使用 -->
		<dependency>
			<groupId>com.squareup.okhttp3</groupId>
			<artifactId>okhttp</artifactId>
			<version>4.8.0</version>
		</dependency>
		<!-- Qdrant 客户端(社区库) -->
		<dependency>
			<groupId>io.qdrant</groupId>
			<artifactId>client</artifactId>
			<version>1.7.0</version>
		</dependency>
		<dependency>
			<groupId>com.fasterxml.jackson.core</groupId>
			<artifactId>jackson-databind</artifactId>
			<version>2.10.0</version>
		</dependency>

在配置文件中添加以下配置:
在这里插入图片描述

Ollama 客户端
@Service
public class OllamaService {
    private final OkHttpClient client ;
    private final ObjectMapper mapper = new ObjectMapper();
    private final String apiUrl;
    private final String model;

    public OllamaService(
            @Value("${ollama.api-url}") String apiUrl,
            @Value("${ollama.model}") String model
    ) {
        //默认情况下,OkHttp 的连接超时为 10秒,读取超时为 10秒。对于大模型推理场景,需延长超时时间:
        this.client = new OkHttpClient.Builder()
                .connectTimeout(30, TimeUnit.SECONDS)  // 连接超时30S
                .readTimeout(300, TimeUnit.SECONDS)    // 读取超时(根据模型响应时间调整)5分钟
                .build();
        this.apiUrl = apiUrl;
        this.model = model;
    }

    public String generateResponse(String prompt) throws IOException {
        Map<String, Object> requestBody = new HashMap<>();
        requestBody.put("model", model);
        requestBody.put("prompt", prompt);
        requestBody.put("stream", false);

        Request request = new Request.Builder()
                .url(apiUrl)
                .post(RequestBody.create(
                        mapper.writeValueAsString(requestBody),
                        MediaType.parse("application/json")
                ))
                .build();

        try (Response response = client.newCall(request).execute()) {
            if (!response.isSuccessful()) {
                throw new IOException("Ollama API error: " + response.body().string());
            }
            OllamaResponse ollamaResponse = mapper.readValue(
                    response.body().string(), OllamaResponse.class
            );
            return ollamaResponse.getResponse();
        }
    }

    @Data
    @JsonIgnoreProperties(ignoreUnknown = true) // 忽略未知字段
    private static class OllamaResponse {
        @JsonProperty("model")
        private String model;

        @JsonProperty("created_at")
        private String createdAt;

        @JsonProperty("response")
        private String response;
    }
}
Qdrant 客户端(同步 HTTP 调用)
@Service
public class QdrantService {
    private final OkHttpClient client = new OkHttpClient();
    private final ObjectMapper mapper = new ObjectMapper();
    private final String baseUrl;
    private final String collection;

    public QdrantService(
            @Value("${qdrant.host}") String host,
            @Value("${qdrant.port}") int port,
            @Value("${qdrant.collection}") String collection
    ) {
        this.baseUrl = "http://" + host + ":" + port;
        this.collection = collection;
    }

    // 创建集合
    public void createCollection(int vectorSize) throws IOException {
        Map<String, Object> requestBody = new HashMap<>();
        Map<String, Object> vectors = new HashMap<>();
        vectors.put("size", vectorSize);
        vectors.put("distance", "Cosine");
        requestBody.put("vectors", vectors);

        // 检查集合是否存在
        Request checkRequest = new Request.Builder()
                .url(baseUrl + "/collections/" + collection)
                .get()
                .build();

        try (Response response = client.newCall(checkRequest).execute()) {
            if (response.isSuccessful()) {
                // 集合已存在,直接返回
                System.out.println("Collection " + collection + " already exists.");
                return;
            }
        }


        Request request = new Request.Builder()
                .url(baseUrl + "/collections/" + collection)
                .put(RequestBody.create(
                        mapper.writeValueAsString(requestBody),
                        MediaType.parse("application/json")
                ))
                .build();

        try (Response response = client.newCall(request).execute()) {
            if (!response.isSuccessful() && response.code() != 400) {
                throw new IOException("Failed to create collection: " + response.body().string());
            }
        }
    }

    // 存储向量
    public void upsertVector(String id, List<Double> vector, String text) throws IOException {
        Map<String, Object> point = new HashMap<>();
        point.put("id", id);
        point.put("vector", vector);
        point.put("payload", Collections.singletonMap("text", text));

        Map<String, Object> requestBody = new HashMap<>();
        requestBody.put("points", Collections.singletonList(point));

        Request request = new Request.Builder()
                .url(baseUrl + "/collections/" + collection + "/points")
                .put(RequestBody.create(
                        mapper.writeValueAsString(requestBody),
                        MediaType.parse("application/json")
                ))
                .build();

        try (Response response = client.newCall(request).execute()) {
            if (!response.isSuccessful()) {
                throw new IOException("Failed to upsert vector: " + response.body().string());
            }
        }
    }

    // 向量搜索
    public List<String> searchVectors(List<Double> vector, int limit) throws IOException {
        Map<String, Object> requestBody = new HashMap<>();
        requestBody.put("vector", vector);
        requestBody.put("limit", limit);
        requestBody.put("with_payload", true);  // 关键:请求返回 payload

        Request request = new Request.Builder()
                .url(baseUrl + "/collections/" + collection + "/points/search")
                .post(RequestBody.create(
                        mapper.writeValueAsString(requestBody),
                        MediaType.parse("application/json")
                ))
                .build();

        try (Response response = client.newCall(request).execute()) {
//            String rawResponse = response.body().string();
//            System.out.println("Qdrant 原始响应: " + rawResponse); // 打印原始 JSON
            if (!response.isSuccessful()) {
                throw new IOException("Search failed: " + response.body().string());
            }
                QdrantSearchResponse searchResponse = mapper.readValue(
                        response.body().string(), QdrantSearchResponse.class
                );
            List<String> results = new ArrayList<>();
            for (QdrantResult result : searchResponse.getResult()) {
                if (result.getPayload() != null) {
                    results.add(result.getPayload().getText());
                } else {
                    System.err.println("Payload 为空");
                }
            }
            return results;
        }
    }

    // 响应解析类
    @Data
    @JsonIgnoreProperties(ignoreUnknown = true) // 忽略未知字段
    private static class QdrantSearchResponse {
        private List<QdrantResult> result;
    }

    @Data
    @JsonIgnoreProperties(ignoreUnknown = true) // 忽略未知字段
    private static class QdrantResult {
        private String id;          // 新增 id 字段
        private double score;       // 新增 score 字段
        private QdrantPayload payload;
    }

    @Data
    @JsonIgnoreProperties(ignoreUnknown = true)
    private static class QdrantPayload {
        @JsonProperty("text")  // 显式指定 JSON 键名
        private String text;
    }
}
向量生成服务
@Service
public class EmbeddingService {
    private final OkHttpClient client = new OkHttpClient();
    private final ObjectMapper mapper = new ObjectMapper();
    private final String ollamaEmbedApi;
    private final String embeddingModel;
    private static final MediaType JSON = MediaType.parse("application/json");
    public EmbeddingService(
            @Value("${ollama.embed.api}") String ollamaEmbedApi,
            @Value("${ollama.embed.model}") String embeddingModel
    ) {
        this.ollamaEmbedApi = ollamaEmbedApi;
        this.embeddingModel = embeddingModel;
    }

    // 调用 Ollama 生成文本向量
    public List<Double> embed(String text) throws IOException {
        Map<String, Object> requestBodyMap = new HashMap<>();
        requestBodyMap.put("model", embeddingModel);
        requestBodyMap.put("prompt", text);
            String jsonBody = mapper.writeValueAsString(requestBodyMap);

            // 创建 RequestBody
            RequestBody body = RequestBody.create(jsonBody, JSON);

        Request request = new Request.Builder()
                .url(ollamaEmbedApi)
                .post(body)
                .build();

        try (Response response = client.newCall(request).execute()) {
            if (!response.isSuccessful()) {
                throw new IOException("Ollama 嵌入服务调用失败: " + response.body().string());
            }
            OllamaEmbedResponse embedResponse = mapper.readValue(
                    response.body().string(), OllamaEmbedResponse.class
            );
            return embedResponse.getEmbedding();
        }
    }

    // 响应解析类
    @Data
    private static class OllamaEmbedResponse {
        private List<Double> embedding;
    }
}
向量数据初始化
@Component
public class DataInitializer {
    private final QdrantService qdrantService;
    private final EmbeddingService embeddingService;

    @Autowired
    public DataInitializer(QdrantService qdrantService, EmbeddingService embeddingService) {
        this.qdrantService = qdrantService;
        this.embeddingService = embeddingService;
    }

    @EventListener(ApplicationReadyEvent.class)
    public void initData() throws Exception {
        qdrantService.createCollection(768); // 根据嵌入模型维度设置(如 nomic-embed-text 是 768 维)

        // 存储示例数据
        Arrays.asList(
                new Doc("123e4567-e89b-12d3-a456-426614174000", "DeepSeek 是一家中国的人工智能公司"),
                new Doc("123e4567-e89b-12d3-a456-426614174001", "Ollama 支持在本地运行大语言模型"),
        ).forEach(doc -> {
            try {
                List<Double> vector = embeddingService.embed(doc.text);
                qdrantService.upsertVector(doc.id, vector, doc.text);
            } catch (Exception e) {
                e.printStackTrace();
            }
        });
    }
RAG 控制器
/***********用户请求 → Spring Boot (REST API) → Qdrant 向量检索 → Ollama (DeepSeek 模型) → 返回答案***********/
@Slf4j
@RestController
@RequestMapping(value = "/ragController")
public class RagController {
    private final OllamaService ollamaService;
    private final QdrantService qdrantService;
    private final EmbeddingService embeddingService;

    @Autowired
    public RagController(
            OllamaService ollamaService,
            QdrantService qdrantService,
            EmbeddingService embeddingService
    ) {
        this.ollamaService = ollamaService;
        this.qdrantService = qdrantService;
        this.embeddingService = embeddingService;
    }

    @PostMapping("/ask")
    public String askQuestion(@RequestBody String question) {
        try {
            // 1. 生成向量
            List<Double> queryVector = embeddingService.embed(question);

            // 2. 检索上下文
            //limit 表示从向量数据库中返回的 最相似结果数量; limit=3时表示返回相似度最高的前 3 条数据
            List<String> contexts = qdrantService.searchVectors(queryVector, 3);

            // 3. 构造提示
            String context = String.join("\n", contexts);
            String prompt = String.format("基于以下上下文回答问题:\n%s\n问题:%s", context, question);

            // 4. 调用 Ollama
            return ollamaService.generateResponse(prompt);
        } catch (Exception e) {
            log.error("处理请求失败", e);
            return "服务暂时不可用";
        }
    }
}

四、完成以上配置及代码编写后,即可启动springboot服务与deepseek进行聊天了。

服务启动后,会初始化qdrant向量数据,可登录qdrang服务查看
http://IP:6333/dashboard#/collections
在这里插入图片描述
一切就绪通过postman进行访问:
在这里插入图片描述
这里可以问你向量数据库中初始化的问题,也可问一些其他问题,大模型来给你回答。如果想让提问的结果更加精确,那就慢慢给qdrant中喂数据慢慢训练吧,祝你早日完成专业知识库的搭建。

Logo

魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。

更多推荐