MySQL 5种 批量更新介绍以及性能对比测试
本文对比分析了MySQL中五种批量更新方式的性能差异: INSERT ON DUPLICATE KEY UPDATE - 适用于重复值较多的场景,仅更新冲突字段索引 CASE WHEN - 最推荐的批量更新方式,单条语句完成多行更新 临时表+JOIN - 适合大数据量更新,但需额外创建临时表 REPLACE INTO - 性能较差,先删除再插入所有索引 事务批量提交 - 减少事务开销但本质上仍是单
文章目录
MySQL批量更新性能对比测试
本文通过具体的测试案例,对比分析MySQL中5种常见的批量更新方式的性能差异,并给出各自的适用场景。
1、INSERT ON DUPLICATE KEY UPDATE
2、CASE WHEN
3、临时表+JOIN
4、REPLACE INTO
5、事务批量提交
INSERT ON DUPLICATE KEY UPDATE分析
- 尝试插入新行
- 如果遇到重复键冲突:
a. 直接更新指定字段(UPDATE操作)
b. 只维护受影响字段的索引更新 - 如果没有冲突:
a. 插入成功
b. 维护所有相关索引的插入
例子:
INSERT INTO users (id, name) VALUES
(1, 'John Updated'),
(2, 'Jane'),
(3, 'Bob')
ON DUPLICATE KEY UPDATE name = VALUES(name);
CASE WHEN 分析
第1步:WHERE 条件筛选
SELECT * FROM table_name WHERE id IN (1, 2, 3);
– 假设找到3行数据
第2步:对每行应用 CASE WHEN 计算新值
Row 1: id=1 → CASE 1 WHEN 1 THEN ‘A’ → ‘A’
Row 2: id=2 → CASE 2 WHEN 2 THEN ‘B’ → ‘B’
Row 3: id=3 → CASE 3 WHEN 3 THEN ‘C’ → ‘C’
第3步:批量执行更新
UPDATE row1 SET column = ‘A’
UPDATE row2 SET column = ‘B’
UPDATE row3 SET column = ‘C’
但这些更新是在一个操作中完成的,不是分别执行
例子:
UPDATE table_name
SET column = CASE id
WHEN 1 THEN 'A'
WHEN 2 THEN 'B'
WHEN 3 THEN 'C'
END
WHERE id IN (1, 2, 3);
临时表+JOIN分析
-- 第一步:创建临时表结构
CREATE TEMPORARY TABLE temp_updates (
id INT,
new_value VARCHAR(255)
);
-- 第二步:把所有要更新的数据装入临时表
INSERT INTO temp_updates VALUES
(1, 'A'), -- 要把 id=1 的记录更新为 'A'
(2, 'B'), -- 要把 id=2 的记录更新为 'B'
(3, 'C'), -- 要把 id=3 的记录更新为 'C'
(100, 'Z'), -- 要把 id=100 的记录更新为 'Z'
-- ... 可能有成千上万条数据
(50000, 'Last');
-- 第三步:通过 JOIN 进行批量更新
UPDATE table_name t -- 原始表(目标表)
JOIN temp_updates tu -- 临时表(数据源表)
ON t.id = tu.id -- 连接条件:通过 id 匹配
SET t.column = tu.new_value; -- 更新操作:用临时表的值更新原表
本质就是先创建临时表 然后填充需要更新的数据 最后将临时表和原始表连接 然后 更新原始表
例子:
-- 创建临时表
CREATE TEMPORARY TABLE temp_updates (
id INT,
new_value VARCHAR(255)
);
-- 插入要更新的数据
INSERT INTO temp_updates VALUES
(1, 'A'), (2, 'B'), (3, 'C');
-- 批量更新
UPDATE table_name t
JOIN temp_updates tu ON t.id = tu.id
SET t.column = tu.new_value;
-- 清理临时表
DROP TEMPORARY TABLE temp_updates;
REPLACE INTO 分析
对于每一行数据
- 查找是否存在重复键
- 如果存在:
a. 标记旧行为删除(DELETE操作)
b. 维护所有相关索引的删除
c. 插入新行(INSERT操作)
d. 维护所有相关索引的插入 - 如果不存在:
a. 直接插入新行
b. 维护所有相关索引的插入
- 先DELETE再INSERT的双重操作
- 需要重建所有索引
例子:
REPLACE INTO table_name (id, column1, column2) VALUES
(1, 'value1', 'value2'),
(2, 'value3', 'value4'),
(3, 'value5', 'value6');
事务批量提交分析
包裹在一个事务中 去执行多个update 语句 本质不是批量更新 只是减少了每一update都会开始事务的消耗
例子:
START TRANSACTION;
UPDATE table_name SET column = 'A' WHERE id = 1;
UPDATE table_name SET column = 'B' WHERE id = 2;
UPDATE table_name SET column = 'C' WHERE id = 3;
COMMIT;
以下是一个简单的测试性能的过程 可以当作简单参考
1. 测试环境准备
1.1 创建测试表
-- 创建用户表,模拟真实业务场景
CREATE TABLE users (
id INT PRIMARY KEY AUTO_INCREMENT,
name VARCHAR(50) NOT NULL,
email VARCHAR(100) NOT NULL UNIQUE,
phone VARCHAR(20),
department_id INT,
salary DECIMAL(10,2),
status ENUM('active', 'inactive') DEFAULT 'active',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP,
INDEX idx_department (department_id),
INDEX idx_status (status),
INDEX idx_name (name)
) ENGINE=InnoDB;
-- 创建部门表,用于多表关联测试
CREATE TABLE departments (
id INT PRIMARY KEY,
name VARCHAR(50),
bonus_rate DECIMAL(3,2) DEFAULT 1.00
);
1.2 插入测试数据
-- 插入部门数据
INSERT INTO departments (id, name, bonus_rate) VALUES
(1, '技术部', 1.20),
(2, '销售部', 1.15),
(3, '市场部', 1.10),
(4, '人事部', 1.05),
(5, '财务部', 1.08);
-- 插入10万条用户测试数据
DELIMITER $$
CREATE PROCEDURE insert_test_data()
BEGIN
DECLARE i INT DEFAULT 1;
DECLARE batch_size INT DEFAULT 1000;
WHILE i <= 100000 DO
INSERT INTO users (name, email, phone, department_id, salary)
SELECT
CONCAT('User_', n + i - 1),
CONCAT('user', n + i - 1, '@test.com'),
CONCAT('138', LPAD(n + i - 1, 8, '0')),
((n + i - 1) % 5) + 1,
ROUND(5000 + RAND() * 15000, 2)
FROM (
SELECT @row := @row + 1 as n
FROM information_schema.tables t1,
information_schema.tables t2,
(SELECT @row := 0) r
LIMIT batch_size
) numbers
WHERE i + batch_size - 1 <= 100000;
SET i = i + batch_size;
END WHILE;
END$$
DELIMITER ;
-- 执行数据插入
CALL insert_test_data();
-- 验证数据
SELECT COUNT(*) as total_users,
MIN(id) as min_id,
MAX(id) as max_id
FROM users;
2. 性能测试脚本
2.1 测试方法1:CASE WHEN(推荐方式)
-- 测试更新1万条记录的salary字段
SET @start_time = NOW(6);
UPDATE users
SET salary = CASE id
WHEN 1 THEN 8000.00
WHEN 2 THEN 8100.00
WHEN 3 THEN 8200.00
-- ... 这里可以继续添加更多条件
-- 为了测试,我们生成一个包含10000个WHEN条件的语句
END,
updated_at = NOW()
WHERE id BETWEEN 1 AND 10000;
SET @end_time = NOW(6);
SELECT TIMESTAMPDIFF(MICROSECOND, @start_time, @end_time) / 1000 as 'CASE WHEN方式耗时(毫秒)';
2.2 测试方法2:INSERT … ON DUPLICATE KEY UPDATE
SET @start_time = NOW(6);
INSERT INTO users (id, name, email, phone, department_id, salary)
SELECT
id,
name,
email,
phone,
department_id,
ROUND(salary * 1.1, 2)
FROM users
WHERE id BETWEEN 10001 AND 20000
ON DUPLICATE KEY UPDATE
salary = VALUES(salary),
updated_at = NOW();
SET @end_time = NOW(6);
SELECT TIMESTAMPDIFF(MICROSECOND, @start_time, @end_time) / 1000 as 'INSERT ON DUPLICATE方式耗时(毫秒)';
2.3 测试方法3:REPLACE INTO
SET @start_time = NOW(6);
REPLACE INTO users (id, name, email, phone, department_id, salary, status, created_at)
SELECT
id,
name,
email,
phone,
department_id,
ROUND(salary * 1.1, 2),
status,
created_at
FROM users
WHERE id BETWEEN 20001 AND 30000;
SET @end_time = NOW(6);
SELECT TIMESTAMPDIFF(MICROSECOND, @start_time, @end_time) / 1000 as 'REPLACE INTO方式耗时(毫秒)';
2.4 测试方法4:临时表 + JOIN更新
SET @start_time = NOW(6);
-- 创建临时表
CREATE TEMPORARY TABLE temp_salary_updates (
user_id INT PRIMARY KEY,
new_salary DECIMAL(10,2)
);
-- 插入要更新的数据
INSERT INTO temp_salary_updates (user_id, new_salary)
SELECT id, ROUND(salary * 1.1, 2)
FROM users
WHERE id BETWEEN 30001 AND 40000;
-- 批量更新
UPDATE users u
JOIN temp_salary_updates tsu ON u.id = tsu.user_id
SET u.salary = tsu.new_salary,
u.updated_at = NOW();
-- 清理临时表
DROP TEMPORARY TABLE temp_salary_updates;
SET @end_time = NOW(6);
SELECT TIMESTAMPDIFF(MICROSECOND, @start_time, @end_time) / 1000 as '临时表+JOIN方式耗时(毫秒)';
2.5 测试方法5:事务批量提交
SET @start_time = NOW(6);
START TRANSACTION;
-- 使用存储过程批量更新
DELIMITER $$
CREATE PROCEDURE batch_update_with_transaction()
BEGIN
DECLARE done INT DEFAULT FALSE;
DECLARE user_id INT;
DECLARE new_salary DECIMAL(10,2);
DECLARE cur CURSOR FOR
SELECT id, ROUND(salary * 1.1, 2)
FROM users
WHERE id BETWEEN 40001 AND 50000;
DECLARE CONTINUE HANDLER FOR NOT FOUND SET done = TRUE;
OPEN cur;
update_loop: LOOP
FETCH cur INTO user_id, new_salary;
IF done THEN
LEAVE update_loop;
END IF;
UPDATE users
SET salary = new_salary, updated_at = NOW()
WHERE id = user_id;
END LOOP;
CLOSE cur;
END$$
DELIMITER ;
CALL batch_update_with_transaction();
COMMIT;
SET @end_time = NOW(6);
SELECT TIMESTAMPDIFF(MICROSECOND, @start_time, @end_time) / 1000 as '事务批量提交方式耗时(毫秒)';
-- 清理存储过程
DROP PROCEDURE batch_update_with_transaction;
3. 复杂场景测试
3.1 多表关联更新测试(临时表方式)
SET @start_time = NOW(6);
-- 根据部门调整员工薪资
CREATE TEMPORARY TABLE temp_dept_updates (
user_id INT PRIMARY KEY,
new_salary DECIMAL(10,2),
bonus_amount DECIMAL(10,2)
);
-- 准备更新数据:薪资 * 部门奖金系数 + 固定奖金
INSERT INTO temp_dept_updates (user_id, new_salary, bonus_amount)
SELECT
u.id,
ROUND(u.salary * d.bonus_rate, 2),
ROUND(u.salary * 0.1, 2)
FROM users u
JOIN departments d ON u.department_id = d.id
WHERE u.id BETWEEN 50001 AND 60000;
-- 批量更新
UPDATE users u
JOIN temp_dept_updates tdu ON u.id = tdu.user_id
SET u.salary = tdu.new_salary,
u.updated_at = NOW();
DROP TEMPORARY TABLE temp_dept_updates;
SET @end_time = NOW(6);
SELECT TIMESTAMPDIFF(MICROSECOND, @start_time, @end_time) / 1000 as '多表关联更新耗时(毫秒)';
3.2 条件性批量更新测试
SET @start_time = NOW(6);
-- 只更新薪资低于平均值的员工
UPDATE users
SET salary = CASE
WHEN salary < 8000 THEN salary * 1.15
WHEN salary < 12000 THEN salary * 1.10
ELSE salary * 1.05
END,
status = CASE
WHEN salary < 6000 THEN 'inactive'
ELSE 'active'
END,
updated_at = NOW()
WHERE id BETWEEN 60001 AND 70000
AND salary < (SELECT AVG(salary) FROM (SELECT salary FROM users) as avg_table);
SET @end_time = NOW(6);
SELECT TIMESTAMPDIFF(MICROSECOND, @start_time, @end_time) / 1000 as '条件性批量更新耗时(毫秒)';
4. 完整性能测试脚本
-- 一键执行所有性能测试
-- 首先关闭安全模式(仅用于测试)
SET SQL_SAFE_UPDATES = 0;
DELIMITER $
CREATE PROCEDURE full_performance_test()
BEGIN
DECLARE method1_time, method2_time, method3_time, method4_time, method5_time INT;
-- 重置测试数据
UPDATE users SET salary = ROUND(5000 + RAND() * 15000, 2) WHERE id <= 70000 AND id > 0;
-- 方法1: CASE WHEN
SET @start = NOW(6);
UPDATE users SET salary = salary * 1.1 WHERE id BETWEEN 1 AND 10000;
SET method1_time = TIMESTAMPDIFF(MICROSECOND, @start, NOW(6)) / 1000;
-- 方法2: INSERT ON DUPLICATE
SET @start = NOW(6);
INSERT INTO users (id, name, email, phone, department_id, salary)
SELECT id, name, email, phone, department_id, salary * 1.1
FROM users WHERE id BETWEEN 10001 AND 20000
ON DUPLICATE KEY UPDATE salary = VALUES(salary);
SET method2_time = TIMESTAMPDIFF(MICROSECOND, @start, NOW(6)) / 1000;
-- 方法3: REPLACE INTO (注意:会重置created_at)
SET @start = NOW(6);
REPLACE INTO users (id, name, email, phone, department_id, salary, status, created_at)
SELECT id, name, email, phone, department_id, salary * 1.1, status, created_at
FROM users WHERE id BETWEEN 20001 AND 30000;
SET method3_time = TIMESTAMPDIFF(MICROSECOND, @start, NOW(6)) / 1000;
-- 方法4: 临时表 + JOIN
SET @start = NOW(6);
CREATE TEMPORARY TABLE temp_updates (user_id INT PRIMARY KEY, new_salary DECIMAL(10,2));
INSERT INTO temp_updates SELECT id, salary * 1.1 FROM users WHERE id BETWEEN 30001 AND 40000;
UPDATE users u JOIN temp_updates tu ON u.id = tu.user_id SET u.salary = tu.new_salary;
DROP TEMPORARY TABLE temp_updates;
SET method4_time = TIMESTAMPDIFF(MICROSECOND, @start, NOW(6)) / 1000;
-- 方法5: 事务批量(简化版)
SET @start = NOW(6);
START TRANSACTION;
UPDATE users SET salary = salary * 1.1 WHERE id BETWEEN 40001 AND 41000;
UPDATE users SET salary = salary * 1.1 WHERE id BETWEEN 41001 AND 42000;
UPDATE users SET salary = salary * 1.1 WHERE id BETWEEN 42001 AND 43000;
UPDATE users SET salary = salary * 1.1 WHERE id BETWEEN 43001 AND 44000;
UPDATE users SET salary = salary * 1.1 WHERE id BETWEEN 44001 AND 45000;
UPDATE users SET salary = salary * 1.1 WHERE id BETWEEN 45001 AND 46000;
UPDATE users SET salary = salary * 1.1 WHERE id BETWEEN 46001 AND 47000;
UPDATE users SET salary = salary * 1.1 WHERE id BETWEEN 47001 AND 48000;
UPDATE users SET salary = salary * 1.1 WHERE id BETWEEN 48001 AND 49000;
UPDATE users SET salary = salary * 1.1 WHERE id BETWEEN 49001 AND 50000;
COMMIT;
SET method5_time = TIMESTAMPDIFF(MICROSECOND, @start, NOW(6)) / 1000;
-- 输出结果
SELECT
'CASE WHEN' as method, method1_time as time_ms
UNION ALL SELECT 'INSERT ON DUPLICATE', method2_time
UNION ALL SELECT 'REPLACE INTO', method3_time
UNION ALL SELECT '临时表+JOIN', method4_time
UNION ALL SELECT '事务批量提交', method5_time
ORDER BY time_ms;
END$$
DELIMITER ;
-- 执行完整测试
CALL full_performance_test();
-- 测试完成后恢复安全模式
SET SQL_SAFE_UPDATES = 1;
5. 预期测试结果分析
基于一般的测试环境,预期的性能排序(从快到慢):
| 排名 | 方法 | 预期耗时 | 说明 |
|---|---|---|---|
| 1 | INSERT ON DUPLICATE KEY UPDATE | 50-150ms | 最优化的批量操作 |
| 2 | CASE WHEN | 80-200ms | 单SQL语句,但CASE表达式有开销 |
| 3 | 临时表+JOIN | 100-300ms | 多步骤,但批量处理 |
| 4 | REPLACE INTO | 200-500ms | 删除+插入开销大 |
| 5 | 事务批量提交 | 300-800ms | 多个独立UPDATE语句 |
6. 方法适用场景总结
6.1 INSERT … ON DUPLICATE KEY UPDATE
最佳场景:
- 简单的字段更新(1-10个字段)
- 中小批量数据(1000-50000行)
- 需要处理INSERT和UPDATE的混合场景
- 对性能要求最高的场景
示例:
-- 用户信息同步
INSERT INTO users (id, name, email, last_login) VALUES
(1001, 'John', 'john@new.com', NOW()),
(1002, 'Jane', 'jane@new.com', NOW())
ON DUPLICATE KEY UPDATE
name = VALUES(name),
email = VALUES(email),
last_login = VALUES(last_login);
6.2 CASE WHEN
最佳场景:
- 根据不同条件设置不同值
- 批量状态转换
- 简单的条件映射
- SQL语句长度可控的情况
示例:
-- 批量状态转换
UPDATE orders
SET status = CASE status
WHEN 'pending' THEN 'processing'
WHEN 'processing' THEN 'shipped'
WHEN 'shipped' THEN 'delivered'
END,
updated_at = NOW()
WHERE status IN ('pending', 'processing', 'shipped');
6.3 临时表 + JOIN
最佳场景:
- 复杂的业务逻辑计算
- 需要多表关联的更新
- 超大批量数据更新(>50000行)
- 需要复杂条件筛选的更新
示例:
-- 复杂的薪资调整:根据部门、绩效、工龄等多个因素
CREATE TEMPORARY TABLE salary_adjustments AS
SELECT
u.id,
CASE
WHEN p.score >= 90 AND u.years_exp >= 5 THEN u.salary * 1.20
WHEN p.score >= 80 AND u.years_exp >= 3 THEN u.salary * 1.15
WHEN p.score >= 70 THEN u.salary * 1.10
ELSE u.salary * 1.05
END * d.bonus_rate as new_salary
FROM users u
JOIN departments d ON u.dept_id = d.id
JOIN performance p ON u.id = p.user_id
WHERE u.status = 'active';
UPDATE users u
JOIN salary_adjustments sa ON u.id = sa.id
SET u.salary = sa.new_salary;
6.4 REPLACE INTO
最佳场景:
- 需要完全替换记录的场景
- 数据导入/同步场景
- 不在意created_at等时间戳字段重置的场景
- 没有复杂外键约束的表
示例:
-- 配置数据同步
REPLACE INTO system_configs (key, value, description) VALUES
('max_upload_size', '100MB', '最大上传文件大小'),
('session_timeout', '3600', '会话超时时间(秒)'),
('api_rate_limit', '1000', 'API每小时请求限制');
6.5 事务批量提交
最佳场景:
- 需要跨表更新的业务场景
- 复杂的业务逻辑需要多步骤操作
- 需要保证数据一致性的场景
- 更新操作涉及不同类型的SQL语句
示例:
-- 订单完成的复杂业务流程
START TRANSACTION;
-- 更新订单状态
UPDATE orders SET status = 'completed', completed_at = NOW() WHERE id = 12345;
-- 更新库存
UPDATE inventory SET quantity = quantity - 1 WHERE product_id = 67890;
-- 增加用户积分
UPDATE users SET points = points + 100 WHERE id = 1001;
-- 记录积分变更日志
INSERT INTO point_logs (user_id, change_amount, reason, created_at)
VALUES (1001, 100, '订单完成奖励', NOW());
-- 更新用户统计
UPDATE user_stats SET total_orders = total_orders + 1,
total_spent = total_spent + 299.99
WHERE user_id = 1001;
COMMIT;
总结
通过以上测试,可以根据具体的业务场景和数据量选择最适合的批量更新方式,在保证数据正确性的前提下获得最佳的性能表现。
1、INSERT ON DUPLICATE KEY UPDATE
2、CASE WHEN
3、临时表+JOIN
4、REPLACE INTO
5、事务批量提交
魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。
更多推荐
所有评论(0)