//一个表有一个ID字段,是主键并且是递增的,现在需要一条SQL把这个表没有的ID查出来

//例如ID的记录如下:

ID

1

2

4

5

7

9

//我们SQL需要把3,6,8这三个值查出来

//这是一个典型的求集合的差集问题:

with ta as(

select 1 id from dual union all

select 2 from dual union all

select 4 from dual union all

select 5 from dual union all

select 7 from dual union all

select 9 from dual)

select level id

from dual

connect by level<=(select max(id)-min(id)+1 from ta)

minus

select id from ta

/

ID

----------

3

6

8

//对于此问题,这里ta中数据很小量的,如果ta的最后一个元素为1000001呢?

//那么怎么样的查询才算高效呢?

//求集合的差集还有其他方法吗?

//下面我们来看看类似情况(求连续递增的数中没有出现的数)的差集怎么的:

create table t as

select 1 id from dual union all

select 2 from dual union all

select 3 from dual union all

select 4 from dual union all

select 456 from dual union all

select 10145 from dual union all

select 1044653 from dual

/

//方法一:使用minus

select count(*)

from (

select level id

from dual

connect by level<=(select max(id)-min(id)+1 from t)

minus

select id from t)

/

COUNT(*)

----------

1044646

Executed in 1.547 seconds

//方法二:使用not exists

select count(*)

from (

select *

from (

select level id

from dual

connect by level<=(select max(id)-min(id)+1 from t)) a

where not exists(

select 1

from t b

where a.id=b.id))

/

COUNT(*)

----------

1044646

Executed in 2.157 seconds

//方法三:使用not in

select count(*)

from (

select *

from (

select level id

from dual

connect by level<=(select max(id)-min(id)+1 from t)) a

where a.id not in(

select b.id

from t b

where a.id=b.id))

/

COUNT(*)

----------

1044646

Executed in 8.39 seconds

//从这里看出,在处理大量数据时,相比于exists,not in(in)的效率是相当低的

//所以建议在应用中要尽量使用exists,少用in

//因为in要进行元素匹配,对比,而exists只需要判断存在性即可

//方法四:使用lag()分析函数

select count(*)

from (

with temp as(

select s,e

from (

select lag(id) over(order by id)+1 s,id-1 e from t)

where e - s >= 0)

select a.s + b.rn -1 h

from temp a,

(

select rownum rn

from (select max(e-s+1) gap from temp)

connect by rownum <= gap) b

where a.s + b.rn-1 <= a.e

order by 1)

/

COUNT(*)

----------

1044646

Executed in 10.313 seconds

SQL> set time on;

7:11:37 SQL> /

COUNT(*)

----------

1044646

Executed in 10.156 seconds

7:11:50 SQL>

//此方法效率最低,因为我们这里用到了lag()分析函数,max(),min(),以及with临时表,

//在一个查询中,使用的函数越多,就会增加oracle的负担,

//所以oracle engine在查理此查询时,也需要额外的开销(时间和资源)

Logo

魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。

更多推荐