当前位置: 首页 > news >正文

oracle模糊查询:分区局部全文索引方式(四)

DATABASE:dwtest(210开发库)

--按根级行业ID和大洲进行混合分区
DROP TABLE iquery.lpx_kw_tmp02_bak2;
CREATE TABLE iquery.lpx_kw_tmp02_bak2(
 KEYWORDS                                           VARCHAR2(256)
 ,REGIN_NAME                                         VARCHAR2(128)
 ,CATEGORY_ROOT_ID                                   NUMBER
 ,SRH_PV_THIS_5MON                                   NUMBER
)PARTITION BY RANGE(CATEGORY_ROOT_ID)
(    PARTITION p1 VALUES LESS THAN(1) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p2 VALUES LESS THAN(2) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p3 VALUES LESS THAN(3) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p4 VALUES LESS THAN(4) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p5 VALUES LESS THAN(5) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p6 VALUES LESS THAN(6) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p7 VALUES LESS THAN(7) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p8 VALUES LESS THAN(8) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p9 VALUES LESS THAN(9) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p10 VALUES LESS THAN(10) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p11 VALUES LESS THAN(11) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p12 VALUES LESS THAN(12) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p13 VALUES LESS THAN(13) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p14 VALUES LESS THAN(14) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p15 VALUES LESS THAN(15) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p16 VALUES LESS THAN(16) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p17 VALUES LESS THAN(17) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p18 VALUES LESS THAN(18) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p19 VALUES LESS THAN(19) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p20 VALUES LESS THAN(20) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p21 VALUES LESS THAN(21) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p22 VALUES LESS THAN(23) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p23 VALUES LESS THAN(26) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p24 VALUES LESS THAN(28) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p25 VALUES LESS THAN(30) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p26 VALUES LESS THAN(32) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p27 VALUES LESS THAN(34) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p28 VALUES LESS THAN(36) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p29 VALUES LESS THAN(39) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p30 VALUES LESS THAN(41) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p31 VALUES LESS THAN(42) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p32 VALUES LESS THAN(43) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p33 VALUES LESS THAN(44) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p34 VALUES LESS THAN(66) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p35 VALUES LESS THAN(80) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p36 VALUES LESS THAN(322) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p37 VALUES LESS THAN(339) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p38 VALUES LESS THAN(502) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p39 VALUES LESS THAN(509) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p40 VALUES LESS THAN(1500) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p41 VALUES LESS THAN(2000) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p42 VALUES LESS THAN(3000) TABLESPACE TBS_EN_DSS_2009,
    PARTITION p43 VALUES LESS THAN(999999999) TABLESPACE TBS_EN_DSS_2009
);


INSERT INTO iquery.lpx_kw_tmp02_bak2
SELECT keywords,
       REGIN_NAME,
       CATEGORY_ROOT_ID,
       SUM(SRH_PV_THIS_5MON) AS SRH_PV_THIS_5MON
  FROM iquery.lpx_kw_tmp02
 WHERE CATEGORY_ROOT_ID IS NOT NULL  --去除类目为空的记录
 GROUP BY keywords,
       REGION_ID,
       REGIN_NAME,
       CATEGORY_ROOT_ID;
COMMIT;

7799710 rows created.

SQL>
Commit complete.

SQL> select count(1) from iquery.lpx_kw_tmp02_bak2;


  COUNT(1)
----------
   7799710



--造测试数据
DROP TABLE iquery.lpx_kw_test;
CREATE TABLE iquery.lpx_kw_test TABLESPACE TBS_EN_DSS_2009
AS
SELECT *
  FROM (
        SELECT SUBSTR(keywords, INSTR(keywords, ' ', 1)+1, INSTR(keywords, ' ', 1, 2) - INSTR(keywords, ' ', 1) - 1) AS kw,
               a.*,
               row_number() OVER(PARTITION BY category_root_id ORDER BY dbms_random.value DESC) AS rw
          FROM iquery.lpx_kw_tmp02_bak2  a
         WHERE length(keywords) - length(REPLACE(keywords, ' ', '')) > 2
       )
 WHERE rw < 100 + 1
   AND length(kw) > 5
 ;
 
 SQL> select count(1) from iquery.lpx_kw_test;


  COUNT(1)
----------
      1577

 

--根据行业和大洲进行匹配
DROP TABLE iquery.lpx_kw_tmp03;
CREATE TABLE iquery.lpx_kw_tmp03(
 KW                                                 VARCHAR2(256)
 ,REGIN_NAME                                         VARCHAR2(128)
 ,CATEGORY_ROOT_ID                                   NUMBER
 ,SRH_PV_THIS_5MON                                   NUMBER
);

set serveroutput ON;

DECLARE 
  cnt               NUMBER;
  start_date        DATE;
  end_date          DATE;
 
CURSOR c1
IS
 select kw, keywords, regin_name, category_root_id FROM iquery.lpx_kw_test;
 
BEGIN

 cnt := 0;
 start_date := SYSDATE;
 
 FOR ref_cursor IN c1
 LOOP 
 INSERT INTO iquery.lpx_kw_tmp03
 SELECT ref_cursor.kw
        ,ref_cursor.regin_name
        ,ref_cursor.category_root_id
        ,SUM(a.srh_pv_this_5mon) AS srh_pv_this_5mon
   FROM iquery.lpx_kw_tmp02_bak2 a
  WHERE a.keywords LIKE '%'||ref_cursor.kw||'%'
    AND a.category_root_id = ref_cursor.category_root_id   --根级行业
    AND a.regin_name = ref_cursor.regin_name               --买家所在大洲
  GROUP BY ref_cursor.kw
        ,ref_cursor.regin_name
        ,ref_cursor.category_root_id;
 COMMIT;
 cnt := cnt + 1;
 
 END LOOP;
 end_date := SYSDATE;
 
 DBMS_OUTPUT.PUT_LINE('total records:'||cnt);
 DBMS_OUTPUT.PUT_LINE('total time(ms):'||ROUND(TO_NUMBER(END_DATE - START_DATE) * 24 * 60 * 60 * 1000));
 DBMS_OUTPUT.PUT_LINE('avg time(ms):'||ROUND(TO_NUMBER(END_DATE - START_DATE) * 24 * 60 * 60 * 1000)/cnt);
 
END;
/



--无全文索引的性能
total records:1577
total time(ms):35000
avg time(ms):22.19403931515535827520608750792644261256

 

 

 

 

--创建全文索引
grant ctxapp to etl;
exec ctx_ddl.create_preference('lpxuan_kw','BASIC_LEXER');
--exec ctx_ddl.drop_preference('lpxuan_kw');
CREATE INDEX idx_kw_keyw on iquery.lpx_kw_tmp02_bak2(keywords) INDEXTYPE IS CTXSYS.CONTEXT parameters('lexer lpxuan_kw')
LOCAL (partition p1, partition p2, partition p3, partition p4, partition p5,partition p6, partition p7, partition p8, partition p9, partition p10,
       partition p11, partition p12, partition p13, partition p14, partition p15,partition p16, partition p17, partition p18, partition p19, partition p20,
       partition p21, partition p22, partition p23, partition p24, partition p25,partition p26, partition p27, partition p28, partition p29, partition p30,
       partition p31, partition p32, partition p33, partition p34, partition p35,partition p36, partition p37, partition p38, partition p39, partition p40,
       partition p41, partition p42, partition p43);

--eg:
/*
create index html2_idx on HTML2(newsdescription) indextype is ctxsys.context local
(partition indx parameters('lexer my_lexer'),partition indx01 tablespace
users01  parameters('lexer my_lexer'),partition indx02 tablespace users02 param
eters('lexer my_lexer'),partition indx03 tablespace users03  parameters('lexer m
y_lexer') );
*/


CREATE or replace procedure rel_kw_sync_index as
      begin
      ctx_ddl.sync_index('idx_kw_keyw');
      end;
      /

Procedure created.

Elapsed: 00:00:00.08

VARIABLE jobno number;
BEGIN
  DBMS_JOB.SUBMIT(:jobno,'rel_kw_sync_index();',
  SYSDATE, 'SYSDATE + (1/24/4)');
  commit;
  END;
  /


--根据行业和大洲进行匹配
DROP TABLE iquery.lpx_kw_tmp03;
CREATE TABLE iquery.lpx_kw_tmp03(
 KW                                                 VARCHAR2(256)
 ,REGIN_NAME                                         VARCHAR2(128)
 ,CATEGORY_ROOT_ID                                   NUMBER
 ,SRH_PV_THIS_5MON                                   NUMBER
);

set serveroutput ON;

DECLARE 
  cnt               NUMBER;
  start_date        DATE;
  end_date          DATE;
 
CURSOR c1
IS
 select REPLACE(REPLACE(kw, '-', '/-'), '&', '/&') AS kw, kw AS kw_bak, keywords, regin_name, category_root_id FROM iquery.lpx_kw_test;
 
BEGIN

 cnt := 0;
 start_date := SYSDATE;
 
 FOR ref_cursor IN c1
 LOOP 
 INSERT INTO iquery.lpx_kw_tmp03
 SELECT ref_cursor.kw_bak
        ,ref_cursor.regin_name
        ,ref_cursor.category_root_id
        ,SUM(a.srh_pv_this_5mon) AS srh_pv_this_5mon
   FROM iquery.lpx_kw_tmp02_bak2 a
  WHERE contains(a.keywords,ref_cursor.kw) > 0             --走全文索引
    AND a.category_root_id = ref_cursor.category_root_id   --根级行业
    AND a.regin_name = ref_cursor.regin_name               --买家所在大洲
  GROUP BY ref_cursor.kw_bak
        ,ref_cursor.regin_name
        ,ref_cursor.category_root_id;
 COMMIT;
 cnt := cnt + 1;
 
 END LOOP;
 end_date := SYSDATE;
 
 DBMS_OUTPUT.PUT_LINE('total records:'||cnt);
 DBMS_OUTPUT.PUT_LINE('total time(ms):'||ROUND(TO_NUMBER(END_DATE - START_DATE) * 24 * 60 * 60 * 1000));
 DBMS_OUTPUT.PUT_LINE('avg time(ms):'||ROUND(TO_NUMBER(END_DATE - START_DATE) * 24 * 60 * 60 * 1000)/cnt);
 
END;
/


--有全文索引的性能
total records:1577
total time(ms):9000
avg time(ms):5.70703868103994927076727964489537095751

PL/SQL procedure successfully completed.

相关文章:

  • 动态链接及静态链接
  • BTrace实战
  • windows下安装配置hadoop
  • JavaScript(jQuery)实现打印英文格式日期
  • eclipse运行hadoop wordcount example
  • linux6.5环境下安装python
  • protobuf-2.5.0的下载与安装
  • ibatis入门
  • 将DataTable转换为ListT对象遇到问题:类型“System.Int64”的对象无法转换为类型“System.Int32”。...
  • php无限分类
  • java servlet实例
  • POJ 3154 Graveyard【多解,数论,贪心】
  • C++ 浅谈 strlen 与 sizeof的区别
  • 产品设计的原则之一:精简你的产品
  • 产品设计之二:优化你的产品细节
  • 【技术性】Search知识
  • 【跃迁之路】【477天】刻意练习系列236(2018.05.28)
  • 【跃迁之路】【669天】程序员高效学习方法论探索系列(实验阶段426-2018.12.13)...
  • 11111111
  • CAP理论的例子讲解
  • Hexo+码云+git快速搭建免费的静态Blog
  • java中的hashCode
  • LeetCode刷题——29. Divide Two Integers(Part 1靠自己)
  • 不上全站https的网站你们就等着被恶心死吧
  • 创建一个Struts2项目maven 方式
  • 融云开发漫谈:你是否了解Go语言并发编程的第一要义?
  • 如何编写一个可升级的智能合约
  • 使用iElevator.js模拟segmentfault的文章标题导航
  • 我与Jetbrains的这些年
  • 新书推荐|Windows黑客编程技术详解
  • 运行时添加log4j2的appender
  • 长三角G60科创走廊智能驾驶产业联盟揭牌成立,近80家企业助力智能驾驶行业发展 ...
  • 关于Android全面屏虚拟导航栏的适配总结
  • 组复制官方翻译九、Group Replication Technical Details
  • ​iOS实时查看App运行日志
  • ​软考-高级-系统架构设计师教程(清华第2版)【第20章 系统架构设计师论文写作要点(P717~728)-思维导图】​
  • ​软考-高级-信息系统项目管理师教程 第四版【第23章-组织通用管理-思维导图】​
  • #include到底该写在哪
  • #我与Java虚拟机的故事#连载03:面试过的百度,滴滴,快手都问了这些问题
  • #我与Java虚拟机的故事#连载09:面试大厂逃不过的JVM
  • $.ajax()参数及用法
  • $L^p$ 调和函数恒为零
  • $redis-setphp_redis Set命令,php操作Redis Set函数介绍
  • (4)事件处理——(7)简单事件(Simple events)
  • (DenseNet)Densely Connected Convolutional Networks--Gao Huang
  • (Redis使用系列) SpringBoot 中对应2.0.x版本的Redis配置 一
  • (ZT)一个美国文科博士的YardLife
  • (第9篇)大数据的的超级应用——数据挖掘-推荐系统
  • (附源码)springboot青少年公共卫生教育平台 毕业设计 643214
  • (力扣记录)235. 二叉搜索树的最近公共祖先
  • (生成器)yield与(迭代器)generator
  • (十八)三元表达式和列表解析
  • (十八)用JAVA编写MP3解码器——迷你播放器
  • (四)【Jmeter】 JMeter的界面布局与组件概述
  • (已解决)什么是vue导航守卫