structure

Block split and entry reorder

December 12, 2012 Internals, oracle No comments

今天跟研发的同学讨论到了这个问题,index block split过程中涉及到的重新排序问题.

SQL> create table idx_split (a number, b varchar2(1446), c date);   
 
Table created
 

SQL> create index idx_split_idx on idx_split (a, b) tablespace idx_2k pctfree 10;   
 
Index created
 

 
begin
    for i in 1..1000
    loop
        insert into tx_index_contention (a, b, c) values (i, lpad('A', 10, 'A'), sysdate);
    end loop;
 end;
 /

PL/SQL procedure successfully completed
 
SQL> commit;
 
Commit complete
 

SQL> SELECT height,blocks,lf_blks,br_blks,btree_space,used_space FROM INDEX_STATS WHERE NAME='IDX_SPLIT_IDX';
 
    HEIGHT     BLOCKS    LF_BLKS    BR_BLKS BTREE_SPACE USED_SPACE
---------- ---------- ---------- ---------- ----------- ----------
         1          8          1          0        7996       2500
 
SQL> ANALYZE INDEX idx_split_idx VALIDATE STRUCTURE;
 
Index analyzed
 

 SQL> SELECT height,blocks,lf_blks,br_blks,btree_space,used_space FROM INDEX_STATS WHERE NAME='IDX_SPLIT_IDX';
 
    HEIGHT     BLOCKS    LF_BLKS    BR_BLKS BTREE_SPACE USED_SPACE
---------- ---------- ---------- ------------------- ----------
         2          8          4          1       40012      25924
 
SQL> select extent_id,file_id,block_id,blocks from dba_extents where segment_name='IDX_SPLIT_IDX' AND OWNER='SYS';
 
 EXTENT_ID    FILE_ID   BLOCK_ID     BLOCKS
---------- ---------- ---------- ----------
         0          6        128          8

插入1000条数据,leaf blocks=4 branch block=1

SQL> alter session set events '10224 trace name context forever,level 1';   
 
Session altered
 
SQL>  SELECT height,blocks,lf_blks,br_blks,btree_space,used_space FROM INDEX_STATS WHERE NAME='IDX_SPLIT_IDX' ;
 
    HEIGHT     BLOCKS    LF_BLKS    BR_BLKS BTREE_SPACE USED_SPACE
---------- ---------- ---------- ---------- ----------- ----------
         2          8          4          1       40012      25924
 
SQL> insert into idx_split (a, b, c) values (1, lpad('A', 10, 'A'), sysdate);
 
1 row inserted
 
SQL> commit;
 
Commit complete
 
SQL> 
SQL> select sysdate from dual;
 
SYSDATE
-----------
2012/12/12
------------------------------------------------------------
*** 2012-12-12 13:44:52.311
*** SESSION ID:(668.2355) 2012-12-12 13:44:52.311
*** CLIENT ID:() 2012-12-12 13:44:52.311
*** SERVICE NAME:(yhdtest) 2012-12-12 13:44:52.311
*** MODULE NAME:(PL/SQL Developer) 2012-12-12 13:44:52.311
*** ACTION NAME:(Command Window - New) 2012-12-12 13:44:52.311
 
splitting leaf,dba 0x01800087,time 13:44:52.310
kdisnew_bseg_srch_cbk reject block -mark full,dba 0x01800086,time 13:44:52.311
kdisnew_bseg_srch_cbk rejecting block ,dba 0x01800086,time 13:44:52.311
kdisnew_bseg_srch_cbk reject block -mark full,dba 0x01800085,time 13:44:52.311
kdisnew_bseg_srch_cbk rejecting block ,dba 0x01800085,time 13:44:52.311
kdisnew_bseg_srch_cbk using block,dba 0x0180008f,time 13:44:52.313

---------------------------------------------------------------

leaf,dba 0x01800087 发生了分裂 (第一个leaf block)

dump 第一个branch block

SQL> alter system dump datafile 6 block 131;
 
System altered

----------------------------------------------------------------
Block header dump:  0x01800083
 Object id on Block? Y
 seg/obj: 0x12ef4  csc: 0x00.17756d  itc: 1  flg: E  typ: 2 - INDEX
     brn: 0  bdba: 0x1800080 ver: 0x01 opc: 0
     inc: 0  exflg: 0

 Itl           Xid                  Uba         Flag  Lck        Scn/Fsc
0x01   0x0009.01d.00000389  0x00c003a5.00a7.02  C---    0  scn 0x0000.0017756d
Branch block dump
=================
header address 140678466630220=0x7ff24207f24c
kdxcolev 1
KDXCOLEV Flags = - - -
kdxcolok 0
kdxcoopc 0x80: opcode=0: iot flags=--- is converted=Y
kdxconco 3
kdxcosdc 1
kdxconro 3
kdxcofbo 34=0x22
kdxcofeo 8029=0x1f5d
kdxcoavs 7995
kdxbrlmc 25165959=0x1800087 -----第一个leaf block
kdxbrsno 2
kdxbrbksz 8056
kdxbr2urrc 0
row#0[8047] dba: 25165956=0x1800084  -----第二个
col 0; len 3; (3):  c2 04 0d
col 1; TERM
row#1[8038] dba: 25165957=0x1800085  ..3
col 0; len 3; (3):  c2 07 14
col 1; TERM
row#2[8029] dba: 25165958=0x1800086  ..4
col 0; len 3; (3):  c2 0a 1b
col 1; TERM
----- end of branch block dump -----
End dump data blocks tsn: 7 file#: 6 minblk 131 maxblk 131

-------------------------------------------------------------------
SQL> variable file# number
variable block# number
execute :file#:=dbms_utility.data_block_address_file(to_number('1800087','xxxxxxx'));
execute :block#:=dbms_utility.data_block_address_block(to_number('1800087','xxxxxxx'));SQL> SQL> 
PL/SQL procedure successfully completed.


PL/SQL procedure successfully completed.

SQL> 
SQL> 
SQL> 
SQL> print file#;

     FILE#
----------
	 6

SQL> print block#;

    BLOCK#
----------
       135

 
SQL> alter system dump datafile 6 block 135;
 
System altered
 
SQL> alter system dump datafile 6 block 132;
 
System altered


SQL> select  display_raw('c2040d','NUMBER') from dual;

DISPLAY_RAW('C2040D','NUMBER')
----------------------------------------------------------------------------------------------------
312

第二个leaf block 的min value = 312

Block header dump:  0x01800084
 Object id on Block? Y
 seg/obj: 0x12ef4  csc: 0x00.177610  itc: 2  flg: E  typ: 2 - INDEX
     brn: 0  bdba: 0x1800080 ver: 0x01 opc: 0
     inc: 0  exflg: 0

 Itl           Xid                  Uba         Flag  Lck        Scn/Fsc
0x01   0x0009.01e.00000389  0x00c003a7.00a7.02  --U-    1  fsc 0x0000.00177611
0x02   0x000a.006.00000756  0x00c00768.0172.19  C---    0  scn 0x0000.00177571
Leaf block dump
===============
header address 140678466630244=0x7ff24207f264
kdxcolev 0
KDXCOLEV Flags = - - -
kdxcolok 1
kdxcoopc 0x89: opcode=9: iot flags=--- is converted=Y
kdxconco 3
kdxcosdc 2
kdxconro 307
kdxcofbo 650=0x28a
kdxcofeo 667=0x29b
kdxcoavs 17
kdxlespl 0
kdxlende 0
kdxlenxt 25165957=0x1800085
kdxleprv 25165967=0x180008f
kdxledsz 0
kdxlebksz 8032
row#0[667] flag: ------, lock: 0, len=24
col 0; len 3; (3):  c2 04 0d          ------------>312
col 1; len 10; (10):  41 41 41 41 41 41 41 41 41 41
col 2; len 6; (6):  00 41 74 8a 00 30
row#1[691] flag: ------, lock: 0, len=24
.....

第一个leaf block 的range <312

Block header dump:  0x01800087
 Object id on Block? Y
 seg/obj: 0x12ef4  csc: 0x00.177576  itc: 2  flg: E  typ: 2 – INDEX
     brn: 0  bdba: 0x1800080 ver: 0x01 opc: 0
     inc: 0  exflg: 0

 Itl           Xid                  Uba         Flag  Lck        Scn/Fsc
0x01   0x0009.01e.00000389  0x00c003a6.00a7.01  -B–    1  fsc 0x0000.00000000
0x02   0x000a.010.00000758  0x00c00770.0172.28  –U-    1  fsc 0x0000.00177649
Leaf block dump
===============
header address 140678466630244=0x7ff24207f264
kdxcolev 0
KDXCOLEV Flags = – – –
kdxcolok 1
kdxcoopc 0x80: opcode=0: iot flags=— is converted=Y
kdxconco 3
kdxcosdc 2
kdxconro 163
kdxcofbo 362=0x16a
kdxcofeo 4221=0x107d
kdxcoavs 3859
kdxlespl 0
kdxlende 0
kdxlenxt 25165967=0x180008f
kdxleprv 0=0x0
kdxledsz 0
kdxlebksz 8032
row#0[4221] flag: ——, lock: 0, len=23
col 0; len 2; (2):  c1 02
col 1; len 10; (10):  41 41 41 41 41 41 41 41 41 41
col 2; len 6; (6):  00 41 74 89 00 00
row#1[4244] flag: ——, lock: 2, len=23
col 0; len 2; (2):  c1 02
col 1; len 10; (10):  41 41 41 41 41 41 41 41 41 41
……

row#0,row#1 的偏移量发生了重组,[4221]->[4244] 按照len=23递增 即物理address 发生了排序
row#0,row#1 被reorder为1,1,2,x 而row#1对应的rowid为max block address

SQL> SELECT height,blocks,lf_blks,br_blks,btree_space,used_space FROM INDEX_STATS WHERE NAME='IDX_SPLIT_IDX';
 
    HEIGHT     BLOCKS    LF_BLKS    BR_BLKS BTREE_SPACE USED_SPACE
---------- ---------- ---------- ---------- ----------- ----------
         2          8          4          1       40012      25924
 
SQL>  ANALYZE INDEX idx_split_idx VALIDATE STRUCTURE;
 
Index analyzed
 
SQL> 
SQL>  SELECT height,blocks,lf_blks,br_blks,btree_space,used_space FROM INDEX_STATS WHERE NAME='IDX_SPLIT_IDX'

  2  ;
 
    HEIGHT     BLOCKS    LF_BLKS    BR_BLKS BTREE_SPACE USED_SPACE
---------- ---------- ---------- ---------- ----------- ----------
         2         16          5          1       48008      25960

leaf block 从4增加到5,第一个leaf block 发生了分裂,具体过程为:

1. Allocate new index block from index freelist
2. Redistribute block so the lower half (by volume) of index entries remain in current block and move the other half into the new block
3. Insert the new index entry into appropriate leaf block
4. Update the previously full block such that its “next leaf block pointer” (kdxlenxt) references the new block
5. Update the leaf block that was the right of the previously full block such that its “previous leaf block pointer”(kdxleprv) also points to the new block
6. Update the branch block that references the full block and add a new entry to point to the new leaf block (effectively the lowest value in the new leaf block)
7. –add this one , split leaf block will reorder by value this is a behavior like rebuild.

处理坏块一例(2)

August 17, 2012 Internals, maintain, oracle 2 comments

帮朋友check一次中国银联数据库的坏块问题(并不是严格意义上的坏块),大致情况为smon需要trascation recover —>特定的block 发现无法读取 —>transaction recover出现问题 —>smon terminated

Wed Aug 15 19:02:57 2012
SMON: Parallel transaction recovery slave got internal error
SMON: Downgrading transaction recovery to serial
Wed Aug 15 19:02:57 2012
Errors in file /oracle/admin/settora/bdump/settora_smon_4755904.trc:
ORA-00600: internal error code, arguments: [kdourp_inorder2], [43], [43], [7], [44], [], [], []
Doing block recovery for file 10 block 176769
Doing block recovery for file 26 block 3259716

Block recovery from logseq 108030, block 49455 to scn 41320651147
Wed Aug 15 19:02:58 2012
Recovery of Online Redo Log: Thread 1 Group 5 Seq 108030 Reading mem 0
Mem# 0: /orasvr/settora/redo05.log
Block recovery completed at rba 108030.49555.16, scn 9.2665945485
ORACLE Instance settora (pid = 8) – Error 600 encountered while recovering transaction (7, 19) on object 331910.

过一段时间数据库就会down掉,从日志信息里面可以看到:

Wed Aug 15 19:23:18 2012
SMON: Parallel transaction recovery slave got internal error
SMON: Downgrading transaction recovery to serial
Wed Aug 15 19:23:18 2012
Errors in file /oracle/admin/settora/bdump/settora_smon_4755904.trc:
ORA-00600: internal error code, arguments: [kdourp_inorder2], [43], [43], [7], [44], [], [], []
Wed Aug 15 19:23:20 2012
Errors in file /oracle/admin/settora/bdump/settora_pmon_4690184.trc:
ORA-00474: SMON process terminated with error
Wed Aug 15 19:23:20 2012
PMON: terminating instance due to error 474
Instance terminated by PMON, pid = 4690184

执行下面SQL

select /*+full(a)*/ from PRO_SETTLE3.SHOPACCOUNT a;

returns:ORA-00600: internal error code, arguments: [kdourp_inorder2], [43], [43], [7], [44], [], [], []

使用dbv工具来检测:

$ dbv file=/orasvr1/settle5/settle000.dbf blocksize=8192

DBVERIFY: Release 10.2.0.4.0 - Production on Thu Aug 16 09:40:49 2012

Copyright (c) 1982, 2007, Oracle.  All rights reserved.

DBVERIFY - Verification starting : FILE = /orasvr1/settle5/settle000.dbf
kdrchk:  row is marked as both Last and Next continue
          prow=0x1101aec98 flag=0x07
Block Checking: DBA = 42119809, Block Type = KTB-managed data block
data header at 0x1101ae07c
kdbchk: bad row tab 0, slot 51
Page 176769 failed with check code 6253


DBVERIFY - Verification complete

Total Pages Examined         : 386688
Total Pages Processed (Data) : 282817
Total Pages Failing   (Data) : 1
Total Pages Processed (Index): 98760
Total Pages Failing   (Index): 0
Total Pages Processed (Other): 2707
Total Pages Processed (Seg)  : 0
Total Pages Failing   (Seg)  : 0
Total Pages Empty            : 2404
Total Pages Marked Corrupt   : 0
Total Pages Influx           : 0
Highest block SCN            : 2666197038 (9.2666197038)

Total Pages Failing (Data) : 1

Page 176769 被标识为corruption

Total Pages Examined         : 512      Number of blocks looked at
Total Pages Processed (Data) : 1        Number of TABLE blocks seen
Total Pages Failing   (Data) : 1        Number of TABLE blocks with internal inconsistencies
Total Pages Processed (Index): 0        Number of INDEX blocks seen
Total Pages Failing   (Index): 0        Number of INDEX block with internal inconsistencies
Total Pages Empty            : 507      Number of unused blocks seen
Total Pages Marked Corrupt   : 2        Number of blocks with corrupt cache wrappers
Total Pages Influx           : 0        Number of pages we re-read as the page looked like it was being modified when it was first read.
SQL> select file#,block#,blocks from v$database_block_corruption;
 
no rows selected

在数据库中并没有关于坏块的记录存在。这并不是一个普通意义上的坏块。对于这种块使用corrupt skip是没有用的skip_corrupt_blocks只能使oracle跳过Oracle能够读出的块,而如果在操作系统层read调用就失败的,则不能跳过该块。
oracle 在read page 176769时发生了error 6253 (cannot read arguments from address file,其实这并不是真正的root cause,我在和huangyong讨论过后基本认定为一个内部的inconsistent block) oracle 不能 mark 为 corrupt block.

这里的思路为使用rowid来跳过这些有问题的block 大致方法如下

SQL> SELECT dbms_rowid.rowid_create(1,331910,10,176769,0) LOW_RID from DUAL;

LOW_RID
——————
AABRCGAAKAAArKBAAA

SQL> SELECT dbms_rowid.rowid_create(1,331910,10,176770,0) HI_RID from dual;

HI_RID
——————
AABRCGAAKAAArKCAAA

SQL> SELECT dbms_rowid.rowid_create(1,331910,26,3259716,0) LOW_RID from DUAL;

LOW_RID
——————
AABRCGAAaAAMb1EAAA

SQL> SELECT dbms_rowid.rowid_create(1,331910,26,3259717,0) HI_RID from DUAL;

HI_RID
——————
AABRCGAAaAAMb1FAAA

SQL> create table PRO_SETTLE3.SHOPACCOUNT_1 as select /*+ rowid(a) */ * from PRO_SETTLE3.SHOPACCOUNT a where rowid < 'AABRCGAAKAAArKBAAA'; Table created. SQL> insert into PRO_SETTLE3.SHOPACCOUNT_1 select /*+ rowid(a) */ * from PRO_SETTLE3.SHOPACCOUNT a where rowid >= ‘AABRCGAAKAAArKCAAA’ and rowid < 'AABRCGAAaAAMb1EAAA'; 49626 rows created. SQL> insert into PRO_SETTLE3.SHOPACCOUNT_1 select /*+ rowid(a) */ * from PRO_SETTLE3.SHOPACCOUNT a where rowid >= ‘AABRCGAAaAAMb1FAAA’;

208837 rows created.

SQL> select count(*) from PRO_SETTLE3.SHOPACCOUNT_1;

COUNT(*)
———-
262256

之后drop掉这张有问题的table restart database 恢复正常.

同样我们可以采用Kerry Osborne的脚本来实现.这里稍微修改了下 增加了选择tablespace 的功能temp

eg:

[oracle@testdb ~]$ dd if=/dev/zero  of=/data/oracle/oradata/wuxuan1/liu.dbf   bs=8192 seek=1798  count=1 conv=notrunc
1+0 records in
1+0 records out
8192 bytes (8.2 kB) copied, 5.2e-05 seconds, 158 MB/s
[oracle@testdb ~]$ sqlplus liu/liu

SQL*Plus: Release 11.2.0.3.0 Production on Fri Aug 17 02:32:20 2012

Copyright (c) 1982, 2011, Oracle.  All rights reserved.

Error accessing PRODUCT_USER_PROFILE
Warning:  Product user profile information not loaded!
You may need to run PUPBLD.SQL as SYSTEM

Connected to:
Oracle Database 11g Enterprise Edition Release 11.2.0.3.0 - 64bit Production
With the Partitioning, OLAP, Data Mining and Real Application Testing options

SQL> select count(*) from temp;
select count(*) from temp
                     *
ERROR at line 1:
ORA-01578: ORACLE data block corrupted (file # 4, block # 1798)
ORA-01110: data file 4: '/data/oracle/oradata/wuxuan1/liu.dbf'


SQL> @temp	

WARNING: This script may issue a DROP TABLE command. Do not execute it unless you have read through it
and are comfortable you know what it does.

Ready? (hit ctl-C to quit)  
Enter value for owner_name: LIU
old   8: v_owner_name varchar2(30) := upper('&owner_name');
new   8: v_owner_name varchar2(30) := upper('LIU');
Enter value for table_name: TEMP
old   9: v_table_name varchar2(30) := upper('&table_name');
new   9: v_table_name varchar2(30) := upper('TEMP');
Enter value for tablespace: LIU
old  11: v_tablespace varchar2(30) := upper('&tablespace');
new  11: v_tablespace varchar2(30) := upper('LIU');


Saved 13247 records in TEMP_SAVED.
201 bads records in TEMP_BAD.

PL/SQL procedure successfully completed.

SQL> 
SQL> select count(*) from temp_saved;

  COUNT(*)
----------
     13247
     
SQL> select  * from  v$database_block_corruption;

     FILE#     BLOCK#	  BLOCKS CORRUPTION_CHANGE# CORRUPTIO
---------- ---------- ---------- ------------------ ---------
	 4	  258	       1		  0 ALL ZERO
	 4	 1798	       1		  0 ALL ZERO

block 1798 已经被标识为media corruption

手动构造rowid

SQL> select * from temp_bad where rownum<5; 

OLD_ROWID			 OLD_FILE OLD_OBJECT  OLD_BLOCK    OLD_ROW
------------------------------ ---------- ---------- ---------- ----------
ERROR_MESSAGE
--------------------------------------------------------------------------------
4.1798.181			    13646	   4	   1798        181
ORA-01578: ORACLE data block corrupted (file # 4, block # 1798)
ORA-01110: data file 4: '/data/oracle/oradata/wuxuan1/liu.dbf'

4.1798.182			    13646	   4	   1798        182
ORA-01578: ORACLE data block corrupted (file # 4, block # 1798)
ORA-01110: data file 4: '/data/oracle/oradata/wuxuan1/liu.dbf'

4.1798.183			    13646	   4	   1798        183

OLD_ROWID			 OLD_FILE OLD_OBJECT  OLD_BLOCK    OLD_ROW
------------------------------ ---------- ---------- ---------- ----------
ERROR_MESSAGE
--------------------------------------------------------------------------------
ORA-01578: ORACLE data block corrupted (file # 4, block # 1798)
ORA-01110: data file 4: '/data/oracle/oradata/wuxuan1/liu.dbf'

4.1798.184			    13646	   4	   1798        184
ORA-01578: ORACLE data block corrupted (file # 4, block # 1798)
ORA-01110: data file 4: '/data/oracle/oradata/wuxuan1/liu.dbf'

SQL> select dbms_rowid.rowid_create(1,13646,4,1798,181) from dual;

DBMS_ROWID.ROWID_C
------------------
AAADVOAAEAAAAcGAC1

SQL> select dbms_rowid.rowid_create(1,13646,4,1798,182) from dual;

DBMS_ROWID.ROWID_C
------------------
AAADVOAAEAAAAcGAC2




SQL> select * from temp where rowid ='AAADVOAAEAAAAcGAC1';
select * from temp where rowid ='AAADVOAAEAAAAcGAC1'
              *
ERROR at line 1:
ORA-01578: ORACLE data block corrupted (file # 4, block # 1798)
ORA-01110: data file 4: '/data/oracle/oradata/wuxuan1/liu.dbf'


SQL> select * from temp where rowid ='AAADVOAAEAAAAcGAC2';
select * from temp where rowid ='AAADVOAAEAAAAcGAC2'
              *
ERROR at line 1:
ORA-01578: ORACLE data block corrupted (file # 4, block # 1798)
ORA-01110: data file 4: '/data/oracle/oradata/wuxuan1/liu.dbf'

Reference:
Extracting Data from a Corrupt Table using ROWID Range Scans in Oracle8 and higher [ID 61685.1]
Extracting Data from a Corrupt Table using DBMS_REPAIR or Event 10231 [ID 33405.1]
Extract rows from a CORRUPT table creating ROWID from DBA_EXTENTS [ID 422547.1]

Bitmap index tips

June 7, 2012 Internals, oracle No comments

对于BITMAP index的使用 一直存在一些争议 我们来看看下面几个观点:

1. Low cardinality – Some dabase vendor, like Oracle, provides very practical suggestion

If the number of distinct values of a column is less than 1% of the number of rows in the table, or if the values in a column are repeated more than 100 times, then the column is a candidate for a bitmap index.
B-tree indexes are most effective for high-cardinality data: that is, data with many possible values, such as CUSTOMER_NAME or PHONE_NUMBER.
There are 100 or more rows for each distinct value in the indexed column. When this limit is met, the bitmap index will be much smaller than a regular index, and you will be able to create the index much faster than a regular index.

2. No or little insert/update

Updating bitmap indexes take a lot of resources. Here are the suggestions
Building and maintaining an index structure can be expensive, and it can consume resources such as disk space, CPU, and I/O capacity. Designers must ensure that the benefits of any index outweigh the negatives of index maintenance.
Use this simple estimation guide for the cost of index maintenance: each index maintained by an INSERT, DELETE, or UPDATE of the indexed keys requires about three times as much resource as the actual DML operation on the table. What this means is that if you INSERT into a table with three indexes, then it will be approximately 10 times slower than an INSERT into a table with no indexes. For DML, and particularly for INSERT-heavy applications, the index design should be seriously reviewed, which might require a compromise between the query and INSERT performance.

3. Multiple Columns

One of the advantage is that multiple bitmap indexes can be merged and the column does not have to selective!More than one column in the table has an index that the optimizer can use to improve performance on a table scan Combining bitmap indexes on non-selective columns allows efficient AND and OR operations with a great number of rowids with minimal I/O





对于观点1 观点2 其实就是说明bitmap index 适合cardinality distinct小,并且并发DML很少的环境(OLAP) 但是对于大量的插入更新操作在cardinality很小的情况下 无论是索引的大小 还是插入的时间 bitmap index 还是占有一定的优势的 但是随着distinct的上升 无论index size还是插入的时间 都会发生恐怖的变化,我们可以做一个简单的对比:

SQL*Plus: Release 11.2.0.3.0 Production on Thu Jun 7 16:59:35 2012

Copyright (c) 1982, 2011, Oracle. All rights reserved.

Connected to:
Oracle Database 11g Enterprise Edition Release 11.2.0.3.0 – 64bit Production
With the Partitioning, OLAP, Data Mining and Real Application Testing options

SQL>

Elapsed: 00:00:00.00
SQL> create index user_data2.idx_temp_liu_id on user_data2.temp_liu(id);

Index created.

Elapsed: 00:00:00.46
SQL> insert into user_data2.temp_liu select 1 from dual connect by level < 40000001; 40000000 rows created. Elapsed: 00:03:36.70 SQL> commit;

Commit complete.

b-tree index distinct count =1 耗时 3:36.70

Elapsed: 00:00:01.14
SQL> select sum(bytes)/1024/1024/1024 from dba_segments where segment_name =’IDX_TEMP_LIU_ID’;

SUM(BYTES)/1024/1024/1024
————————-
1.06738281

Elapsed: 00:00:00.85
SQL> drop index user_data2.IDX_TEMP_LIU_ID;

Index dropped.

Elapsed: 00:00:01.75
SQL> create bitmap index user_data2.IDX_TEMP_LIU_ID on user_data2.temp_liu (id);

Index created.

Elapsed: 00:00:06.79
SQL> drop index user_data2.IDX_TEMP_LIU_ID;

Index dropped.

Elapsed: 00:00:00.01
SQL> truncate table user_data2.temp_liu;

Table truncated.

Elapsed: 00:00:02.02
SQL> create bitmap index user_data2.IDX_TEMP_LIU_ID on user_data2.temp_liu (id);

Index created.

Elapsed: 00:00:00.01
SQL> select count(*) from user_data2.temp_liu;

COUNT(*)
———-
0

Elapsed: 00:00:00.00
SQL> insert into user_data2.temp_liu select 1 from dual connect by level < 40000001; 40000000 rows created. Elapsed: 00:03:10.28 SQL> commit;

Commit complete.

Elapsed: 00:00:00.06
SQL> select sum(bytes)/1024/1024/1024 from dba_segments where segment_name =’IDX_TEMP_LIU_ID’;

SUM(BYTES)/1024/1024/1024
————————-
.005859375

Elapsed: 00:00:01.95





可以看到 耗时小于b-tree 但是index size << b-tree index size 下面测试下distinct count =2的对比


SQL> truncate table user_data2.temp_liu;

Table truncated.

Elapsed: 00:00:00.11
SQL> drop index user_data2.IDX_TEMP_LIU_ID;

Index dropped.

Elapsed: 00:00:00.01
SQL> create index user_data2.idx_temp_liu_id on user_data2.temp_liu(id);

Index created.

Elapsed: 00:00:00.18
SQL>
SQL> insert into user_data2.temp_liu select mod(rownum,2) from dual connect by level < 40000001; 40000000 rows created. Elapsed: 00:10:33.48 SQL> commit;

Commit complete.

Elapsed: 00:00:01.09

SQL> truncate table user_data2.temp_liu ;

Table truncated.

Elapsed: 00:00:02.31
SQL> drop index user_data2.IDX_TEMP_LIU_ID;

Index dropped.

Elapsed: 00:00:00.01
SQL> create bitmap index user_data2.IDX_TEMP_LIU_ID on user_data2.temp_liu (id);

Index created.

Elapsed: 00:00:00.17
SQL> insert into user_data2.temp_liu select mod(rownum,2) from dual connect by level < 40000001; 40000000 rows created. Elapsed: 00:03:43.77



b-tree 耗时接近10分钟 bitmap index 耗时 3:43s 差距继续扩大 bitmap_index << b-tree index


SQL> drop index user_data2.IDX_TEMP_LIU_ID;

Index dropped.

Elapsed: 00:00:02.12
SQL> truncate table user_data2.temp_liu ;

Table truncated.

Elapsed: 00:00:01.04

SQL> insert into user_data2.temp_liu select ceil (rownum/2000000) from dual connect by level < 10000001; 10000000 rows created. Elapsed: 00:00:19.68 SQL> truncate table user_data2.temp_liu;

Table truncated.

Elapsed: 00:00:00.27
SQL> create bitmap index user_data2.IDX_TEMP_LIU_ID on user_data2.temp_liu (id);

Index created.

Elapsed: 00:00:00.01
SQL> insert into user_data2.temp_liu select ceil (rownum/2000000) from dual connect by level < 10000001; 10000000 rows created. Elapsed: 00:00:45.71 SQL> select sum(bytes) from dba_segments where segment_name=’IDX_TEMP_LIU_ID’;

SUM(BYTES)
———-
2097152

Elapsed: 00:00:00.13
SQL> truncate table user_data2.temp_liu;

Table truncated.

Elapsed: 00:00:00.28
SQL> drop index user_data2.IDX_TEMP_LIU_ID
2 ;

Index dropped.

Elapsed: 00:00:00.01
SQL> create index user_data2.IDX_TEMP_LIU_ID on user_data2.temp_liu (id);

Index created.

Elapsed: 00:00:00.01
SQL> insert into user_data2.temp_liu select ceil (rownum/2000000) from dual connect by level < 10000001; 10000000 rows created. Elapsed: 00:00:41.29 SQL> select sum(bytes) from dba_segments where segment_name=’IDX_TEMP_LIU_ID’;

SUM(BYTES)
———-
277872640

Elapsed: 00:00:00.21
SQL> select distinct id from user_data2.temp_liu ;

ID
———-
3
4
1
2
5

Elapsed: 00:00:01.60





可以看到 插入速度并没有得到明显的提升 但是size 仍然bitmap << b-tree


SQL> truncate table user_data2.temp_liu;

Table truncated.

Elapsed: 00:00:00.24
SQL> create bitmap index user_data2.IDX_TEMP_LIU_ID on user_data2.temp_liu (id);

Index created.

Elapsed: 00:00:00.02
SQL>
SQL>
SQL>
SQL> insert into user_data2.temp_liu select ceil (rownum/5) from dual connect by level < 10000001; 10000000 rows created. Elapsed: 00:01:39.25 SQL> commit;

Commit complete.

Elapsed: 00:00:00.22
SQL> select sum(bytes) from dba_segments where segment_name=’IDX_TEMP_LIU_ID’;

SUM(BYTES)
———-
104857600

Elapsed: 00:00:00.01





对比之前的插入操作 插入1000万数据 耗费时间 distinct count=5 << distinct count=2000000


SQL> truncate table user_data2.temp_liu ;

Table truncated.

Elapsed: 00:00:00.37
SQL> drop index user_data2.IDX_TEMP_LIU_ID
2 ;

Index dropped.

Elapsed: 00:00:00.01
SQL> create index user_data2.IDX_TEMP_LIU_ID on user_data2.temp_liu (id);

Index created.

Elapsed: 00:00:00.02
SQL> insert into user_data2.temp_liu select ceil (rownum/5) from dual connect by level < 10000001; 10000000 rows created. Elapsed: 00:00:38.16 SQL> select sum(bytes) from dba_segments where segment_name=’IDX_TEMP_LIU_ID’;

SUM(BYTES)
———-
177209344





可以看到 插入速度已经远远大于 bitmap-index 此时的size 也已经接近b-tree size



总结一下这个小实验 在distinct 大量增加的环境下bitmap 索引完全失去了优势 大量的维护代价 不但让索引的大小成倍增加 维护索引的代价也是剧增 并且在OLTP这种高并发的环境下BITMAP显得那么格格不入 BITMAP index 只能被用在DW DSS等低并发环境,当一条insert 执行的时候,如果column value=x的,由于要更新bitmap,需要lock value=x的所有行,所以千万别在oltp中使用bitmap index

可以给个流程图:

FROM Richard Foote

Bitmap indexes should only be considered in Data Warehouse, low concurrency DML type environments due to their locking implications and certainly pre 10g, Bitmap indexes had growth issues after significant DML changes. However it’s a complete nonsense to suggest that Bitmap indexes should only be considered with columns with “few” distinct values, else things will run 100s of times slower.





关于DML 操作对于bitmap index的影响 我们可以先看一下bitmap index block的结构:



In Oracle 9.2 and below, insertion is very inefficient
When a row is inserted
if leaf row already exists for ROWID then bitmap is updated
otherwise a new leaf row is inserted for the eight bits around the new row

In Oracle 10g, insertion is more efficient
rows are inserted into existing leaf rows where possible
if necessary start ROWID or end ROWID is adjusted





大量的DML 操作造成的 ROWID adjusted 的代价是很大的 所以在9i的库 我们经常发现bitmap index变的十分巨大(曾经见过一个2T的bitmap index)

上面的写了很多 看来bitmap index 真是一无是处了,但是在很多特定情况下 bitmap 还是具有一定优势的 就如上面第三个观点所说的 Multiple Columns的情况下
bitmap index merge :
Combining bitmap indexes on non-selective columns allows efficient AND and OR operations with a great number of rowids with minimal I/O

其实可以通过下面的图客观的反映出来

另外bitmap join index也是DW环境一个很不错的选择

总结 when to use bitmap index ?


1.Low cardinality

2.little concurrency DML statement

3.Multiple Columns using index merge in DW

4.bitmap join index in DW

5.just use it when you think your OLTP system is not bad enough 🙂

创建index之前如何确定其大小

March 8, 2012 Internals, maintain, oracle 1 comment

昨天看了roger的一篇文章,如何在创建index 之前估算它的大小,这里有一种更简单的方法实现:

主要思路就是采用了dbms_space.create_index_cost这个包

–create a testing table named t
SQL> create table t as select * from dba_objects;

Table created.

SQL> declare
begin
for i in 1..10 loop
insert /*+ APPEND */ into t select * from t;
commit;
end loop;
end;
/

PL/SQL procedure successfully completed.

SQL> select count(*) from t;

COUNT(*)
———-
38171648

1 row selected.

–As told by Oracle docs, you have to have updated statistics in order to allow CREATE_INDEX_COST to produce accurate estimation
SQL> exec dbms_stats.gather_table_stats(‘LIU’,’T’);

PL/SQL procedure successfully completed.

SQL> variable used_bytes number
SQL> variable allocated_bytes number
SQL> set autoprint on
SQL> exec dbms_space.create_index_cost( ‘create index t_idx on t(object_id,object_name,object_type) tablespace PURCHASE’, :used_bytes, :allocated_bytes );

PL/SQL procedure successfully completed.

ALLOCATED_BYTES
—————
1946157056

USED_BYTES
———-
1259664384

Verification
SQL> create index t_idx on t(object_id,object_name,object_type);

Index created.

SQL> select segment_name ,sum(bytes) from dba_segments where owner=’LIU’ and segment_name=’T_IDX’ group by segment_name;

SEGMENT_NAME SUM(BYTES)
——————————————————————————— ———-
T_IDX 1930428416

1 row selected.

可以看到预计空间为 1946157056byte 实际空间为 1930428416byte 结果极其的接近。

按照roger的方法 我们再计算一次:

对于8k的block:

block_size(8192)= = kcbh + ktbbh + kdxle + kd_off + DATA + block_size * Pctfree + 4
= 20 + 72 + 32 + 2 * Y + Y * ( 20 + 5 + 8 + rowid ) + 8192 * 0.1 + 4
8192 = 92 + 32 + 45Y + 819.2 + 4

SQL> SELECT (8192-92-32-819.2-4)/45 FROM dual;

(8192-92-32-819.2-4)/45
———————–
160.995

SQL> SELECT 38171648/((8192-92-32-819.2-4)/45) *8192 from dual;

38171648/((8192-92-32-819.2-4)/45)*8192
————————————————-
1942249319.354037

可以看到如果单用leaf block计算 同样很接近真实值,当然如果对一个very big的index而言 仅仅用leaf block去计算 还是会出现一定的偏差

Reference:http://www.killdb.com/2012/02/26/how-to-calculate-indexes-size-when-you-create-index.html?wpc=dlc#comment-805

Fractured blocks when Rman backup is running

February 20, 2012 backup, oracle, replication No comments

系统alert在主库出现下面错误 OS为linux 5.5


Stopping background process CJQ0
Sat Feb 11 03:41:07 2012
Hex dump of (file 9, block 561424) in trace file /data/oracle/diag/rdbms/yhdstd/yhddb1/trace/yhddb1_ora_11327.trc
Corrupt block relative dba: 0x02489110 (file 9, block 561424)
Fractured block found during backing up datafile
Data in bad block:
type: 6 format: 2 rdba: 0x02489110
last change scn: 0x0007.a9d2831f seq: 0x1 flg: 0x06
spare1: 0x0 spare2: 0x0 spare3: 0x0
consistency value in tail: 0xe6b90601
check value in block header: 0x8c60
computed block checksum: 0x82af
Reread of blocknum=561424, file=/data/oracle/oradata/yhddb1/md_data01.dbf. found valid data

这个情况在9i比较常见,rman备份时datafile 正在处在剧烈的io操作,如大批量的写入等等,oracle判断此块为Fractured,但是这并不是真正意义上的Corrupt

oracle会再次check这个块 ‘Reread of blocknum=561424, file=/data/oracle/oradata/yhddb1/md_data01.dbf. found valid data’ 发现这个块是valid的,数据库版本是11.2.0.3

[oracle@rac03 ~]$ crontab -l
0 03 * * 2 sh /home/oracle/monitor/script/rman_level0.sh >> /home/oracle/monitor/script/rman_level0.log
0 03 * * 0,1,3,4,5,6 sh /home/oracle/monitor/script/rman_level1.sh >> /home/oracle/monitor/script/rman_level1.log

rman备份确实是放在3点左右,和alert log 中的时间相吻合

由此发现在11g中我们还是建议rman备份尽量不要放在数据库繁忙的阶段

reference:

fact: Oracle Server – Enterprise Edition 8
fact: Oracle Server – Enterprise Edition 9
fact: Recovery Manager (RMAN)
symptom: Fractured block found during backup up datafile
symptom: Reread of blocknum found some corrupt data
symptom: Analyze table validate structure cascade returns no errors
change: NOTE ROLE:

The messages are of the form

Reread of blocknum=36256, file=/pdscdata/pdsclive/data1/dispatch_data_large2.
dbf. found same corrupt data
***
Corrupt block relative dba: 0xfc008dc0 (file 63, block 36288)
Fractured block found during backing up datafile
Data in bad block –
type: 0 format: 0 rdba: 0x00000000
last change scn: 0x0000.00000000 seq: 0x0 flg: 0x00
consistency value in tail: 0x53494e53
check value in block header: 0x0, block checksum disabled
spare1: 0x0, spare2: 0x0, spare3: 0x0
cause: RMAN backups of datafile are being performed while the datafile is
involved in heavy I/O.

RMAN reads Oracle blocks from disk. If it finds that the block is fractured,
which means it is being actively used, it performs a reread of the block. If
that fails again then the block is assumed to be corrupt.

By identifying the object that these blocks belong to by following
Handling Oracle Block Corruptions in Oracle7/8/8i and
performing an analyze .. validate structure cascade on the object involved you
can confirm that
the object is not corrupt.

fix:

Run the backups when the tablespace has less I/O activity.

HugePages on Linux

January 16, 2012 linux, system 5 comments

Regular Pages and HugePages

This section aims to give a general picture about memory access in virtual memory systems and how pages are referenced.
When a single process works with a piece of memory, the pages that the process uses are reference in a local page table for the specific process. The entries in this table also contain references to the System-Wide Page Table which actually has references to actual physical memory addresses. So theoretically a user mode process (i.e. Oracle processes), follows its local page table to access to the system page table and then can reference the actual physical table virtually. As you can see below, it is also possible (and very common to Oracle RDBMS due to SGA use) that two different O/S processes can point to the same entry in the system-wide page table.

When HugePages are in the play, the usual page tables are employed. The very basic difference is that the entries in both process page table and the system page table has attributes about huge pages. So any page in a page table can be a huge page or a regular page. The following diagram illustrates 4096K hugepages but the diagram would be the same for any huge page size.

Some HugePages Facts/Features

HugePages can be allocated on-the-fly but they must be reserved during system startup. Otherwise the allocation might fail as the memory is already paged in 4K mostly.
HugePage sizes vary from 2MB to 256MB based on kernel version and HW architecture (See related section below.)
HugePages are not subject to reservation / release after the system startup unless there is system administrator intervention, basically changing the hugepages configuration (i.e. number of pages available or pool size)

HugePages and Oracle 11g Automatic Memory Management (AMM)

The AMM and HugePages are not compatible. One needs to disable AMM on 11g to be able to use HugePages. See hugepage in 11g for further information.

设置大页内存

[oracle@db-36 ~]$ cat /etc/sysctl.conf |grep nr_hugepages
vm.nr_hugepages=33792

vm.nr_hugepages>=SGA/2M 如SGA=64G vm.nr_hugepages>=32768

设置limits.conf

cat /etc/security/limits.conf

cat oracle soft nofile 131072
oracle hard nofile 131072
oracle soft nproc 131072
oracle hard nproc 131072
oracle soft core unlimited
oracle hard core unlimited
oracle soft memlock 69206016 –> 大于SGA
oracle hard memlock 69206016 –> 大于SGA


[oracle@db-36 ~]$ more /proc/meminfo |grep -i HugePage
HugePages_Total: 33792
HugePages_Free: 998
HugePages_Rsvd: 38
Hugepagesize: 2048 kB

表示已经使用了大页内存

scripts :用于计算系统所需要的大页

#!/bin/bash
#
# hugepages_settings.sh
#
# Linux bash script to compute values for the
# recommended HugePages/HugeTLB configuration
#
# Note: This script does calculation for all shared memory
# segments available when the script is run, no matter it
# is an Oracle RDBMS shared memory segment or not.
#
# This script is provided by Doc ID 401749.1 from My Oracle Support
# http://support.oracle.com

# Welcome text
echo ”
This script is provided by Doc ID 401749.1 from My Oracle Support
(http://support.oracle.com) where it is intended to compute values for
the recommended HugePages/HugeTLB configuration for the current shared
memory segments. Before proceeding with the execution please make sure
that:
* Oracle Database instance(s) are up and running
* Oracle Database 11g Automatic Memory Management (AMM) is not setup
(See Doc ID 749851.1)
* The shared memory segments can be listed by command:
# ipcs -m

Press Enter to proceed…”

read

# Check for the kernel version
KERN=`uname -r | awk -F. ‘{ printf(“%d.%d\n”,$1,$2); }’`

# Find out the HugePage size
HPG_SZ=`grep Hugepagesize /proc/meminfo | awk ‘{print $2}’`

# Initialize the counter
NUM_PG=0

# Cumulative number of pages required to handle the running shared memory segments
for SEG_BYTES in `ipcs -m | awk ‘{print $5}’ | grep “[0-9][0-9]*”`
do
MIN_PG=`echo “$SEG_BYTES/($HPG_SZ*1024)” | bc -q`
if [ $MIN_PG -gt 0 ]; then
NUM_PG=`echo “$NUM_PG+$MIN_PG+1” | bc -q`
fi
done

RES_BYTES=`echo “$NUM_PG * $HPG_SZ * 1024” | bc -q`

# An SGA less than 100MB does not make sense
# Bail out if that is the case
if [ $RES_BYTES -lt 100000000 ]; then
echo “***********”
echo “** ERROR **”
echo “***********”
echo “Sorry! There are not enough total of shared memory segments allocated for
HugePages configuration. HugePages can only be used for shared memory segments
that you can list by command:

# ipcs -m

of a size that can match an Oracle Database SGA. Please make sure that:
* Oracle Database instance is up and running
* Oracle Database 11g Automatic Memory Management (AMM) is not configured”
exit 1
fi

# Finish with results
case $KERN in
‘2.4’) HUGETLB_POOL=`echo “$NUM_PG*$HPG_SZ/1024” | bc -q`;
echo “Recommended setting: vm.hugetlb_pool = $HUGETLB_POOL” ;;
‘2.6’) echo “Recommended setting: vm.nr_hugepages = $NUM_PG” ;;
*) echo “Unrecognized kernel version $KERN. Exiting.” ;;
esac

# End


example:

[oracle@db-36 ~]$ sh page.sh

This script is provided by Doc ID 401749.1 from My Oracle Support
(http://support.oracle.com) where it is intended to compute values for
the recommended HugePages/HugeTLB configuration for the current shared
memory segments. Before proceeding with the execution please make sure
that:
* Oracle Database instance(s) are up and running
* Oracle Database 11g Automatic Memory Management (AMM) is not setup
(See Doc ID 749851.1)
* The shared memory segments can be listed by command:
# ipcs -m

Press Enter to proceed…

Recommended setting: vm.nr_hugepages = 32835
[oracle@db-36 ~]$ cat /etc/sysctl.conf |grep vm.nr_hugepages
vm.nr_hugepages=33792

可以看出 我们设置的大页是很合理的

How does “Nologging” attribute

January 13, 2012 Internals, oracle No comments

NOLOGGING is supported in only a subset of the locations that support LOGGING. Only the following operations support the NOLOGGING mode:
DML:
1. Direct-path INSERT (serial or parallel) resulting either from an INSERT or a
MERGE statement. NOLOGGING is not applicable to any UPDATE operations
resulting from the MERGE statement.
2. Direct Loader (SQL*Loader)

DDL:
1. CREATE TABLE … AS SELECT
2. CREATE TABLE … LOB_storage_clause … LOB_parameters … NOCACHE |
CACHE READS
3. ALTER TABLE … LOB_storage_clause … LOB_parameters … NOCACHE |
CACHE READS (to specify logging of newly created LOB columns)
4. ALTER TABLE … modify_LOB_storage_clause … modify_LOB_parameters
… NOCACHE | CACHE READS (to change logging of existing LOB columns)
5. ALTER TABLE … MOVE
6. ALTER TABLE … (all partition operations that involve data movement)
– ALTER TABLE … ADD PARTITION (hash partition only)
– ALTER TABLE … MERGE PARTITIONS
– ALTER TABLE … SPLIT PARTITION
– ALTER TABLE … MOVE PARTITION
– ALTER TABLE … MODIFY PARTITION … ADD SUBPARTITION
– ALTER TABLE … MODIFY PARTITION … COALESCE SUBPARTITION
7. CREATE INDEX
8. ALTER INDEX … REBUILD
9. ALTER INDEX … REBUILD [SUB]PARTITION
10. ALTER INDEX … SPLIT PARTITION
For objects other than LOBs, if you omit this clause, then the logging attribute of the
object defaults to the logging attribute of the tablespace in which it resides.
For LOBs, if you omit this clause:
If you specify CACHE, then LOGGING is used (because you cannot have CACHE
NOLOGGING).
If you specify NOCACHE or CACHE READS, then the logging attribute defaults to the
logging attribute of the tablespace in which it resides.
NOLOGGING does not apply to LOBs that are stored inline with row data. That is, if
you specify NOLOGGING for LOBs with values less than 4000 bytes and you have not
disabled STORAGE IN ROW, then Oracle ignores the NOLOGGING specification and
treats the LOB data the same as other table data.

The NOLOGGING clause also specifies that subsequent direct loads using SQL*Loader
and direct load INSERT operations are not logged. Subsequent DML statements
(UPDATE, DELETE, and conventional path insert) are unaffected by the NOLOGGING
attribute of the table and generate redo.

Nologging Table Still generating Lots Of Archive Logs [ID 976722.1]


Modified 28-NOV-2011 Type HOWTO Status MODERATED
In this Document
Goal
Solution
References

This document is being delivered to you via Oracle Support’s Rapid Visibility (RaV) process and therefore has not been subject to an independent technical review.
Applies to:

Oracle Server – Enterprise Edition – Version: 10.2.0.4 to 10.2.0.4 – Release: 10.2 to 10.2
Information in this document applies to any platform.
***Checked for relevance on 28-Nov-2011***
Goal

Why is a Nologging Table still generating lots of archive log.

Solution

The nologging concept is still the same. Some redo still does get generated for objects regardless if the nologging option is set.

When nologging is set, the amount of redo generated is reduced but it is still generated.

For instance the DDL and DML information regarding the creation of any new objects is logged in the REDO logs for recovery purposes .

The situations where no log is generated is covered by:
Note 188691.1 “How to Avoid Generation of Redolog Entries”

Nologging operations are invoked by any of the following:
* SQL*Loader direct load operations
* Direct load INSERT operations from CREATE TABLE | INDEX or INSERT commands
* Loading into an object containing LOB data when its object’s segment characteristic is NOCACHE NOLOGGING

For databases in ARCHIVELOG mode, nologging operations can only occur for a particular object if and only if:

* Database allows for nologging (ALTER DATABASE NO FORCE LOGGING) and
* Tablespace allows for nologging (ALTER TABLESPACE NO FORCE LOGGING)
* Object allows for nologging (ALTER TABLE NOLOGGING)

NOTE:
The above means that if FORCE LOGGING is enabled at a higher level, the NOLOGGING at a lower level has no effect. That means that even for a nologging table redo information may still be logged.

Another aspect you should keep in mind is that, if the database is in NOARCHIVELOG mode, then the NOLOGGING flag has no effect.
For more information refer to :
Note 290161.1 – The Gains and Pains of Nologging Operations

Take also into account that by default the Oracle Database does not provide any supplemental logging which means that by default LogMiner is not usable/useful.

Supplemental logging should be enabled prior to generating log files that will be analyzed by LogMiner.

When you enable supplemental logging, additional information is recorded in the redo stream that is needed to make the information in the redo log files useful to you. Therefore, at the very least, you must enable minimal supplemental logging, as the following SQL statement shows:

SQL> ALTER DATABASE ADD SUPPLEMENTAL LOG DATA;

To determine whether supplemental logging is enabled, query the V$DATABASE view, as the following SQL statement shows:

SQL> SELECT SUPPLEMENTAL_LOG_DATA_MIN FROM V$DATABASE;

If the query returns a value of YES or IMPLICIT, minimal supplemental logging is enabled.

You can turn off Supplemental Logging by following command.

SQL> ALTER DATABASE DROP SUPPLEMENTAL LOG DATA;

Minimal supplemental logging ensures that Logminer (and any products building on Logminer technology) will have sufficient information to support chained rows and various storage arrangements such as cluster tables.

Minimal Supplemental Logging
o is required for Logminer
o includes additional information in redo stream

In summary, enabling supplemental logging would also increase the amount of information gathered in the log because of the increased granularity.

References

NOTE:188691.1 – How to Avoid Generation of Redolog Entries
NOTE:290161.1 – The Gains and Pains of Nologging Operations
NOTE:750198.1 – Effect of Supplemental Logging on LogMiner with Example

Oracle 坏块处理(final)

January 8, 2012 maintain, oracle 2 comments

收到开发人员报告,oracle出现坏块,信息如下:

Hex dump of (file 7, block 1407500) in trace file /data/oracle/admin/orcl/bdump/orcl_p004_7383.trc
Corrupt block relative dba: 0x01d57a0c (file 7, block 1407500)
Fractured block found during crash/instance recovery
Data in bad block:
type: 6 format: 2 rdba: 0x01d57a0c
last change scn: 0x0000.e02e5f2d seq: 0x1 flg: 0x06
spare1: 0x0 spare2: 0x0 spare3: 0x0
consistency value in tail: 0x88150601
check value in block header: 0x462f
computed block checksum: 0xd738
Reread of rdba: 0x01d57a0c (file 7, block 1407500) found same corrupted data

至此,我们确定该坏块是 7月28号上午9:45分数据库server crash导致的。

解决思路:
首先确认该坏块所属对象

Connected to:
Oracle Database 10g Enterprise Edition Release 10.2.0.1.0 – 64bit Production
With the Partitioning, OLAP, Data Mining and Real Application Testing options

SQL> set pagesize 100
SQL> set linesize 144
SQL> col segment_name format a50

SQL> select
object_id||’-‘||segment_name||’/’||partition_name segment_name,
segment_type,
RELATIVE_FNO
from dba_extents a
,dba_objects b
where file_id = ‘7’
and 1407500 between block_id and block_id + blocks – 1
and a.SEGMENT_NAME = b.OBJECT_NAME
and (a.PARTITION_NAME = b.SUBOBJECT_NAME or a.PARTITION_NAME is null)
and a.owner = b.owner

SEGMENT_NAME SEGMENT_TYPE RELATIVE_FNO
—————— ————
159761-IDX_D_D_PRODUCT_ID/ INDEX 7

确认该坏块属于索引IDX_D_D_PRODUCT_ID。先重建索引暂时解决问题:

SQL> alter index IDX_D_D_PRODUCT_ID rebuild online tablespace pur_index ;
Index altered.


使用dbv检查:

[oracle@rac1 ~]$ dbv file=/data/oracle/oradata/orcl/purchase03.dbf blocksize=8192
DBVERIFY: Release 10.2.0.1.0 – Production on Thu Jul 29 10:53:48 2010
Copyright (c) 1982, 2005, Oracle. All rights reserved.
DBVERIFY – Verification starting : FILE = /data/oracle/oradata/orcl/purchase03.dbf
DBV-00200: Block, dba 30767628, already marked corrupted
DBVERIFY – Verification complete
Total Pages Examined : 4194302
Total Pages Processed (Data) : 3508146
Total Pages Failing (Data) : 0
Total Pages Processed (Index): 605887
Total Pages Failing (Index): 0
Total Pages Processed (Other): 75876
Total Pages Processed (Seg) : 0
Total Pages Failing (Seg) : 0
Total Pages Empty : 4393
Total Pages Marked Corrupt : 1
Total Pages Influx : 0
Highest block SCN : 3781089739 (0.3781089739)

我们也可以使用如下过程确定坏块属于哪个block:
SQL> select dbms_utility.data_block_address_file(30767628) from dual;
DBMS_UTILITY.DATA_BLOCK_ADDRESS_FILE(30767628)

7
Elapsed: 00:00:00.23
SQL> select dbms_utility.data_block_address_block(30767628) from dual;
DBMS_UTILITY.DATA_BLOCK_ADDRESS_BLOCK(30767628)

1407500

其次要确认坏块类型是物理坏块还是逻辑坏块
for physical corruption check: backup validate datafile ‘filename’;
for logical corruption check: backup check logical validate datafile ‘filename’
then find out whether there is corruption from dynamic view V$DATABASE_BLOCK_CORRUPTION
先进行物理坏块检测
RMAN> backup validate datafile 7;
Starting backup at 29-JUL-10
allocated channel: ORA_DISK_1
channel ORA_DISK_1: sid=1736 devtype=DISK
channel ORA_DISK_1: starting full datafile backupset
channel ORA_DISK_1: specifying datafile(s) in backupset
input datafile fno=00007 name=/data/oracle/oradata/orcl/purchase03.dbf
channel ORA_DISK_1: backup set complete, elapsed time: 00:00:35
Finished backup at 29-JUL-10
sys@std01> select * from V$DATABASE_BLOCK_CORRUPTION;
FILE# BLOCK# BLOCKS CORRUPTION_CHANGE# CORRUPTIO
———- ———- —————— ———
7 1407500 1 0 CORRUPT

确认为物理坏块
最后也是最重要的一步,进行坏块恢复,


RMAN> BLOCKRECOVER CORRUPTION LIST;
Starting blockrecover at 29-JUL-10
using target database control file instead of recovery catalog
allocated channel: ORA_DISK_1
channel ORA_DISK_1: sid=455 devtype=DISK
channel ORA_DISK_1: restoring block(s)
channel ORA_DISK_1: specifying block(s) to restore from backup set
restoring blocks of datafile 00007
channel ORA_DISK_1: reading from backup piece /backup/rman/rac1_ORCL_20100729_eoljvn41_1_1.bak

channel ORA_DISK_1: restored block(s) from backup piece 1
piece handle=/backup/rman/rac1_ORCL_20100729_eoljvn41_1_1.bak tag=TAG20100729T050504
channel ORA_DISK_1: block restore complete, elapsed time: 00:57:56
failover to previous backup
channel ORA_DISK_1: restoring block(s)
channel ORA_DISK_1: specifying block(s) to restore from backup set
restoring blocks of datafile 00007
channel ORA_DISK_1: reading from backup piece /backup/rman/rac1_ORCL_20100728_eiljt2ju_1_1.bak

channel ORA_DISK_1: restored block(s) from backup piece 1
piece handle=/backup/rman/rac1_ORCL_20100728_eiljt2ju_1_1.bak tag=TAG20100728T050253
channel ORA_DISK_1: block restore complete, elapsed time: 00:58:36
starting media recovery
media recovery complete, elapsed time: 00:12:15
Finished blockrecover at 29-JUL-10


命令完成,分别使用DBV和backup validate命令进行验证


[oracle@rac1 ~]$ dbv file=/data/oracle/oradata/orcl/purchase03.dbf blocksize=8192
DBVERIFY: Release 10.2.0.1.0 – Production on Thu Jul 29 19:04:45 2010
Copyright (c) 1982, 2005, Oracle. All rights reserved.
DBVERIFY – Verification starting : FILE = /data/oracle/oradata/orcl/purchase03.dbf

DBVERIFY – Verification complete
Total Pages Examined : 4194302
Total Pages Processed (Data) : 3507791
Total Pages Failing (Data) : 0
Total Pages Processed (Index): 606239
Total Pages Failing (Index): 0
Total Pages Processed (Other): 75879
Total Pages Processed (Seg) : 0
Total Pages Failing (Seg) : 0
Total Pages Empty : 4393
Total Pages Marked Corrupt : 0
Total Pages Influx : 0
Highest block SCN : 3785553739 (0.3785553739)

使用 backup validate datafile 进行验证:


验证前:
SQL> select * from V$DATABASE_BLOCK_CORRUPTION;
FILE# BLOCK# BLOCKS CORRUPTION_CHANGE# CORRUPTIO
———- ———- —————— ———
7 1407500 1 0 CORRUPT
[oracle@rac1 ~]$ rman target /
Recovery Manager: Release 10.2.0.1.0 – Production on Thu Jul 29 21:17:10 2010
Copyright (c) 1982, 2005, Oracle. All rights reserved.
connected to target database: ORCL (DBID=1185342296)
RMAN> backup validate datafile 7;
Starting backup at 29-JUL-10
using target database control file instead of recovery catalog
allocated channel: ORA_DISK_1
channel ORA_DISK_1: sid=455 devtype=DISK
channel ORA_DISK_1: starting full datafile backupset
channel ORA_DISK_1: specifying datafile(s) in backupset
input datafile fno=00007 name=/data/oracle/oradata/orcl/purchase03.dbf
channel ORA_DISK_1: backup set complete, elapsed time: 00:02:35
Finished backup at 29-JUL-10
执行完上述命令,坏块从数据字典里消失:
SQL> select * from V$DATABASE_BLOCK_CORRUPTION;
no rows selected

问题得到解决。

—————————————————————————-
下面模拟一种特殊情况,CORRUPTION BLOCKS上面还不存在object 这时候用BLOCKRECOVER CORRUPTION LIST是恢复不了的,我们提供下面一种方法。
首先查找已经format的块,也就是所谓的存在object的块:

SQL>create tablespace block datafile ‘/u01/app/oracle/oradata/axx/block01.dbf’ size 100M;

tablespace created

SQL> select file_id from dba_data_files where tablespace_name=’BLOCK’;

FILE_ID
———-
6

SQL> select OWNER,SEGMENT_NAME from dba_extents a where file_id=6 and 500 between a.block_id and a.block_id+a.blocks;

no rows selected

SQL> create table tmp_1 (id int) tablespace block;

Table created.

找出BLOCK中已经使用的最大块:

SQL> select max(BLOCK_ID+blocks) from dba_extents a where tablespace_name=’BLOCK’;

MAX(BLOCK_ID+BLOCKS)
——————–
489

下面模拟损坏datafile 6 的 第 500个block;

[oracle@liu ~]$ dd of=/u01/app/oracle/oradata/axx/block01.dbf bs=8192 seek=500 conv=notrunc count=1 if=/dev/zero
1+0 records in
1+0 records out

RMAN> backup validate datafile 6;

Starting backup at 08-JAN-12
using target database control file instead of recovery catalog
allocated channel: ORA_DISK_1
channel ORA_DISK_1: sid=132 devtype=DISK
channel ORA_DISK_1: starting full datafile backupset
channel ORA_DISK_1: specifying datafile(s) in backupset
input datafile fno=00006 name=/u01/app/oracle/oradata/axx/block01.dbf
channel ORA_DISK_1: backup set complete, elapsed time: 00:00:02
Finished backup at 08-JAN-12

[oracle@liu ~]$ dbv file=/u01/app/oracle/oradata/axx/block01.dbf blocksize=8192;

DBVERIFY: Release 10.2.0.1.0 – Production on Sun Jan 8 20:17:19 2012

Copyright (c) 1982, 2005, Oracle. All rights reserved.

DBVERIFY – Verification starting : FILE = /u01/app/oracle/oradata/axx/block01.dbf
Page 500 is marked corrupt
Corrupt block relative dba: 0x018001f4 (file 6, block 500)
Completely zero block found during dbv:

DBVERIFY – Verification complete

Total Pages Examined : 12800
Total Pages Processed (Data) : 613
Total Pages Failing (Data) : 0
Total Pages Processed (Index): 0
Total Pages Failing (Index): 0
Total Pages Processed (Other): 61
Total Pages Processed (Seg) : 0
Total Pages Failing (Seg) : 0
Total Pages Empty : 12125
Total Pages Marked Corrupt : 1
Total Pages Influx : 0
Highest block SCN : 1178159 (0.1178159)
[oracle@liu ~]$ sqlplus ‘/as sysdba’

SQL*Plus: Release 10.2.0.1.0 – Production on Sun Jan 8 20:17:30 2012

Copyright (c) 1982, 2005, Oracle. All rights reserved.

Connected to:
Oracle Database 10g Enterprise Edition Release 10.2.0.1.0 – Production
With the Partitioning, OLAP and Data Mining options

SQL> select * from V$DATABASE_BLOCK_CORRUPTION;

FILE# BLOCK# BLOCKS CORRUPTION_CHANGE# CORRUPTIO
———- ———- ———- —————— ———
6 500 1 0 ALL ZERO

SQL> alter system switch logfile;

System altered.

可以看到第500个block已经被标记为 Corrupt,下面我们通过创建object format这个block;

SQL> create table tmp_2 (id int) tablespace block;

Table created.

创建一个monitor trigger 达到第500个block后自动终止操作,由于这次只是模拟,datafile很小,在实际环境中可能出现Corrupt block在datafile的尾部接近max block,这时候可以
通过创建一张temp table allocate extent达到接近这个max block 然后再创建填充数据的table.


SQL> create or replace trigger trigger_monitor after insert on tmp_2 for each row
2 declare
3 check_block_id number;
4 begin
5 select dbms_rowid.rowid_block_number(:new.rowid) into check_block_id from dual;
6 if check_block_id=500 then
7 raise_application_error(-20201,’curroption block has been format’);
8 end if;
9 end;
10
11 /

Trigger created

SQL>
SQL>
SQL> begin
2 for i in 1..1000000 loop
3 insert into tmp_2 values (i);
4 commit;
5 end loop;
6 end;
7 /

begin
for i in 1..1000000 loop
insert into tmp_2 values (i);
commit;
end loop;
end;

ORA-20201: curroption block has been format
ORA-06512: at “LIU.TRIGGER_MONITOR”, line 6
ORA-04088: error during execution of trigger ‘LIU.TRIGGER_MONITOR’
ORA-06512: at line 4

第500个block已经被填充,下面我们来验证结果:


[oracle@liu ~]$ rman target /

Recovery Manager: Release 10.2.0.1.0 – Production on Sun Jan 8 20:06:25 2012

Copyright (c) 1982, 2005, Oracle. All rights reserved.

connected to target database: AXX (DBID=665578413)

RMAN> backup validate datafile 6;

Starting backup at 08-JAN-12
using target database control file instead of recovery catalog
allocated channel: ORA_DISK_1
channel ORA_DISK_1: sid=133 devtype=DISK
channel ORA_DISK_1: starting full datafile backupset
channel ORA_DISK_1: specifying datafile(s) in backupset
input datafile fno=00006 name=/u01/app/oracle/oradata/axx/block01.dbf
channel ORA_DISK_1: backup set complete, elapsed time: 00:00:03
Finished backup at 08-JAN-12

RMAN> exit

Recovery Manager complete.
[oracle@liu ~]$ sqlplus ‘/as sysdba’

SQL*Plus: Release 10.2.0.1.0 – Production on Sun Jan 8 20:07:11 2012

Copyright (c) 1982, 2005, Oracle. All rights reserved.

Connected to:
Oracle Database 10g Enterprise Edition Release 10.2.0.1.0 – Production
With the Partitioning, OLAP and Data Mining options

SQL> select * from V$DATABASE_BLOCK_CORRUPTION;

no rows selected

SQL> !
[oracle@liu ~]$ dbv file=/u01/app/oracle/oradata/axx/block01.dbf blocksize=8192;

DBVERIFY: Release 10.2.0.1.0 – Production on Sun Jan 8 20:09:07 2012

Copyright (c) 1982, 2005, Oracle. All rights reserved.

DBVERIFY – Verification starting : FILE = /u01/app/oracle/oradata/axx/block01.dbf

DBVERIFY – Verification complete

Total Pages Examined : 12800
Total Pages Processed (Data) : 614
Total Pages Failing (Data) : 0
Total Pages Processed (Index): 0
Total Pages Failing (Index): 0
Total Pages Processed (Other): 61
Total Pages Processed (Seg) : 0
Total Pages Failing (Seg) : 0
Total Pages Empty : 12125
Total Pages Marked Corrupt : 0
Total Pages Influx : 0
Highest block SCN : 1178159 (0.1178159)

OK 已经恢复了,如果这种情况下再采用rman 的blockrecover 是恢复不了这个block的 我们来测试下:
制造坏块,同时确保max block 小于500;

SQL> select max(BLOCK_ID+blocks) from dba_extents a where tablespace_name=’BLOCK’;

MAX(BLOCK_ID+BLOCKS)
——————–
489

RMAN> backup database ;

Starting backup at 08-JAN-12
using target database control file instead of recovery catalog
allocated channel: ORA_DISK_1
channel ORA_DISK_1: sid=132 devtype=DISK
channel ORA_DISK_1: starting full datafile backupset
channel ORA_DISK_1: specifying datafile(s) in backupset
input datafile fno=00001 name=/u01/app/oracle/oradata/axx/system01.dbf
input datafile fno=00003 name=/u01/app/oracle/oradata/axx/sysaux01.dbf
input datafile fno=00005 name=/u01/app/oracle/oradata/axx/tbs_stream01.dbf
input datafile fno=00006 name=/u01/app/oracle/oradata/axx/block01.dbf
input datafile fno=00002 name=/u01/app/oracle/oradata/axx/undotbs01.dbf
input datafile fno=00004 name=/u01/app/oracle/oradata/axx/users01.dbf
channel ORA_DISK_1: starting piece 1 at 08-JAN-12
channel ORA_DISK_1: finished piece 1 at 08-JAN-12
piece handle=/u01/app/oracle/product/10.2.0/db_1/dbs/07n09aa7_1_1 tag=TAG20120108T202335 comment=NONE
channel ORA_DISK_1: backup set complete, elapsed time: 00:01:05
channel ORA_DISK_1: starting full datafile backupset
channel ORA_DISK_1: specifying datafile(s) in backupset
including current control file in backupset
including current SPFILE in backupset
channel ORA_DISK_1: starting piece 1 at 08-JAN-12
channel ORA_DISK_1: finished piece 1 at 08-JAN-12
piece handle=/u01/app/oracle/product/10.2.0/db_1/dbs/08n09ac8_1_1 tag=TAG20120108T202335 comment=NONE
channel ORA_DISK_1: backup set complete, elapsed time: 00:00:03
Finished backup at 08-JAN-12

RMAN> exit

[oracle@liu ~]$ dd of=/u01/app/oracle/oradata/axx/block01.dbf bs=8192 seek=500 conv=notrunc count=1 if=/dev/zero
1+0 records in
1+0 records out

[oracle@liu ~]$ rman target /

Recovery Manager: Release 10.2.0.1.0 – Production on Sun Jan 8 20:23:01 2012

Copyright (c) 1982, 2005, Oracle. All rights reserved.

connected to target database: AXX (DBID=665578413)

RMAN> backup validate datafile 6;

Starting backup at 08-JAN-12
using target database control file instead of recovery catalog
allocated channel: ORA_DISK_1
channel ORA_DISK_1: sid=131 devtype=DISK
channel ORA_DISK_1: starting full datafile backupset
channel ORA_DISK_1: specifying datafile(s) in backupset
input datafile fno=00006 name=/u01/app/oracle/oradata/axx/block01.dbf
channel ORA_DISK_1: backup set complete, elapsed time: 00:00:01
Finished backup at 08-JAN-12

SQL> select * from V$DATABASE_BLOCK_CORRUPTION;

FILE# BLOCK# BLOCKS CORRUPTION_CHANGE# CORRUPTIO
———- ———- ———- —————— ———
6 500 1 0 ALL ZERO

RMAN> blockrecover corruption list;

Starting blockrecover at 08-JAN-12
using target database control file instead of recovery catalog
allocated channel: ORA_DISK_1
channel ORA_DISK_1: sid=134 devtype=DISK

starting media recovery
media recovery complete, elapsed time: 00:00:00

Finished blockrecover at 08-JAN-12

SQL> select * from V$DATABASE_BLOCK_CORRUPTION;

FILE# BLOCK# BLOCKS CORRUPTION_CHANGE# CORRUPTIO
———- ———- ———- —————— ———
6 500 1 0 ALL ZERO

[oracle@liu ~]$ dbv file=/u01/app/oracle/oradata/axx/block01.dbf blocksize=8192;

DBVERIFY: Release 10.2.0.1.0 – Production on Sun Jan 8 20:26:51 2012

Copyright (c) 1982, 2005, Oracle. All rights reserved.

DBVERIFY – Verification starting : FILE = /u01/app/oracle/oradata/axx/block01.dbf
Page 500 is marked corrupt
Corrupt block relative dba: 0x018001f4 (file 6, block 500)
Completely zero block found during dbv:

DBVERIFY – Verification complete

Total Pages Examined : 12800
Total Pages Processed (Data) : 613
Total Pages Failing (Data) : 0
Total Pages Processed (Index): 0
Total Pages Failing (Index): 0
Total Pages Processed (Other): 61
Total Pages Processed (Seg) : 0
Total Pages Failing (Seg) : 0
Total Pages Empty : 12125
Total Pages Marked Corrupt : 1
Total Pages Influx : 0
Highest block SCN : 1189579 (0.1189579)

可以看到坏块依然存在

分区表中的index 失效及处理

January 4, 2012 maintain, oracle No comments

分区表中 local 索引的维护会在oracle 操作表分区的时候自动进行,需要注意的是global 索引,当global索引所在表执行alter table 涉及下列操作时,会导至该索引失效,需要重新建立:

———————————————————————-

一、测试环境

[oracle@testdb ~]$ sqlplus ‘/as sysdba’

SQL*Plus: Release 11.2.0.2.0 Production on Wed Jan 4 14:52:40 2012

Copyright (c) 1982, 2010, Oracle. All rights reserved.

Connected to:
Oracle Dayangase 11g Enterprise Edition Release 11.2.0.2.0 – 64bit Production
With the Partitioning, OLAP, Data Mining and Real Application Testing options

二、测试步骤
1.全局索引测试步骤


create table yang (id number,name char(100))
partition by range(id)
(partition a values less than(10),
partition b values less than(20),
partition c values less than(30));

insert into yang values(1,’a’);
insert into yang values(11,’a’);
insert into yang values(21,’a’);
commit;

create index idx_yang_id on yang (id);

———————– add测试—————————-

alter table yang add partition d values less than (40);
alter table yang add partition e values less than (maxvalue);

———————– drop测试—————————

insert into yang values(31,’a’);
commit;

alter table yang drop partition e;
alter table yang drop partition d;
alter index idx_yang_id rebuild;

———————– truncate测试—————————-

alter table yang add partition d values less than (40);
alter table yang add partition e values less than (maxvalue);

insert into yang values(31,’a’);
commit;

alter table yang truncate partition e;
alter table yang truncate partition d;
alter index idx_yang_id rebuild;

———————– split测试—————————
alter table yang add partition d values less than (maxvalue);
alter table yang split partition d at (40) into (partition d,partition e);

insert into yang values(45,’a’);
commit;
alter table yang split partition e at (50) into (partition e,partition f);

insert into yang values(65,’a’);
commit;
alter table yang split partition f at (60) into (partition f,partition g);

insert into yang values(75,’a’);
commit;
alter table yang split partition g at (70) into (partition g,partition h);
alter index idx_yang_id rebuild;

2.本地索引测试步骤


create table yang (id number,name char(100))
partition by range(id)
(partition a values less than(10),
partition b values less than(20),
partition c values less than(30));

insert into yang values(1,’a’);
insert into yang values(11,’a’);
insert into yang values(21,’a’);
commit;

create index l_idx on yang (id) local;

———————– add测试—————————-

alter table yang add partition d values less than (40);
alter table yang add partition e values less than (maxvalue);

———————– drop测试—————————

insert into yang values(31,’a’);
commit;

alter table yang drop partition e;
alter table yang drop partition d;

———————– truncate测试—————————-

alter table yang add partition d values less than (40);
alter table yang add partition e values less than (maxvalue);

insert into yang values(31,’a’);
commit;

alter table yang truncate partition e;
alter table yang truncate partition d;

———————– split测试—————————
alter table yang add partition d values less than (maxvalue);
alter table yang split partition d at (40) into (partition d,partition e);

insert into yang values(45,’a’);
commit;
alter table yang split partition e at (50) into (partition e,partition f);

insert into yang values(65,’a’);
commit;
alter table yang split partition f at (60) into (partition f,partition g);

insert into yang values(75,’a’);
commit;
alter table yang split partition g at (70) into (partition g,partition h);

alter index l_idx rebuild partition h;
alter index l_idx rebuild partition g;

三、测试结论 (range, list)


global index:

add partition valid
drop partition 分区中无数据:valid 有数据:unusable
truncate partition 分区中无数据:valid 有数据:unusable

local index:
add partition,drop partition,truncate partition 都不会导致失效

—————————————————-

split partition a->a,b

global index :
a,b无数据 valid
a有数据 b无数据 valid
a无数据 b有数据 valid
a,b都有数据 unusable

local index:
a,b无数据 usable
a有数据 b无数据 usable
a无数据 b有数据 usable
a,b均有数据 unusable

———————————————————

exchang partition (range list)

reference:http://www.itpub.net/thread-1323472-1-1.html

我下面就主要对这两种办法讨论update global indexes。

建表SQL(其中,HAOPART2和HAOPART是一样的结构):


create table haopart (
id number not null,
c1 char(100),
c2 char(200),
c3 char(300)
)
PARTITION BY RANGE(id)
(
PARTITION PART01 VALUES LESS THAN (100),
PARTITION PART02 VALUES LESS THAN (200),
PARTITION PART03 VALUES LESS THAN (500),
PARTITION PART04 VALUES LESS THAN (1000),
PARTITION PARTMAX VALUES LESS THAN (MAXVALUE)
)
tablespace USERS
;

create index haolocal_1 on haopart(c1) local tablespace USERS;
create index haolocal_2 on haopart(c2) local tablespace USERS;
create index haolocal_3 on haopart(c3) local tablespace USERS;
create index haoglobal on haopart(id,c1,c2,c3) global tablespace USERS ;

insert into haopart
select rownum,object_name,object_name,object_name
from dba_objects;

这样HAOPART就有3个local indexes和1个global index。

临时表建表SQL(其中,HAOTMP和HAOTMP2是一样的结构):


create table haotmp
(
id number not null,
c1 char(100),
c2 char(200),
c3 char(300)
) tablespace users;

create index tmphao_1 on haotmp(c1) tablespace USERS;
create index tmphao_2 on haotmp(c2) tablespace USERS;
create index tmphao_3 on haotmp(c3) tablespace USERS;

一.以exchange partition为例,不加update global indexes时:
1. 如果partiton里有数据,global index则会失效
SQL> select count(*) from haopart2 partition(part04);
COUNT(*)
———-
500
SQL> select count(*) from haotmp2;
COUNT(*)
———-
0
SQL> alter table haopart2 exchange partition part04 with table haotmp2 including indexes without validation;
table altered.

SQL> select INDEX_NAME,STATUS,PARTITIONED from dba_indexes where table_NAME=’HAOPART2′ and PARTITIONED=’NO’;
INDEX_NAME STATUS PAR
—————————— ——– —
HAOGLOBAL2 UNUSABLE NO

2. 如果partition里没有任何数据,新的临时表有数据,global index也会失效。

SQL> select count(*) from haotmp2;
COUNT(*)
———-
500

SQL> select count(*) from haopart2 partition(part04);
COUNT(*)
———-
0
SQL> alter index haoglobal2 rebuild;
Index altered.

SQL> select INDEX_NAME,STATUS,PARTITIONED from dba_indexes where table_NAME=’HAOPART2′ and PARTITIONED=’NO’;
INDEX_NAME STATUS PAR
—————————— ——– —
HAOGLOBAL2 VALID NO

SQL> alter table haopart2 exchange partition part04 with table haotmp2 including indexes without validation;
table altered.

SQL> select INDEX_NAME,STATUS,PARTITIONED from dba_indexes where
2 table_NAME=’HAOPART2′ and PARTITIONED=’NO’;
INDEX_NAME STATUS PAR
—————————— ——– —
HAOGLOBAL2 UNUSABLE NO

3.即使partition和临时表都没有数据,也会使global index失效。
SQL> alter table haopart2 truncate partition part04;
table truncated.

SQL> truncate table haotmp2;
table truncated.

SQL> alter index haoglobal2 rebuild;
Index altered.

SQL> select INDEX_NAME,STATUS,PARTITIONED from dba_indexes where table_NAME=’HAOPART2′ and PARTITIONED=’NO’;
INDEX_NAME STATUS PAR
—————————— ——– —
HAOGLOBAL2 VALID NO

SQL> alter table haopart2 exchange partition part04 with table haotmp2 including indexes without validation;
table altered.

SQL> select INDEX_NAME,STATUS,PARTITIONED from dba_indexes where table_NAME=’HAOPART2′ and PARTITIONED=’NO’;
INDEX_NAME STATUS PAR
—————————— ——– —
HAOGLOBAL2 UNUSABLE NO

二.以exchange partition为例,加上update global indexes时:
1. 无论任何时候,global index都不会失效。

SQL> select count(*) from haopart2 partition(part04);
COUNT(*)
———-
500
SQL> select count(*) from haotmp2;
COUNT(*)
———-
56

SQL> select INDEX_NAME,STATUS,PARTITIONED from dba_indexes where table_NAME=’HAOPART2′ and PARTITIONED=’NO’;
INDEX_NAME STATUS PAR
—————————— ——– —
HAOGLOBAL2 VALID NO

SQL> alter table haopart2 exchange partition part04 with table haotmp2 including indexes without validation update global indexes;
table altered.

SQL> select INDEX_NAME,STATUS,PARTITIONED from dba_indexes where
2 table_NAME=’HAOPART2′ and PARTITIONED=’NO’;
INDEX_NAME STATUS PAR
—————————— ——– —
HAOGLOBAL2 VALID NO

2. 会对原表加Mode=3 TM lock,会对原表做ddl的partition加Mode=6 TM lock。

select o.OBJECT_ID,o.OBJECT_NAME,o.SUBOBJECT_NAME,o.OBJECT_TYPE,l.LMODE
from dba_objects o,v$lock l
where o.OBJECT_ID=l.ID1
and l.TYPE=’TM’
and l.sid=1094
;
OBJECT_ID OBJECT_NAM SUBOBJECT_ OBJECT_TYPE LMODE
———- ———- ———- ——————- ———-
10597 HAOPART PART04 table PARTITION 6
10593 HAOPART table 3
10604 HAOTMP table 6

3. exchange partition update global indexes不会block使用global index的select语句,但是由于大量的update index操作,所以会使得查询大量走undo,所以查询会变慢。

在如下exchange partition update global indexes命令进行时:

alter table haopart exchange partition part04 with table haotmp
including indexes without validation
update global indexes;

在另一个session执行如下走global index的select:
select count(*) from haopart where id <=1000; ------------------------------------------------------------------------------- | Id | Operation | Name | Rows | Bytes | Cost (%CPU)| Time | ------------------------------------------------------------------------------- | 0 | SELECT STATEMENT | | 1 | 4 | 2902 (1)| 00:00:35 | | 1 | SORT AGGREGATE | | 1 | 4 | | | |* 2 | INDEX RANGE SCAN| HAOGLOBAL | 31744 | 124K| 2902 (1)| 00:00:35 | ------------------------------------------------------------------------------- Predicate Information (identified by operation id): --------------------------------------------------- 2 - access("ID"<=1000) Statistics ---------------------------------------------------------- 0 recursive calls 0 db block gets 2914 consistent gets 0 physical reads 0 redo size 516 bytes sent via SQL*Net to client 469 bytes received via SQL*Net from client 2 SQL*Net roundtrips to/from client 0 sorts (memory) 0 sorts (disk) 1 rows processed Statistics ---------------------------------------------------------- 0 recursive calls 0 db block gets 4095 consistent gets 0 physical reads 27052 redo size 516 bytes sent via SQL*Net to client 469 bytes received via SQL*Net from client 2 SQL*Net roundtrips to/from client 0 sorts (memory) 0 sorts (disk) 1 rows processed Statistics ---------------------------------------------------------- 0 recursive calls 0 db block gets 5130 consistent gets 0 physical reads 49140 redo size 516 bytes sent via SQL*Net to client 469 bytes received via SQL*Net from client 2 SQL*Net roundtrips to/from client 0 sorts (memory) 0 sorts (disk) 1 rows processed

可见,执行计划是不变的,但是逻辑读不断上升,也产生大量的redo。
明显查询了undo。

4. exchange partition update global index会阻碍该partition上的dml,但不会阻碍其他partition上的dml。

根据第二点,由于这条语句会对该partition加Mode=6 TM lock,所以很显然,该partition是无法做dml的。

我们会看到等待事件:enq: TM – contention:TM-3:2:

—————————–

exchange partition –local index 测试

SQL> create table t_partition (id number,name varchar2(50))
2 partition by range(id)(
partition p1 values less than (10) ,
partition p2 values less than (20) ,
partition p3 values less than (30) ,
partition pmax values less than (maxvalue)
)
; 3 4 5 6 7 8

Table created.

SQL> create index partition_idx_name on t_partition (name) local;

Index created.

SQL> create table t_no_partition (id number,name varchar2(50));

Table created.

SQL> create index idx_name on t_no_partition(name);

Index created.

SQL> insert into t_no_partition values(11,’11’);

1 row created.

SQL> insert into t_no_partition values(12,’ddd’);

1 row created.

SQL> insert into t_no_partition values(13,’13dsa’);

1 row created.

SQL> commit;

Commit complete.

SQL>
SQL>
SQL> alter table t_partition exchange partition p2 with table t_no_partition INCLUDING INDEXES with validation;

Table altered.

SQL> select partition_name, status from user_ind_partitions where index_name=upper(‘partition_idx_name’);

PARTITION_NAME STATUS
—————————— ——–
P1 USABLE
P2 USABLE
P3 USABLE
PMAX USABLE

SQL> alter table t_partition exchange partition p1 with table t_no_partition;

Table altered.

SQL> alter table t_partition exchange partition p3 with table t_no_partition;

Table altered.

SQL> select partition_name, status from user_ind_partitions where index_name=upper(‘partition_idx_name’);

PARTITION_NAME STATUS
—————————— ——–
P1 USABLE
P2 USABLE
P3 UNUSABLE
PMAX USABLE

可以看到不带INCLUDING INDEXES with validation 被exchange的local index partition 会失效,对于存在主键的分区表,可以在主键上以DISABLE VALIDATE方式创建unique constraint约束,以代替全局的主键索引。若交换表上存在主键索引的话,那么建议在交换前暂时将该索引drop掉,待交换完成后再重建。

———————————————
hash partition的测试:

SQL> CREATE table liu(id number,name varchar2(20))
PARTITION BY HASH (id)
PARTITIONS 8
STORE IN (users) 2 3 4
5 ;

table created.

SQL>
SQL>
SQL> begin
2 for i in 1..10000 loop
3 insert into liu values (i,null);
4 commit;
5 end loop;
6 end;
7 /
alter table liu add constraint pk_liu_id primary key(id);
CREATE INDEX idx_liu_id ON liu (name) LOCAL ;

Partition Name Max Value Length tablespace Compression Rows Blocks Empty Blocks Last Analyzed Avg Space # Subparts

SYS_P153 0 USERS DISABLED 1,188 46 978 2012/1/4 14:25:41 7257 0
SYS_P152 0 USERS DISABLED 1,201 46 978 2012/1/4 14:25:41 7248 0
SYS_P150 0 USERS DISABLED 1,232 46 978 2012/1/4 14:25:41 7226 0
SYS_P155 0 USERS DISABLED 1,259 46 978 2012/1/4 14:25:41 7208 0
SYS_P151 0 USERS DISABLED 1,262 46 978 2012/1/4 14:25:41 7206 0
SYS_P156 0 USERS DISABLED 1,280 46 978 2012/1/4 14:25:41 7193 0
SYS_P149 0 USERS DISABLED 1,283 46 978 2012/1/4 14:25:41 7191 0
SYS_P154 0 USERS DISABLED 1,295 46 978 2012/1/4 14:25:41 7183 0

alter table liu add partition ;

idx_liu_id 分区变为unusable:

Partition Name tablespace Last Analyzed Max Value Length Rows BLevel Distinct Keys # Subparts

UNUSABLE SYS_P165 USERS 2012/1/4 14:30:15 0 0 0 0 0
VALID SYS_P166 USERS 2012/1/4 14:30:15 0 0 0 0 0
VALID SYS_P167 USERS 2012/1/4 14:30:15 0 0 0 0 0
VALID SYS_P168 USERS 2012/1/4 14:30:15 0 0 0 0 0
VALID SYS_P169 USERS 2012/1/4 14:30:15 0 0 0 0 0
VALID SYS_P170 USERS 2012/1/4 14:30:15 0 0 0 0 0
VALID SYS_P171 USERS 2012/1/4 14:30:15 0 0 0 0 0
VALID SYS_P172 USERS 2012/1/4 14:30:15 0 0 0 0 0
UNUSABLE SYS_P173 PURCHASE 0 0

select status from dba_indexes where index_name=’PK_LIU_ID’;

SQL> select status from dba_indexes where index_name=’PK_LIU_ID’;

STATUS
——–
UNUSABLE

SQL>

同样变为了unusable,hash分区在add partition时候 由于数据会重组,所以local,global index 都会变成unusable

在线重定义一例

December 29, 2011 maintain, oracle 1 comment

创建需要redefinition的表,结构同源表


— Create table
create table yhd_so_temp
(id NUMBER(18) not null,
end_user_id NUMBER(18),
order_amount NUMBER(11,2),
order_code CHAR(12),
order_status NUMBER(4),
order_type NUMBER(4),
order_need_cs NUMBER(1),
order_delivery_fee NUMBER(11,2),
order_source NUMBER(4),
back_operator_id NUMBER(18),
order_paid_by_rebate NUMBER(11,2),
order_pay_cfm_bk_op_id NUMBER(18),
order_payment_confirm_date DATE,
order_payment_signal NUMBER(4),
order_payment_method_id NUMBER(18),
order_payment_code VARCHAR2(500),
order_create_time DATE,
order_to_logistics_time DATE,
order_out_of_inventory_status NUMBER(4),
good_receiver_id NUMBER(18),
order_need_invoice NUMBER(1),
parent_so_id NUMBER(18),
delivery_date DATE,
expect_receive_date DATE,
expect_receive_time NUMBER(4),
receive_date DATE,
order_delivery_method_id NUMBER(18),
order_paid_by_account NUMBER(11,2),
order_paid_by_others NUMBER(11,2),
tracker_session_id VARCHAR2(400),
account_payable NUMBER(11,2),
product_amount NUMBER(11,2),
session_id VARCHAR2(400),
data_exchange_flag NUMBER(1),
good_receiver_name VARCHAR2(500),
good_receiver_address VARCHAR2(400),
good_receiver_province VARCHAR2(400),
good_receiver_city VARCHAR2(400),
good_receiver_county VARCHAR2(400),
good_receiver_post_code VARCHAR2(400),
good_receiver_phone VARCHAR2(400),
supplier_process_status NUMBER(4),
po_id NUMBER(18),
order_paid_by_coupon NUMBER(11,2),
cancel_date DATE,
is_leaf NUMBER(1),
order_cs_remark VARCHAR2(2000),
has_imported NUMBER(1),
good_receiver_mobile VARCHAR2(400),
do_cancel_flag NUMBER(10) not null,
so_item_count_change_flag NUMBER(1) not null,
order_paid_by_card NUMBER(11,2) not null,
reference_point NUMBER,
real_point NUMBER,
recompense_points NUMBER not null,
membership_base_point NUMBER not null,
group_ratio NUMBER,
wlt_point NUMBER not null,
track_info VARCHAR2(2000),
is_fragile NUMBER(1),
is_liquid NUMBER(1),
bought_times NUMBER(18),
spec_proc_flag NUMBER(4),
id_card VARCHAR2(400),
order_import_source NUMBER(18),
is_new_good_receiver_hpone NUMBER(1),
partner_amount NUMBER(11,2),
partner_amount_type NUMBER(1),
is_vip NUMBER(1),
good_receiver_city_id NUMBER(18),
good_receiver_country_id NUMBER(18),
good_receiver_county_id NUMBER(18),
good_receiver_province_id NUMBER(18),
allyes_uid VARCHAR2(300),
payment_gateway_id NUMBER(18),
need_allocation NUMBER(1) not null,
warehouse_id NUMBER(18),
delivery_supplierid NUMBER(18),
order_import_do_flag NUMBER(1),
cancel_operator_id NUMBER(18),
mc_site_id NUMBER(8),
order_need_integral NUMBER(9),
virtual_stock_status NUMBER(2),
business_type NUMBER(2),
flag NUMBER(1),
order_finish_time DATE,
update_time DATE,
good_remark VARCHAR2(300),
order_cs_reason VARCHAR2(400),
treatment_time DATE,
treatment_user_id VARCHAR2(400),
is_half_day_delivery NUMBER(1) default 0,
central_version NUMBER(5,2),
order_promotion_discount NUMBER(11,2),
is_need_double_form NUMBER(1) default 0,
creditcard_owner_name VARCHAR2(120),
is_moved_by_hand NUMBER(1) default 0 not null,
order_delivery_person_mobile VARCHAR2(400),
estimate_receive_date DATE,
delivery_remark VARCHAR2(500),
receive_remark VARCHAR2(500),
supplier_delivery_time VARCHAR2(400),
order_delivery_add_fee NUMBER(11,2),
user_ip VARCHAR2(500),
user_guid VARCHAR2(400),
print_num NUMBER(2),
order_create_pay_time DATE,
delivery_method_type NUMBER(8),
delivery_service_type NUMBER(8),
pay_service_type NUMBER(8),
good_receiver_area VARCHAR2(500),
good_receiver_area_id NUMBER(18),
is_new_deliveryorder NUMBER(3),
delivery_level NUMBER(1),
order_weight NUMBER(11,2)
)
partition by range (ORDER_CREATE_TIME)
(
partition P0811 values less than (TO_DATE(‘ 2008-11-01 00:00:00’, ‘SYYYY-MM-DD HH24:MI:SS’, ‘NLS_CALENDAR=GREGORIAN’))
tablespace data01
pctfree 10
initrans 1
maxtrans 255
storage
(
initial 64K
next 1M
minextents 1
maxextents unlimited
),
partition P0812

……….


Connected to:
Oracle Database 11g Enterprise Edition Release 11.2.0.2.0 – 64bit Production
With the Partitioning, OLAP, Data Mining and Real Application Testing options

SQL> alter session force parallel dml parallel 4;

Session altered.

SQL> alter session force parallel query parallel 4;

Session altered.

SQL> set timing on;

SQL>
SQL>
SQL> begin
2 DBMS_REDEFINITION.START_REDEF_TABLE(uname => ‘EDW1_USER’, orig_table => ‘YHD_SO’,int_table=> ‘YHD_SO_TEMP’,options_flag=>dbms_redefinition.cons_use_rowid);
3 end;
4 /

PL/SQL procedure successfully completed.

Elapsed: 00:06:00.41

花了6分钟 速度很不错

下面copy index 约束等,对于分区表建议不要copy index,约束,手工建立local index

SQL> select count (*) from yhd_so_temp

COUNT (*)
————
13474251

SQL> declare
2 num_errors PLS_INTEGER;
begin
DBMS_REDEFINITION.COPY_TABLE_DEPENDENTS(
uname =>’EDW1_USER’,
orig_table =>’YHD_SO’,
int_table =>’YHD_SO_TEMP’,
copy_indexes => 0,
copy_triggers=>true,
copy_constraints=>false,
copy_privileges=>true,
ignore_errors=>false,
num_errors=>num_errors,
copy_statistics=>true);
end;
/ 3 4 5 6 7 8 9 10 11 12 13 14 15 16

PL/SQL procedure successfully completed.

Elapsed: 00:09:52.79

花费了9分钟

创建local index

alter session set workarea_size_policy=manual;

alter session set sort_area_size=1057600;

alter session set db_file_multiblock_read_count=128;

alter table YHD_SO_TEMP
add constraint YHD_PK_SO_N_1 primary key (ID,order_create_time) using index tablespace data01 local;

create index IDX_YHD_SO_PARENT_SO_ID_N_1 on YHD_SO_TEMP (PARENT_SO_ID)
tablespace data01 local parallel 4

create index YHD_IDX_CREATE_TIME_N_1 on YHD_SO_TEMP (TRUNC(ORDER_CREATE_TIME))
tablespace data01 local parallel 4

create index YHD_IDX_ORDER_STATUS_N_1 on YHD_SO_TEMP (ORDER_STATUS)
tablespace data01 local parallel 4

create index YHD_IDX_SO_CREATE_DATE_N_1 on YHD_SO_TEMP (ORDER_CREATE_TIME)
tablespace data01 local parallel 4

create index YHD_IDX_SO_END_USER_ID_N_1 on YHD_SO_TEMP (END_USER_ID)
tablespace data01 local parallel 4

create index YHD_IDX_SO_ORDER_TYPE_N_1 on YHD_SO_TEMP (ORDER_TYPE)
tablespace data01 local parallel 4

create index YHD_IDX_SO_PAYMENT_ID_N_1 on YHD_SO_TEMP (ORDER_PAYMENT_METHOD_ID)
tablespace data01 local parallel 4

create index YHD_UQ_IDX_SO_CODE_N_1 on YHD_SO_TEMP (ORDER_CODE)
tablespace data01 local parallel 4

SYNC同步表,这一步主要是为了减少finish lock table的时间 ,收集新表的statistics,最后不要忘了alter index noparallel

SQL> begin
DBMS_REDEFINITION.SYNC_INTERIM_TABLE(
uname =>’EDW1_USER’,
orig_table =>’YHD_SO’,
int_table =>’YHD_SO_TEMP’);
end;
/ 2 3 4 5 6 7

PL/SQL procedure successfully completed.

SQL> begin
DBMS_REDEFINITION.FINISH_REDEF_TABLE (
uname =>’EDW1_USER’,
orig_table =>’YHD_SO’,
int_table =>’YHD_SO_TEMP’);
end;
/ 2 3 4 5 6 7

PL/SQL procedure successfully completed.

SQL> exec DBMS_STATS.GATHER_TABLE_STATS (ownname =>’edw1_user’,tabname =>’yhd_so’,estimate_percent=> 10,degree =>4, cascade=>true);

PL/SQL procedure successfully completed.

SQL> alter index IDX_YHD_SO_PARENT_SO_ID_N_1 noparallel;

Index altered.

SQL> alter index YHD_IDX_CREATE_TIME_N_1 noparallel;

Index altered.

SQL> alter index YHD_IDX_ORDER_STATUS_N_1 noparallel;

Index altered.

SQL> alter index YHD_IDX_SO_CREATE_DATE_N_1 noparallel;

Index altered.

SQL> alter index YHD_IDX_SO_END_USER_ID_N_1 noparallel;

Index altered.

SQL> alter index YHD_IDX_SO_ORDER_TYPE_N_1 noparallel;

Index altered.

SQL> alter index YHD_IDX_SO_PAYMENT_ID_N_1 noparallel;

Index altered.

SQL> alter index YHD_UQ_IDX_SO_CODE_N_1 noparallel;

Index altered.

SQL>

整个过程没有超过1个小时,1300万的数据也不算小 online redefinition 是heap转分区表的一个不错的方法。