SQLLOAD加载数据优化案例-使用SQLLOAD驱动模式的外部表
Nov142013
某银行在用SQLLOAD向数据库中直接路径加载44G文本文件(约1亿条记录)的时候,速度非常慢,每秒只能加载10M多的数据(3W多条记录),加载完成需要1个多小时,有时需要两个多小时,通过设置一些优化参数后,加载速度提升不明显,甚至比默认参数加载还要慢(其实加载速度并不慢,虽然每秒只能加载10M的数据感觉很慢,但是每秒加载3W多条的记录已经不慢了),除了将SQLLOAD加载改成使用SQLLOAD驱动的外部表加载应该没有什么优化办法,即使将加载速度从每秒3W多条记录提升到4W条,加载速度还是太慢。下面是将SQLLOAD加载改成SQLLOAD驱动方式的外部表做的测试。
[oracle@dm0101 hongye]$ du -sh /dbfs/my_dbfs/flatfile/20130331/EC00006D.CDU 44G /dbfs/my_dbfs/flatfile/20130331/EC00006D.CDU
使用SQLLOAD生成外部表的创建语句。
[oracle@dm0101 hongye]$ sqlldr xxx/xxx control=sqlload.ctl external_table=GENERATE_ONLY SQL*Loader: Release 11.2.0.3.0 - Production on Thu Nov 14 17:29:29 2013 Copyright (c) 1982, 2011, Oracle and/or its affiliates. All rights reserved.
打开sqlload的log文件,找到创建DIRECTORY和建表部分,创建外部表。
SQL> CREATE DIRECTORY SYS_SQLLDR_XT_TMPDIR_00001 AS '/home/oracle/xxx'; Directory created. SQL> CREATE DIRECTORY SYS_SQLLDR_XT_TMPDIR_00000 AS '/dbfs/my_dbfs/flatfile/20130331/'; Directory created. SQL> CREATE TABLE "SYS_SQLLDR_X_EXT_ENMO_ASICDA" 2 ( 3 "APPG_MODE" VARCHAR2(2), 4 "APPG_DATE" DATE, 5 "AS_OF_DATE" DATE, 6 "ACCT_NO" VARCHAR2(80), 7 "PARTY_ID" NUMBER, 8 "CLIT_NO" NUMBER(16), 9 "BRAN_NO" VARCHAR2(60), 10 "DEPT_ID" NUMBER(14), 11 "GL_CODE" VARCHAR2(20), 12 "GL_ACCOUNT_ID" NUMBER(14), 13 "GL_ACCOUNT_ID_DP" NUMBER(14), 14 "GL_ACCOUNT_ID_OD" NUMBER(14), 15 "GL_ACCOUNT_ID_NPL" NUMBER(14), 16 "COMMON_COA_ID" NUMBER(14), 17 "OLD_PROD_CODE" VARCHAR2(20), 18 "OLD_SUB_PROD_CODE" VARCHAR2(20), 19 "ACCT_STATUS" NUMBER(2), 20 "IS_LINK_CARD" VARCHAR2(8), 21 "CARD_NO" VARCHAR2(40), 22 "IS_CHQ" VARCHAR2(2), 23 "ISO_CURRENCY_CD" VARCHAR2(6), 24 "CUR_BOOK_BAL" NUMBER(20,2), 25 "EFFEC_BOOK_BAL" NUMBER(20,2), 26 "ACCRUAL_BASIS_CD" NUMBER(5), 27 "ADJUSTABLE_TYPE_CD" NUMBER(5), 28 "AMRT_TYPE_CD" NUMBER(5), 29 "COMPOUND_BASIS_CD" NUMBER(5), 30 "PRIME_RATE" NUMBER(13,8), 31 "CUR_NET_RATE" NUMBER(13,8), 32 "INT_TYPE" NUMBER(5), 33 "PMT_FREQ" NUMBER(5), 34 "PMT_FREQ_MULT" VARCHAR2(2), 35 "REPRICE_FREQ" NUMBER(5), 36 "REPRICE_FREQ_MULT" VARCHAR2(2), 37 "ORG_TERM" NUMBER(5), 38 "ORG_TERM_MULT" VARCHAR2(2), 39 "LAST_REPRICE_DATE" DATE, 40 "NEXT_REPRICE_DATE" DATE, 41 "ISSUE_DATE" DATE, 42 "ACCOUNT_CLOSE_DATE" DATE, 43 "ORIGINATION_DATE" DATE, 44 "MATURITY_DATE" DATE, 45 "INT_DAY_INCOME" NUMBER(24,4), 46 "INT_DAY_PAYOUT" NUMBER(24,4), 47 "INT_CACL_INCOME" NUMBER(24,4), 48 "INT_CACL_DPAYOUT" NUMBER(24,4), 49 "LOAN_LOSS_RESERVE" NUMBER(20,2), 50 "OVERDRAFT_ACCT_TYPE" NUMBER(4), 51 "OVERDRAFT_SUB_ACCT_TYPE" NUMBER(4), 52 "OVERDRAFT_STATUS" VARCHAR2(2), 53 "OVERDRAFT_START_DATE" DATE, 54 "OVERDRAFT_STOP_DATE" DATE, 55 "OVERDRAFT_LIM_AMT" NUMBER(20,2), 56 "OVERDRAFT_RATE_TYPE" VARCHAR2(2), 57 "OVERDRAFT_RATE" NUMBER(13,8), 58 "OVERDRAFT_BAL" NUMBER(20,2), 59 "DATA_SOURCE" VARCHAR2(20), 60 "INT_MON_INCOME" NUMBER(24,4), 61 "INT_MON_PAYOUT" NUMBER(24,4), 62 "CUR_BOOK_BAL_Y1" NUMBER(20,2), 63 "CUR_BOOK_BAL_Y2" NUMBER(20,2), 64 "CUR_BOOK_BAL_M1" NUMBER(20,2), 65 "CUR_BOOK_BAL_M2" NUMBER(20,2), 66 "CUR_TP_PER_ADB_M" NUMBER(20,2), 67 "PRI_TP_PER_ADB_M" NUMBER(20,2), 68 "AUTO_RENEWAL_FLG" NUMBER, 69 "LAST_MATURITY_DATE" DATE, 70 "RECORD_FLAG" VARCHAR2(2), 71 "ORIGINAL_MATURITY_DATE" DATE 72 ) 73 ORGANIZATION external 74 ( 75 TYPE oracle_loader 76 DEFAULT DIRECTORY SYS_SQLLDR_XT_TMPDIR_00000 77 ACCESS PARAMETERS 78 ( 79 RECORDS DELIMITED BY NEWLINE CHARACTERSET US7ASCII 80 BADFILE 'SYS_SQLLDR_XT_TMPDIR_00001':'EC00006D.bad' 81 LOGFILE 'sqlload.log_xt' 82 READSIZE 1048576 83 FIELDS TERMINATED BY "~|~" LDRTRIM 84 MISSING FIELD VALUES ARE NULL 85 REJECT ROWS WITH ALL NULL FIELDS 86 ( 87 "APPG_MODE" CHAR(255) 88 TERMINATED BY "~|~", 89 "APPG_DATE" CHAR(255) 90 TERMINATED BY "~|~" 91 DATE_FORMAT DATE MASK 'yyyy-mm-dd hh24:mi:ss', 92 "AS_OF_DATE" CHAR(255) 93 TERMINATED BY "~|~" 94 DATE_FORMAT DATE MASK 'yyyy-mm-dd hh24:mi:ss', 95 "ACCT_NO" CHAR(255) 96 TERMINATED BY "~|~", 97 "PARTY_ID" CHAR(255) 98 TERMINATED BY "~|~", 99 "CLIT_NO" CHAR(255) 100 TERMINATED BY "~|~", 101 "BRAN_NO" CHAR(255) 102 TERMINATED BY "~|~", 103 "DEPT_ID" CHAR(255) 104 TERMINATED BY "~|~", 105 "GL_CODE" CHAR(255) 106 TERMINATED BY "~|~", 107 "GL_ACCOUNT_ID" CHAR(255) 108 TERMINATED BY "~|~", 109 "GL_ACCOUNT_ID_DP" CHAR(255) 110 TERMINATED BY "~|~", 111 "GL_ACCOUNT_ID_OD" CHAR(255) 112 TERMINATED BY "~|~", 113 "GL_ACCOUNT_ID_NPL" CHAR(255) 114 TERMINATED BY "~|~", 115 "COMMON_COA_ID" CHAR(255) 116 TERMINATED BY "~|~", 117 "OLD_PROD_CODE" CHAR(255) 118 TERMINATED BY "~|~", 119 "OLD_SUB_PROD_CODE" CHAR(255) 120 TERMINATED BY "~|~", 121 "ACCT_STATUS" CHAR(255) 122 TERMINATED BY "~|~", 123 "IS_LINK_CARD" CHAR(255) 124 TERMINATED BY "~|~", 125 "CARD_NO" CHAR(255) 126 TERMINATED BY "~|~", 127 "IS_CHQ" CHAR(255) 128 TERMINATED BY "~|~", 129 "ISO_CURRENCY_CD" CHAR(255) 130 TERMINATED BY "~|~", 131 "CUR_BOOK_BAL" CHAR(255) 132 TERMINATED BY "~|~", 133 "EFFEC_BOOK_BAL" CHAR(255) 134 TERMINATED BY "~|~", 135 "ACCRUAL_BASIS_CD" CHAR(255) 136 TERMINATED BY "~|~", 137 "ADJUSTABLE_TYPE_CD" CHAR(255) 138 TERMINATED BY "~|~", 139 "AMRT_TYPE_CD" CHAR(255) 140 TERMINATED BY "~|~", 141 "COMPOUND_BASIS_CD" CHAR(255) 142 TERMINATED BY "~|~", 143 "PRIME_RATE" CHAR(255) 144 TERMINATED BY "~|~", 145 "CUR_NET_RATE" CHAR(255) 146 TERMINATED BY "~|~", 147 "INT_TYPE" CHAR(255) 148 TERMINATED BY "~|~", 149 "PMT_FREQ" CHAR(255) 150 TERMINATED BY "~|~", 151 "PMT_FREQ_MULT" CHAR(255) 152 TERMINATED BY "~|~", 153 "REPRICE_FREQ" CHAR(255) 154 TERMINATED BY "~|~", 155 "REPRICE_FREQ_MULT" CHAR(255) 156 TERMINATED BY "~|~", 157 "ORG_TERM" CHAR(255) 158 TERMINATED BY "~|~", 159 "ORG_TERM_MULT" CHAR(255) 160 TERMINATED BY "~|~", 161 "LAST_REPRICE_DATE" CHAR(255) 162 TERMINATED BY "~|~" 163 DATE_FORMAT DATE MASK 'yyyy-mm-dd hh24:mi:ss', 164 "NEXT_REPRICE_DATE" CHAR(255) 165 TERMINATED BY "~|~" 166 DATE_FORMAT DATE MASK 'yyyy-mm-dd hh24:mi:ss', 167 "ISSUE_DATE" CHAR(255) 168 TERMINATED BY "~|~" 169 DATE_FORMAT DATE MASK 'yyyy-mm-dd hh24:mi:ss', 170 "ACCOUNT_CLOSE_DATE" CHAR(255) 171 TERMINATED BY "~|~" 172 DATE_FORMAT DATE MASK 'yyyy-mm-dd hh24:mi:ss', 173 "ORIGINATION_DATE" CHAR(255) 174 TERMINATED BY "~|~" 175 DATE_FORMAT DATE MASK 'yyyy-mm-dd hh24:mi:ss', 176 "MATURITY_DATE" CHAR(255) 177 TERMINATED BY "~|~" 178 DATE_FORMAT DATE MASK 'yyyy-mm-dd hh24:mi:ss', 179 "INT_DAY_INCOME" CHAR(255) 180 TERMINATED BY "~|~", 181 "INT_DAY_PAYOUT" CHAR(255) 182 TERMINATED BY "~|~", 183 "INT_CACL_INCOME" CHAR(255) 184 TERMINATED BY "~|~", 185 "INT_CACL_DPAYOUT" CHAR(255) 186 TERMINATED BY "~|~", 187 "LOAN_LOSS_RESERVE" CHAR(255) 188 TERMINATED BY "~|~", 189 "OVERDRAFT_ACCT_TYPE" CHAR(255) 190 TERMINATED BY "~|~", 191 "OVERDRAFT_SUB_ACCT_TYPE" CHAR(255) 192 TERMINATED BY "~|~", 193 "OVERDRAFT_STATUS" CHAR(255) 194 TERMINATED BY "~|~", 195 "OVERDRAFT_START_DATE" CHAR(255) 196 TERMINATED BY "~|~" 197 DATE_FORMAT DATE MASK 'yyyy-mm-dd hh24:mi:ss', 198 "OVERDRAFT_STOP_DATE" CHAR(255) 199 TERMINATED BY "~|~" 200 DATE_FORMAT DATE MASK 'yyyy-mm-dd hh24:mi:ss', 201 "OVERDRAFT_LIM_AMT" CHAR(255) 202 TERMINATED BY "~|~", 203 "OVERDRAFT_RATE_TYPE" CHAR(255) 204 TERMINATED BY "~|~", 205 "OVERDRAFT_RATE" CHAR(255) 206 TERMINATED BY "~|~", 207 "OVERDRAFT_BAL" CHAR(255) 208 TERMINATED BY "~|~", 209 "DATA_SOURCE" CHAR(255) 210 TERMINATED BY "~|~", 211 "INT_MON_INCOME" CHAR(255) 212 TERMINATED BY "~|~", 213 "INT_MON_PAYOUT" CHAR(255) 214 TERMINATED BY "~|~", 215 "CUR_BOOK_BAL_Y1" CHAR(255) 216 TERMINATED BY "~|~", 217 "CUR_BOOK_BAL_Y2" CHAR(255) 218 TERMINATED BY "~|~", 219 "CUR_BOOK_BAL_M1" CHAR(255) 220 TERMINATED BY "~|~", 221 "CUR_BOOK_BAL_M2" CHAR(255) 222 TERMINATED BY "~|~", 223 "CUR_TP_PER_ADB_M" CHAR(255) 224 TERMINATED BY "~|~", 225 "PRI_TP_PER_ADB_M" CHAR(255) 226 TERMINATED BY "~|~", 227 "AUTO_RENEWAL_FLG" CHAR(255) 228 TERMINATED BY "~|~", 229 "LAST_MATURITY_DATE" CHAR(255) 230 TERMINATED BY "~|~" 231 DATE_FORMAT DATE MASK 'yyyy-mm-dd hh24:mi:ss', 232 "RECORD_FLAG" CHAR(255) 233 TERMINATED BY "~|~", 234 "ORIGINAL_MATURITY_DATE" CHAR(255) 235 TERMINATED BY "~|~" 236 DATE_FORMAT DATE MASK 'yyyy-mm-dd hh24:mi:ss' 237 ) 238 ) 239 location 240 ( 241 'EC00006D.CDU' 242 ) 243 )REJECT LIMIT UNLIMITED; Table created.
外部表创建完成后,使用insert into的方法将数据插入到业务表。
SQL> INSERT /*+ parallel 32 */ INTO ENMO_ASICDA 2 ( 3 APPG_MODE, 4 APPG_DATE, 5 AS_OF_DATE, 6 ACCT_NO, 7 PARTY_ID, 8 CLIT_NO, 9 BRAN_NO, 10 DEPT_ID, 11 GL_CODE, 12 GL_ACCOUNT_ID, 13 GL_ACCOUNT_ID_DP, 14 GL_ACCOUNT_ID_OD, 15 GL_ACCOUNT_ID_NPL, 16 COMMON_COA_ID, 17 OLD_PROD_CODE, 18 OLD_SUB_PROD_CODE, 19 ACCT_STATUS, 20 IS_LINK_CARD, 21 CARD_NO, 22 IS_CHQ, 23 ISO_CURRENCY_CD, 24 CUR_BOOK_BAL, 25 EFFEC_BOOK_BAL, 26 ACCRUAL_BASIS_CD, 27 ADJUSTABLE_TYPE_CD, 28 AMRT_TYPE_CD, 29 COMPOUND_BASIS_CD, 30 PRIME_RATE, 31 CUR_NET_RATE, 32 INT_TYPE, 33 PMT_FREQ, 34 PMT_FREQ_MULT, 35 REPRICE_FREQ, 36 REPRICE_FREQ_MULT, 37 ORG_TERM, 38 ORG_TERM_MULT, 39 LAST_REPRICE_DATE, 40 NEXT_REPRICE_DATE, 41 ISSUE_DATE, 42 ACCOUNT_CLOSE_DATE, 43 ORIGINATION_DATE, 44 MATURITY_DATE, 45 INT_DAY_INCOME, 46 INT_DAY_PAYOUT, 47 INT_CACL_INCOME, 48 INT_CACL_DPAYOUT, 49 LOAN_LOSS_RESERVE, 50 OVERDRAFT_ACCT_TYPE, 51 OVERDRAFT_SUB_ACCT_TYPE, 52 OVERDRAFT_STATUS, 53 OVERDRAFT_START_DATE, 54 OVERDRAFT_STOP_DATE, 55 OVERDRAFT_LIM_AMT, 56 OVERDRAFT_RATE_TYPE, 57 OVERDRAFT_RATE, 58 OVERDRAFT_BAL, 59 DATA_SOURCE, 60 INT_MON_INCOME, 61 INT_MON_PAYOUT, 62 CUR_BOOK_BAL_Y1, 63 CUR_BOOK_BAL_Y2, 64 CUR_BOOK_BAL_M1, 65 CUR_BOOK_BAL_M2, 66 CUR_TP_PER_ADB_M, 67 PRI_TP_PER_ADB_M, 68 AUTO_RENEWAL_FLG, 69 LAST_MATURITY_DATE, 70 RECORD_FLAG, 71 ORIGINAL_MATURITY_DATE 72 ) 73 SELECT /*+ parallel (a 32) */ 74 "APPG_MODE", 75 "APPG_DATE", 76 "AS_OF_DATE", 77 "ACCT_NO", 78 "PARTY_ID", 79 "CLIT_NO", 80 "BRAN_NO", 81 "DEPT_ID", 82 "GL_CODE", 83 "GL_ACCOUNT_ID", 84 "GL_ACCOUNT_ID_DP", 85 "GL_ACCOUNT_ID_OD", 86 "GL_ACCOUNT_ID_NPL", 87 "COMMON_COA_ID", 88 "OLD_PROD_CODE", 89 "OLD_SUB_PROD_CODE", 90 "ACCT_STATUS", 91 "IS_LINK_CARD", 92 "CARD_NO", 93 "IS_CHQ", 94 "ISO_CURRENCY_CD", 95 "CUR_BOOK_BAL", 96 "EFFEC_BOOK_BAL", 97 "ACCRUAL_BASIS_CD", 98 "ADJUSTABLE_TYPE_CD", 99 "AMRT_TYPE_CD", 100 "COMPOUND_BASIS_CD", 101 "PRIME_RATE", 102 "CUR_NET_RATE", 103 "INT_TYPE", 104 "PMT_FREQ", 105 "PMT_FREQ_MULT", 106 "REPRICE_FREQ", 107 "REPRICE_FREQ_MULT", 108 "ORG_TERM", 109 "ORG_TERM_MULT", 110 "LAST_REPRICE_DATE", 111 "NEXT_REPRICE_DATE", 112 "ISSUE_DATE", 113 "ACCOUNT_CLOSE_DATE", 114 "ORIGINATION_DATE", 115 "MATURITY_DATE", 116 "INT_DAY_INCOME", 117 "INT_DAY_PAYOUT", 118 "INT_CACL_INCOME", 119 "INT_CACL_DPAYOUT", "LOAN_LOSS_RESERVE", 121 "OVERDRAFT_ACCT_TYPE", 122 "OVERDRAFT_SUB_ACCT_TYPE", 123 "OVERDRAFT_STATUS", 124 "OVERDRAFT_START_DATE", "OVERDRAFT_STOP_DATE", "OVERDRAFT_LIM_AMT", 126 127 "OVERDRAFT_RATE_TYPE", 128 "OVERDRAFT_RATE", "OVERDRAFT_BAL", 130 "DATA_SOURCE", 131 "INT_MON_INCOME", 132 "INT_MON_PAYOUT", 133 "CUR_BOOK_BAL_Y1", 134 "CUR_BOOK_BAL_Y2", 135 "CUR_BOOK_BAL_M1", 136 "CUR_BOOK_BAL_M2", 137 "CUR_TP_PER_ADB_M", 138 "PRI_TP_PER_ADB_M", 139 "AUTO_RENEWAL_FLG", 140 "LAST_MATURITY_DATE", 141 "RECORD_FLAG", 142 "ORIGINAL_MATURITY_DATE" 143 FROM "SYS_SQLLDR_X_EXT_ENMO_ASICDA" a 144 ; 83834875 rows created. Elapsed: 00:10:19.45 SQL> commit; Commit complete. Elapsed: 00:00:00.02
只用了10分钟就将数据加载到数据库了,虽然还有优化空间,加载速度还可以提升,但和以前的一个小时、两个小时的加载时间比起来,已经飞快了。
如果SQLLOAD加载的数据文件是一个大文件,而不是多个小文件,如果字符编码是UTF-8或ASCII,在使用INSERT INTO SELECT的方式开并行后,ORACLE会自动分割这个SQLLOAD的数据文件,实现并行。如果这个文件是GBK编码,那么INSERT INTO SELECT的方式就无法实现并行,要使用并行就必须手动将SQLLOAD加载的数据文件分隔成多个小文件,并修改外部表的数据源。
[oracle@dm0101 20130331]$ file EC00006D.CDU EC00006D.CDU: ASCII text, with very long lines
还好,客户的环境,SQLLOAD加载的数据文件都是ASCII编码。