heniancheng 2020-02-11
最近有需求将数据从mysql同步到oracle,之前有使用kettle将表从oracle同步到mysql,这里使用的插件依然是阿里的dataX
详细见:Githup地址:https://github.com/alibaba/DataX
这里也是根据阿里云的一篇帖子来进行验证
https://yq.aliyun.com/articles/715393?spm=a2c4e.11155472.0.0.3aef4dc1IWo28F
1 环境需要
JDK(1.8以上,推荐1.8)
Python(推荐Python2.6.X)
Apache Maven 3.x (Compile DataX)
2 #服务器配置
[ ~]# yum search java|grep jdk
[ ~]# yum install java-1.8.0-openjdk.x86_64 -y
[ ~]# java -version
openjdk version "1.8.0_242"
OpenJDK Runtime Environment (build 1.8.0_242-b07)
OpenJDK 64-Bit Server VM (build 25.242-b07, mixed mode)
[root@yhq ~]# python --version
Python 2.6.6
下载地址:https://maven.apache.org/download.cgi
[root@yhq soft]# ls apache-maven-3.6.3-bin.tar.gz
apache-maven-3.6.3-bin.tar.gz
[root@yhq soft]# tar -zxvf apache-maven-3.6.3-bin.tar.gz
[ soft]# mkdir /opt/maven
[ soft]# mv apache-maven-3.6.3/* /opt/maven/
[ soft]# ln -s /opt/maven/b
bin/ boot/
[root@yhq soft]# ln -s /opt/maven/bin/mvn /usr/bin/mvn
[ soft]# vim /etc/profile.d/maven.sh
export M2_HOME=/opt/maven
export PATH=${M2_HOME}/bin:${PATH}
[root@yhq soft]# mvn -v
Apache Maven 3.6.3 (cecedd343002696d0abb50b32b541b8a6ba2883f)
Maven home: /opt/maven
Java version: 1.8.0_242, vendor: Oracle Corporation, runtime: /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.242.b07-1.el6_10.x86_64/jre
Default locale: en_US, platform encoding: UTF-8
OS name: "linux", version: "2.6.32-754.el6.x86_64", arch: "amd64", family: "unix"
#安装datax
[ soft]# tar -zxvf datax.tar.gz
[ soft]# ls datax
bin conf job lib plugin script tmp
#自检测试脚本
[ soft]# python datax/bin/datax.py datax/job/job.json
DataX (DATAX-OPENSOURCE-3.0), From Alibaba !
Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.
2020-02-09 23:27:35.717 [main] INFO VMInfo - VMInfo# operatingSystem class => sun.management.OperatingSystemImpl
2020-02-09 23:27:35.763 [main] INFO Engine - the machine info =>
osInfo: Oracle Corporation 1.8 25.242-b07
jvmInfo: Linux amd64 2.6.32-754.el6.x86_64
cpu num: 2
2020-02-09 23:27:46.974 [job-0] INFO JobContainer - PerfTrace not enable!
2020-02-09 23:27:46.975 [job-0] INFO StandAloneJobContainerCommunicator - Total 100000 records, 2600000 bytes | Speed 253.91KB/s, 10000 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 0.020s | All Task WaitReaderTime 0.121s | Percentage 100.00%
2020-02-09 23:27:46.976 [job-0] INFO JobContainer -
任务启动时刻 : 2020-02-09 23:27:35
任务结束时刻 : 2020-02-09 23:27:46
任务总计耗时 : 10s
任务平均流量 : 253.91KB/s
记录写入速度 : 10000rec/s
读出记录总数 : 100000
读写失败总数 : 0#测试使用
mysql 表 CREATE TABLE `datax_test` ( `id` int(11) NOT NULL, `name` varchar(20) DEFAULT NULL, `birthday` date DEFAULT NULL, `memo` varchar(32) DEFAULT NULL, PRIMARY KEY (`id`) ) ENGINE=InnoDB DEFAULT CHARSET=utf8; #插入10w数据
#在oracle里面可以先创建好表结构,或者用kettle同步表
create table datax_test (id number, name varchar2(20), birthday date, memo varchar2(32));
#创建配置文件
#可以通过命令查看配置模板: python datax.py -r {YOUR_READER} -w {YOUR_WRITER}
# python datax.py -r streamreader -w streamwriter
[ job]# vim job1.json
{
"job": {
"setting": {
"speed": {
"channel": 5
}
},
"content": [
{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "system",
"password": "****",
"column": ["*"],
"connection": [
{
"table": ["datax_test"],
"jdbcUrl": ["jdbc:mysql://ip:3306/vision"]
}
]
}
},
"writer": {
"name": "oraclewriter",
"parameter": {
"username": "vision",
"password": "***",
"column": ["*"],
"connection": [
{
"jdbcUrl": "jdbc:oracle:thin:@ip:1521:bol",
"table": ["datax_test"]
}
]
}
}
}
]
}
}#启动数据同步
[ job]# python /data/soft/datax/bin/datax.py job1.json
DataX (DATAX-OPENSOURCE-3.0), From Alibaba !
Copyright (C) 2010-2017, Alibaba Group. All Rights Reserved.
2020-02-10 00:12:03.796 [main] INFO VMInfo - VMInfo# operatingSystem class => sun.management.OperatingSystemImpl
2020-02-10 00:12:03.807 [main] INFO Engine - the machine info =>
osInfo: Oracle Corporation 1.8 25.242-b07
jvmInfo: Linux amd64 2.6.32-754.el6.x86_64
cpu num: 2
totalPhysicalMemory: -0.00G
freePhysicalMemory: -0.00G
maxFileDescriptorCount: -1
currentOpenFileDescriptorCount: -1
GC Names [PS MarkSweep, PS Scavenge]
MEMORY_NAME | allocation_size | init_size
PS Eden Space | 256.00MB | 256.00MB
Code Cache | 240.00MB | 2.44MB
Compressed Class Space | 1,024.00MB | 0.00MB
PS Survivor Space | 42.50MB | 42.50MB
PS Old Gen | 683.00MB | 683.00MB
Metaspace | -0.00MB | 0.00MB
2020-02-10 00:12:03.869 [main] INFO Engine -
2020-02-10 00:13:48.061 [job-0] INFO JobContainer - PerfTrace not enable!
2020-02-10 00:13:48.061 [job-0] INFO StandAloneJobContainerCommunicator - Total 99999 records, 1888875 bytes | Speed 21.96KB/s, 1190 records/s | Error 0 records, 0 bytes | All Task WaitWriterTime 42.323s | All Task WaitReaderTime 29.424s | Percentage 100.00%
2020-02-10 00:13:48.069 [job-0] INFO JobContainer -
任务启动时刻 : 2020-02-10 00:12:05
任务结束时刻 : 2020-02-10 00:13:48
任务总计耗时 : 102s
任务平均流量 : 21.96KB/s
记录写入速度 : 1190rec/s
读出记录总数 : 99999
读写失败总数 : 0#在oracle查看数据
select count(*) from datax_test; #同步正常
#剩下的增量同步,在crontab中定时任务就行。
$ crontab -e
#会进入已有crontab文件编辑界面,继续增加定时任务即可,本示例增加以下内容,并保存
0,10,20,30,40,50 * * * * python /data/soft/datax/bin/datax.py /data/soft/datax/job/mysql2odps.json >>/tmp/log.`date +\%Y\%m\%d\%H\%M\%S` 2>&1
测试环境
#测试环境 ==mysql db 5.7.27 centos 6.10 create database vision;==oracle db 11.2.0.4 centos 6.10 select file#,name from v$datafile; create tablespace vision logging datafile ‘/u01/app/oracle/oradata/bol/vision01.dbf‘ size 100m autoextend on next 500m; create user vision identified by **** default tablespace vision ; grant connect,resource,dba to vision;== delimiter // create procedure insrt_t () begin declare i int default 1; declare j int default 1; while i < 100000 do #insert into test values (i,‘a‘); insert into datax_test (id,name,birthday,memo) VALUES (i,‘yhq‘,now(),‘123‘); set i=i+1; set j=j+1; if j=5000 then set j=1; commit; end if; end while; end // delimiter ;