大数据平台建设系列之数据调度-datax实现定时数据增量同步
+、crontab# 每5分钟执行一次shell脚本*/5 * * * * /data/datax/sh/syn_product_summary.sh > /dev/null 2>&1+、sh 脚本 syn_product_summary.sh#!/bin/bashsource /etc/profile# 获取服务器当前时间戳cur_time=$(date +%s)# 数据起始
·
+、crontab
# 每5分钟执行一次shell脚本
*/5 * * * * /data/datax/sh/syn_product_summary.sh > /dev/null 2>&1
+、sh 脚本 syn_product_summary.sh
#!/bin/bash
source /etc/profile
# 获取服务器当前时间戳
cur_time=$(date +%s)
# 数据起始时间设置为5分钟+1分钟前(允许一分钟内的数据重复推送)
start_time=$(($cur_time - 360))
# 将起始时间作为参数传入到datax脚本
/usr/bin/python /usr/local/datax/bin/datax.py /data/datax/config/mro_summary_max_mysql_elastic_product.json -p "-Dstart_time=$start_time" >>/var/log/datax/summary_log.`date +%Y%m%d` 3>&1 &
+、datax 同步脚本 mro_summary_max_mysql_elastic_product.json 含 groovy脚本
{
"job": {
"content": [{
"reader": {
"name": "mysqlreader",
"parameter": {
"username": "mro_account",
"password": "******",
"connection": [{
"querySql": ["select product_id as uni_key, product_id,brand_name,model,category_name,color,unit, \" \" as category_attr, concat(brand_name, \" \", model, \" \", category_name, \" \", unit) as summary,bn, \"AAAAA\" as support_sku, 0.00 as support_price, attr_json from beta_product where product_state=1 and update_time > FROM_UNIXTIME($start_time)"],
"jdbcUrl": ["jdbc:mysql://rm-*.mysql.rds.aliyuncs.com:3306/mro_*"]
}]
}
},
"transformer": [
{
"name": "dx_groovy",
"parameter": {
"code": "Column column = record.getColumn(12);\n def attrJson = column.asString();\n def attrStr = new StringBuffer();\n if (column != null && !attrJson.equals(\"\")) {\n def jsonSlurper = new JsonSlurper();\n def map = jsonSlurper.parseText(attrJson);\n map.each{\n attrStr.append(\"$it.attributeValue \");\n }\n}\n record.setColumn(7, new StringColumn(attrStr.toString()));\n return record;",
"extraPackage": [
"import groovy.json.JsonSlurper;"
]
}
}
],
"writer": {
"name": "elasticsearchwriter",
"parameter": {
"endpoint": "http://172.16.166.101:9200",
"accessId": "elastic",
"accessKey": "******",
"index": "product_summary_max",
"type": "_doc",
"cleanup": false,
"settings": {
"index" :{
"number_of_shards": 2,
"number_of_replicas": 1
}
},
"discovery": false,
"batchSize": 20000,
"splitter": ",",
"column": [
{
"name": "uniKey",
"type": "id"
},
{
"name":"productId",
"type" : "long"
},
{
"name":"brandName",
"type" : "text",
"analyzer" : "my_ik"
},
{
"name":"model",
"type" : "text",
"analyzer" : "ik_max_word"
},
{
"name":"categoryName",
"type" : "text",
"analyzer" : "my_ik"
},
{
"name":"color",
"type" : "text",
"analyzer" : "ik_max_word"
},
{
"name":"unit",
"type" : "text",
"analyzer" : "ik_max_word"
},
{
"name":"categoryAttr",
"type" : "text",
"analyzer" : "ik_max_word"
},
{
"name":"summary",
"type" : "text",
"analyzer" : "my_ik"
},
{
"name":"bn",
"type" : "text"
},
{
"name":"support_sku",
"type" : "text"
},
{
"name":"support_price",
"type" : "float"
},
{
"name":"remark",
"type" : "text"
}
]
}
}
}],
"setting": {
"speed": {
"channel": 5
}
}
}
}
魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。
更多推荐


所有评论(0)