+、crontab


 # 每5分钟执行一次shell脚本
 */5 * * * * /data/datax/sh/syn_product_summary.sh > /dev/null 2>&1

+、sh 脚本 syn_product_summary.sh

#!/bin/bash

source /etc/profile

# 获取服务器当前时间戳
cur_time=$(date +%s)
# 数据起始时间设置为5分钟+1分钟前(允许一分钟内的数据重复推送)
start_time=$(($cur_time - 360))

# 将起始时间作为参数传入到datax脚本
/usr/bin/python /usr/local/datax/bin/datax.py /data/datax/config/mro_summary_max_mysql_elastic_product.json -p "-Dstart_time=$start_time" >>/var/log/datax/summary_log.`date +%Y%m%d`  3>&1 &

+、datax 同步脚本 mro_summary_max_mysql_elastic_product.json 含 groovy脚本

{
	"job": {
		"content": [{
			"reader": {
				"name": "mysqlreader",
				"parameter": {
					"username": "mro_account",
					"password": "******",
					"connection": [{
						"querySql": ["select product_id as uni_key, product_id,brand_name,model,category_name,color,unit, \" \" as category_attr, concat(brand_name, \" \", model, \" \", category_name, \" \", unit) as summary,bn, \"AAAAA\" as support_sku, 0.00 as support_price, attr_json from beta_product where product_state=1 and update_time > FROM_UNIXTIME($start_time)"],
						"jdbcUrl": ["jdbc:mysql://rm-*.mysql.rds.aliyuncs.com:3306/mro_*"]
					}]
				}
			},

			"transformer": [
			  {
			    "name": "dx_groovy",
			    "parameter": {
			      "code": "Column column = record.getColumn(12);\n def attrJson = column.asString();\n def attrStr = new StringBuffer();\n if (column != null && !attrJson.equals(\"\")) {\n def jsonSlurper = new JsonSlurper();\n def map = jsonSlurper.parseText(attrJson);\n map.each{\n attrStr.append(\"$it.attributeValue \");\n }\n}\n record.setColumn(7, new StringColumn(attrStr.toString()));\n return record;",
			      "extraPackage": [
			        "import groovy.json.JsonSlurper;"
			      ]
			    }
			  }
			],

			"writer": {
				"name": "elasticsearchwriter",
				"parameter": {
		            "endpoint": "http://172.16.166.101:9200",
		            "accessId": "elastic",
					"accessKey": "******",
		            "index": "product_summary_max",
		            "type": "_doc",
		            "cleanup": false,
		            "settings": {
		            	"index" :{
		            		"number_of_shards": 2, 
		            		"number_of_replicas": 1
		            	}
		            },
		            "discovery": false,
		            "batchSize": 20000,
		            "splitter": ",",
		            "column": [
		            	{
							"name": "uniKey",
							"type": "id"
						},
				        {
				          "name":"productId",
				          "type" : "long"
				        },
				        {
				          "name":"brandName",
				          "type" : "text",
				          "analyzer" : "my_ik"
				        },
				        {
				        	"name":"model",
				          "type" : "text",
				          "analyzer" : "ik_max_word"
				        },
				        {
				        	"name":"categoryName",
				          "type" : "text",
				          "analyzer" : "my_ik"
				        },
				        {
				        	"name":"color",
				          "type" : "text",
				          "analyzer" : "ik_max_word"
				        },
				        {
				        	"name":"unit",
				          "type" : "text",
				          "analyzer" : "ik_max_word"
				        },
				        {
				        	"name":"categoryAttr",
				          "type" : "text",
				          "analyzer" : "ik_max_word"
				        },
				        {
				        	"name":"summary",
				          "type" : "text",
				          "analyzer" : "my_ik"
				        },

				        {
				        	"name":"bn",
				          "type" : "text"
				        },
				        {
				        	"name":"support_sku",
				          "type" : "text"
				        },
				        {
				        	"name":"support_price",
				          "type" : "float"
				        },
				        {
				        	"name":"remark",
				          "type" : "text"
				        }
		            ]
				}
			}
		}],
		"setting": {
			"speed": {
				"channel": 5
			}
		}
	}
}
Logo

魔乐社区(Modelers.cn) 是一个中立、公益的人工智能社区,提供人工智能工具、模型、数据的托管、展示与应用协同服务,为人工智能开发及爱好者搭建开放的学习交流平台。社区通过理事会方式运作,由全产业链共同建设、共同运营、共同享有,推动国产AI生态繁荣发展。

更多推荐