package com.zsb.test.spout;
import java.util.Arrays;
import java.util.Map;
import backtype.storm.spout.MultiScheme;
import backtype.storm.spout.SchemeAsMultiScheme;
import com.ai.baas.storm.util.BaseConstants;
import storm.kafka.BrokerHosts;
import storm.kafka.KafkaSpout;
import storm.kafka.SpoutConfig;
import storm.kafka.StringScheme;
import storm.kafka.ZkHosts;
/**
* kafka 数据源
* Date: 2017年3月13日 <br>
* @author zhoushanbin
*/
public class KafkaSpoutAgent {
private KafkaSpout kafkaSpout;
public static final String KAFKA_SPOUT_ID = "kafkaSpout";
/**
* kafka topic
*/
private static final String KAFKA_SPOUT_TOPIC = "kafka.spout.topic";
/**
* zk 服务器地址
*/
private static final String KAFKA_SPOUT_ZK_SERVER = "kafka.spout.zk.server";
/**
* zk 端口
*/
private static final String KAFKA_SPOUT_ZK_PORT = "kafka.spout.zk.port";
/**
* 拓扑名称
*/
private static final String APP_TOPOLOGY_NAME = "app.topology.name";
/**
* 注册到zk的地址根节点
*/
private static final String STORM_KAFKA_INPUT = "/stormkfkinput";
/**
* true : 从头开始消费,false : zk记录的位置开始消费
*/
private static final String KAFKA_SPOUT_CONSUMER_FROM_START = "kafka.spout.consumer.fromstart";
/**
* 获取KafkaSpout 当scheme为空时采用StringScheme
* @param conf
* @param scheme
*/
public KafkaSpoutAgent(Map<String,Object> conf,MultiScheme scheme){
String topic = (String)conf.get(KAFKA_SPOUT_TOPIC);
String zkServerStr = String.valueOf(conf.get(BaseConstants.KAFKA_SPOUT_ZK_SERVER));
StringBuilder id = new StringBuilder();
id.append(conf.get(APP_TOPOLOGY_NAME)).append("_").append(topic);
String zkAddr = zkServerStr.replace(" ", "").replace(",", conf.get(KAFKA_SPOUT_ZK_PORT)+",");
BrokerHosts brokerHosts = new ZkHosts(zkAddr);
SpoutConfig spoutConf = new SpoutConfig(brokerHosts, topic, STORM_KAFKA_INPUT, id.toString());
spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());
spoutConf.forceFromStart = Boolean.valueOf(((String)conf.get(KAFKA_SPOUT_CONSUMER_FROM_START)).trim()).booleanValue();
spoutConf.zkServers = Arrays.asList(((String)conf.get(KAFKA_SPOUT_ZK_SERVER)).split(",", -1));
spoutConf.zkPort = Integer.valueOf((String)conf.get(KAFKA_SPOUT_ZK_PORT)).intValue();
if(scheme == null){
spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());
}
else{
spoutConf.scheme = scheme;
}
this.setKafkaSpout(new KafkaSpout(spoutConf));
}
public KafkaSpout getKafkaSpout() {
return kafkaSpout;
}
public void setKafkaSpout(KafkaSpout kafkaSpout) {
this.kafkaSpout = kafkaSpout;
}
}
package com.zsb.test.entry;
import java.util.Map;
import com.zsb.test.spout.KafkaSpoutAgent;
import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.spout.MultiScheme;
import backtype.storm.topology.TopologyBuilder;
/**
* 拓扑流程定义 storm 版本0.9.6
* Date: 2017年3月13日 <br>
* @author zhoushanbin
*/
public abstract class FlowDefine {
private TopologyBuilder builder;
private Map<String,Object> conf;
public FlowDefine(Map<String,Object> conf){
this.conf = conf;
builder = new TopologyBuilder();
}
public abstract void defineFlow();
public void addKafkaSout(MultiScheme scheme){
builder.setSpout(KafkaSpoutAgent.KAFKA_SPOUT_ID, new KafkaSpoutAgent(conf,scheme).getKafkaSpout(),
Integer.valueOf((String)conf.get(KafkaSpoutAgent.class.getSimpleName().toUpperCase()+"_PARALLELISM")).intValue())
.setNumTasks(Integer.valueOf((String)conf.get(KafkaSpoutAgent.class.getSimpleName().toUpperCase()+"_TASK_NUM")));
}
public void start() throws AlreadyAliveException, InvalidTopologyException{
defineFlow();
if("local".equals(conf.get("storm.run.type"))){
//本地运行模式
LocalCluster cluster = new LocalCluster();
cluster.submitTopology((String)conf.get("app.topology.name"), conf, builder.createTopology());
}
else{
//集群模式
StormSubmitter.submitTopology((String)conf.get("app.topology.name"), conf, builder.createTopology());
}
}
public TopologyBuilder getTopologyBuilder(){
return builder;
}
public int getBoltParallelism(Class<?> clazz){
String key = clazz.getSimpleName().toUpperCase()+"_PARALLELISM";
return Integer.valueOf((String)conf.get(key)).intValue();
}
public int getBoltTaskNum(Class<?> clazz){
String key = clazz.getSimpleName().toUpperCase()+"_TASK_NUM";
return Integer.valueOf((String)conf.get(key)).intValue();
}
}
package com.zsb.test.entry;
import java.util.HashMap;
import java.util.Map;
import com.zsb.test.bolt.MyTest11Bolt;
import com.zsb.test.bolt.MyTest1Bolt;
import com.zsb.test.bolt.MyTest12Bolt;
import com.zsb.test.bolt.MyTest21Bolt;
import com.zsb.test.bolt.MyTest22Bolt;
import com.zsb.test.bolt.MyTest2Bolt;
import com.zsb.test.spout.KafkaSpoutAgent;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.tuple.Fields;
public class StartFlow extends FlowDefine{
public StartFlow(Map<String, Object> conf) {
super(conf);
}
@Override
public void defineFlow() {
//this.addKafkaSout(new KeyValueSchemeAsMultiScheme(new StringKeyValueScheme()));
this.addKafkaSout(null);
/**
* shuffleGrouping(comId) 上游消息同时发往下游的bolt
*
*/
getTopologyBuilder().setBolt(MyTest1Bolt.class.getSimpleName(), new MyTest1Bolt(), getBoltParallelism(MyTest1Bolt.class))
.shuffleGrouping(KafkaSpoutAgent.KAFKA_SPOUT_ID)
.setNumTasks(getBoltTaskNum(MyTest1Bolt.class));
//getTopologyBuilder().setBolt(MyTest2Bolt.class.getSimpleName(), new MyTest2Bolt(), getBoltParallelism(MyTest2Bolt.class))
//.shuffleGrouping(KafkaSpoutAgent.KAFKA_SPOUT_ID)
//.setNumTasks(getBoltTaskNum(MyTest2Bolt.class));
getTopologyBuilder().setBolt(MyTest11Bolt.class.getSimpleName(), new MyTest11Bolt(), getBoltParallelism(MyTest11Bolt.class))
.shuffleGrouping(MyTest1Bolt.class.getSimpleName(),"MyTest11Stream") //指定流ID
.setNumTasks(getBoltTaskNum(MyTest11Bolt.class));
getTopologyBuilder().setBolt(MyTest12Bolt.class.getSimpleName(), new MyTest12Bolt(), getBoltParallelism(MyTest12Bolt.class))
.shuffleGrouping(MyTest1Bolt.class.getSimpleName()) //使用默认流ID
.setNumTasks(getBoltTaskNum(MyTest12Bolt.class));
//getTopologyBuilder().setBolt(MyTest21Bolt.class.getSimpleName(), new MyTest21Bolt(), getBoltParallelism(MyTest21Bolt.class))
//.shuffleGrouping(MyTest2Bolt.class.getSimpleName(),"MyTest11Stream") //指定流ID
//根据 new Fields("testField") 散列值投放
//.fieldsGrouping(MyTest2Bolt.class.getSimpleName(), new Fields("testField"))
//.setNumTasks(getBoltTaskNum(MyTest21Bolt.class));
/**
getTopologyBuilder().setBolt(MyTest22Bolt.class.getSimpleName(), new MyTest22Bolt(), getBoltParallelism(MyTest22Bolt.class))
.shuffleGrouping(MyTest2Bolt.class.getSimpleName()) //使用默认流ID
.setNumTasks(getBoltTaskNum(MyTest22Bolt.class));
**/
}
public static void main(String args[]) throws AlreadyAliveException, InvalidTopologyException{
Map<String,Object> conf = new HashMap<String,Object>();
conf.put("kafka.spout.topic", "system_monitor_topic");
conf.put("kafka.spout.zk.server", "127.0.0.1");
conf.put("kafka.spout.zk.port", "2181");
conf.put("app.topology.name", "MyTest");
conf.put("kafka.spout.consumer.fromstart", "false");
conf.put("KAFKASPOUTAGENT_PARALLELISM", "1");
conf.put("KAFKASPOUTAGENT_TASK_NUM", "1");
conf.put("MYTEST1BOLT_PARALLELISM", "1");
conf.put("MYTEST1BOLT_TASK_NUM", "1");
conf.put("MYTEST2BOLT_PARALLELISM", "1");
conf.put("MYTEST2BOLT_TASK_NUM", "1");
conf.put("MYTEST12BOLT_PARALLELISM", "1");
conf.put("MYTEST12BOLT_TASK_NUM", "1");
conf.put("MYTEST11BOLT_PARALLELISM", "1");
conf.put("MYTEST11BOLT_TASK_NUM", "1");
conf.put("MYTEST22BOLT_PARALLELISM", "1");
conf.put("MYTEST22BOLT_TASK_NUM", "1");
conf.put("MYTEST21BOLT_PARALLELISM", "12");
conf.put("MYTEST21BOLT_TASK_NUM", "5");
conf.put("storm.run.type", "local");
conf.put("topology.workers", Integer.valueOf(1));
conf.put("topology.max.spout.pending", Integer.valueOf(2));//默认是1;topology.max.spout.pending 的意义在于 ,缓存spout 发送出去的tuple,当下流的bolt还有topology.max.spout.pending 个 tuple 没有消费完时,spout会停下来,等待下游bolt去消费,当tuple 的个数少于topology.max.spout.pending个数时,spout 会继续从消息源读取消息。(这个属性只对可靠消息处理有用)
conf.put("topology.enable.classloader", Boolean.valueOf("false"));//禁用了用户自定义的类加载器
new StartFlow(conf).start();
}
}
package com.zsb.test.bolt;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.IBasicBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
//BaseRichBolt 需要显示ack/fail;IBasicBolt 无需显示ack
public class MyTest1Bolt implements IBasicBolt{
private Logger LOG = LoggerFactory.getLogger(MyTest1Bolt.class);
private OutputCollector collector;
/**
*
*/
private static final long serialVersionUID = -6115493059000161669L;
/***
@SuppressWarnings("rawtypes")
@Override
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
this.collector = collector;
LOG.info("配置=【{}】",stormConf.toString());
//此处做初始化操作
}
@Override
public void execute(Tuple input) {
LOG.info("Tuple类型=【{}】",input.getClass().getName());
LOG.info("input1=【{}】",input.getValueByField("str"));
collector.emit(new Values(input));
collector.ack(input);
}
***/
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
//declarer.declare(new Fields("MyTest1Bolt"));
declarer.declareStream("default",new Fields("MyTest1Bolt"));
declarer.declareStream("MyTest11Stream", new Fields("MyTest1Bolt"));
}
@Override
public Map<String, Object> getComponentConfiguration() {
return null;
}
@SuppressWarnings("rawtypes")
@Override
public void prepare(Map stormConf, TopologyContext context) {
}
@Override
public void execute(Tuple input, BasicOutputCollector collector) {
//LOG.info("TEST1=【{}】",input.getValue(0));
/***
LOG.info("####################TEST1 ST#############");
LOG.info("Tuple类型=【{}】",input.getClass().getName());
LOG.info("源于上游的数据=【{}】",input.getValues());
LOG.info("源于上游的域=【{}】",input.getFields());
LOG.info("拓扑分配的消息ID:msgId=【{}】",input.getMessageId());
LOG.info("SourceComponent=【{}】",input.getSourceComponent());
LOG.info("SourceGlobalStreamid=【{}】",input.getSourceGlobalStreamid());
LOG.info("SourceStreamId=【{}】",input.getSourceStreamId());
LOG.info("SourceTask=【{}】",input.getSourceTask());
LOG.info("####################TEST1 END#############");
**/
collector.emit("MyTest11Stream",new Values(input));//消息发送时,可指定发送的流ID,不指定默认为default
}
@Override
public void cleanup() {
}
}
其他的bolt 可参照MyTest1Bolt