storm 例子

本文介绍了一种基于Apache Storm实现Kafka数据源的具体方法,包括配置参数、Spout实现及拓扑流程定义等关键部分。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

package com.zsb.test.spout;

import java.util.Arrays;
import java.util.Map;

import backtype.storm.spout.MultiScheme;
import backtype.storm.spout.SchemeAsMultiScheme;

import com.ai.baas.storm.util.BaseConstants;

import storm.kafka.BrokerHosts;
import storm.kafka.KafkaSpout;
import storm.kafka.SpoutConfig;
import storm.kafka.StringScheme;
import storm.kafka.ZkHosts;

/**
 * kafka 数据源
 * Date: 2017年3月13日 <br>
 * @author zhoushanbin
 */
public class KafkaSpoutAgent {

	private KafkaSpout kafkaSpout;
	
	public static final String KAFKA_SPOUT_ID = "kafkaSpout";
	
	/**
	 * kafka topic
	 */
	private static final String KAFKA_SPOUT_TOPIC = "kafka.spout.topic";
	/**
	 * zk 服务器地址
	 */
	private static final String KAFKA_SPOUT_ZK_SERVER = "kafka.spout.zk.server";
	/**
	 * zk 端口
	 */
	private static final String KAFKA_SPOUT_ZK_PORT = "kafka.spout.zk.port";
	/**
	 * 拓扑名称
	 */
	private static final String APP_TOPOLOGY_NAME = "app.topology.name";
	/**
	 * 注册到zk的地址根节点
	 */
	private static final String STORM_KAFKA_INPUT = "/stormkfkinput";
	/**
	 * true : 从头开始消费,false : zk记录的位置开始消费
	 */
	private static final String KAFKA_SPOUT_CONSUMER_FROM_START = "kafka.spout.consumer.fromstart";
	
	
	
	/**
	 * 获取KafkaSpout 当scheme为空时采用StringScheme
	 * @param conf
	 * @param scheme
	 */
	public KafkaSpoutAgent(Map<String,Object> conf,MultiScheme scheme){
		
		String topic = (String)conf.get(KAFKA_SPOUT_TOPIC);
		String zkServerStr = String.valueOf(conf.get(BaseConstants.KAFKA_SPOUT_ZK_SERVER));
		StringBuilder id = new StringBuilder();
		id.append(conf.get(APP_TOPOLOGY_NAME)).append("_").append(topic);
		String zkAddr = zkServerStr.replace(" ", "").replace(",", conf.get(KAFKA_SPOUT_ZK_PORT)+",");
		BrokerHosts brokerHosts = new ZkHosts(zkAddr);
		
		
		SpoutConfig spoutConf = new SpoutConfig(brokerHosts, topic, STORM_KAFKA_INPUT, id.toString());
		spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());
		spoutConf.forceFromStart = Boolean.valueOf(((String)conf.get(KAFKA_SPOUT_CONSUMER_FROM_START)).trim()).booleanValue();
		spoutConf.zkServers = Arrays.asList(((String)conf.get(KAFKA_SPOUT_ZK_SERVER)).split(",", -1));
		spoutConf.zkPort = Integer.valueOf((String)conf.get(KAFKA_SPOUT_ZK_PORT)).intValue();
		if(scheme == null){
			spoutConf.scheme = new SchemeAsMultiScheme(new StringScheme());
		}
		else{
			spoutConf.scheme = scheme;
		}
		this.setKafkaSpout(new KafkaSpout(spoutConf));
		
	}

	
	
	
	
	public KafkaSpout getKafkaSpout() {
		return kafkaSpout;
	}


	public void setKafkaSpout(KafkaSpout kafkaSpout) {
		this.kafkaSpout = kafkaSpout;
	}

	
	
}

package com.zsb.test.entry;

import java.util.Map;

import com.zsb.test.spout.KafkaSpoutAgent;

import backtype.storm.LocalCluster;
import backtype.storm.StormSubmitter;
import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.spout.MultiScheme;
import backtype.storm.topology.TopologyBuilder;

/**
 * 拓扑流程定义 storm 版本0.9.6
 * Date: 2017年3月13日 <br>
 * @author zhoushanbin
 */
public abstract class FlowDefine {
		
	private TopologyBuilder builder;
	
	private Map<String,Object> conf;
	
	public FlowDefine(Map<String,Object> conf){
		this.conf = conf;
		builder = new TopologyBuilder();
	}
	
	public abstract void defineFlow();
	
	public void addKafkaSout(MultiScheme scheme){
		
		builder.setSpout(KafkaSpoutAgent.KAFKA_SPOUT_ID, new KafkaSpoutAgent(conf,scheme).getKafkaSpout(), 
				Integer.valueOf((String)conf.get(KafkaSpoutAgent.class.getSimpleName().toUpperCase()+"_PARALLELISM")).intValue())
				.setNumTasks(Integer.valueOf((String)conf.get(KafkaSpoutAgent.class.getSimpleName().toUpperCase()+"_TASK_NUM")));
		
	}
	
	
	public void start() throws AlreadyAliveException, InvalidTopologyException{
		defineFlow();
		if("local".equals(conf.get("storm.run.type"))){
			//本地运行模式
			LocalCluster cluster = new LocalCluster();
			cluster.submitTopology((String)conf.get("app.topology.name"), conf, builder.createTopology());
		}
		else{
			//集群模式
			StormSubmitter.submitTopology((String)conf.get("app.topology.name"), conf, builder.createTopology());
		}
		
	}
	
	
	
	
	public TopologyBuilder getTopologyBuilder(){
		return builder;
	}
	
	public int getBoltParallelism(Class<?> clazz){
		String key = clazz.getSimpleName().toUpperCase()+"_PARALLELISM";
		
		return Integer.valueOf((String)conf.get(key)).intValue();
	}
	
	public int getBoltTaskNum(Class<?> clazz){
		
		String key = clazz.getSimpleName().toUpperCase()+"_TASK_NUM";
		
		return Integer.valueOf((String)conf.get(key)).intValue();
	}

}




package com.zsb.test.entry;

import java.util.HashMap;
import java.util.Map;

import com.zsb.test.bolt.MyTest11Bolt;
import com.zsb.test.bolt.MyTest1Bolt;
import com.zsb.test.bolt.MyTest12Bolt;
import com.zsb.test.bolt.MyTest21Bolt;
import com.zsb.test.bolt.MyTest22Bolt;
import com.zsb.test.bolt.MyTest2Bolt;
import com.zsb.test.spout.KafkaSpoutAgent;

import backtype.storm.generated.AlreadyAliveException;
import backtype.storm.generated.InvalidTopologyException;
import backtype.storm.tuple.Fields;

public class StartFlow extends FlowDefine{

	public StartFlow(Map<String, Object> conf) {
		super(conf);
	}

	@Override
	public void defineFlow() {
	
		//this.addKafkaSout(new KeyValueSchemeAsMultiScheme(new StringKeyValueScheme()));
		
		this.addKafkaSout(null);
		/**
		 * shuffleGrouping(comId) 上游消息同时发往下游的bolt
		 * 
		 */
		getTopologyBuilder().setBolt(MyTest1Bolt.class.getSimpleName(), new MyTest1Bolt(), getBoltParallelism(MyTest1Bolt.class))
		.shuffleGrouping(KafkaSpoutAgent.KAFKA_SPOUT_ID)
		.setNumTasks(getBoltTaskNum(MyTest1Bolt.class));
		
		//getTopologyBuilder().setBolt(MyTest2Bolt.class.getSimpleName(), new MyTest2Bolt(), getBoltParallelism(MyTest2Bolt.class))
		//.shuffleGrouping(KafkaSpoutAgent.KAFKA_SPOUT_ID)
		//.setNumTasks(getBoltTaskNum(MyTest2Bolt.class));
		
		
		getTopologyBuilder().setBolt(MyTest11Bolt.class.getSimpleName(), new MyTest11Bolt(), getBoltParallelism(MyTest11Bolt.class))
		.shuffleGrouping(MyTest1Bolt.class.getSimpleName(),"MyTest11Stream") //指定流ID
		.setNumTasks(getBoltTaskNum(MyTest11Bolt.class));
		
		getTopologyBuilder().setBolt(MyTest12Bolt.class.getSimpleName(), new MyTest12Bolt(), getBoltParallelism(MyTest12Bolt.class))
		.shuffleGrouping(MyTest1Bolt.class.getSimpleName()) //使用默认流ID
		.setNumTasks(getBoltTaskNum(MyTest12Bolt.class));
		
		
		//getTopologyBuilder().setBolt(MyTest21Bolt.class.getSimpleName(), new MyTest21Bolt(), getBoltParallelism(MyTest21Bolt.class))
		//.shuffleGrouping(MyTest2Bolt.class.getSimpleName(),"MyTest11Stream") //指定流ID
		//根据 new Fields("testField") 散列值投放
		//.fieldsGrouping(MyTest2Bolt.class.getSimpleName(), new Fields("testField"))
		//.setNumTasks(getBoltTaskNum(MyTest21Bolt.class));
		
		/**
		getTopologyBuilder().setBolt(MyTest22Bolt.class.getSimpleName(), new MyTest22Bolt(), getBoltParallelism(MyTest22Bolt.class))
		.shuffleGrouping(MyTest2Bolt.class.getSimpleName()) //使用默认流ID
		.setNumTasks(getBoltTaskNum(MyTest22Bolt.class));
		**/
	}

	public static void main(String args[]) throws AlreadyAliveException, InvalidTopologyException{
		Map<String,Object> conf = new HashMap<String,Object>();
		
		conf.put("kafka.spout.topic", "system_monitor_topic");
		conf.put("kafka.spout.zk.server", "127.0.0.1");
		conf.put("kafka.spout.zk.port", "2181");
		conf.put("app.topology.name", "MyTest");
		conf.put("kafka.spout.consumer.fromstart", "false");
		conf.put("KAFKASPOUTAGENT_PARALLELISM", "1");
		conf.put("KAFKASPOUTAGENT_TASK_NUM", "1");
		conf.put("MYTEST1BOLT_PARALLELISM", "1");
		conf.put("MYTEST1BOLT_TASK_NUM", "1");
		conf.put("MYTEST2BOLT_PARALLELISM", "1");
		conf.put("MYTEST2BOLT_TASK_NUM", "1");
		conf.put("MYTEST12BOLT_PARALLELISM", "1");
		conf.put("MYTEST12BOLT_TASK_NUM", "1");
		conf.put("MYTEST11BOLT_PARALLELISM", "1");
		conf.put("MYTEST11BOLT_TASK_NUM", "1");
		
		conf.put("MYTEST22BOLT_PARALLELISM", "1");
		conf.put("MYTEST22BOLT_TASK_NUM", "1");
		conf.put("MYTEST21BOLT_PARALLELISM", "12");
		conf.put("MYTEST21BOLT_TASK_NUM", "5");
		
		conf.put("storm.run.type", "local");
		conf.put("topology.workers", Integer.valueOf(1));
		conf.put("topology.max.spout.pending", Integer.valueOf(2));//默认是1;topology.max.spout.pending 的意义在于 ,缓存spout 发送出去的tuple,当下流的bolt还有topology.max.spout.pending 个 tuple 没有消费完时,spout会停下来,等待下游bolt去消费,当tuple 的个数少于topology.max.spout.pending个数时,spout 会继续从消息源读取消息。(这个属性只对可靠消息处理有用)
		conf.put("topology.enable.classloader", Boolean.valueOf("false"));//禁用了用户自定义的类加载器

		
		
		new StartFlow(conf).start();
	}
}

package com.zsb.test.bolt;

import java.util.Map;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import backtype.storm.task.OutputCollector;
import backtype.storm.task.TopologyContext;
import backtype.storm.topology.BasicOutputCollector;
import backtype.storm.topology.IBasicBolt;
import backtype.storm.topology.OutputFieldsDeclarer;
import backtype.storm.topology.base.BaseRichBolt;
import backtype.storm.tuple.Fields;
import backtype.storm.tuple.Tuple;
import backtype.storm.tuple.Values;
//BaseRichBolt 需要显示ack/fail;IBasicBolt 无需显示ack
public class MyTest1Bolt implements IBasicBolt{
	
	private Logger LOG = LoggerFactory.getLogger(MyTest1Bolt.class);
	private OutputCollector collector;
	
	/**
	 * 
	 */
	private static final long serialVersionUID = -6115493059000161669L;
	/***
	@SuppressWarnings("rawtypes")
	@Override
	public void prepare(Map stormConf, TopologyContext context,
			OutputCollector collector) {
		this.collector = collector;
		LOG.info("配置=【{}】",stormConf.toString());
		//此处做初始化操作
	}

	@Override
	public void execute(Tuple input) {
		
		LOG.info("Tuple类型=【{}】",input.getClass().getName());
		LOG.info("input1=【{}】",input.getValueByField("str"));

		
		collector.emit(new Values(input));
		collector.ack(input);
	}
	***/
	@Override
	public void declareOutputFields(OutputFieldsDeclarer declarer) {
		//declarer.declare(new Fields("MyTest1Bolt"));
		declarer.declareStream("default",new Fields("MyTest1Bolt"));
		declarer.declareStream("MyTest11Stream", new Fields("MyTest1Bolt"));
		
	}

	@Override
	public Map<String, Object> getComponentConfiguration() {
		
		return null;
	}

	@SuppressWarnings("rawtypes")
	@Override
	public void prepare(Map stormConf, TopologyContext context) {
	
	}

	@Override
	public void execute(Tuple input, BasicOutputCollector collector) {
		
		//LOG.info("TEST1=【{}】",input.getValue(0));
		
		/***
		LOG.info("####################TEST1 ST#############");
		LOG.info("Tuple类型=【{}】",input.getClass().getName());
		LOG.info("源于上游的数据=【{}】",input.getValues());
		LOG.info("源于上游的域=【{}】",input.getFields());
		LOG.info("拓扑分配的消息ID:msgId=【{}】",input.getMessageId());
		LOG.info("SourceComponent=【{}】",input.getSourceComponent());
		LOG.info("SourceGlobalStreamid=【{}】",input.getSourceGlobalStreamid());
		LOG.info("SourceStreamId=【{}】",input.getSourceStreamId());
		LOG.info("SourceTask=【{}】",input.getSourceTask());
		LOG.info("####################TEST1 END#############");
		
		**/
		collector.emit("MyTest11Stream",new Values(input));//消息发送时,可指定发送的流ID,不指定默认为default
	}

	@Override
	public void cleanup() {
		
	}

}

其他的bolt 可参照MyTest1Bolt 
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值