jdk配置
/etc/profile 修改環境變量
export JAVA_HOME=/usr/java/jdk1.8.0_77
export HADOOP_HOME=/lj/hadoop-2.2.0
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin
source /etc/profile
解壓
tar -zxvf apache-flume-1.5.0-bin.tar.gz -C 路徑
/lj/apache-flume-1.5.0-bin/conf
重命名 mv flume-env.sh.template flume-env.sh
flume-env.sh下配置
JAVA_HOME=/usr/java/jdk1.8.0_77
需要hadoop包,使用scp將
/lj/hadoop-2.2.0/share/hadoop/common/hadoop-common-2.2.0.jar
/lj/hadoop-2.2.0/share/hadoop/common/lib/commons-configuration-1.6.jar、hadoop-auth-2.2.0.jar、htrace-core-3.0.4.jar
/letv/data/hadoop-2.6.0/share/hadoop/hdfs/hadoop-hdfs-2.6.0.jar
拷貝到/lj/apache-flume-1.5.0-bin/lib下
將/lj/hadoop-2.2.0/etc/hadoop/{core-site.xml,hdfs-site.xml} 拷貝到/lj/apache-flume-1.5.0-bin/conf下
vim /etc/hosts
10.185.28.92 THadoop1
10.185.28.94 THadoop2
10.185.28.95 THadoop3
10.185.28.23 THadoop4
10.185.28.58 THadoop5
10.185.28.16 THadoop6
10.185.28.26 THadoop7
10.185.28.50 THadoop8
然後創建要監聽的目錄mkdir /root/logs
啟動
bin/flume-ng agent -n a4 -c conf -f conf/a4.conf -Dflume.root.logger=INFO,console
bin/flume-ng agent -n a2 -f /home/hadoop/a2.conf -c conf -Dflume.root.logger=INFO,console
-D指定運行時要打印在控制台的日志級別
INFO,Debug,warning
接下來將文件放入指定目錄
配置文件a4
#定義agent名, source、channel、sink的名稱
a4.sources = r1
a4.channels = c1
a4.sinks = k1
#具體定義source
a4.sources.r1.type = spooldir
a4.sources.r1.spoolDir = /home/hadoop/logs
#具體定義channel
a4.channels.c1.type = memory
#容量 條
a4.channels.c1.capacity = 10000
#事務容量
a4.channels.c1.transactionCapacity = 100
#定義攔截器,為消息添加時間戳
#可以攔截無效數據
a4.sources.r1.interceptors = i1
a4.sources.r1.interceptors.i1.type = org.apache.flume.interceptor.TimestampInterceptor$Builder
#具體定義sink
a4.sinks.k1.type = hdfs
#上層抽象,nameService,路由
#%Y%m%d 從攔截器得到時間
a4.sinks.k1.hdfs.path = hdfs://ns1/flume/%Y%m%d
#前綴,隨便起名字,名字是event-.時間戳
a4.sinks.k1.hdfs.filePrefix = events-
#純文本
a4.sinks.k1.hdfs.fileType = DataStream
#不按照條數生成文件
a4.sinks.k1.hdfs.rollCount = 0
#兩個條件滿足其中一個就可以
#HDFS上的文件達到128M時生成一個文件
a4.sinks.k1.hdfs.rollSize = 134217728
#HDFS上的文件達到60秒生成一個文件
a4.sinks.k1.hdfs.rollInterval = 60
#組裝source、channel、sink
a4.sources.r1.channels = c1
a4.sinks.k1.channel = c1
配置文件a2
#定義agent名, source、channel、sink的名稱
a2.sources = r1
a2.channels = c1
a2.sinks = k1
#具體定義source
a2.sources.r1.type = exec
#tail -F 監視文件的增長
a2.sources.r1.command = tail -F /home/hadoop/a.log
#具體定義channel
a2.channels.c1.type = memory
a2.channels.c1.capacity = 1000
a2.channels.c1.transactionCapacity = 100
#具體定義sink
#logger是打印在控制台上
a2.sinks.k1.type = logger
#組裝source、channel、sink
a2.sources.r1.channels = c1
a2.sinks.k1.channel = c1