获取hdfs上面的文件
首先进行远程链接 具体在 http://blog.csdn.net/qq_36020545/article/details/79062200 有介绍
首先在hadoop hdfs上面看看文件数据
bin/hdfs dfs -text /user/davidghs/mapreduce/wordcount/input/wc.input
进行远程连后获取到fileSystem对象
/**
* 获取文件系统
* @return fileSystem 文件系统
* @throws Exception
*/
public static FileSystem getFileSystem() throws Exception{
//默认加载 core-site.xml core-defult.xml hdfs-site.xml hdfs-defult.xml
Configuration configuration = new Configuration();
//获取文件系统
FileSystem fileSystem = FileSystem.get(configuration);
return fileSystem;
}
/**
* 读取文件
* @param fileName 文件路径
* @throws Exception
*/
public static void readFile(String fileName) throws Exception {
FileSystem fileSystem = getFileSystem();
Path readPath = new Path(fileName);
//文件流
FSDataInputStream inputStream = fileSystem.open(readPath);
try{
//读流
IOUtils.copyBytes(inputStream,System.out,4096,false);
}catch(Exception e){
e.printStackTrace();
}finally {
//close steam 关闭资源
IOUtils.closeStream(inputStream);
}
}
public static void main(String[] args) throws Exception {
String fileName = "/user/davidghs/mapreduce/wordcount/input/wc.input";
//读文件路径
readFile(fileName);
}
希望可以帮到你