windows10+eclipse+hadoop2.5.0环境搭建

2017-12-27 08:01:46来源:CSDN作者:As_Yang人点击

分享
一、Linux上配置Hadoop集群环境。二、windows基础环境搭建1.jdk环境配置--》安装好jdk后配置相关JAVA_HOME环境变量配置jdk安装目录,path环境变量配置jdk的 bin目录,classpath环境变量配置 .;%JAVA_HOME%/lib;%JAVA_HOME%/lib/tools.jar;2.下载 Hadoop-2.5.0.tar.gz--》下载地址:https://pan.baidu.com/s/1i4WhKnb r3am3.下载 hadoop-eclipse-plugin-2.5.1.jar--》下载地址:https://pan.baidu.com/s/1sl7Lloh w7cz4.下载eclipse 版本是4.5--》下载地址:https://pan.baidu.com/s/1b7t84I 1qeg5.下载 hadoop.dll,winutils.exe--》下载地址:https://pan.baidu.com/s/1gffFqmB t8mn5.修改windows的hosts文件,配置主机名--》192.168.1.101 Hadoop01 --》这是配置Linux的IP与主机名关联
三、eclipse环境配置1.解压Hadoop- 2.5.0.tar.gz,将hadoop.dll,winutils.exe复制到Hadoop-2.5.0/bin目录下2.将hadoop-eclipse-plugin-2.5.1.jar拷贝至eclipse的plugins目录下,重启eclipse3.打开菜单栏Windows-Preferences-Hadoop Map/Reduce 配置windows上解压后的Hadoop-2.5.0.tar.gz目录
4.配置连接Hadoop信息:Windows-ShowView填写配置信息,如下图示

Host:hdfs的主机IP,这里Hadoop01,因为我们在windows的hosts文件中已经配置了主机名与IP绑定了左边Port:hdfs的web访问端口,右边Port:hdfs内部访问端口5.如果连接成功,则显示以下信息就是hdfs文件系统的所有文件信息4.创建一个Java项目编写WordCount程序package com.bigdata.mapreduce;
import java.io.IOException;
import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;import org.apache.hadoop.io.IntWritable;import org.apache.hadoop.io.LongWritable;import org.apache.hadoop.io.Text;import org.apache.hadoop.mapreduce.Job;import org.apache.hadoop.mapreduce.Mapper;import org.apache.hadoop.mapreduce.Reducer;import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import com.bigdata.mapreduce.MapReduceModule.MapReduceMapper.MapReduceReducer;
public class MapReduceModule {
// step 1: Mapper Classpublic static class MapReduceMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
private Text mapOutputKey = new Text();// 出现一次就记录一次private IntWritable mapOutputValue = new IntWritable(1);
@Overridepublic void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// 读取文件的每一行,将Text类型转换成String类型String lineValue = value.toString();// 分割单词,以空格分割String[] strs = lineValue.split(" ");
// 分割之后将单词从数组中一个个拿出来,组成<keyvalue>,比如<hadoop,1>for (String str : strs) {// 设置key输出mapOutputKey.set(str);
// map输出context.write(mapOutputKey, mapOutputValue);}}
// step2: Reducer Classpublic static class MapReduceReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private IntWritable outputValue = new IntWritable();
@Overrideprotected void reduce(Text key, Iterable<IntWritable> values, Context context)throws IOException, InterruptedException {
// temp: sumint sum = 0;
// 对值进行跌代累加for (IntWritable value : values) {// totalsum += value.get();}
// set output valueoutputValue.set(sum);
// 最终输出context.write(key, outputValue);}}
}
// step3: Driverpublic int run(String[] args) throws Exception {
// 获取集群中的相关配置信息Configuration configuration = new Configuration();
// 创建一个Job任务Job job = Job.getInstance(configuration, this.getClass().getSimpleName());// 整个MapReduce程序运行的入口,或者叫jar包的入口,jar具体运行的是哪个类job.setJarByClass(this.getClass());
// 设置Job// input输入,输入路径Path inpath = new Path(args[0]);FileInputFormat.addInputPath(job, inpath);
// outout输出,输出路径Path outpath = new Path(args[1]);FileOutputFormat.setOutputPath(job, outpath);
// 设置Mapperjob.setMapperClass(MapReduceMapper.class);job.setMapOutputKeyClass(Text.class);job.setMapOutputValueClass(IntWritable.class);
// 设置Reducerjob.setReducerClass(MapReduceReducer.class);job.setOutputKeyClass(Text.class);job.setOutputValueClass(IntWritable.class);
// 提交Job -》 YARNboolean isSuccess = job.waitForCompletion(true);
return isSuccess ? 0 : 1;
}
public static void main(String[] args) throws Exception {
args = new String[] { "hdfs://bigdata-senior01.liuhongyang.com:8020/user/admin/mapreduce/input","hdfs://bigdata-senior01.liuhongyang.com:8020/user/admin/mapreduce/output3" };
// run jobint status = new MapReduceModule().run(args);
// 关闭System.exit(status);}
}运行此类如果产生Exception in thread "main" java.lang.UnsatisfiedLinkError: org.apache.hadoop.io.nativeio.NativeIO$Windows.access0(Ljava/lang/String;I)异常则将Hadoop-common包中org.apache.hadoop.util.DiskChecker.class文件复制出来在项目中创建org.apache.hadoop.util.DiskChecker.java文件,注释94行代码,checkDirAccess(dir)

最新文章

123

最新摄影

闪念基因

微信扫一扫

第七城市微信公众平台