Parallelising MapReduce

私はParallel ProgrammingとHadoop MapReduceの新機能です。次の例はチュートリアルのウェブサイトから取り上げられました。それは一緒に実行することができますし、それをマルチスレッド紹介してどのようにマッパーと減速したMapReduce（並列プログラミングを適用）を並列化する方法Parallelising MapReduce

https://www.tutorialspoint.com/hadoop/hadoop_mapreduce.htm

？

Mapperを1台のマシンで、Reducerを同時に実行することはできますか？

私は非常にうまく説明できなかったら謝罪します。

package hadoop; 

import java.util.*; 

import java.io.IOException; 
import java.io.IOException; 

import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.conf.*; 
import org.apache.hadoop.io.*; 
import org.apache.hadoop.mapred.*; 
import org.apache.hadoop.util.*; 

public class ProcessUnits 
{ 
    //Mapper class 
    public static class E_EMapper extends MapReduceBase implements 
    Mapper<LongWritable ,/*Input key Type */ 
    Text,    /*Input value Type*/ 
    Text,    /*Output key Type*/ 
    IntWritable>  /*Output value Type*/ 
    { 

     //Map function 
     public void map(LongWritable key, Text value, 
     OutputCollector<Text, IntWritable> output, 
     Reporter reporter) throws IOException 
     { 
     String line = value.toString(); 
     String lasttoken = null; 
     StringTokenizer s = new StringTokenizer(line,"\t"); 
     String year = s.nextToken(); 

     while(s.hasMoreTokens()) 
      { 
       lasttoken=s.nextToken(); 
      } 

     int avgprice = Integer.parseInt(lasttoken); 
     output.collect(new Text(year), new IntWritable(avgprice)); 
     } 
    } 


    //Reducer class 
    public static class E_EReduce extends MapReduceBase implements 
    Reducer< Text, IntWritable, Text, IntWritable > 
    { 

     //Reduce function 
     public void reduce(Text key, Iterator <IntWritable> values, 
     OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException 
     { 
      int maxavg=30; 
      int val=Integer.MIN_VALUE; 

      while (values.hasNext()) 
      { 
       if((val=values.next().get())>maxavg) 
       { 
        output.collect(key, new IntWritable(val)); 
       } 
      } 

     } 
    } 


    //Main function 
    public static void main(String args[])throws Exception 
    { 
     JobConf conf = new JobConf(ProcessUnits.class); 

     conf.setJobName("max_eletricityunits"); 
     conf.setOutputKeyClass(Text.class); 
     conf.setOutputValueClass(IntWritable.class); 
     conf.setMapperClass(E_EMapper.class); 
     conf.setCombinerClass(E_EReduce.class); 
     conf.setReducerClass(E_EReduce.class); 
     conf.setInputFormat(TextInputFormat.class); 
     conf.setOutputFormat(TextOutputFormat.class); 

     FileInputFormat.setInputPaths(conf, new Path(args[0])); 
     FileOutputFormat.setOutputPath(conf, new Path(args[1])); 

     JobClient.runJob(conf); 
    } 
}

出典

2017-05-01 Novice Programmer

あなたは非常によく説明している場合、私は知らないが、私はgen.Strash @喜び – strash

とポストの答えを読み取りますので、私は、同じHadoopのレベルで苦労しています。ええ、申し訳ありませんが、私はそれを正しく説明しようとしましたが、多くの知識を持っていないことが原因です。私たちがいくつかの答えを得ることを望みましょう –

Hadoopが作業を並列処理します。 hadoop jarを実行する以外の何かをする必要はありません。

一般的にはMapReduceのについて、あなたはreduceがmapの結果に依存するためmap相とreduce相（並行していない）を順次発生することを念頭に置いておく必要があります。しかし、並列に動作する複数のmappersを持つことができ、それらが終了すると、いくつかの並列処理が行われます（当然のことながら）。再び、hadoopはあなたのためにそれらを立ち上げ、調整します。

出典

2017-05-01 13:44:34

答えて

関連する問題