異なる長さの部分文字列を生成するためのhadoop mapreduce

-1

Hadoop mapreduceの使用私は異なる長さの部分文字列を得るコードを書いています。文字列 "ZYXCBA"と長さ3を指定した例（テキストファイルを使用すると "3 ZYXCBA"と入力します）。私のコードは、長さ3（ "ZYX"、 "YXC"、 "XCB"、 "CBA"）、長さ4（ "ZYXC"、 "YXCB"、 "XCBA"）の長さ5（ "ZYXCB "、" YXCBA "）。異なる長さの部分文字列を生成するためのhadoop mapreduce

キーは=サブストリングの長さは、私が

値= "ZYXCBA" たい：mapフェーズで

は、私は次のようでした。

のでマッパー出力は

3,"ZYXCBA" 
4,"ZYXCBA" 
5,"ZYXCBA"

では、私は長さ3の全ての部分文字列が同じ取得するには、文字列（「ZYXCBA」）とキー3を取る軽減される4,5のために発生します。結果は文字列を使用して連結されます。

[email protected]:~/Documents$ hadoop jar Saishingles.jar hadoopshingles.Saishingles Behara/Shingles/input Behara/Shingles/output

私のコードを以下に示しますよう：

package hadoopshingles; 

import java.io.IOException; 
//import java.util.ArrayList; 

import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 
import org.apache.hadoop.util.GenericOptionsParser; 


public class Saishingles{ 

public static class shinglesmapper extends Mapper<Object, Text, IntWritable, Text>{ 

     public void map(Object key, Text value, Context context 
       ) throws IOException, InterruptedException { 

      String str = new String(value.toString()); 
      String[] list = str.split(" "); 
      int x = Integer.parseInt(list[0]); 
      String val = list[1]; 
      int M = val.length(); 
      int X = M-1; 


      for(int z = x; z <= X; z++) 
      { 
       context.write(new IntWritable(z), new Text(val)); 
      } 

     } 

    } 


public static class shinglesreducer extends Reducer<IntWritable,Text,IntWritable,Text> { 


    public void reduce(IntWritable key, Text value, Context context 
      ) throws IOException, InterruptedException { 
     int z = key.get(); 
     String str = new String(value.toString()); 
     int M = str.length(); 
     int Tz = M - z; 
     String newvalue = ""; 
     for(int position = 0; position <= Tz; position++) 
     { 
      newvalue = newvalue + " " + str.substring(position,position + z); 
     } 

     context.write(new IntWritable(z),new Text(newvalue)); 
    } 
} 




public static void main(String[] args) throws Exception { 
     GenericOptionsParser parser = new GenericOptionsParser(args); 
     Configuration conf = parser.getConfiguration(); 
     String[] otherArgs = parser.getRemainingArgs(); 

     if (otherArgs.length != 2) 
     { 
      System.err.println("Usage: Saishingles <inputFile> <outputDir>"); 
      System.exit(2); 
     } 
     Job job = Job.getInstance(conf, "Saishingles"); 
     job.setJarByClass(hadoopshingles.Saishingles.class); 
     job.setMapperClass(shinglesmapper.class); 
     //job.setCombinerClass(shinglesreducer.class); 
     job.setReducerClass(shinglesreducer.class); 
     //job.setMapOutputKeyClass(IntWritable.class); 
     //job.setMapOutputValueClass(Text.class); 
     job.setOutputKeyClass(IntWritable.class); 
     job.setOutputValueClass(Text.class); 
     FileInputFormat.addInputPath(job, new Path(args[0])); 
     FileOutputFormat.setOutputPath(job, new Path(args[1])); 
     System.exit(job.waitForCompletion(true) ? 0 : 1); 

} 

}

出力の代わりに減らす

3 "ZYX YXC XCB CBA" 
4 "ZYXC YXCB XCBA" 
5 "ZYXCB YXCBA"

は、私は次のコマンドを使用して、私のコードを実行しています：だからアウトする必要があります削減の入れ返信

3 "ZYX YXC XCB CBA" 
4 "ZYXC YXCB XCBA" 
5 "ZYXCB YXCBA"

が返されます

3 "ZYXCBA" 
4 "ZYXCBA" 
5 "ZYXCBA"

つまり、マッパーと同じ出力が得られます。なぜこれが起こっているのか分かりません。）:) :)

出典

2016-07-21 Jyothi Kumar

還元剤を実行しなくてもこれを達成できます。 map/reduceロジックが間違っています...変換はMapperで行う必要があります。

Reduceからreduce(WritableComparable, Iterator, OutputCollector, Reporter)方法がグループ化された入力内の各<key, (list of values)>ペアに対して呼び出され、この段階では。私はnew 1を作成しないだろう、あなたはすでにマッパーからIntwritable Keyを持っている - context.write(new IntWritable(z),new Text(newvalue));context.write(key,new Text(newvalue));へ：あなたのreduce署名で

は：public void reduce(IntWritable key, Text value, Context context)

は減らす方法の最後の行を変更し、また、public void reduce(IntWritable key, Iterable<Text> values, Context context)

でなければなりません。与えられた入力と

：

3 "ZYXCBA" 
4 "ZYXCBA" 
5 "ZYXCBA"

マッパージョブが出力：

3 "XCB YXC ZYX" 
4 "XCBA YXCB ZYXC" 
5 "YXCBA ZYXCB"

MapReduceJob：

import java.io.IOException; 
import java.util.ArrayList; 
import org.apache.hadoop.conf.Configuration; 
import org.apache.hadoop.fs.FileSystem; 
import org.apache.hadoop.fs.Path; 
import org.apache.hadoop.io.IntWritable; 
import org.apache.hadoop.io.Text; 
import org.apache.hadoop.mapreduce.Job; 
import org.apache.hadoop.mapreduce.Mapper; 
import org.apache.hadoop.mapreduce.Reducer; 
import org.apache.hadoop.mapreduce.Reducer.Context; 
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; 
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; 

public class SubStrings{ 

    public static class SubStringsMapper extends Mapper<Object, Text, IntWritable, Text> { 

     @Override 
     public void map(Object key, Text value, Context context) throws IOException, InterruptedException { 

      String [] values = value.toString().split(" "); 
      int len = Integer.parseInt(values[0].trim()); 
      String str = values[1].replaceAll("\"", "").trim(); 

      int endindex=len; 
      for(int i = 0; i < len; i++) 
      { 
       endindex=i+len; 
       if(endindex <= str.length()) 
        context.write(new IntWritable(len), new Text(str.substring(i, endindex))); 
      } 

     } 
    } 

    public static class SubStringsReducer extends Reducer<IntWritable, Text, IntWritable, Text> { 

     public void reduce(IntWritable key, Iterable<Text> values, Context context) 
       throws IOException, InterruptedException { 

      String str="\""; //adding starting quotes 
      for(Text value: values) 
       str += " " + value; 

      str=str.replace("\" ", "\"") + "\""; //adding ending quotes 
      context.write(key, new Text(str)); 
     } 
    } 

    public static void main(String[] args) throws Exception { 

     Configuration conf = new Configuration(); 
     Job job = Job.getInstance(conf, "get-possible-strings-by-length"); 

     job.setJarByClass(SubStrings.class); 
     job.setMapperClass(SubStringsMapper.class); 
     job.setReducerClass(SubStringsReducer.class); 

     job.setMapOutputKeyClass(IntWritable.class); 
     job.setMapOutputValueClass(Text.class); 

     job.setOutputKeyClass(IntWritable.class); 
     job.setOutputValueClass(Text.class); 

     FileInputFormat.addInputPath(job, new Path(args[0])); 
     FileOutputFormat.setOutputPath(job, new Path(args[1])); 
     FileSystem fs = null; 
     Path dstFilePath = new Path(args[1]); 
     try { 
      fs = dstFilePath.getFileSystem(conf); 
      if (fs.exists(dstFilePath)) 
       fs.delete(dstFilePath, true); 
     } catch (IOException e1) { 
      e1.printStackTrace(); 
     } 

     job.waitForCompletion(true); 
    } 
}

出典

2016-07-21 12:57:02

ええ、私は減速機なしで同じことをした。なぜ私のレデューサーが自分のコードで動作していないのか知りたい@BigDataLearner –

私のアップデートを答えてみる - あなたは 'reduce'メソッドの実装を変更する必要があります。 –

異なる長さの部分文字列を生成するためのhadoop mapreduce

答えて

関連する問題