有 Java 编程相关的问题?

你可以在下面搜索框中键入要查询的问题!

java Hadoop mapreduce映射程序编程

import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapred.MapReduceBase;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.Reporter;


public class ADDMapper extends MapReduceBase implements Mapper<LongWritable,
                              Text,Text,LongWritable>
{   @Override
public void map(LongWritable key, Text value,OutputCollector<Text,    LongWritable> output, Reporter r)throws IOException 
    {
    String s=value.toString();
         char[] words=s.toCharArray();
                    int wno=0;
                    int ino=0;
        for(int i=0;i<words.length;i++)
          {    

           String temp="";  
               for(int j=ino;j<words.length;j++)
                   {                        

                        if(words[j]!=' ')
                        {   temp+=words[j];
                        }
                        else
                        {
                            wno=j;
                        if(temp!="")
                        {     

                            ino=ino + key; //////POINT OF ERROR

        output.collect(new Text(temp),new LongWritable(ino));
                        }

                    temp="";

                        ino=wno+1;
                        break;
                        }

                  }
        } 
}

}

我想得到每个字符串的索引值,按字符串排序
上面的代码既没有给出索引值,也没有洗牌字符串。 允许 输入文件: 你好 嗨,我是对的。 你的工作怎么样。 你好,你还好吗

输出: 我50岁 是7,33 你好,0,30,44 3,14怎么样 .


共 (3) 个答案

  1. # 1 楼答案

    请运行下面的代码,给出预期的输出

       import java.io.IOException;
        import java.util.*;
        import java.util.Map.Entry;
    
         import org.apache.hadoop.fs.Path;
        import org.apache.hadoop.conf.*;
        import org.apache.hadoop.io.*;
        import org.apache.hadoop.mapreduce.*;
        import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
        import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
        import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
        import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
    
         public class Index {
    
          public static class Map extends Mapper<LongWritable, Text, Text, IntWritable> {
    
    
             public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
                 String str=value.toString();
                   String[] tokens = str.split(" "); //split into words
                   //create hashmap for unique word
                   HashMap<String,Integer> uniqueString = new HashMap<String,Integer>();
                   for(int i=0;i<tokens.length;i++){
                       uniqueString.put(tokens[i],1);
                   }       
                   //for sorting create TreeMap from above hash map
                   TreeMap<String, Integer> map = new TreeMap<String,Integer>(uniqueString); 
                   Configuration conf=context.getConfiguration();
                   int strIndex = 0;
                    for (Entry<String, Integer> entry : map.entrySet()) {
                       //int index=0;
                        strIndex=conf.getInt("index", 0);
                   //find the index of the word
                       int index = str.indexOf((String)entry.getKey());
                       while (index >= 0) {
                                index+=strIndex;
                               context.write(new Text((String)entry.getKey()),new IntWritable(index));
                               index = str.indexOf((String)entry.getKey(), index + 1);
                       }
                   }
                    conf.setInt("index", strIndex+str.length());
               }
          } 
    
      public static class Reduce extends Reducer<Text, IntWritable, Text, IntWritable> {
    
         public void reduce(Text key, Iterable<IntWritable> values, Context context) 
           throws IOException, InterruptedException {
    
             for (IntWritable val : values) {
                 context.write(key, new IntWritable(val.get()));
            }
         }
      }
    
      public static void main(String[] args) throws Exception {
         Configuration conf = new Configuration();
    
            conf.setInt("index", 0);
             Job job = new Job(conf, "index");
         job.setOutputKeyClass(Text.class);
         job.setOutputValueClass(IntWritable.class);
    
         job.setMapperClass(Map.class);
         job.setReducerClass(Reduce.class);
    
         job.setInputFormatClass(TextInputFormat.class);
         job.setOutputFormatClass(TextOutputFormat.class);
    
         FileInputFormat.addInputPath(job, new Path("input"));
         FileOutputFormat.setOutputPath(job, new Path("output"));
    
         job.waitForCompletion(true);
      }
    
     }
    
  2. # 2 楼答案

    请运行下面的代码,它运行良好,并给出您的预期输出

    在命令行参数中提供输入和输出路径。(参数[0],参数[1])

    import java.io.IOException;
    import java.util.*;
    import java.util.Map.Entry;
    import org.apache.hadoop.fs.Path;
    import org.apache.hadoop.io.*;
    import org.apache.hadoop.mapred.*;
    
    
        public class IndexCount {
    
           public static class Map extends MapReduceBase implements Mapper<LongWritable, Text, Text, IntWritable> {
             public void map(LongWritable key, Text value, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
    
               String str=value.toString();
               String[] tokens = str.split(" "); //split into words
               //create hashmap for unique word
               HashMap<String,Integer> uniqueString = new HashMap<String,Integer>();
               for(int i=0;i<tokens.length;i++){
                   uniqueString.put(tokens[i],1);
               }       
               //for sorting create TreeMap from above hash map
               TreeMap<String, Integer> map = new TreeMap<String,Integer>(uniqueString); 
                for (Entry<String, Integer> entry : map.entrySet()) {
                   int index=0;
               //find the index of the word
                   index = str.indexOf((String)entry.getKey());
                   while (index >= 0) {
                           output.collect(new Text((String)entry.getKey()),new IntWritable(index));
                           index = str.indexOf((String)entry.getKey(), index + 1);
                   }
               }
           }
        }
           public static class Reduce extends MapReduceBase implements Reducer<Text, IntWritable, Text, IntWritable> {
             public void reduce(Text key, Iterator<IntWritable> values, OutputCollector<Text, IntWritable> output, Reporter reporter) throws IOException {
    
               while (values.hasNext()) {
                   output.collect(key, new IntWritable(values.next().get()));
               }
    
             } 
        }
           public static void main(String[] args) throws Exception {
             JobConf conf = new JobConf(WordCount.class);
             conf.setJobName("indexfinder");
    
             conf.setOutputKeyClass(Text.class);
             conf.setOutputValueClass(IntWritable.class);
             conf.setMapperClass(Map.class);
             conf.setCombinerClass(Reduce.class);
             conf.setReducerClass(Reduce.class);    
             conf.setInputFormat(TextInputFormat.class);
             conf.setOutputFormat(TextOutputFormat.class);
    
             FileInputFormat.setInputPaths(conf, new Path(args[0]));
             FileOutputFormat.setOutputPath(conf, new Path(args[1]));
    
             JobClient.runJob(conf);
           }
        }
    
  3. # 3 楼答案

    Hi Shivendra,我编写了下面的映射器逻辑,它将帮助您找到每个字符串的索引和排序输出。 此代码的输出是带有索引的排序字符串,然后您可以在此输出上运行reducer

    String str=value.toString();
    String[] tokens = str.split(" "); //split into words
    //create hashmap for unique word
    Map<String,Integer> uniqueString = new HashMap<String,Integer>();
    for(int i=0;i<tokens.length;i++){
        uniqueString.put(tokens[i],1);
    }       
    //for sorting create TreeMap from above hash map
    Map<String,Integer> map = new TreeMap<String,Integer>(uniqueString); 
     for (Map.Entry entry : map.entrySet()) {
        int index=0;
    //find the index of the word
        index = str.indexOf((String)entry.getKey());
        while (index >= 0) {
                output.collect(new Text((String)entry.getKey()),new LongWritable(index));
                index = str.indexOf((String)entry.getKey(), index + 1);
        }
    }
    

    此逻辑的输出: 上午:20,, 是:7,, 是:50,, 嗨:0,, 你好:15,, 你好:47,, 方式:3,, 时间:30, 一:1,, i:16, i:18, i:24, i:34, i:48, is:34, 工作:42, 好 啊。:58, 正确的。:23, 你:11,, 你:37,, 你:54,, 你的电话号码:37

    这可能对你有帮助