IDEA上T倒排索引案例

2020-05-27 hadoop 0 评论

IIMapper1类:

package com.atguigu.invertindex;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;

import java.io.IOException;

public class IIMapper1 extends Mapper<LongWritable, Text,Text, IntWritable> {
    private Text k = new Text();
    private IntWritable v = new IntWritable(1);

    private String filename;

    @Override
    protected void setup(Context context) throws IOException, InterruptedException {
        FileSplit fs = (FileSplit) context.getInputSplit();
        filename = fs.getPath().getName();//获取文件名
    }

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] words = value.toString().split(" ");
        for (String word : words) {
            k.set(word + "--" + filename);
            context.write(k,v);
        }

    }
}

IIReducer1类：

package com.atguigu.invertindex;

import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

public class IIReducer1 extends Reducer<Text, IntWritable,Text,IntWritable> {
    private IntWritable v = new IntWritable();

    @Override
    protected void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
        int sum = 0;
        for (IntWritable value : values) {
            sum += value.get();
        }
        v.set(sum);

        context.write(key,v);
    }
}

IIMapper2类：

package com.atguigu.invertindex;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;


import java.io.IOException;

public class IIMapper2 extends Mapper<LongWritable,Text,Text, Text> {
    private Text k = new Text();
    private Text v = new Text();

    @Override
    protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
        String[] split = value.toString().split("--");
        k.set(split[0]);
        String[] fields = split[1].split("\t");
        v.set(fields[0] + "-->" + fields[1]);

        context.write(k,v);
    }
}

IIReducer2类：

package com.atguigu.invertindex;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;


public class IIReducer2 extends Reducer<Text,Text,Text, Text> {
    private Text v = new Text();
    private StringBuilder sb = new StringBuilder();

    @Override
    protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
        sb.delete(0,sb.length());//sb清零
        for (Text value : values) {
            sb.append(value.toString()).append(" ");
        }
        v.set(sb.toString());

        context.write(key,v);

    }
}

IIDriver类：

package com.atguigu.invertindex;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

import java.io.IOException;

public class IIDriver {
    public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
        Job job1 = Job.getInstance(new Configuration());
        job1.setJarByClass(IIDriver.class);
        job1.setMapperClass(IIMapper1.class);
        job1.setReducerClass(IIReducer1.class);

        job1.setMapOutputKeyClass(Text.class);
        job1.setMapOutputValueClass(IntWritable.class);
        job1.setOutputKeyClass(Text.class);
        job1.setOutputValueClass(IntWritable.class);

        FileInputFormat.setInputPaths(job1,new Path("F:/input"));
        FileOutputFormat.setOutputPath(job1,new Path("F:/output"));
        boolean b = job1.waitForCompletion(true);
        if (b) {
            Job job2 = Job.getInstance(new Configuration());
            job2.setJarByClass(IIDriver.class);
            job2.setMapperClass(IIMapper2.class);
            job2.setReducerClass(IIReducer2.class);

            job2.setMapOutputKeyClass(Text.class);
            job2.setMapOutputValueClass(Text.class);
            job2.setOutputKeyClass(Text.class);
            job2.setOutputValueClass(Text.class);

            FileInputFormat.setInputPaths(job2, new Path("F:/output"));
            FileOutputFormat.setOutputPath(job2, new Path("F:/output1"));
            boolean b2 = job2.waitForCompletion(true);
            System.exit(b2 ? 0 : 1);
        }
    }
}

本文链接： https://www.fluffysponge.fun/2020/05/27/IDEA%E4%B8%8AT%E5%80%92%E6%8E%92%E7%B4%A2%E5%BC%95%E6%A1%88%E4%BE%8B/

版权声明： 本博客所有文章除特别声明外，均采用 CC BY 4.0 CN协议许可协议。转载请注明出处！

InstantCWeedStudent

个人简介。