使用partion對一個文本中不同長度的字串磁區存盤
輸入文本如下:
Kaka 1 28
hua 0 26
chao 1
tao 1 22
mao 0 29 22
想要將不同長度的字串分在三個檔案中存盤,代碼如下:
public class TestPartionar {
// Map Method
public static class Map extends Mapper<LongWritable, Text, Text, Text> {
// realize map method
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
// read different length string, type them in different tag
int token_length = 0;
StringTokenizer readline = new StringTokenizer(value.toString());
token_length = readline.countTokens();
if (token_length == 2)
context.write(new Text("Short"), value);
else if (token_length == 3)
context.write(new Text("Right"), value);
else if (token_length == 4)
context.write(new Text("Long"), value);
}
}
// Partionar Method
public static class getPartionar extends Partitioner<Text, Text> {
public int getPartition(Text key, Text value, int partionNum) {
return ((key.hashCode() & Integer.MAX_VALUE) % partionNum);
}
}
// Reducer Method
public static class Reduce extends Reducer<Text, Text, Text, Text> {
// realize reduce method
protected void reduce(Text key, Iterable<Text> value, Context context)
throws IOException, InterruptedException {
Iterator<Text> itr = value.iterator();
while (itr.hasNext()) {
context.write(new Text(""), itr.next());
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
conf.set("mapred.job.tracker", "192.168.108.101:9001");
conf.set("fs.default.name", "hdfs://192.168.108.101:9000");
conf.set("mapred.jar", "TestPartionar.jar");
Job job = new Job(conf, "Test Partionar");
String[] ioArgs = new String[] { "test_in", "test_out" };
String[] otherArgs = new GenericOptionsParser(conf, ioArgs).getRemainingArgs();
if (otherArgs.length != 2) {
System.err.println("Usage: Test Partionar <in> <out>");
}
job.setJarByClass(TestPartionar.class);
job.setMapperClass(Map.class);
job.setReducerClass(Reduce.class);
job.setPartitionerClass(getPartionar.class);
job.setNumReduceTasks(3);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}但是輸出的時候,長度為2和長度為3的字串混在了一起,請問我的代碼哪里是錯誤的?
轉載請註明出處,本文鏈接:https://www.uj5u.com/qita/87191.html
標籤:云存儲
上一篇:請教大俠們怎么弄??????
