重讀avro文件 對文件進行簡單的mr計算
public class ReadAvroInput {
public static class ReadAvroInputMap extends Mapper<AvroKey<UserActionLog>, NullWritable, Text, IntWritable> {
private Text oKey = new Text();
private final IntWritable ONE = new IntWritable(1);
private UserActionLog keyData;
@Override
protected void map(AvroKey<UserActionLog> key, NullWritable value,
Mapper<AvroKey<UserActionLog>, NullWritable, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
keyData = key.datum();
oKey.set(keyData.getProvience().toString());
context.write(oKey, ONE);
}
}
public static class ReadAvroInputReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
private int sum;
private IntWritable oValue = new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> values,
Reducer<Text, IntWritable, Text, IntWritable>.Context context)
throws IOException, InterruptedException {
sum=0;
for (IntWritable value : values) {
sum += value.get();
}
oValue.set(sum);
context.write(key, oValue);
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration configuration =new Configuration();
Job job =Job.getInstance(configuration);
job.setJarByClass(ReadAvroInput.class);
job.setJobName("重讀avro文件進行mr計算");
job.setMapperClass(ReadAvroInputMap.class);
job.setCombinerClass(ReadAvroInputReducer.class);
job.setReducerClass(ReadAvroInputReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setInputFormatClass(AvroKeyInputFormat.class);
AvroJob.setInputKeySchema(job, UserActionLog.getClassSchema());
FileInputFormat.addInputPath(job, new Path("/ReducerJoin/part-r-00000.avro"));
Path outputPath =new Path("/ReadAvroInput");
outputPath.getFileSystem(configuration).delete(outputPath, true);
FileOutputFormat.setOutputPath(job, outputPath);
System.exit(job.waitForCompletion(true)?0:1);
}
}
UserActionLog是通過mvn 指令通過schema框架生成的
最後更新:2017-10-20 09:33:24
上一篇:
如何遠程連接阿裏雲主機服務器(Linux係統)
下一篇:
通過avro合並大文件 並計算文件詞頻
Samba 係列(二):在 Linux 命令行下管理 Samba4 AD 架構
Android Camera學習博客
阿裏雲雙11活動擼福利攻略雲市場篇 1.1億紅包賺不停
Ecs支持密鑰對登錄(openapi篇)
對標穀歌TPU,比特大陸第一代深度學習專用處理器全球首發
Android開發6——布局中的wrap_content和fill_parent以及match_parent
全國第一家互聯網法院來了,今後打電商官司會像網購一樣方便
Ztorg木馬分析: 從Android root木馬演變到短信吸血鬼
點播帶皮膚播放器,不帶皮膚播放器的區別
零點之戰——阿裏雲2017雙11技術探訪