多路输入输出示例__示例程序_MapReduce_大数据计算服务-阿里云

目前ODPS支持多表的输入及输出。

测试准备

（1）准备好测试程序jar包，假设名字为mapreduce-examples.jar;

（2）准备好多路输入输出的测试表和资源；

创建表

create table wc_in1(key string, value string);
create table wc_in2(key string, value string);
create table mr_multiinout_out1 (key string, cnt bigint);
create table mr_multiinout_out2 (key string, cnt bigint) partitioned by (a string, b string);
alter table mr_multiinout_out2 add partition (a='1', b='1');
alter table mr_multiinout_out2 add partition (a='2', b='2');

添加资源
add jar mapreduce-examples.jar -f;

（3）使用tunnel导入数据；


   tunnel upload data1 wc_in1;
   tunnel upload data2 wc_in2;
导入wc_in1表的数据文件data内容为：
    hello,odps
导入wc_in2表的数据文件data内容为：
    hello,world
测试步骤
在odpscmd中执行MultipleInOut
jar -resources mapreduce-examples.jar -classpath mapreduce-examples.jar
    com.aliyun.odps.mapred.open.example.MultipleInOut wc_in1,wc_in2 mr_multiinout_out1,mr_multiinout_out2|a=1/b=1|out1,mr_multiinout_out2|a=2/b=2|out2;
预期结果
作业成功结束。mr_multiinout_out1中内容为：
+------------+------------+
| key        | cnt        |
+------------+------------+
| default    | 1          |
+------------+------------+
mr_multiinout_out2中内容为：
+--------+------------+---+---+
| key    | cnt        | a | b |
+--------+------------+---+---+
| odps   | 1          | 1 | 1 |
| world  | 1          | 1 | 1 |
| out1   | 1          | 1 | 1 |
| hello  | 2          | 2 | 2 |
| out2   | 1          | 2 | 2 |
+--------+------------+---+---+
代码示例
    package com.aliyun.odps.mapred.open.example;
    import java.io.IOException;
    import java.util.Iterator;
    import java.util.LinkedHashMap;
    import com.aliyun.odps.data.Record;
    import com.aliyun.odps.data.TableInfo;
    import com.aliyun.odps.mapred.JobClient;
    import com.aliyun.odps.mapred.MapperBase;
    import com.aliyun.odps.mapred.ReducerBase;
    import com.aliyun.odps.mapred.TaskContext;
    import com.aliyun.odps.mapred.conf.JobConf;
    import com.aliyun.odps.mapred.utils.InputUtils;
    import com.aliyun.odps.mapred.utils.OutputUtils;
    import com.aliyun.odps.mapred.utils.SchemaUtils;
    /**
     * Multi input & output example.
     **/
    public class MultipleInOut {
      public static class TokenizerMapper extends MapperBase {
        Record word;
        Record one;
        @Override
        public void setup(TaskContext context) throws IOException {
          word = context.createMapOutputKeyRecord();
          one = context.createMapOutputValueRecord();
          one.set(new Object[] { 1L });
        }
        @Override
        public void map(long recordNum, Record record, TaskContext context)
            throws IOException {
          for (int i = 0; i < record.getColumnCount(); i++) {
            word.set(new Object[] { record.get(i).toString() });
            context.write(word, one);
          }
        }
      }
      public static class SumReducer extends ReducerBase {
        private Record result;
        private Record result1;
        private Record result2;
        @Override
        public void setup(TaskContext context) throws IOException {
          result = context.createOutputRecord();
          result1 = context.createOutputRecord("out1");
          result2 = context.createOutputRecord("out2");
        }
        @Override
        public void reduce(Record key, Iterator<Record> values, TaskContext context)
            throws IOException {
          long count = 0;
          while (values.hasNext()) {
            Record val = values.next();
            count += (Long) val.get(0);
          }
          long mod = count % 3;
          if (mod == 0) {
            result.set(0, key.get(0));
            result.set(1, count);
            //不指定label，输出的默认(default)输出
            context.write(result);
          } else if (mod == 1) {
            result1.set(0, key.get(0));
            result1.set(1, count);
            context.write(result1, "out1");
          } else {
            result2.set(0, key.get(0));
            result2.set(1, count);
            context.write(result2, "out2");
          }
        }
        @Override
        public void cleanup(TaskContext context) throws IOException {
          Record result = context.createOutputRecord();
          result.set(0, "default");
          result.set(1, 1L);
          context.write(result);
          Record result1 = context.createOutputRecord("out1");
          result1.set(0, "out1");
          result1.set(1, 1L);
          context.write(result1, "out1");
          Record result2 = context.createOutputRecord("out2");
          result2.set(0, "out2");
          result2.set(1, 1L);
          context.write(result2, "out2");
        }
      }
      public static LinkedHashMap<String, String> convertPartSpecToMap(
          String partSpec) {
        LinkedHashMap<String, String> map = new LinkedHashMap<String, String>();
        if (partSpec != null && !partSpec.trim().isEmpty()) {
          String[] parts = partSpec.split("/");
          for (String part : parts) {
            String[] ss = part.split("=");
            if (ss.length != 2) {
              throw new RuntimeException("ODPS-0730001: error part spec format: "
                  + partSpec);
            }
            map.put(ss[0], ss[1]);
          }
        }
        return map;
      }
      public static void main(String[] args) throws Exception {
        String[] inputs = null;
        String[] outputs = null;
        if (args.length == 2) {
          inputs = args[0].split(",");
          outputs = args[1].split(",");
        } else {
          System.err.println("MultipleInOut in... out...");
          System.exit(1);
        }
        JobConf job = new JobConf();
        job.setMapperClass(TokenizerMapper.class);
        job.setReducerClass(SumReducer.class);
        job.setMapOutputKeySchema(SchemaUtils.fromString("word:string"));
        job.setMapOutputValueSchema(SchemaUtils.fromString("count:bigint"));
        //解析用户的输入表字符串
        for (String in : inputs) {
          String[] ss = in.split("\|");
          if (ss.length == 1) {
            InputUtils.addTable(TableInfo.builder().tableName(ss[0]).build(), job);
          } else if (ss.length == 2) {
            LinkedHashMap<String, String> map = convertPartSpecToMap(ss[1]);
            InputUtils.addTable(TableInfo.builder().tableName(ss[0]).partSpec(map).build(), job);
          } else {
            System.err.println("Style of input: " + in + " is not right");
            System.exit(1);
          }
        }
        //解析用户的输出表字符串
        for (String out : outputs) {
          String[] ss = out.split("\|");
          if (ss.length == 1) {
            OutputUtils.addTable(TableInfo.builder().tableName(ss[0]).build(), job);
          } else if (ss.length == 2) {
            LinkedHashMap<String, String> map = convertPartSpecToMap(ss[1]);
            OutputUtils.addTable(TableInfo.builder().tableName(ss[0]).partSpec(map).build(), job);
          } else if (ss.length == 3) {
            if (ss[1].isEmpty()) {
              LinkedHashMap<String, String> map = convertPartSpecToMap(ss[2]);
              OutputUtils.addTable(TableInfo.builder().tableName(ss[0]).partSpec(map).build(), job);
            } else {
              LinkedHashMap<String, String> map = convertPartSpecToMap(ss[1]);
              OutputUtils.addTable(TableInfo.builder().tableName(ss[0]).partSpec(map)
                  .label(ss[2]).build(), job);
            }
          } else {
            System.err.println("Style of output: " + out + " is not right");
            System.exit(1);
          }
        }
        JobClient.runJob(job);
      }
    }
  最后更新：2016-09-22 10:05:19
  上一篇： MapOnly示例__示例程序_MapReduce_大数据计算服务-阿里云
  下一篇： 多任务示例__示例程序_MapReduce_大数据计算服务-阿里云
相关内容
 OpenIdConnectConfig__数据类型_API_API 网关-阿里云
 DeleteRole__角色管理接口_RAM API文档_访问控制-阿里云
 提交备案初审流程__流程引导图_备案流程_备案-阿里云
 大数据早报：阿里云与中科院宣布合作发布量子计算云平台 10.12
 短信发送频率上有什么限制？__常见问题_短信服务-阿里云
 修改路由器属性__路由器相关接口_API 参考_云服务器 ECS-阿里云
 DRDS自定义注释__开发手册_分布式关系型数据库 DRDS-阿里云
 ALIYUN::ECS::EIP__资源列表_资源编排-阿里云
 附录一 元数据库数据字典__附录_使用手册_分析型数据库-阿里云
 key分区__分区_SQL语法参考_云数据库 OceanBase-阿里云
热门内容
 常见错误说明__附录_大数据计算服务-阿里云
 发送短信接口__API使用手册_短信服务-阿里云
 接口文档__Android_安全组件教程_移动安全-阿里云
 运营商错误码（联通）__常见问题_短信服务-阿里云
 设置短信模板__使用手册_短信服务-阿里云
 OSS 权限问题及排查__常见错误及排除_最佳实践_对象存储 OSS-阿里云
 消息通知__操作指南_批量计算-阿里云
 设备端快速接入(MQTT)__快速开始_阿里云物联网套件-阿里云
 查询API调用流量数据__API管理相关接口_API_API 网关-阿里云
 使用STS访问__JavaScript-SDK_SDK 参考_对象存储 OSS-阿里云
最新内容
 阿里云服务器ECS和阿里牛云有什么区别？深度解析云服务器选择
 阿里云服务器活跃排名查询及解读：揭秘云服务器性能与市场地位
 阿里云搭建专属云桌面：方案详解及最佳实践
 彻底关闭电脑版阿里云盘：方法详解及常见问题解答
 阿里云云效平台访问及使用指南
 阿里云私有云深度管理指南：从部署到运维全流程详解
 阿里云充值：深度解读背后的云计算资源与商业策略
 阿里云语音技术全解析：从基础服务到应用场景
 阿里云公有云申请全流程详解：从账号注册到资源部署
 阿里云混合云深度解读：架构、优势与应用场景

多路输入输出示例__示例程序_MapReduce_大数据计算服务-阿里云

测试准备

测试步骤

预期结果

代码示例

上一篇： MapOnly示例__示例程序_MapReduce_大数据计算服务-阿里云

下一篇： 多任务示例__示例程序_MapReduce_大数据计算服务-阿里云

相关内容

热门内容

最新内容

下一篇：多任务示例__示例程序_MapReduce_大数据计算服务-阿里云