使用hadoop restful api實現對集群信息的一些統計
(適用於hadoop 2.7及以上版本)
涉及到RESTful API
ResourceManager REST API’s:
https://hadoop.apache.org/docs/stable/hadoop-yarn/hadoop-yarn-site/ResourceManagerRest.htmlWebHDFS REST API:
https://hadoop.apache.org/docs/stable/hadoop-project-dist/hadoop-hdfs/WebHDFS.htmlMapReduce History Server REST API’s:
https://hadoop.apache.org/docs/stable/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.htmlSpark Monitoring and Instrumentation
https://spark.apache.org/docs/latest/monitoring.html
1. 統計HDFS文件係統實時使用情況
URL
https://emr-header-1:50070/webhdfs/v1/?user.name=hadoop&op=GETCONTENTSUMMARY-
返回結果:
{ "ContentSummary": { "directoryCount": 2, "fileCount" : 1, "length" : 24930, "quota" : -1, "spaceConsumed" : 24930, "spaceQuota" : -1 } }
-
關於返回結果的說明:
{ "name" : "ContentSummary", "properties": { "ContentSummary": { "type" : "object", "properties": { "directoryCount": { "description": "The number of directories.", "type" : "integer", "required" : true }, "fileCount": { "description": "The number of files.", "type" : "integer", "required" : true }, "length": { "description": "The number of bytes used by the content.", "type" : "integer", "required" : true }, "quota": { "description": "The namespace quota of this directory.", "type" : "integer", "required" : true }, "spaceConsumed": { "description": "The disk space consumed by the content.", "type" : "integer", "required" : true }, "spaceQuota": { "description": "The disk space quota.", "type" : "integer", "required" : true } } } } }
注意length與spaceConsumed的關係,跟hdfs副本數有關。
如果要統計各個組工作目錄的使用情況,使用如下請求:
https://emr-header-1:50070/webhdfs/v1/user/feed_aliyun?user.name=hadoop&op=GETCONTENTSUMMARY
2. 查看集群的實時信息和狀態
- URL
https://emr-header-1:8088/ws/v1/cluster
- 返回結果
{
"clusterInfo": {
"id": 1495123166259,
"startedOn": 1495123166259,
"state": "STARTED",
"haState": "ACTIVE",
"rmStateStoreName": "org.apache.hadoop.yarn.server.resourcemanager.recovery.NullRMStateStore",
"resourceManagerVersion": "2.7.2",
"resourceManagerBuildVersion": "2.7.2 from 4bee04d3d1c27d7ef559365d3bdd2a8620807bfc by root source checksum c63f7cc71b8f63249e35126f0f7492d",
"resourceManagerVersionBuiltOn": "2017-04-17T12:28Z",
"hadoopVersion": "2.7.2",
"hadoopBuildVersion": "2.7.2 from 4bee04d3d1c27d7ef559365d3bdd2a8620807bfc by root source checksum 3329b146070a2bc9e249fa9ba9fb55",
"hadoopVersionBuiltOn": "2017-04-17T12:18Z",
"haZooKeeperConnectionState": "ResourceManager HA is not enabled."
}
}
3. 查看資源隊列的實時信息,包括隊列的配額信息、資源使用實時情況
- URL
https://emr-header-1:8088/ws/v1/cluster/scheduler
- 返回結果
{
"scheduler": {
"schedulerInfo": {
"type": "capacityScheduler",
"capacity": 100,
"usedCapacity": 0,
"maxCapacity": 100,
"queueName": "root",
"queues": {
"queue": [
{
"type": "capacitySchedulerLeafQueueInfo",
"capacity": 1,
"usedCapacity": 0,
"maxCapacity": 90,
"absoluteCapacity": 1,
"absoluteMaxCapacity": 90,
"absoluteUsedCapacity": 0,
"numApplications": 0,
"queueName": "algorithm_aliyun",
"state": "RUNNING",
"resourcesUsed": {
"memory": 0,
"vCores": 0
},
"hideReservationQueues": false,
"nodeLabels": [
"*"
],
"numActiveApplications": 0,
"numPendingApplications": 0,
"numContainers": 0,
"maxApplications": 100,
"maxApplicationsPerUser": 100,
"userLimit": 100,
"users": null,
"userLimitFactor": 1,
"AMResourceLimit": {
"memory": 11776,
"vCores": 7
},
"usedAMResource": {
"memory": 0,
"vCores": 0
},
"userAMResourceLimit": {
"memory": 160,
"vCores": 1
},
"preemptionDisabled": true
},
{
"type": "capacitySchedulerLeafQueueInfo",
"capacity": 1,
"usedCapacity": 0,
"maxCapacity": 90,
"absoluteCapacity": 1,
"absoluteMaxCapacity": 90,
"absoluteUsedCapacity": 0,
"numApplications": 0,
"queueName": "dcps_aliyun",
"state": "RUNNING",
"resourcesUsed": {
"memory": 0,
"vCores": 0
},
"hideReservationQueues": false,
"nodeLabels": [
"*"
],
"numActiveApplications": 0,
"numPendingApplications": 0,
"numContainers": 0,
"maxApplications": 100,
"maxApplicationsPerUser": 100,
"userLimit": 100,
"users": null,
"userLimitFactor": 1,
"AMResourceLimit": {
"memory": 11776,
"vCores": 7
},
"usedAMResource": {
"memory": 0,
"vCores": 0
},
"userAMResourceLimit": {
"memory": 160,
"vCores": 1
},
"preemptionDisabled": true
},
{
"type": "capacitySchedulerLeafQueueInfo",
"capacity": 31,
"usedCapacity": 0,
"maxCapacity": 100,
"absoluteCapacity": 31,
"absoluteMaxCapacity": 100,
"absoluteUsedCapacity": 0,
"numApplications": 0,
"queueName": "default",
"state": "RUNNING",
"resourcesUsed": {
"memory": 0,
"vCores": 0
},
"hideReservationQueues": false,
"nodeLabels": [
"*"
],
"numActiveApplications": 0,
"numPendingApplications": 0,
"numContainers": 0,
"maxApplications": 3100,
"maxApplicationsPerUser": 3100,
"userLimit": 100,
"users": null,
"userLimitFactor": 1,
"AMResourceLimit": {
"memory": 13088,
"vCores": 8
},
"usedAMResource": {
"memory": 0,
"vCores": 0
},
"userAMResourceLimit": {
"memory": 4064,
"vCores": 3
},
"preemptionDisabled": true
},
{
"type": "capacitySchedulerLeafQueueInfo",
"capacity": 15.000001,
"usedCapacity": 0,
"maxCapacity": 100,
"absoluteCapacity": 15.000001,
"absoluteMaxCapacity": 100,
"absoluteUsedCapacity": 0,
"numApplications": 0,
"queueName": "feed_aliyun",
"state": "RUNNING",
"resourcesUsed": {
"memory": 0,
"vCores": 0
},
"hideReservationQueues": false,
"nodeLabels": [
"*"
],
"numActiveApplications": 0,
"numPendingApplications": 0,
"numContainers": 0,
"maxApplications": 1500,
"maxApplicationsPerUser": 7500,
"userLimit": 100,
"users": null,
"userLimitFactor": 5,
"AMResourceLimit": {
"memory": 12320,
"vCores": 8
},
"usedAMResource": {
"memory": 0,
"vCores": 0
},
"userAMResourceLimit": {
"memory": 9856,
"vCores": 7
},
"preemptionDisabled": true
},
{
"type": "capacitySchedulerLeafQueueInfo",
"capacity": 51,
"usedCapacity": 0,
"maxCapacity": 90,
"absoluteCapacity": 51,
"absoluteMaxCapacity": 90,
"absoluteUsedCapacity": 0,
"numApplications": 0,
"queueName": "hot_aliyun",
"state": "RUNNING",
"resourcesUsed": {
"memory": 0,
"vCores": 0
},
"hideReservationQueues": false,
"nodeLabels": [
"*"
],
"numActiveApplications": 0,
"numPendingApplications": 0,
"numContainers": 0,
"maxApplications": 5100,
"maxApplicationsPerUser": 5100,
"userLimit": 100,
"users": null,
"userLimitFactor": 1,
"AMResourceLimit": {
"memory": 11776,
"vCores": 7
},
"usedAMResource": {
"memory": 0,
"vCores": 0
},
"userAMResourceLimit": {
"memory": 6688,
"vCores": 5
},
"preemptionDisabled": true
},
{
"type": "capacitySchedulerLeafQueueInfo",
"capacity": 1,
"usedCapacity": 0,
"maxCapacity": 90,
"absoluteCapacity": 1,
"absoluteMaxCapacity": 90,
"absoluteUsedCapacity": 0,
"numApplications": 0,
"queueName": "push_aliyun",
"state": "RUNNING",
"resourcesUsed": {
"memory": 0,
"vCores": 0
},
"hideReservationQueues": false,
"nodeLabels": [
"*"
],
"numActiveApplications": 0,
"numPendingApplications": 0,
"numContainers": 0,
"maxApplications": 100,
"maxApplicationsPerUser": 100,
"userLimit": 100,
"users": null,
"userLimitFactor": 1,
"AMResourceLimit": {
"memory": 11776,
"vCores": 7
},
"usedAMResource": {
"memory": 0,
"vCores": 0
},
"userAMResourceLimit": {
"memory": 160,
"vCores": 1
},
"preemptionDisabled": true
}
]
}
}
}
}
4. 查看實時的作業列表,列表信息中也包含了作業運行的詳情信息,包括作業名稱、id、運行狀態、起止時間,資源使用情況。
- URL
https://emr-header-1:8088/ws/v1/cluster/apps
- 返回結果
{
"apps":
{
"app":
[
{
"finishedTime" : 1326815598530,
"amContainerLogs" : "https://host.domain.com:8042/node/containerlogs/container_1326815542473_0001_01_000001",
"trackingUI" : "History",
"state" : "FINISHED",
"user" : "user1",
"id" : "application_1326815542473_0001",
"clusterId" : 1326815542473,
"finalStatus" : "SUCCEEDED",
"amHostHttpAddress" : "host.domain.com:8042",
"progress" : 100,
"name" : "word count",
"startedTime" : 1326815573334,
"elapsedTime" : 25196,
"diagnostics" : "",
"trackingUrl" : "https://host.domain.com:8088/proxy/application_1326815542473_0001/jobhistory/job/job_1326815542473_1_1",
"queue" : "default",
"allocatedMB" : 0,
"allocatedVCores" : 0,
"runningContainers" : 0,
"memorySeconds" : 151730,
"vcoreSeconds" : 103
},
{
"finishedTime" : 1326815789546,
"amContainerLogs" : "https://host.domain.com:8042/node/containerlogs/container_1326815542473_0002_01_000001",
"trackingUI" : "History",
"state" : "FINISHED",
"user" : "user1",
"id" : "application_1326815542473_0002",
"clusterId" : 1326815542473,
"finalStatus" : "SUCCEEDED",
"amHostHttpAddress" : "host.domain.com:8042",
"progress" : 100,
"name" : "Sleep job",
"startedTime" : 1326815641380,
"elapsedTime" : 148166,
"diagnostics" : "",
"trackingUrl" : "https://host.domain.com:8088/proxy/application_1326815542473_0002/jobhistory/job/job_1326815542473_2_2",
"queue" : "default",
"allocatedMB" : 0,
"allocatedVCores" : 0,
"runningContainers" : 1,
"memorySeconds" : 640064,
"vcoreSeconds" : 442
}
]
}
}
- 如果要統計固定時間段的,可以加上"?finishedTimeBegin={時間戳}&finishedTimeEnd={時間戳}"參數,例如 https://emr-header-1:8088/ws/v1/cluster/apps?finishedTimeBegin=1496742124000&finishedTimeEnd=1496742134000
5. 統計作業掃描的數據量情況
job掃描的數據量,需要通過History Server的RESTful API查詢,MapReduce的和Spark的又有一些差異。
5.1 Mapreduce job掃描數據量
- URL
https://emr-header-1:19888/ws/v1/history/mapreduce/jobs/job_1495123166259_0962/counters
- 返回結果
{
"jobCounters" : {
"id" : "job_1326381300833_2_2",
"counterGroup" : [
{
"counterGroupName" : "Shuffle Errors",
"counter" : [
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "BAD_ID"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "CONNECTION"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "IO_ERROR"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "WRONG_LENGTH"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "WRONG_MAP"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "WRONG_REDUCE"
}
]
},
{
"counterGroupName" : "org.apache.hadoop.mapreduce.FileSystemCounter",
"counter" : [
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 2483,
"name" : "FILE_BYTES_READ"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 108525,
"name" : "FILE_BYTES_WRITTEN"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "FILE_READ_OPS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "FILE_LARGE_READ_OPS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "FILE_WRITE_OPS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 48,
"name" : "HDFS_BYTES_READ"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "HDFS_BYTES_WRITTEN"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 1,
"name" : "HDFS_READ_OPS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "HDFS_LARGE_READ_OPS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "HDFS_WRITE_OPS"
}
]
},
{
"counterGroupName" : "org.apache.hadoop.mapreduce.TaskCounter",
"counter" : [
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 1,
"name" : "MAP_INPUT_RECORDS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 1200,
"name" : "MAP_OUTPUT_RECORDS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 4800,
"name" : "MAP_OUTPUT_BYTES"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 2235,
"name" : "MAP_OUTPUT_MATERIALIZED_BYTES"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 48,
"name" : "SPLIT_RAW_BYTES"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "COMBINE_INPUT_RECORDS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "COMBINE_OUTPUT_RECORDS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 1200,
"name" : "REDUCE_INPUT_GROUPS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 2235,
"name" : "REDUCE_SHUFFLE_BYTES"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 1200,
"name" : "REDUCE_INPUT_RECORDS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "REDUCE_OUTPUT_RECORDS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 2400,
"name" : "SPILLED_RECORDS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 1,
"name" : "SHUFFLED_MAPS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "FAILED_SHUFFLE"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 1,
"name" : "MERGED_MAP_OUTPUTS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 113,
"name" : "GC_TIME_MILLIS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 1830,
"name" : "CPU_MILLISECONDS"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 478068736,
"name" : "PHYSICAL_MEMORY_BYTES"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 2159284224,
"name" : "VIRTUAL_MEMORY_BYTES"
},
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 378863616,
"name" : "COMMITTED_HEAP_BYTES"
}
]
},
{
"counterGroupName" : "org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter",
"counter" : [
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "BYTES_READ"
}
]
},
{
"counterGroupName" : "org.apache.hadoop.mapreduce.lib.output.FileOutputFormatCounter",
"counter" : [
{
"reduceCounterValue" : 0,
"mapCounterValue" : 0,
"totalCounterValue" : 0,
"name" : "BYTES_WRITTEN"
}
]
}
]
}
}
其中org.apache.hadoop.mapreduce.lib.input.FileInputFormatCounter裏麵的BYTES_READ為job掃描的數據量
具體參數:https://hadoop.apache.org/docs/stable/hadoop-mapreduce-client/hadoop-mapreduce-client-hs/HistoryServerRest.html#Job_Counters_API
5.2 Mapreduce job掃描數據量
- URL
https://emr-header-1:18080/api/v1/applications/application_1495123166259_1050/executors
每個executor的totalInputBytes總和為整個job的數據掃描量。
更多參考:https://spark.apache.org/docs/latest/monitoring.html
最後更新:2017-06-06 21:38:01