Show the Stage ID and Task ID that corresponds to the max metric
digraph G {
0 [labelType="html" label="<br><b>CollectLimit</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (5)";
2 [labelType="html" label="<br><b>Project</b><br><br>"];
3 [labelType="html" label="<b>SortMergeJoin</b><br><br>number of output rows: 11"];
}
subgraph cluster4 {
isCluster="true";
label="WholeStageCodegen (2)";
5 [labelType="html" label="<b>Sort</b><br><br>sort time: 28 ms<br>peak memory: 72.0 MiB<br>spill size: 0.0 B"];
}
6 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 33,832,162<br>shuffle write time total (min, med, max (stageId: taskId))<br>1.8 s (43 ms, 63 ms, 101 ms (stage 45.0: task 1585))<br>records read: 133,284<br>local bytes read: 1576.2 KiB<br>fetch wait time: 0 ms<br>remote bytes read: 2.0 MiB<br>local blocks read: 12<br>remote blocks read: 16<br>data size total (min, med, max (stageId: taskId))<br>3.3 GiB (99.2 MiB, 119.1 MiB, 127.8 MiB (stage 45.0: task 1559))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>877.2 MiB (26.2 MiB, 31.2 MiB, 33.4 MiB (stage 45.0: task 1559))"];
subgraph cluster7 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n2.5 m (3.5 s, 6.0 s, 6.6 s (stage 45.0: task 1559))";
8 [labelType="html" label="<br><b>Project</b><br><br>"];
9 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 33,832,162"];
10 [labelType="html" label="<b>Scan ExistingRDD</b><br><br>number of output rows: 33,832,162"];
}
subgraph cluster11 {
isCluster="true";
label="WholeStageCodegen (4)";
12 [labelType="html" label="<b>Sort</b><br><br>sort time: 0 ms<br>peak memory: 64.1 MiB<br>spill size: 0.0 B"];
}
13 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 86,537<br>shuffle write time total (min, med, max (stageId: taskId))<br>36 ms (17 ms, 19 ms, 19 ms (stage 46.0: task 1587))<br>records read: 425<br>local bytes read: 9.8 KiB<br>fetch wait time: 0 ms<br>remote bytes read: 9.1 KiB<br>local blocks read: 1<br>remote blocks read: 1<br>data size total (min, med, max (stageId: taskId))<br>6.4 MiB (3.2 MiB, 3.2 MiB, 3.2 MiB (stage 46.0: task 1588))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>3.8 MiB (1924.7 KiB, 1944.1 KiB, 1944.1 KiB (stage 46.0: task 1587))"];
subgraph cluster14 {
isCluster="true";
label="WholeStageCodegen (3)\n \nduration: total (min, med, max (stageId: taskId))\n681 ms (303 ms, 378 ms, 378 ms (stage 46.0: task 1588))";
15 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 86,537"];
16 [labelType="html" label="<b>Scan ExistingRDD</b><br><br>number of output rows: 86,537"];
}
2->0;
3->2;
5->3;
6->5;
8->6;
9->8;
10->9;
12->3;
13->12;
15->13;
16->15;
}
17
CollectLimit 11
Project [cast(movieId#1L as string) AS movieId#229, cast(userId#0L as string) AS userId#230, cast(rating#2 as string) AS rating#231, cast(timestamp_str#3L as string) AS timestamp_str#232, date#39, cast(month#120 as string) AS month#234, cast(year#127 as string) AS year#235, time_of_rating#135, title#201, genres#202]
Project [userId#0L, movieId#1L, rating#2, timestamp_str#3L, from_unixtime(timestamp_str#3L, yyyy-MM-dd, Some(GMT)) AS date#39, month(cast(from_unixtime(timestamp_str#3L, yyyy-MM-dd, Some(GMT)) as date)) AS month#120, year(cast(from_unixtime(timestamp_str#3L, yyyy-MM-dd, Some(GMT)) as date)) AS year#127, CASE WHEN ((month(cast(from_unixtime(timestamp_str#3L, yyyy-MM-dd, Some(GMT)) as date)) >= 1) AND (month(cast(from_unixtime(timestamp_str#3L, yyyy-MM-dd, Some(GMT)) as date)) <= 6)) THEN Early Year ELSE Late Year END AS time_of_rating#135]
== Parsed Logical Plan ==
GlobalLimit 11
+- LocalLimit 11
+- Project [cast(movieId#1L as string) AS movieId#229, cast(userId#0L as string) AS userId#230, cast(rating#2 as string) AS rating#231, cast(timestamp_str#3L as string) AS timestamp_str#232, cast(date#39 as string) AS date#233, cast(month#120 as string) AS month#234, cast(year#127 as string) AS year#235, cast(time_of_rating#135 as string) AS time_of_rating#236, cast(title#201 as string) AS title#237, cast(genres#202 as string) AS genres#238]
+- Project [movieId#1L, userId#0L, rating#2, timestamp_str#3L, date#39, month#120, year#127, time_of_rating#135, title#201, genres#202]
+- Join Inner, (movieId#1L = movieId#200L)
:- Project [userId#0L, movieId#1L, rating#2, timestamp_str#3L, date#39, month#120, year#127, CASE WHEN ((month#120 >= 1) AND (month#120 <= 6)) THEN Early Year ELSE Late Year END AS time_of_rating#135]
: +- Project [userId#0L, movieId#1L, rating#2, timestamp_str#3L, date#39, month#120, year(cast(date#39 as date)) AS year#127]
: +- Project [userId#0L, movieId#1L, rating#2, timestamp_str#3L, date#39, month(cast(date#39 as date)) AS month#120]
: +- Sort [date#39 ASC NULLS FIRST], true
: +- Project [userId#0L, movieId#1L, rating#2, timestamp_str#3L, from_unixtime(timestamp_str#3L, yyyy-MM-dd, Some(GMT)) AS date#39]
: +- LogicalRDD [userId#0L, movieId#1L, rating#2, timestamp_str#3L], false
+- Project [movieId#200L, title#201, genres#202]
+- LogicalRDD [movieId#200L, title#201, genres#202], false
== Analyzed Logical Plan ==
movieId: string, userId: string, rating: string, timestamp_str: string, date: string, month: string, year: string, time_of_rating: string, title: string, genres: string
GlobalLimit 11
+- LocalLimit 11
+- Project [cast(movieId#1L as string) AS movieId#229, cast(userId#0L as string) AS userId#230, cast(rating#2 as string) AS rating#231, cast(timestamp_str#3L as string) AS timestamp_str#232, cast(date#39 as string) AS date#233, cast(month#120 as string) AS month#234, cast(year#127 as string) AS year#235, cast(time_of_rating#135 as string) AS time_of_rating#236, cast(title#201 as string) AS title#237, cast(genres#202 as string) AS genres#238]
+- Project [movieId#1L, userId#0L, rating#2, timestamp_str#3L, date#39, month#120, year#127, time_of_rating#135, title#201, genres#202]
+- Join Inner, (movieId#1L = movieId#200L)
:- Project [userId#0L, movieId#1L, rating#2, timestamp_str#3L, date#39, month#120, year#127, CASE WHEN ((month#120 >= 1) AND (month#120 <= 6)) THEN Early Year ELSE Late Year END AS time_of_rating#135]
: +- Project [userId#0L, movieId#1L, rating#2, timestamp_str#3L, date#39, month#120, year(cast(date#39 as date)) AS year#127]
: +- Project [userId#0L, movieId#1L, rating#2, timestamp_str#3L, date#39, month(cast(date#39 as date)) AS month#120]
: +- Sort [date#39 ASC NULLS FIRST], true
: +- Project [userId#0L, movieId#1L, rating#2, timestamp_str#3L, from_unixtime(timestamp_str#3L, yyyy-MM-dd, Some(GMT)) AS date#39]
: +- LogicalRDD [userId#0L, movieId#1L, rating#2, timestamp_str#3L], false
+- Project [movieId#200L, title#201, genres#202]
+- LogicalRDD [movieId#200L, title#201, genres#202], false
== Optimized Logical Plan ==
GlobalLimit 11
+- LocalLimit 11
+- Project [cast(movieId#1L as string) AS movieId#229, cast(userId#0L as string) AS userId#230, cast(rating#2 as string) AS rating#231, cast(timestamp_str#3L as string) AS timestamp_str#232, date#39, cast(month#120 as string) AS month#234, cast(year#127 as string) AS year#235, time_of_rating#135, title#201, genres#202]
+- Join Inner, (movieId#1L = movieId#200L)
:- Project [userId#0L, movieId#1L, rating#2, timestamp_str#3L, from_unixtime(timestamp_str#3L, yyyy-MM-dd, Some(GMT)) AS date#39, month(cast(from_unixtime(timestamp_str#3L, yyyy-MM-dd, Some(GMT)) as date)) AS month#120, year(cast(from_unixtime(timestamp_str#3L, yyyy-MM-dd, Some(GMT)) as date)) AS year#127, CASE WHEN ((month(cast(from_unixtime(timestamp_str#3L, yyyy-MM-dd, Some(GMT)) as date)) >= 1) AND (month(cast(from_unixtime(timestamp_str#3L, yyyy-MM-dd, Some(GMT)) as date)) <= 6)) THEN Early Year ELSE Late Year END AS time_of_rating#135]
: +- Filter isnotnull(movieId#1L)
: +- LogicalRDD [userId#0L, movieId#1L, rating#2, timestamp_str#3L], false
+- Filter isnotnull(movieId#200L)
+- LogicalRDD [movieId#200L, title#201, genres#202], false
== Physical Plan ==
CollectLimit 11
+- *(5) Project [cast(movieId#1L as string) AS movieId#229, cast(userId#0L as string) AS userId#230, cast(rating#2 as string) AS rating#231, cast(timestamp_str#3L as string) AS timestamp_str#232, date#39, cast(month#120 as string) AS month#234, cast(year#127 as string) AS year#235, time_of_rating#135, title#201, genres#202]
+- *(5) SortMergeJoin [movieId#1L], [movieId#200L], Inner
:- *(2) Sort [movieId#1L ASC NULLS FIRST], false, 0
: +- Exchange hashpartitioning(movieId#1L, 200), true, [id=#259]
: +- *(1) Project [userId#0L, movieId#1L, rating#2, timestamp_str#3L, from_unixtime(timestamp_str#3L, yyyy-MM-dd, Some(GMT)) AS date#39, month(cast(from_unixtime(timestamp_str#3L, yyyy-MM-dd, Some(GMT)) as date)) AS month#120, year(cast(from_unixtime(timestamp_str#3L, yyyy-MM-dd, Some(GMT)) as date)) AS year#127, CASE WHEN ((month(cast(from_unixtime(timestamp_str#3L, yyyy-MM-dd, Some(GMT)) as date)) >= 1) AND (month(cast(from_unixtime(timestamp_str#3L, yyyy-MM-dd, Some(GMT)) as date)) <= 6)) THEN Early Year ELSE Late Year END AS time_of_rating#135]
: +- *(1) Filter isnotnull(movieId#1L)
: +- *(1) Scan ExistingRDD[userId#0L,movieId#1L,rating#2,timestamp_str#3L]
+- *(4) Sort [movieId#200L ASC NULLS FIRST], false, 0
+- Exchange hashpartitioning(movieId#200L, 200), true, [id=#265]
+- *(3) Filter isnotnull(movieId#200L)
+- *(3) Scan ExistingRDD[movieId#200L,title#201,genres#202]