digraph G {
0 [labelType="html" label="<br><b>CollectLimit</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (7)\n \nduration: total (min, med, max (stageId: taskId))\n73 ms (0 ms, 0 ms, 5 ms (stage 59.0: task 1825))";
2 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>35 ms (0 ms, 0 ms, 4 ms (stage 59.0: task 1825))<br>peak memory total (min, med, max (stageId: taskId))<br>735.3 MiB (256.0 KiB, 256.0 KiB, 64.3 MiB (stage 59.0: task 1825))<br>number of output rows: 11<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1, 1, 1 (stage 59.0: task 1825))"];
}
3 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 3,560<br>shuffle write time total (min, med, max (stageId: taskId))<br>861 ms (2 ms, 4 ms, 8 ms (stage 50.0: task 1784))<br>records read: 2,200<br>local bytes read total (min, med, max (stageId: taskId))<br>160.2 KiB (0.0 B, 0.0 B, 15.9 KiB (stage 63.0: task 1845))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 59.0: task 1825))<br>local blocks read: 2,200<br>data size total (min, med, max (stageId: taskId))<br>117.2 KiB (568.0 B, 608.0 B, 608.0 B (stage 50.0: task 1621))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>257.2 KiB (1250.0 B, 1332.0 B, 1340.0 B (stage 50.0: task 1689))"];
subgraph cluster4 {
isCluster="true";
label="WholeStageCodegen (6)\n \nduration: total (min, med, max (stageId: taskId))\n20.7 s (31 ms, 97 ms, 254 ms (stage 50.0: task 1701))";
5 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>19.4 s (27 ms, 91 ms, 248 ms (stage 50.0: task 1701))<br>peak memory total (min, med, max (stageId: taskId))<br>50.0 MiB (256.0 KiB, 256.0 KiB, 256.0 KiB (stage 50.0: task 1620))<br>number of output rows: 3,560"];
6 [labelType="html" label="<br><b>Project</b><br><br>"];
7 [labelType="html" label="<b>SortMergeJoin</b><br><br>number of output rows: 90,497,183"];
}
subgraph cluster8 {
isCluster="true";
label="WholeStageCodegen (3)\n \nduration: total (min, med, max (stageId: taskId))\n20.4 s (0 ms, 96 ms, 257 ms (stage 50.0: task 1620))";
9 [labelType="html" label="<b>Sort</b><br><br>sort time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 50.0: task 1620))<br>peak memory total (min, med, max (stageId: taskId))<br>12.5 GiB (64.1 MiB, 64.1 MiB, 64.1 MiB (stage 50.0: task 1620))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 50.0: task 1620))"];
}
10 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 145,033<br>shuffle write time total (min, med, max (stageId: taskId))<br>32 ms (14 ms, 18 ms, 18 ms (stage 49.0: task 1619))<br>records read: 145,033<br>local bytes read total (min, med, max (stageId: taskId))<br>1393.5 KiB (6.0 KiB, 6.9 KiB, 7.9 KiB (stage 50.0: task 1813))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 50.0: task 1620))<br>local blocks read: 400<br>data size total (min, med, max (stageId: taskId))<br>4.6 MiB (2.2 MiB, 2.4 MiB, 2.4 MiB (stage 49.0: task 1618))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>1393.5 KiB (669.7 KiB, 723.7 KiB, 723.7 KiB (stage 49.0: task 1618))"];
subgraph cluster11 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n593 ms (289 ms, 304 ms, 304 ms (stage 49.0: task 1619))";
12 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 145,033"];
}
13 [labelType="html" label="<b>Generate</b><br><br>number of output rows: 152,288"];
subgraph cluster14 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n637 ms (306 ms, 331 ms, 331 ms (stage 49.0: task 1618))";
15 [labelType="html" label="<br><b>Project</b><br><br>"];
16 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 86,537"];
17 [labelType="html" label="<b>Scan ExistingRDD</b><br><br>number of output rows: 86,537"];
}
subgraph cluster18 {
isCluster="true";
label="WholeStageCodegen (5)\n \nduration: total (min, med, max (stageId: taskId))\n19.1 s (0 ms, 92 ms, 251 ms (stage 50.0: task 1701))";
19 [labelType="html" label="<b>Sort</b><br><br>sort time total (min, med, max (stageId: taskId))<br>742 ms (1 ms, 3 ms, 30 ms (stage 50.0: task 1620))<br>peak memory total (min, med, max (stageId: taskId))<br>14.0 GiB (66.0 MiB, 72.0 MiB, 80.0 MiB (stage 50.0: task 1624))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 50.0: task 1620))"];
}
20 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 33,832,162<br>shuffle write time total (min, med, max (stageId: taskId))<br>719 ms (20 ms, 24 ms, 39 ms (stage 48.0: task 1600))<br>records read: 33,832,162<br>local bytes read total (min, med, max (stageId: taskId))<br>67.0 MiB (141.1 KiB, 332.1 KiB, 756.8 KiB (stage 50.0: task 1701))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 50.0: task 1620))<br>remote bytes read total (min, med, max (stageId: taskId))<br>49.8 MiB (105.9 KiB, 246.4 KiB, 561.6 KiB (stage 50.0: task 1701))<br>local blocks read: 3,200<br>remote blocks read: 2,400<br>data size total (min, med, max (stageId: taskId))<br>516.2 MiB (15.3 MiB, 18.3 MiB, 19.7 MiB (stage 48.0: task 1590))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>116.8 MiB (3.5 MiB, 4.2 MiB, 4.4 MiB (stage 48.0: task 1590))"];
subgraph cluster21 {
isCluster="true";
label="WholeStageCodegen (4)\n \nduration: total (min, med, max (stageId: taskId))\n2.4 m (4.2 s, 4.6 s, 6.3 s (stage 48.0: task 1595))";
22 [labelType="html" label="<br><b>Project</b><br><br>"];
23 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 33,832,162"];
24 [labelType="html" label="<b>Scan ExistingRDD</b><br><br>number of output rows: 33,832,162"];
}
2->0;
3->2;
5->3;
6->5;
7->6;
9->7;
10->9;
12->10;
13->12;
15->13;
16->15;
17->16;
19->7;
20->19;
22->20;
23->22;
24->23;
}
25
CollectLimit 11
HashAggregate(keys=[genre#261], functions=[count(1)])
WholeStageCodegen (7)
Exchange hashpartitioning(genre#261, 200), true, [id=#355]
HashAggregate(keys=[genre#261], functions=[partial_count(1)])
Project [genre#261]
SortMergeJoin [movieId#200L], [movieId#1L], Inner
WholeStageCodegen (6)
Sort [movieId#200L ASC NULLS FIRST], false, 0
WholeStageCodegen (3)
Exchange hashpartitioning(movieId#200L, 200), true, [id=#340]
Filter genre#261 INSET (Crime,Fantasy,Comedy,Children,Adventure,Musical,Romance,Mystery,War,Horror,Film-Noir,Documentary,Drama,Action,Sci-Fi,Animation,Western,Thriller)
WholeStageCodegen (2)
Generate explode(split(genres#202, \|, -1)), [movieId#200L], false, [genre#261]
Project [movieId#200L, genres#202]
Filter isnotnull(movieId#200L)
Scan ExistingRDD[movieId#200L,title#201,genres#202]
WholeStageCodegen (1)
Sort [movieId#1L ASC NULLS FIRST], false, 0
WholeStageCodegen (5)
Exchange hashpartitioning(movieId#1L, 200), true, [id=#346]
Project [movieId#1L]
Filter isnotnull(movieId#1L)
Scan ExistingRDD[userId#0L,movieId#1L,rating#2,timestamp_str#3L]
WholeStageCodegen (4)