digraph G {
0 [labelType="html" label="<br><b>CollectLimit</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (7)\n \nduration: total (min, med, max (stageId: taskId))\n137 ms (0 ms, 0 ms, 8 ms (stage 28.0: task 1589))";
2 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>80 ms (0 ms, 0 ms, 7 ms (stage 28.0: task 1594))<br>peak memory total (min, med, max (stageId: taskId))<br>1202.0 MiB (256.0 KiB, 256.0 KiB, 64.3 MiB (stage 28.0: task 1589))<br>number of output rows: 18<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1, 1, 1 (stage 28.0: task 1589))"];
}
3 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 3,560<br>shuffle write time total (min, med, max (stageId: taskId))<br>817 ms (3 ms, 3 ms, 7 ms (stage 19.0: task 1489))<br>records read: 3,560<br>local bytes read total (min, med, max (stageId: taskId))<br>257.2 KiB (0.0 B, 0.0 B, 15.9 KiB (stage 32.0: task 1609))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 28.0: task 1589))<br>local blocks read: 3,560<br>data size total (min, med, max (stageId: taskId))<br>117.2 KiB (568.0 B, 608.0 B, 608.0 B (stage 19.0: task 1385))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>257.2 KiB (1250.0 B, 1332.0 B, 1340.0 B (stage 19.0: task 1453))"];
subgraph cluster4 {
isCluster="true";
label="WholeStageCodegen (6)\n \nduration: total (min, med, max (stageId: taskId))\n17.5 s (35 ms, 82 ms, 223 ms (stage 19.0: task 1429))";
5 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>16.3 s (29 ms, 75 ms, 215 ms (stage 19.0: task 1429))<br>peak memory total (min, med, max (stageId: taskId))<br>50.0 MiB (256.0 KiB, 256.0 KiB, 256.0 KiB (stage 19.0: task 1384))<br>number of output rows: 3,560"];
6 [labelType="html" label="<br><b>Project</b><br><br>"];
7 [labelType="html" label="<b>SortMergeJoin</b><br><br>number of output rows: 90,497,183"];
}
subgraph cluster8 {
isCluster="true";
label="WholeStageCodegen (3)\n \nduration: total (min, med, max (stageId: taskId))\n17.2 s (0 ms, 81 ms, 262 ms (stage 19.0: task 1384))";
9 [labelType="html" label="<b>Sort</b><br><br>sort time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 19.0: task 1384))<br>peak memory total (min, med, max (stageId: taskId))<br>12.5 GiB (64.1 MiB, 64.1 MiB, 64.1 MiB (stage 19.0: task 1384))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 19.0: task 1384))"];
}
10 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 145,033<br>shuffle write time total (min, med, max (stageId: taskId))<br>55 ms (26 ms, 28 ms, 28 ms (stage 18.0: task 1383))<br>records read: 145,033<br>local bytes read total (min, med, max (stageId: taskId))<br>1393.5 KiB (6.0 KiB, 6.9 KiB, 7.9 KiB (stage 19.0: task 1577))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 19.0: task 1384))<br>local blocks read: 400<br>data size total (min, med, max (stageId: taskId))<br>4.6 MiB (2.2 MiB, 2.4 MiB, 2.4 MiB (stage 18.0: task 1382))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>1393.5 KiB (669.7 KiB, 723.7 KiB, 723.7 KiB (stage 18.0: task 1382))"];
subgraph cluster11 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n835 ms (403 ms, 432 ms, 432 ms (stage 18.0: task 1382))";
12 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 145,033"];
}
13 [labelType="html" label="<b>Generate</b><br><br>number of output rows: 152,288"];
subgraph cluster14 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n875 ms (406 ms, 469 ms, 469 ms (stage 18.0: task 1382))";
15 [labelType="html" label="<br><b>Project</b><br><br>"];
16 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 86,537"];
17 [labelType="html" label="<b>Scan ExistingRDD</b><br><br>number of output rows: 86,537"];
}
subgraph cluster18 {
isCluster="true";
label="WholeStageCodegen (5)\n \nduration: total (min, med, max (stageId: taskId))\n16.1 s (0 ms, 78 ms, 227 ms (stage 19.0: task 1384))";
19 [labelType="html" label="<b>Sort</b><br><br>sort time total (min, med, max (stageId: taskId))<br>673 ms (1 ms, 3 ms, 23 ms (stage 19.0: task 1384))<br>peak memory total (min, med, max (stageId: taskId))<br>14.0 GiB (66.0 MiB, 72.0 MiB, 80.0 MiB (stage 19.0: task 1388))<br>spill size total (min, med, max (stageId: taskId))<br>0.0 B (0.0 B, 0.0 B, 0.0 B (stage 19.0: task 1384))"];
}
20 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 33,832,162<br>shuffle write time total (min, med, max (stageId: taskId))<br>726 ms (20 ms, 25 ms, 43 ms (stage 17.0: task 1357))<br>records read: 33,832,162<br>local bytes read total (min, med, max (stageId: taskId))<br>50.5 MiB (106.3 KiB, 249.7 KiB, 569.1 KiB (stage 19.0: task 1465))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 19.0: task 1384))<br>remote bytes read total (min, med, max (stageId: taskId))<br>66.3 MiB (140.6 KiB, 328.0 KiB, 749.3 KiB (stage 19.0: task 1465))<br>local blocks read: 2,400<br>remote blocks read: 3,200<br>data size total (min, med, max (stageId: taskId))<br>516.2 MiB (15.3 MiB, 18.3 MiB, 19.7 MiB (stage 17.0: task 1354))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>116.8 MiB (3.5 MiB, 4.2 MiB, 4.4 MiB (stage 17.0: task 1354))"];
subgraph cluster21 {
isCluster="true";
label="WholeStageCodegen (4)\n \nduration: total (min, med, max (stageId: taskId))\n2.5 m (4.0 s, 5.0 s, 6.4 s (stage 17.0: task 1354))";
22 [labelType="html" label="<br><b>Project</b><br><br>"];
23 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 33,832,162"];
24 [labelType="html" label="<b>Scan ExistingRDD</b><br><br>number of output rows: 33,832,162"];
}
2->0;
3->2;
5->3;
6->5;
7->6;
9->7;
10->9;
12->10;
13->12;
15->13;
16->15;
17->16;
19->7;
20->19;
22->20;
23->22;
24->23;
}
25
CollectLimit 21
HashAggregate(keys=[genre#106], functions=[count(1)])
WholeStageCodegen (7)
Exchange hashpartitioning(genre#106, 200), true, [id=#243]
HashAggregate(keys=[genre#106], functions=[partial_count(1)])
Project [genre#106]
SortMergeJoin [movieId#86L], [movieId#1L], Inner
WholeStageCodegen (6)
Sort [movieId#86L ASC NULLS FIRST], false, 0
WholeStageCodegen (3)
Exchange hashpartitioning(movieId#86L, 200), true, [id=#228]
Filter genre#106 INSET (Crime,Fantasy,Comedy,Children,Adventure,Musical,Romance,Mystery,War,Horror,Film-Noir,Documentary,Drama,Action,Sci-Fi,Animation,Western,Thriller)
WholeStageCodegen (2)
Generate explode(split(genres#88, \|, -1)), [movieId#86L], false, [genre#106]
Project [movieId#86L, genres#88]
Filter isnotnull(movieId#86L)
Scan ExistingRDD[movieId#86L,title#87,genres#88]
WholeStageCodegen (1)
Sort [movieId#1L ASC NULLS FIRST], false, 0
WholeStageCodegen (5)
Exchange hashpartitioning(movieId#1L, 200), true, [id=#234]
Project [movieId#1L]
Filter isnotnull(movieId#1L)
Scan ExistingRDD[userId#0L,movieId#1L,rating#2,timestamp_str#3L]
WholeStageCodegen (4)