digraph G {
subgraph cluster0 {
isCluster="true";
label="WholeStageCodegen (3)\n \nduration: 2 ms";
1 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build: 2 ms<br>number of output rows: 1"];
}
2 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 200<br>shuffle write time total (min, med, max (stageId: taskId))<br>44 ms (0 ms, 0 ms, 0 ms (stage 2.0: task 87))<br>records read: 200<br>local bytes read: 7.0 KiB<br>fetch wait time: 0 ms<br>remote bytes read: 4.6 KiB<br>local blocks read: 121<br>remote blocks read: 79<br>data size total (min, med, max (stageId: taskId))<br>3.1 KiB (16.0 B, 16.0 B, 16.0 B (stage 2.0: task 29))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>11.5 KiB (59.0 B, 59.0 B, 59.0 B (stage 2.0: task 29))"];
subgraph cluster3 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n1.1 s (2 ms, 4 ms, 32 ms (stage 2.0: task 30))";
4 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>1.0 s (2 ms, 4 ms, 30 ms (stage 2.0: task 30))<br>number of output rows: 200"];
5 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>564 ms (0 ms, 2 ms, 16 ms (stage 2.0: task 30))<br>peak memory total (min, med, max (stageId: taskId))<br>12.5 GiB (64.3 MiB, 64.3 MiB, 64.3 MiB (stage 2.0: task 29))<br>number of output rows: 330,975<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1, 1, 1 (stage 2.0: task 29))"];
}
6 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 331,002<br>shuffle write time total (min, med, max (stageId: taskId))<br>662 ms (14 ms, 19 ms, 70 ms (stage 1.0: task 1))<br>records read: 331,002<br>local bytes read total (min, med, max (stageId: taskId))<br>1102.7 KiB (4.2 KiB, 6.0 KiB, 6.6 KiB (stage 2.0: task 106))<br>fetch wait time total (min, med, max (stageId: taskId))<br>60 ms (0 ms, 0 ms, 11 ms (stage 2.0: task 209))<br>remote bytes read total (min, med, max (stageId: taskId))<br>1037.6 KiB (4.2 KiB, 4.7 KiB, 6.7 KiB (stage 2.0: task 225))<br>local blocks read: 2,884<br>remote blocks read: 2,716<br>data size total (min, med, max (stageId: taskId))<br>5.1 MiB (155.4 KiB, 186.1 KiB, 196.1 KiB (stage 1.0: task 8))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>2.1 MiB (60.1 KiB, 69.6 KiB, 111.6 KiB (stage 1.0: task 5))"];
subgraph cluster7 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n2.5 m (4.0 s, 5.0 s, 6.4 s (stage 1.0: task 7))";
8 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>2.5 m (4.0 s, 5.0 s, 6.3 s (stage 1.0: task 7))<br>peak memory total (min, med, max (stageId: taskId))<br>1806.0 MiB (64.5 MiB, 64.5 MiB, 64.5 MiB (stage 1.0: task 2))<br>number of output rows: 331,002<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1.4, 1.4, 1.4 (stage 1.0: task 2))"];
9 [labelType="html" label="<br><b>Project</b><br><br>"];
10 [labelType="html" label="<b>Scan ExistingRDD</b><br><br>number of output rows: 33,832,162"];
}
2->1;
4->2;
5->4;
6->5;
8->6;
9->8;
10->9;
}
11
HashAggregate(keys=[], functions=[count(1)])
WholeStageCodegen (3)
Exchange SinglePartition, true, [id=#33]
HashAggregate(keys=[], functions=[partial_count(1)])
HashAggregate(keys=[userId#0L], functions=[])
WholeStageCodegen (2)
Exchange hashpartitioning(userId#0L, 200), true, [id=#28]
HashAggregate(keys=[userId#0L], functions=[])
Project [userId#0L]
Scan ExistingRDD[userId#0L,movieId#1L,rating#2,timestamp_str#3L]
WholeStageCodegen (1)