digraph G {
subgraph cluster0 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: 10 ms";
1 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build: 9 ms<br>number of output rows: 1"];
}
2 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 5<br>shuffle write time total (min, med, max (stageId: taskId))<br>23 ms (0 ms, 1 ms, 11 ms (stage 3.0: task 7))<br>records read: 5<br>local bytes read: 177.0 B<br>fetch wait time: 0 ms<br>remote bytes read: 118.0 B<br>local blocks read: 3<br>remote blocks read: 2<br>data size total (min, med, max (stageId: taskId))<br>80.0 B (16.0 B, 16.0 B, 16.0 B (stage 3.0: task 8))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>295.0 B (59.0 B, 59.0 B, 59.0 B (stage 3.0: task 8))"];
subgraph cluster3 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n8.3 s (707 ms, 1.8 s, 2.1 s (stage 3.0: task 7))";
4 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>8.2 s (706 ms, 1.7 s, 2.1 s (stage 3.0: task 7))<br>number of output rows: 5"];
}
5 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 559.8 MiB<br>number of output rows: 14,262,517"];
2->1;
4->2;
5->4;
}
6
HashAggregate(keys=[], functions=[count(1)])
WholeStageCodegen (2)
Exchange SinglePartition, true, [id=#46]
HashAggregate(keys=[], functions=[partial_count(1)])
WholeStageCodegen (1)
FileScan csv [] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/Twitter/twitter.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<>
== Parsed Logical Plan ==
Aggregate [count(1) AS count#50L]
+- Relation[longitude#16,latitude#17,timestamp#18L,timezone#19] csv
== Analyzed Logical Plan ==
count: bigint
Aggregate [count(1) AS count#50L]
+- Relation[longitude#16,latitude#17,timestamp#18L,timezone#19] csv
== Optimized Logical Plan ==
Aggregate [count(1) AS count#50L]
+- Project
+- Relation[longitude#16,latitude#17,timestamp#18L,timezone#19] csv
== Physical Plan ==
*(2) HashAggregate(keys=[], functions=[count(1)], output=[count#50L])
+- Exchange SinglePartition, true, [id=#46]
+- *(1) HashAggregate(keys=[], functions=[partial_count(1)], output=[count#53L])
+- FileScan csv [] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/Twitter/twitter.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<>