digraph G {
subgraph cluster0 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: 11 ms";
1 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build: 10 ms<br>number of output rows: 1"];
}
2 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 3<br>shuffle write time total (min, med, max (stageId: taskId))<br>28 ms (1 ms, 11 ms, 16 ms (stage 2.0: task 5))<br>records read: 3<br>local bytes read: 59.0 B<br>fetch wait time: 0 ms<br>remote bytes read: 118.0 B<br>local blocks read: 1<br>remote blocks read: 2<br>data size total (min, med, max (stageId: taskId))<br>48.0 B (16.0 B, 16.0 B, 16.0 B (stage 2.0: task 5))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>177.0 B (59.0 B, 59.0 B, 59.0 B (stage 2.0: task 5))"];
subgraph cluster3 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n1.2 s (123 ms, 516 ms, 533 ms (stage 2.0: task 4))";
4 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>1.1 s (122 ms, 491 ms, 512 ms (stage 2.0: task 4))<br>number of output rows: 3"];
}
5 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 7<br>metadata time: 0 ms<br>size of files read: 47.0 MiB<br>number of output rows: 466,523"];
2->1;
4->2;
5->4;
}
6
HashAggregate(keys=[], functions=[count(1)])
WholeStageCodegen (2)
Exchange SinglePartition, true, [id=#34]
HashAggregate(keys=[], functions=[partial_count(1)])
WholeStageCodegen (1)
FileScan csv [] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/green_tripdata/2023/green_tripdata_20..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<>
== Parsed Logical Plan ==
Aggregate [count(1) AS count#68L]
+- Relation[lpep_pickup_datetime#16,lpep_dropoff_datetime#17,PULocationID#18,DOLocationID#19,passenger_count#20,trip_distance#21,fare_amount#22,extra#23,mta_tax#24,tip_amount#25,tolls_amount#26,ehail_fee#27,total_amount#28,payment_type#29,trip_type#30,congestion_surcharge#31,taxi_type#32] csv
== Analyzed Logical Plan ==
count: bigint
Aggregate [count(1) AS count#68L]
+- Relation[lpep_pickup_datetime#16,lpep_dropoff_datetime#17,PULocationID#18,DOLocationID#19,passenger_count#20,trip_distance#21,fare_amount#22,extra#23,mta_tax#24,tip_amount#25,tolls_amount#26,ehail_fee#27,total_amount#28,payment_type#29,trip_type#30,congestion_surcharge#31,taxi_type#32] csv
== Optimized Logical Plan ==
Aggregate [count(1) AS count#68L]
+- Project
+- Relation[lpep_pickup_datetime#16,lpep_dropoff_datetime#17,PULocationID#18,DOLocationID#19,passenger_count#20,trip_distance#21,fare_amount#22,extra#23,mta_tax#24,tip_amount#25,tolls_amount#26,ehail_fee#27,total_amount#28,payment_type#29,trip_type#30,congestion_surcharge#31,taxi_type#32] csv
== Physical Plan ==
*(2) HashAggregate(keys=[], functions=[count(1)], output=[count#68L])
+- Exchange SinglePartition, true, [id=#34]
+- *(1) HashAggregate(keys=[], functions=[partial_count(1)], output=[count#71L])
+- FileScan csv [] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/green_tripdata/2023/green_tripdata_20..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<>