digraph G {
subgraph cluster0 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: 16 ms";
1 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build: 16 ms<br>number of output rows: 1"];
}
2 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 20<br>shuffle write time total (min, med, max (stageId: taskId))<br>42 ms (0 ms, 0 ms, 18 ms (stage 5.0: task 25))<br>records read: 20<br>local bytes read: 590.0 B<br>fetch wait time: 0 ms<br>remote bytes read: 590.0 B<br>local blocks read: 10<br>remote blocks read: 10<br>data size total (min, med, max (stageId: taskId))<br>320.0 B (16.0 B, 16.0 B, 16.0 B (stage 5.0: task 24))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>1180.0 B (59.0 B, 59.0 B, 59.0 B (stage 5.0: task 24))"];
subgraph cluster3 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n28.6 s (479 ms, 1.6 s, 1.7 s (stage 5.0: task 25))";
4 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>28.5 s (478 ms, 1.6 s, 1.7 s (stage 5.0: task 27))<br>number of output rows: 20"];
}
5 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 7<br>metadata time: 0 ms<br>size of files read: 2.2 GiB<br>number of output rows: 22,400,728"];
2->1;
4->2;
5->4;
}
6
HashAggregate(keys=[], functions=[count(1)])
WholeStageCodegen (2)
Exchange SinglePartition, true, [id=#65]
HashAggregate(keys=[], functions=[partial_count(1)])
WholeStageCodegen (1)
FileScan csv [] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/yellow_tripdata/2023/yellow_tripdata_..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<>
== Parsed Logical Plan ==
Aggregate [count(1) AS count#110L]
+- Relation[tpep_pickup_datetime#61,tpep_dropoff_datetime#62,passenger_count#63,trip_distance#64,PULocationID#65,DOLocationID#66,payment_type#67,fare_amount#68,extra#69,mta_tax#70,tip_amount#71,tolls_amount#72,total_amount#73,congestion_surcharge#74,airport_fee#75,taxi_type#76] csv
== Analyzed Logical Plan ==
count: bigint
Aggregate [count(1) AS count#110L]
+- Relation[tpep_pickup_datetime#61,tpep_dropoff_datetime#62,passenger_count#63,trip_distance#64,PULocationID#65,DOLocationID#66,payment_type#67,fare_amount#68,extra#69,mta_tax#70,tip_amount#71,tolls_amount#72,total_amount#73,congestion_surcharge#74,airport_fee#75,taxi_type#76] csv
== Optimized Logical Plan ==
Aggregate [count(1) AS count#110L]
+- Project
+- Relation[tpep_pickup_datetime#61,tpep_dropoff_datetime#62,passenger_count#63,trip_distance#64,PULocationID#65,DOLocationID#66,payment_type#67,fare_amount#68,extra#69,mta_tax#70,tip_amount#71,tolls_amount#72,total_amount#73,congestion_surcharge#74,airport_fee#75,taxi_type#76] csv
== Physical Plan ==
*(2) HashAggregate(keys=[], functions=[count(1)], output=[count#110L])
+- Exchange SinglePartition, true, [id=#65]
+- *(1) HashAggregate(keys=[], functions=[partial_count(1)], output=[count#113L])
+- FileScan csv [] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/yellow_tripdata/2023/yellow_tripdata_..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<>