digraph G {
subgraph cluster0 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: 15 ms";
1 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build: 14 ms<br>number of output rows: 1"];
}
2 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 20<br>shuffle write time total (min, med, max (stageId: taskId))<br>32 ms (0 ms, 0 ms, 10 ms (stage 4.0: task 24))<br>records read: 20<br>local bytes read: 590.0 B<br>fetch wait time: 4 ms<br>remote bytes read: 590.0 B<br>local blocks read: 10<br>remote blocks read: 10<br>data size total (min, med, max (stageId: taskId))<br>320.0 B (16.0 B, 16.0 B, 16.0 B (stage 4.0: task 24))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>1180.0 B (59.0 B, 59.0 B, 59.0 B (stage 4.0: task 24))"];
subgraph cluster3 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n20.8 s (356 ms, 1.2 s, 1.3 s (stage 4.0: task 31))";
4 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>20.8 s (356 ms, 1.2 s, 1.3 s (stage 4.0: task 31))<br>number of output rows: 20"];
}
5 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 7<br>metadata time: 0 ms<br>size of files read: 2.2 GiB<br>number of output rows: 22,400,728"];
2->1;
4->2;
5->4;
}
6
HashAggregate(keys=[], functions=[count(1)])
WholeStageCodegen (2)
Exchange SinglePartition, true, [id=#53]
HashAggregate(keys=[], functions=[partial_count(1)])
WholeStageCodegen (1)
FileScan csv [] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/yellow_tripdata/2023], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<>
== Parsed Logical Plan ==
Aggregate [count(1) AS count#89L]
+- Relation[tpep_pickup_datetime#40,tpep_dropoff_datetime#41,passenger_count#42,trip_distance#43,PULocationID#44,DOLocationID#45,payment_type#46,fare_amount#47,extra#48,mta_tax#49,tip_amount#50,tolls_amount#51,total_amount#52,congestion_surcharge#53,airport_fee#54,taxi_type#55] csv
== Analyzed Logical Plan ==
count: bigint
Aggregate [count(1) AS count#89L]
+- Relation[tpep_pickup_datetime#40,tpep_dropoff_datetime#41,passenger_count#42,trip_distance#43,PULocationID#44,DOLocationID#45,payment_type#46,fare_amount#47,extra#48,mta_tax#49,tip_amount#50,tolls_amount#51,total_amount#52,congestion_surcharge#53,airport_fee#54,taxi_type#55] csv
== Optimized Logical Plan ==
Aggregate [count(1) AS count#89L]
+- Project
+- Relation[tpep_pickup_datetime#40,tpep_dropoff_datetime#41,passenger_count#42,trip_distance#43,PULocationID#44,DOLocationID#45,payment_type#46,fare_amount#47,extra#48,mta_tax#49,tip_amount#50,tolls_amount#51,total_amount#52,congestion_surcharge#53,airport_fee#54,taxi_type#55] csv
== Physical Plan ==
*(2) HashAggregate(keys=[], functions=[count(1)], output=[count#89L])
+- Exchange SinglePartition, true, [id=#53]
+- *(1) HashAggregate(keys=[], functions=[partial_count(1)], output=[count#92L])
+- FileScan csv [] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/yellow_tripdata/2023], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<>