== Parsed Logical Plan == GlobalLimit 11 +- LocalLimit 11 +- Project [cast(Month#500 as string) AS Month#739, cast(route#477 as string) AS route#740, cast(total_tip_amount#661 as string) AS total_tip_amount#741, cast(total_passenger_count#663 as string) AS total_passenger_count#742, cast(average_tip_per_passenger#668 as string) AS average_tip_per_passenger#743] +- Sort [average_tip_per_passenger#668 DESC NULLS LAST], true +- Project [Month#500, route#477, total_tip_amount#661, total_passenger_count#663, round((total_tip_amount#661 / total_passenger_count#663), 2) AS average_tip_per_passenger#668] +- Aggregate [Month#500, route#477], [Month#500, route#477, round(sum(cast(tip_amount#71 as double)), 2) AS total_tip_amount#661, sum(cast(passenger_count#63 as double)) AS total_passenger_count#663] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, Dropoff_Borough#384, Dropoff_Zone#408, Dropoff_service_zone#432, route#477, month(cast(tpep_pickup_datetime#61 as date)) AS Month#500] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, Dropoff_Borough#384, Dropoff_Zone#408, Dropoff_service_zone#432, concat_ws( to , Pickup_Borough#230, Dropoff_Borough#384) AS route#477] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, Dropoff_Borough#384, Dropoff_Zone#408, Dropoff_service_zone#432] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, LocationID#16, Dropoff_Borough#384, Dropoff_Zone#408, service_zone#19 AS Dropoff_service_zone#432] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, LocationID#16, Dropoff_Borough#384, Zone#18 AS Dropoff_Zone#408, service_zone#19] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, LocationID#16, Borough#17 AS Dropoff_Borough#384, Zone#18, service_zone#19] +- Join LeftOuter, (cast(DOLocationID#66 as int) = LocationID#16) :- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274] : +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, PULocationID#65, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, LocationID#16, Pickup_Borough#230, Pickup_Zone#252, service_zone#19 AS Pickup_service_zone#274] : +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, PULocationID#65, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, LocationID#16, Pickup_Borough#230, Zone#18 AS Pickup_Zone#252, service_zone#19] : +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, PULocationID#65, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, LocationID#16, Borough#17 AS Pickup_Borough#230, Zone#18, service_zone#19] : +- Join LeftOuter, (cast(PULocationID#65 as int) = LocationID#16) : :- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, PULocationID#65, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, date_format(cast(tpep_pickup_datetime#61 as timestamp), yyyy-MM-dd, Some(GMT)) AS trip_date#114] : : +- Relation[tpep_pickup_datetime#61,tpep_dropoff_datetime#62,passenger_count#63,trip_distance#64,PULocationID#65,DOLocationID#66,payment_type#67,fare_amount#68,extra#69,mta_tax#70,tip_amount#71,tolls_amount#72,total_amount#73,congestion_surcharge#74,airport_fee#75,taxi_type#76] csv : +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Analyzed Logical Plan == Month: string, route: string, total_tip_amount: string, total_passenger_count: string, average_tip_per_passenger: string GlobalLimit 11 +- LocalLimit 11 +- Project [cast(Month#500 as string) AS Month#739, cast(route#477 as string) AS route#740, cast(total_tip_amount#661 as string) AS total_tip_amount#741, cast(total_passenger_count#663 as string) AS total_passenger_count#742, cast(average_tip_per_passenger#668 as string) AS average_tip_per_passenger#743] +- Sort [average_tip_per_passenger#668 DESC NULLS LAST], true +- Project [Month#500, route#477, total_tip_amount#661, total_passenger_count#663, round((total_tip_amount#661 / total_passenger_count#663), 2) AS average_tip_per_passenger#668] +- Aggregate [Month#500, route#477], [Month#500, route#477, round(sum(cast(tip_amount#71 as double)), 2) AS total_tip_amount#661, sum(cast(passenger_count#63 as double)) AS total_passenger_count#663] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, Dropoff_Borough#384, Dropoff_Zone#408, Dropoff_service_zone#432, route#477, month(cast(tpep_pickup_datetime#61 as date)) AS Month#500] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, Dropoff_Borough#384, Dropoff_Zone#408, Dropoff_service_zone#432, concat_ws( to , Pickup_Borough#230, Dropoff_Borough#384) AS route#477] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, Dropoff_Borough#384, Dropoff_Zone#408, Dropoff_service_zone#432] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, LocationID#16, Dropoff_Borough#384, Dropoff_Zone#408, service_zone#19 AS Dropoff_service_zone#432] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, LocationID#16, Dropoff_Borough#384, Zone#18 AS Dropoff_Zone#408, service_zone#19] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, LocationID#16, Borough#17 AS Dropoff_Borough#384, Zone#18, service_zone#19] +- Join LeftOuter, (cast(DOLocationID#66 as int) = LocationID#16) :- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274] : +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, PULocationID#65, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, LocationID#16, Pickup_Borough#230, Pickup_Zone#252, service_zone#19 AS Pickup_service_zone#274] : +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, PULocationID#65, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, LocationID#16, Pickup_Borough#230, Zone#18 AS Pickup_Zone#252, service_zone#19] : +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, PULocationID#65, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, LocationID#16, Borough#17 AS Pickup_Borough#230, Zone#18, service_zone#19] : +- Join LeftOuter, (cast(PULocationID#65 as int) = LocationID#16) : :- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, PULocationID#65, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, date_format(cast(tpep_pickup_datetime#61 as timestamp), yyyy-MM-dd, Some(GMT)) AS trip_date#114] : : +- Relation[tpep_pickup_datetime#61,tpep_dropoff_datetime#62,passenger_count#63,trip_distance#64,PULocationID#65,DOLocationID#66,payment_type#67,fare_amount#68,extra#69,mta_tax#70,tip_amount#71,tolls_amount#72,total_amount#73,congestion_surcharge#74,airport_fee#75,taxi_type#76] csv : +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Optimized Logical Plan == GlobalLimit 11 +- LocalLimit 11 +- Project [cast(Month#500 as string) AS Month#739, route#477, cast(total_tip_amount#661 as string) AS total_tip_amount#741, cast(total_passenger_count#663 as string) AS total_passenger_count#742, cast(average_tip_per_passenger#668 as string) AS average_tip_per_passenger#743] +- Sort [average_tip_per_passenger#668 DESC NULLS LAST], true +- Aggregate [Month#500, route#477], [Month#500, route#477, round(sum(cast(tip_amount#71 as double)), 2) AS total_tip_amount#661, sum(cast(passenger_count#63 as double)) AS total_passenger_count#663, round((round(sum(cast(tip_amount#71 as double)), 2) / sum(cast(passenger_count#63 as double))), 2) AS average_tip_per_passenger#668] +- Project [passenger_count#63, tip_amount#71, concat_ws( to , Pickup_Borough#230, Borough#17) AS route#477, month(cast(tpep_pickup_datetime#61 as date)) AS Month#500] +- Join LeftOuter, (cast(DOLocationID#66 as int) = LocationID#16) :- Project [tpep_pickup_datetime#61, passenger_count#63, DOLocationID#66, tip_amount#71, Borough#17 AS Pickup_Borough#230] : +- Join LeftOuter, (cast(PULocationID#65 as int) = LocationID#16) : :- Project [tpep_pickup_datetime#61, passenger_count#63, PULocationID#65, DOLocationID#66, tip_amount#71] : : +- Relation[tpep_pickup_datetime#61,tpep_dropoff_datetime#62,passenger_count#63,trip_distance#64,PULocationID#65,DOLocationID#66,payment_type#67,fare_amount#68,extra#69,mta_tax#70,tip_amount#71,tolls_amount#72,total_amount#73,congestion_surcharge#74,airport_fee#75,taxi_type#76] csv : +- Project [LocationID#16, Borough#17] : +- Filter isnotnull(LocationID#16) : +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv +- Project [LocationID#16, Borough#17] +- Filter isnotnull(LocationID#16) +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Physical Plan == TakeOrderedAndProject(limit=11, orderBy=[average_tip_per_passenger#668 DESC NULLS LAST], output=[Month#739,route#477,total_tip_amount#741,total_passenger_count#742,average_tip_per_passenger#743]) +- *(4) HashAggregate(keys=[Month#500, route#477], functions=[sum(cast(tip_amount#71 as double)), sum(cast(passenger_count#63 as double))], output=[Month#500, route#477, total_tip_amount#661, total_passenger_count#663, average_tip_per_passenger#668]) +- Exchange hashpartitioning(Month#500, route#477, 200), true, [id=#522] +- *(3) HashAggregate(keys=[Month#500, route#477], functions=[partial_sum(cast(tip_amount#71 as double)), partial_sum(cast(passenger_count#63 as double))], output=[Month#500, route#477, sum#691, sum#692]) +- *(3) Project [passenger_count#63, tip_amount#71, concat_ws( to , Pickup_Borough#230, Borough#17) AS route#477, month(cast(tpep_pickup_datetime#61 as date)) AS Month#500] +- *(3) BroadcastHashJoin [cast(DOLocationID#66 as int)], [LocationID#16], LeftOuter, BuildRight :- *(3) Project [tpep_pickup_datetime#61, passenger_count#63, DOLocationID#66, tip_amount#71, Borough#17 AS Pickup_Borough#230] : +- *(3) BroadcastHashJoin [cast(PULocationID#65 as int)], [LocationID#16], LeftOuter, BuildRight : :- FileScan csv [tpep_pickup_datetime#61,passenger_count#63,PULocationID#65,DOLocationID#66,tip_amount#71] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/yellow_tripdata/2023/yellow_tripdata_..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<tpep_pickup_datetime:string,passenger_count:string,PULocationID:string,DOLocationID:string... : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#481] : +- *(1) Project [LocationID#16, Borough#17] : +- *(1) Filter isnotnull(LocationID#16) : +- FileScan csv [LocationID#16,Borough#17] Batched: false, DataFilters: [isnotnull(LocationID#16)], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/taxi_zone_lookup.csv], PartitionFilters: [], PushedFilters: [IsNotNull(LocationID)], ReadSchema: struct<LocationID:int,Borough:string> +- ReusedExchange [LocationID#16, Borough#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#481]