== Parsed Logical Plan == GlobalLimit 21 +- LocalLimit 21 +- Project [cast(Month#464 as string) AS Month#697, cast(route#442 as string) AS route#698, cast(total_tip_amount#619 as string) AS total_tip_amount#699, cast(total_passenger_count#621 as string) AS total_passenger_count#700, cast(average_tip_per_passenger#626 as string) AS average_tip_per_passenger#701] +- GlobalLimit 10 +- LocalLimit 10 +- Sort [average_tip_per_passenger#626 DESC NULLS LAST], true +- Project [Month#464, route#442, total_tip_amount#619, total_passenger_count#621, (total_tip_amount#619 / total_passenger_count#621) AS average_tip_per_passenger#626] +- Aggregate [Month#464, route#442], [Month#464, route#442, sum(cast(tip_amount#50 as double)) AS total_tip_amount#619, sum(cast(passenger_count#42 as double)) AS total_passenger_count#621] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, Dropoff_Borough#353, Dropoff_Zone#376, Dropoff_service_zone#399, route#442, month(cast(tpep_pickup_datetime#40 as date)) AS Month#464] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, Dropoff_Borough#353, Dropoff_Zone#376, Dropoff_service_zone#399, concat_ws( to , Pickup_Borough#206, Dropoff_Borough#353) AS route#442] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, Dropoff_Borough#353, Dropoff_Zone#376, Dropoff_service_zone#399] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, LocationID#16, Dropoff_Borough#353, Dropoff_Zone#376, service_zone#19 AS Dropoff_service_zone#399] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, LocationID#16, Dropoff_Borough#353, Zone#18 AS Dropoff_Zone#376, service_zone#19] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, LocationID#16, Borough#17 AS Dropoff_Borough#353, Zone#18, service_zone#19] +- Join LeftOuter, (cast(DOLocationID#45 as int) = LocationID#16) :- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, PULocationID#44, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, LocationID#16, Pickup_Borough#206, Pickup_Zone#227, service_zone#19 AS Pickup_service_zone#248] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, PULocationID#44, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, LocationID#16, Pickup_Borough#206, Zone#18 AS Pickup_Zone#227, service_zone#19] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, PULocationID#44, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, LocationID#16, Borough#17 AS Pickup_Borough#206, Zone#18, service_zone#19] : +- Join LeftOuter, (cast(PULocationID#44 as int) = LocationID#16) : :- Relation[tpep_pickup_datetime#40,tpep_dropoff_datetime#41,passenger_count#42,trip_distance#43,PULocationID#44,DOLocationID#45,payment_type#46,fare_amount#47,extra#48,mta_tax#49,tip_amount#50,tolls_amount#51,total_amount#52,congestion_surcharge#53,airport_fee#54,taxi_type#55] csv : +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Analyzed Logical Plan == Month: string, route: string, total_tip_amount: string, total_passenger_count: string, average_tip_per_passenger: string GlobalLimit 21 +- LocalLimit 21 +- Project [cast(Month#464 as string) AS Month#697, cast(route#442 as string) AS route#698, cast(total_tip_amount#619 as string) AS total_tip_amount#699, cast(total_passenger_count#621 as string) AS total_passenger_count#700, cast(average_tip_per_passenger#626 as string) AS average_tip_per_passenger#701] +- GlobalLimit 10 +- LocalLimit 10 +- Sort [average_tip_per_passenger#626 DESC NULLS LAST], true +- Project [Month#464, route#442, total_tip_amount#619, total_passenger_count#621, (total_tip_amount#619 / total_passenger_count#621) AS average_tip_per_passenger#626] +- Aggregate [Month#464, route#442], [Month#464, route#442, sum(cast(tip_amount#50 as double)) AS total_tip_amount#619, sum(cast(passenger_count#42 as double)) AS total_passenger_count#621] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, Dropoff_Borough#353, Dropoff_Zone#376, Dropoff_service_zone#399, route#442, month(cast(tpep_pickup_datetime#40 as date)) AS Month#464] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, Dropoff_Borough#353, Dropoff_Zone#376, Dropoff_service_zone#399, concat_ws( to , Pickup_Borough#206, Dropoff_Borough#353) AS route#442] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, Dropoff_Borough#353, Dropoff_Zone#376, Dropoff_service_zone#399] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, LocationID#16, Dropoff_Borough#353, Dropoff_Zone#376, service_zone#19 AS Dropoff_service_zone#399] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, LocationID#16, Dropoff_Borough#353, Zone#18 AS Dropoff_Zone#376, service_zone#19] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, LocationID#16, Borough#17 AS Dropoff_Borough#353, Zone#18, service_zone#19] +- Join LeftOuter, (cast(DOLocationID#45 as int) = LocationID#16) :- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, PULocationID#44, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, LocationID#16, Pickup_Borough#206, Pickup_Zone#227, service_zone#19 AS Pickup_service_zone#248] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, PULocationID#44, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, LocationID#16, Pickup_Borough#206, Zone#18 AS Pickup_Zone#227, service_zone#19] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, PULocationID#44, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, LocationID#16, Borough#17 AS Pickup_Borough#206, Zone#18, service_zone#19] : +- Join LeftOuter, (cast(PULocationID#44 as int) = LocationID#16) : :- Relation[tpep_pickup_datetime#40,tpep_dropoff_datetime#41,passenger_count#42,trip_distance#43,PULocationID#44,DOLocationID#45,payment_type#46,fare_amount#47,extra#48,mta_tax#49,tip_amount#50,tolls_amount#51,total_amount#52,congestion_surcharge#53,airport_fee#54,taxi_type#55] csv : +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Optimized Logical Plan == GlobalLimit 21 +- LocalLimit 21 +- Project [cast(Month#464 as string) AS Month#697, route#442, cast(total_tip_amount#619 as string) AS total_tip_amount#699, cast(total_passenger_count#621 as string) AS total_passenger_count#700, cast(average_tip_per_passenger#626 as string) AS average_tip_per_passenger#701] +- GlobalLimit 10 +- LocalLimit 10 +- Sort [average_tip_per_passenger#626 DESC NULLS LAST], true +- Aggregate [Month#464, route#442], [Month#464, route#442, sum(cast(tip_amount#50 as double)) AS total_tip_amount#619, sum(cast(passenger_count#42 as double)) AS total_passenger_count#621, (sum(cast(tip_amount#50 as double)) / sum(cast(passenger_count#42 as double))) AS average_tip_per_passenger#626] +- Project [passenger_count#42, tip_amount#50, concat_ws( to , Pickup_Borough#206, Borough#17) AS route#442, month(cast(tpep_pickup_datetime#40 as date)) AS Month#464] +- Join LeftOuter, (cast(DOLocationID#45 as int) = LocationID#16) :- Project [tpep_pickup_datetime#40, passenger_count#42, DOLocationID#45, tip_amount#50, Borough#17 AS Pickup_Borough#206] : +- Join LeftOuter, (cast(PULocationID#44 as int) = LocationID#16) : :- Project [tpep_pickup_datetime#40, passenger_count#42, PULocationID#44, DOLocationID#45, tip_amount#50] : : +- Relation[tpep_pickup_datetime#40,tpep_dropoff_datetime#41,passenger_count#42,trip_distance#43,PULocationID#44,DOLocationID#45,payment_type#46,fare_amount#47,extra#48,mta_tax#49,tip_amount#50,tolls_amount#51,total_amount#52,congestion_surcharge#53,airport_fee#54,taxi_type#55] csv : +- Project [LocationID#16, Borough#17] : +- Filter isnotnull(LocationID#16) : +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv +- Project [LocationID#16, Borough#17] +- Filter isnotnull(LocationID#16) +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Physical Plan == CollectLimit 21 +- *(5) Project [cast(Month#464 as string) AS Month#697, route#442, cast(total_tip_amount#619 as string) AS total_tip_amount#699, cast(total_passenger_count#621 as string) AS total_passenger_count#700, cast(average_tip_per_passenger#626 as string) AS average_tip_per_passenger#701] +- TakeOrderedAndProject(limit=10, orderBy=[average_tip_per_passenger#626 DESC NULLS LAST], output=[Month#464,route#442,total_tip_amount#619,total_passenger_count#621,average_tip_per_passenger#626]) +- *(4) HashAggregate(keys=[Month#464, route#442], functions=[sum(cast(tip_amount#50 as double)), sum(cast(passenger_count#42 as double))], output=[Month#464, route#442, total_tip_amount#619, total_passenger_count#621, average_tip_per_passenger#626]) +- Exchange hashpartitioning(Month#464, route#442, 200), true, [id=#521] +- *(3) HashAggregate(keys=[Month#464, route#442], functions=[partial_sum(cast(tip_amount#50 as double)), partial_sum(cast(passenger_count#42 as double))], output=[Month#464, route#442, sum#649, sum#650]) +- *(3) Project [passenger_count#42, tip_amount#50, concat_ws( to , Pickup_Borough#206, Borough#17) AS route#442, month(cast(tpep_pickup_datetime#40 as date)) AS Month#464] +- *(3) BroadcastHashJoin [cast(DOLocationID#45 as int)], [LocationID#16], LeftOuter, BuildRight :- *(3) Project [tpep_pickup_datetime#40, passenger_count#42, DOLocationID#45, tip_amount#50, Borough#17 AS Pickup_Borough#206] : +- *(3) BroadcastHashJoin [cast(PULocationID#44 as int)], [LocationID#16], LeftOuter, BuildRight : :- FileScan csv [tpep_pickup_datetime#40,passenger_count#42,PULocationID#44,DOLocationID#45,tip_amount#50] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/yellow_tripdata/2023], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<tpep_pickup_datetime:string,passenger_count:string,PULocationID:string,DOLocationID:string... : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#476] : +- *(1) Project [LocationID#16, Borough#17] : +- *(1) Filter isnotnull(LocationID#16) : +- FileScan csv [LocationID#16,Borough#17] Batched: false, DataFilters: [isnotnull(LocationID#16)], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/taxi_zone_lookup.csv], PartitionFilters: [], PushedFilters: [IsNotNull(LocationID)], ReadSchema: struct<LocationID:int,Borough:string> +- ReusedExchange [LocationID#16, Borough#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#476]