== Parsed Logical Plan == GlobalLimit 11 +- LocalLimit 11 +- Project [cast(Travel_Month#390 as string) AS Travel_Month#459, cast(Travel_Route#368 as string) AS Travel_Route#460, cast(Total_Tips#436 as string) AS Total_Tips#461, cast(Total_Passengers#438L as string) AS Total_Passengers#462, cast(Avg_Tip_Per_Passenger#443 as string) AS Avg_Tip_Per_Passenger#463] +- Project [Travel_Month#390, Travel_Route#368, Total_Tips#436, Total_Passengers#438L, Avg_Tip_Per_Passenger#443] +- Project [Travel_Month#390, Travel_Route#368, Total_Tips#436, Total_Passengers#438L, round((Total_Tips#436 / cast(Total_Passengers#438L as double)), 2) AS Avg_Tip_Per_Passenger#443] +- Aggregate [Travel_Month#390, Travel_Route#368], [Travel_Month#390, Travel_Route#368, sum(cast(cast(tip_amount#26 as float) as double)) AS Total_Tips#436, sum(cast(cast(passenger_count#18 as int) as bigint)) AS Total_Passengers#438L] +- Project [tpep_pickup_datetime#16, tpep_dropoff_datetime#17, passenger_count#18, trip_distance#19, payment_type#22, fare_amount#23, extra#24, mta_tax#25, tip_amount#26, tolls_amount#27, total_amount#28, congestion_surcharge#29, airport_fee#30, taxi_type#31, Start_Borough#132, Start_Zone#153, Start_ServiceZone#174, End_Borough#279, End_Zone#302, End_ServiceZone#325, Travel_Route#368, month(cast(tpep_pickup_datetime#16 as date)) AS Travel_Month#390] +- Project [tpep_pickup_datetime#16, tpep_dropoff_datetime#17, passenger_count#18, trip_distance#19, payment_type#22, fare_amount#23, extra#24, mta_tax#25, tip_amount#26, tolls_amount#27, total_amount#28, congestion_surcharge#29, airport_fee#30, taxi_type#31, Start_Borough#132, Start_Zone#153, Start_ServiceZone#174, End_Borough#279, End_Zone#302, End_ServiceZone#325, concat_ws( to , Start_Borough#132, End_Borough#279) AS Travel_Route#368] +- Project [tpep_pickup_datetime#16, tpep_dropoff_datetime#17, passenger_count#18, trip_distance#19, payment_type#22, fare_amount#23, extra#24, mta_tax#25, tip_amount#26, tolls_amount#27, total_amount#28, congestion_surcharge#29, airport_fee#30, taxi_type#31, Start_Borough#132, Start_Zone#153, Start_ServiceZone#174, End_Borough#279, End_Zone#302, End_ServiceZone#325] +- Project [tpep_pickup_datetime#16, tpep_dropoff_datetime#17, passenger_count#18, trip_distance#19, DOLocationID#21, payment_type#22, fare_amount#23, extra#24, mta_tax#25, tip_amount#26, tolls_amount#27, total_amount#28, congestion_surcharge#29, airport_fee#30, taxi_type#31, Start_Borough#132, Start_Zone#153, Start_ServiceZone#174, LocationID#64, End_Borough#279, End_Zone#302, service_zone#67 AS End_ServiceZone#325] +- Project [tpep_pickup_datetime#16, tpep_dropoff_datetime#17, passenger_count#18, trip_distance#19, DOLocationID#21, payment_type#22, fare_amount#23, extra#24, mta_tax#25, tip_amount#26, tolls_amount#27, total_amount#28, congestion_surcharge#29, airport_fee#30, taxi_type#31, Start_Borough#132, Start_Zone#153, Start_ServiceZone#174, LocationID#64, End_Borough#279, Zone#66 AS End_Zone#302, service_zone#67] +- Project [tpep_pickup_datetime#16, tpep_dropoff_datetime#17, passenger_count#18, trip_distance#19, DOLocationID#21, payment_type#22, fare_amount#23, extra#24, mta_tax#25, tip_amount#26, tolls_amount#27, total_amount#28, congestion_surcharge#29, airport_fee#30, taxi_type#31, Start_Borough#132, Start_Zone#153, Start_ServiceZone#174, LocationID#64, Borough#65 AS End_Borough#279, Zone#66, service_zone#67] +- Join Inner, (DOLocationID#21 = LocationID#64) :- SubqueryAlias pickup_join : +- Project [tpep_pickup_datetime#16, tpep_dropoff_datetime#17, passenger_count#18, trip_distance#19, DOLocationID#21, payment_type#22, fare_amount#23, extra#24, mta_tax#25, tip_amount#26, tolls_amount#27, total_amount#28, congestion_surcharge#29, airport_fee#30, taxi_type#31, Start_Borough#132, Start_Zone#153, Start_ServiceZone#174] : +- Project [tpep_pickup_datetime#16, tpep_dropoff_datetime#17, passenger_count#18, trip_distance#19, PULocationID#20, DOLocationID#21, payment_type#22, fare_amount#23, extra#24, mta_tax#25, tip_amount#26, tolls_amount#27, total_amount#28, congestion_surcharge#29, airport_fee#30, taxi_type#31, LocationID#64, Start_Borough#132, Start_Zone#153, service_zone#67 AS Start_ServiceZone#174] : +- Project [tpep_pickup_datetime#16, tpep_dropoff_datetime#17, passenger_count#18, trip_distance#19, PULocationID#20, DOLocationID#21, payment_type#22, fare_amount#23, extra#24, mta_tax#25, tip_amount#26, tolls_amount#27, total_amount#28, congestion_surcharge#29, airport_fee#30, taxi_type#31, LocationID#64, Start_Borough#132, Zone#66 AS Start_Zone#153, service_zone#67] : +- Project [tpep_pickup_datetime#16, tpep_dropoff_datetime#17, passenger_count#18, trip_distance#19, PULocationID#20, DOLocationID#21, payment_type#22, fare_amount#23, extra#24, mta_tax#25, tip_amount#26, tolls_amount#27, total_amount#28, congestion_surcharge#29, airport_fee#30, taxi_type#31, LocationID#64, Borough#65 AS Start_Borough#132, Zone#66, service_zone#67] : +- Join Inner, (PULocationID#20 = LocationID#64) : :- SubqueryAlias trips : : +- Relation[tpep_pickup_datetime#16,tpep_dropoff_datetime#17,passenger_count#18,trip_distance#19,PULocationID#20,DOLocationID#21,payment_type#22,fare_amount#23,extra#24,mta_tax#25,tip_amount#26,tolls_amount#27,total_amount#28,congestion_surcharge#29,airport_fee#30,taxi_type#31] csv : +- SubqueryAlias pickup : +- Relation[LocationID#64,Borough#65,Zone#66,service_zone#67] csv +- SubqueryAlias dropoff +- Relation[LocationID#64,Borough#65,Zone#66,service_zone#67] csv == Analyzed Logical Plan == Travel_Month: string, Travel_Route: string, Total_Tips: string, Total_Passengers: string, Avg_Tip_Per_Passenger: string GlobalLimit 11 +- LocalLimit 11 +- Project [cast(Travel_Month#390 as string) AS Travel_Month#459, cast(Travel_Route#368 as string) AS Travel_Route#460, cast(Total_Tips#436 as string) AS Total_Tips#461, cast(Total_Passengers#438L as string) AS Total_Passengers#462, cast(Avg_Tip_Per_Passenger#443 as string) AS Avg_Tip_Per_Passenger#463] +- Project [Travel_Month#390, Travel_Route#368, Total_Tips#436, Total_Passengers#438L, Avg_Tip_Per_Passenger#443] +- Project [Travel_Month#390, Travel_Route#368, Total_Tips#436, Total_Passengers#438L, round((Total_Tips#436 / cast(Total_Passengers#438L as double)), 2) AS Avg_Tip_Per_Passenger#443] +- Aggregate [Travel_Month#390, Travel_Route#368], [Travel_Month#390, Travel_Route#368, sum(cast(cast(tip_amount#26 as float) as double)) AS Total_Tips#436, sum(cast(cast(passenger_count#18 as int) as bigint)) AS Total_Passengers#438L] +- Project [tpep_pickup_datetime#16, tpep_dropoff_datetime#17, passenger_count#18, trip_distance#19, payment_type#22, fare_amount#23, extra#24, mta_tax#25, tip_amount#26, tolls_amount#27, total_amount#28, congestion_surcharge#29, airport_fee#30, taxi_type#31, Start_Borough#132, Start_Zone#153, Start_ServiceZone#174, End_Borough#279, End_Zone#302, End_ServiceZone#325, Travel_Route#368, month(cast(tpep_pickup_datetime#16 as date)) AS Travel_Month#390] +- Project [tpep_pickup_datetime#16, tpep_dropoff_datetime#17, passenger_count#18, trip_distance#19, payment_type#22, fare_amount#23, extra#24, mta_tax#25, tip_amount#26, tolls_amount#27, total_amount#28, congestion_surcharge#29, airport_fee#30, taxi_type#31, Start_Borough#132, Start_Zone#153, Start_ServiceZone#174, End_Borough#279, End_Zone#302, End_ServiceZone#325, concat_ws( to , Start_Borough#132, End_Borough#279) AS Travel_Route#368] +- Project [tpep_pickup_datetime#16, tpep_dropoff_datetime#17, passenger_count#18, trip_distance#19, payment_type#22, fare_amount#23, extra#24, mta_tax#25, tip_amount#26, tolls_amount#27, total_amount#28, congestion_surcharge#29, airport_fee#30, taxi_type#31, Start_Borough#132, Start_Zone#153, Start_ServiceZone#174, End_Borough#279, End_Zone#302, End_ServiceZone#325] +- Project [tpep_pickup_datetime#16, tpep_dropoff_datetime#17, passenger_count#18, trip_distance#19, DOLocationID#21, payment_type#22, fare_amount#23, extra#24, mta_tax#25, tip_amount#26, tolls_amount#27, total_amount#28, congestion_surcharge#29, airport_fee#30, taxi_type#31, Start_Borough#132, Start_Zone#153, Start_ServiceZone#174, LocationID#64, End_Borough#279, End_Zone#302, service_zone#67 AS End_ServiceZone#325] +- Project [tpep_pickup_datetime#16, tpep_dropoff_datetime#17, passenger_count#18, trip_distance#19, DOLocationID#21, payment_type#22, fare_amount#23, extra#24, mta_tax#25, tip_amount#26, tolls_amount#27, total_amount#28, congestion_surcharge#29, airport_fee#30, taxi_type#31, Start_Borough#132, Start_Zone#153, Start_ServiceZone#174, LocationID#64, End_Borough#279, Zone#66 AS End_Zone#302, service_zone#67] +- Project [tpep_pickup_datetime#16, tpep_dropoff_datetime#17, passenger_count#18, trip_distance#19, DOLocationID#21, payment_type#22, fare_amount#23, extra#24, mta_tax#25, tip_amount#26, tolls_amount#27, total_amount#28, congestion_surcharge#29, airport_fee#30, taxi_type#31, Start_Borough#132, Start_Zone#153, Start_ServiceZone#174, LocationID#64, Borough#65 AS End_Borough#279, Zone#66, service_zone#67] +- Join Inner, (DOLocationID#21 = LocationID#64) :- SubqueryAlias pickup_join : +- Project [tpep_pickup_datetime#16, tpep_dropoff_datetime#17, passenger_count#18, trip_distance#19, DOLocationID#21, payment_type#22, fare_amount#23, extra#24, mta_tax#25, tip_amount#26, tolls_amount#27, total_amount#28, congestion_surcharge#29, airport_fee#30, taxi_type#31, Start_Borough#132, Start_Zone#153, Start_ServiceZone#174] : +- Project [tpep_pickup_datetime#16, tpep_dropoff_datetime#17, passenger_count#18, trip_distance#19, PULocationID#20, DOLocationID#21, payment_type#22, fare_amount#23, extra#24, mta_tax#25, tip_amount#26, tolls_amount#27, total_amount#28, congestion_surcharge#29, airport_fee#30, taxi_type#31, LocationID#64, Start_Borough#132, Start_Zone#153, service_zone#67 AS Start_ServiceZone#174] : +- Project [tpep_pickup_datetime#16, tpep_dropoff_datetime#17, passenger_count#18, trip_distance#19, PULocationID#20, DOLocationID#21, payment_type#22, fare_amount#23, extra#24, mta_tax#25, tip_amount#26, tolls_amount#27, total_amount#28, congestion_surcharge#29, airport_fee#30, taxi_type#31, LocationID#64, Start_Borough#132, Zone#66 AS Start_Zone#153, service_zone#67] : +- Project [tpep_pickup_datetime#16, tpep_dropoff_datetime#17, passenger_count#18, trip_distance#19, PULocationID#20, DOLocationID#21, payment_type#22, fare_amount#23, extra#24, mta_tax#25, tip_amount#26, tolls_amount#27, total_amount#28, congestion_surcharge#29, airport_fee#30, taxi_type#31, LocationID#64, Borough#65 AS Start_Borough#132, Zone#66, service_zone#67] : +- Join Inner, (PULocationID#20 = LocationID#64) : :- SubqueryAlias trips : : +- Relation[tpep_pickup_datetime#16,tpep_dropoff_datetime#17,passenger_count#18,trip_distance#19,PULocationID#20,DOLocationID#21,payment_type#22,fare_amount#23,extra#24,mta_tax#25,tip_amount#26,tolls_amount#27,total_amount#28,congestion_surcharge#29,airport_fee#30,taxi_type#31] csv : +- SubqueryAlias pickup : +- Relation[LocationID#64,Borough#65,Zone#66,service_zone#67] csv +- SubqueryAlias dropoff +- Relation[LocationID#64,Borough#65,Zone#66,service_zone#67] csv == Optimized Logical Plan == GlobalLimit 11 +- LocalLimit 11 +- Aggregate [Travel_Month#390, Travel_Route#368], [cast(Travel_Month#390 as string) AS Travel_Month#459, Travel_Route#368, cast(sum(cast(cast(tip_amount#26 as float) as double)) as string) AS Total_Tips#461, cast(sum(cast(cast(passenger_count#18 as int) as bigint)) as string) AS Total_Passengers#462, cast(round((sum(cast(cast(tip_amount#26 as float) as double)) / cast(sum(cast(cast(passenger_count#18 as int) as bigint)) as double)), 2) as string) AS Avg_Tip_Per_Passenger#463] +- Project [passenger_count#18, tip_amount#26, concat_ws( to , Start_Borough#132, Borough#65) AS Travel_Route#368, month(cast(tpep_pickup_datetime#16 as date)) AS Travel_Month#390] +- Join Inner, (DOLocationID#21 = LocationID#64) :- Project [tpep_pickup_datetime#16, passenger_count#18, DOLocationID#21, tip_amount#26, Borough#65 AS Start_Borough#132] : +- Join Inner, (PULocationID#20 = LocationID#64) : :- Project [tpep_pickup_datetime#16, passenger_count#18, PULocationID#20, DOLocationID#21, tip_amount#26] : : +- Filter (isnotnull(PULocationID#20) AND isnotnull(DOLocationID#21)) : : +- Relation[tpep_pickup_datetime#16,tpep_dropoff_datetime#17,passenger_count#18,trip_distance#19,PULocationID#20,DOLocationID#21,payment_type#22,fare_amount#23,extra#24,mta_tax#25,tip_amount#26,tolls_amount#27,total_amount#28,congestion_surcharge#29,airport_fee#30,taxi_type#31] csv : +- Project [LocationID#64, Borough#65] : +- Filter isnotnull(LocationID#64) : +- Relation[LocationID#64,Borough#65,Zone#66,service_zone#67] csv +- Project [LocationID#64, Borough#65] +- Filter isnotnull(LocationID#64) +- Relation[LocationID#64,Borough#65,Zone#66,service_zone#67] csv == Physical Plan == CollectLimit 11 +- *(4) HashAggregate(keys=[Travel_Month#390, Travel_Route#368], functions=[sum(cast(cast(tip_amount#26 as float) as double)), sum(cast(cast(passenger_count#18 as int) as bigint))], output=[Travel_Month#459, Travel_Route#368, Total_Tips#461, Total_Passengers#462, Avg_Tip_Per_Passenger#463]) +- Exchange hashpartitioning(Travel_Month#390, Travel_Route#368, 200), true, [id=#147] +- *(3) HashAggregate(keys=[Travel_Month#390, Travel_Route#368], functions=[partial_sum(cast(cast(tip_amount#26 as float) as double)), partial_sum(cast(cast(passenger_count#18 as int) as bigint))], output=[Travel_Month#390, Travel_Route#368, sum#471, sum#472L]) +- *(3) Project [passenger_count#18, tip_amount#26, concat_ws( to , Start_Borough#132, Borough#65) AS Travel_Route#368, month(cast(tpep_pickup_datetime#16 as date)) AS Travel_Month#390] +- *(3) BroadcastHashJoin [DOLocationID#21], [LocationID#64], Inner, BuildRight :- *(3) Project [tpep_pickup_datetime#16, passenger_count#18, DOLocationID#21, tip_amount#26, Borough#65 AS Start_Borough#132] : +- *(3) BroadcastHashJoin [PULocationID#20], [LocationID#64], Inner, BuildRight : :- *(3) Project [tpep_pickup_datetime#16, passenger_count#18, PULocationID#20, DOLocationID#21, tip_amount#26] : : +- *(3) Filter (isnotnull(PULocationID#20) AND isnotnull(DOLocationID#21)) : : +- FileScan csv [tpep_pickup_datetime#16,passenger_count#18,PULocationID#20,DOLocationID#21,tip_amount#26] Batched: false, DataFilters: [isnotnull(PULocationID#20), isnotnull(DOLocationID#21)], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/yellow_tripdata/2023], PartitionFilters: [], PushedFilters: [IsNotNull(PULocationID), IsNotNull(DOLocationID)], ReadSchema: struct<tpep_pickup_datetime:string,passenger_count:string,PULocationID:string,DOLocationID:string... : +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])), [id=#106] : +- *(1) Project [LocationID#64, Borough#65] : +- *(1) Filter isnotnull(LocationID#64) : +- FileScan csv [LocationID#64,Borough#65] Batched: false, DataFilters: [isnotnull(LocationID#64)], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/taxi_zone_lookup.csv], PartitionFilters: [], PushedFilters: [IsNotNull(LocationID)], ReadSchema: struct<LocationID:string,Borough:string> +- ReusedExchange [LocationID#64, Borough#65], BroadcastExchange HashedRelationBroadcastMode(List(input[0, string, true])), [id=#106]