== Parsed Logical Plan == GlobalLimit 11 +- LocalLimit 11 +- Project [cast(month#669 as string) AS month#872, cast(route#647 as string) AS route#873, cast(total_tip_amount#823 as string) AS total_tip_amount#874, cast(total_passenger_count#825 as string) AS total_passenger_count#875, cast(average_tip_per_passenger#830 as string) AS average_tip_per_passenger#876] +- Filter (average_tip_per_passenger#830 = cast(0 as double)) +- Project [month#669, route#647, total_tip_amount#823, total_passenger_count#825, (total_tip_amount#823 / total_passenger_count#825) AS average_tip_per_passenger#830] +- Aggregate [month#669, route#647], [month#669, route#647, sum(cast(tip_amount#50 as double)) AS total_tip_amount#823, sum(cast(passenger_count#42 as double)) AS total_passenger_count#825] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#365, Pickup_Zone#385, Pickup_service_zone#406, Dropoff_Borough#512, Dropoff_Zone#536, Dropoff_service_zone#561, route#647, month(to_date('tpep_pickup_datetime, Some(yyyy-MM-dd HH:mm:ss))) AS month#669] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#365, Pickup_Zone#385, Pickup_service_zone#406, Dropoff_Borough#512, Dropoff_Zone#536, Dropoff_service_zone#561, concat_ws( to , Pickup_Borough#365, Dropoff_Borough#512) AS route#647] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#365, Pickup_Zone#385, Pickup_service_zone#406, Dropoff_Borough#512, Dropoff_Zone#536, Dropoff_service_zone#561] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#365, Pickup_Zone#385, Pickup_service_zone#406, Dropoff_Borough#512, Dropoff_Zone#536, Dropoff_service_zone#561] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#365, Pickup_Zone#385, Pickup_service_zone#406, Dropoff_Borough#512, Dropoff_Zone#536, Dropoff_service_zone#561] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#365, Pickup_Zone#385, Pickup_service_zone#406, LocationID#16, Borough#17, Zone#18, service_zone#19, Dropoff_Borough#512, Dropoff_Zone#536, service_zone#19 AS Dropoff_service_zone#561] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#365, Pickup_Zone#385, Pickup_service_zone#406, LocationID#16, Borough#17, Zone#18, service_zone#19, Dropoff_Borough#512, Zone#18 AS Dropoff_Zone#536] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#365, Pickup_Zone#385, Pickup_service_zone#406, LocationID#16, Borough#17, Zone#18, service_zone#19, Borough#17 AS Dropoff_Borough#512] +- Join Inner, (cast(DOLocationID#45 as int) = LocationID#16) :- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#365, Pickup_Zone#385, Pickup_service_zone#406] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Borough#17, Zone#18, service_zone#19, Pickup_Borough#365, Pickup_Zone#385, service_zone#19 AS Pickup_service_zone#406] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Borough#17, Zone#18, service_zone#19, Pickup_Borough#365, Zone#18 AS Pickup_Zone#385] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Borough#17, Zone#18, service_zone#19, Borough#17 AS Pickup_Borough#365] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Borough#17, Zone#18, service_zone#19] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, PULocationID#44, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Borough#17, Zone#18, service_zone#19] : +- Join Inner, (cast(DOLocationID#45 as int) = LocationID#16) : :- Relation[tpep_pickup_datetime#40,tpep_dropoff_datetime#41,passenger_count#42,trip_distance#43,PULocationID#44,DOLocationID#45,payment_type#46,fare_amount#47,extra#48,mta_tax#49,tip_amount#50,tolls_amount#51,total_amount#52,congestion_surcharge#53,airport_fee#54,taxi_type#55] csv : +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Analyzed Logical Plan == month: string, route: string, total_tip_amount: string, total_passenger_count: string, average_tip_per_passenger: string GlobalLimit 11 +- LocalLimit 11 +- Project [cast(month#669 as string) AS month#872, cast(route#647 as string) AS route#873, cast(total_tip_amount#823 as string) AS total_tip_amount#874, cast(total_passenger_count#825 as string) AS total_passenger_count#875, cast(average_tip_per_passenger#830 as string) AS average_tip_per_passenger#876] +- Filter (average_tip_per_passenger#830 = cast(0 as double)) +- Project [month#669, route#647, total_tip_amount#823, total_passenger_count#825, (total_tip_amount#823 / total_passenger_count#825) AS average_tip_per_passenger#830] +- Aggregate [month#669, route#647], [month#669, route#647, sum(cast(tip_amount#50 as double)) AS total_tip_amount#823, sum(cast(passenger_count#42 as double)) AS total_passenger_count#825] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#365, Pickup_Zone#385, Pickup_service_zone#406, Dropoff_Borough#512, Dropoff_Zone#536, Dropoff_service_zone#561, route#647, month(to_date('tpep_pickup_datetime, Some(yyyy-MM-dd HH:mm:ss))) AS month#669] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#365, Pickup_Zone#385, Pickup_service_zone#406, Dropoff_Borough#512, Dropoff_Zone#536, Dropoff_service_zone#561, concat_ws( to , Pickup_Borough#365, Dropoff_Borough#512) AS route#647] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#365, Pickup_Zone#385, Pickup_service_zone#406, Dropoff_Borough#512, Dropoff_Zone#536, Dropoff_service_zone#561] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#365, Pickup_Zone#385, Pickup_service_zone#406, Dropoff_Borough#512, Dropoff_Zone#536, Dropoff_service_zone#561] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#365, Pickup_Zone#385, Pickup_service_zone#406, Dropoff_Borough#512, Dropoff_Zone#536, Dropoff_service_zone#561] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#365, Pickup_Zone#385, Pickup_service_zone#406, LocationID#16, Borough#17, Zone#18, service_zone#19, Dropoff_Borough#512, Dropoff_Zone#536, service_zone#19 AS Dropoff_service_zone#561] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#365, Pickup_Zone#385, Pickup_service_zone#406, LocationID#16, Borough#17, Zone#18, service_zone#19, Dropoff_Borough#512, Zone#18 AS Dropoff_Zone#536] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#365, Pickup_Zone#385, Pickup_service_zone#406, LocationID#16, Borough#17, Zone#18, service_zone#19, Borough#17 AS Dropoff_Borough#512] +- Join Inner, (cast(DOLocationID#45 as int) = LocationID#16) :- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#365, Pickup_Zone#385, Pickup_service_zone#406] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Borough#17, Zone#18, service_zone#19, Pickup_Borough#365, Pickup_Zone#385, service_zone#19 AS Pickup_service_zone#406] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Borough#17, Zone#18, service_zone#19, Pickup_Borough#365, Zone#18 AS Pickup_Zone#385] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Borough#17, Zone#18, service_zone#19, Borough#17 AS Pickup_Borough#365] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Borough#17, Zone#18, service_zone#19] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, PULocationID#44, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Borough#17, Zone#18, service_zone#19] : +- Join Inner, (cast(DOLocationID#45 as int) = LocationID#16) : :- Relation[tpep_pickup_datetime#40,tpep_dropoff_datetime#41,passenger_count#42,trip_distance#43,PULocationID#44,DOLocationID#45,payment_type#46,fare_amount#47,extra#48,mta_tax#49,tip_amount#50,tolls_amount#51,total_amount#52,congestion_surcharge#53,airport_fee#54,taxi_type#55] csv : +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Optimized Logical Plan == GlobalLimit 11 +- LocalLimit 11 +- Project [cast(month#669 as string) AS month#872, route#647, cast(total_tip_amount#823 as string) AS total_tip_amount#874, cast(total_passenger_count#825 as string) AS total_passenger_count#875, cast((total_tip_amount#823 / total_passenger_count#825) as string) AS average_tip_per_passenger#876] +- Filter ((isnotnull(total_tip_amount#823) AND isnotnull(total_passenger_count#825)) AND ((total_tip_amount#823 / total_passenger_count#825) = 0.0)) +- Aggregate [month#669, route#647], [month#669, route#647, sum(cast(tip_amount#50 as double)) AS total_tip_amount#823, sum(cast(passenger_count#42 as double)) AS total_passenger_count#825] +- Project [passenger_count#42, tip_amount#50, concat_ws( to , Pickup_Borough#365, Borough#17) AS route#647, month(cast(cast(unix_timestamp(tpep_pickup_datetime#40, yyyy-MM-dd HH:mm:ss, Some(GMT)) as timestamp) as date)) AS month#669] +- Join Inner, (cast(DOLocationID#45 as int) = LocationID#16) :- Project [tpep_pickup_datetime#40, passenger_count#42, DOLocationID#45, tip_amount#50, Borough#17 AS Pickup_Borough#365] : +- Join Inner, (cast(DOLocationID#45 as int) = LocationID#16) : :- Project [tpep_pickup_datetime#40, passenger_count#42, DOLocationID#45, tip_amount#50] : : +- Filter isnotnull(DOLocationID#45) : : +- Relation[tpep_pickup_datetime#40,tpep_dropoff_datetime#41,passenger_count#42,trip_distance#43,PULocationID#44,DOLocationID#45,payment_type#46,fare_amount#47,extra#48,mta_tax#49,tip_amount#50,tolls_amount#51,total_amount#52,congestion_surcharge#53,airport_fee#54,taxi_type#55] csv : +- Project [LocationID#16, Borough#17] : +- Filter isnotnull(LocationID#16) : +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv +- Project [LocationID#16, Borough#17] +- Filter isnotnull(LocationID#16) +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Physical Plan == CollectLimit 11 +- *(4) Project [cast(month#669 as string) AS month#872, route#647, cast(total_tip_amount#823 as string) AS total_tip_amount#874, cast(total_passenger_count#825 as string) AS total_passenger_count#875, cast((total_tip_amount#823 / total_passenger_count#825) as string) AS average_tip_per_passenger#876] +- *(4) Filter ((isnotnull(total_tip_amount#823) AND isnotnull(total_passenger_count#825)) AND ((total_tip_amount#823 / total_passenger_count#825) = 0.0)) +- *(4) HashAggregate(keys=[month#669, route#647], functions=[sum(cast(tip_amount#50 as double)), sum(cast(passenger_count#42 as double))], output=[month#669, route#647, total_tip_amount#823, total_passenger_count#825]) +- Exchange hashpartitioning(month#669, route#647, 200), true, [id=#445] +- *(3) HashAggregate(keys=[month#669, route#647], functions=[partial_sum(cast(tip_amount#50 as double)), partial_sum(cast(passenger_count#42 as double))], output=[month#669, route#647, sum#853, sum#854]) +- *(3) Project [passenger_count#42, tip_amount#50, concat_ws( to , Pickup_Borough#365, Borough#17) AS route#647, month(cast(cast(unix_timestamp(tpep_pickup_datetime#40, yyyy-MM-dd HH:mm:ss, Some(GMT)) as timestamp) as date)) AS month#669] +- *(3) BroadcastHashJoin [cast(DOLocationID#45 as int)], [LocationID#16], Inner, BuildRight :- *(3) Project [tpep_pickup_datetime#40, passenger_count#42, DOLocationID#45, tip_amount#50, Borough#17 AS Pickup_Borough#365] : +- *(3) BroadcastHashJoin [cast(DOLocationID#45 as int)], [LocationID#16], Inner, BuildRight : :- *(3) Project [tpep_pickup_datetime#40, passenger_count#42, DOLocationID#45, tip_amount#50] : : +- *(3) Filter isnotnull(DOLocationID#45) : : +- FileScan csv [tpep_pickup_datetime#40,passenger_count#42,DOLocationID#45,tip_amount#50] Batched: false, DataFilters: [isnotnull(DOLocationID#45)], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/yellow_tripdata/2023/yellow_tripdata_..., PartitionFilters: [], PushedFilters: [IsNotNull(DOLocationID)], ReadSchema: struct<tpep_pickup_datetime:string,passenger_count:string,DOLocationID:string,tip_amount:string> : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#402] : +- *(1) Project [LocationID#16, Borough#17] : +- *(1) Filter isnotnull(LocationID#16) : +- FileScan csv [LocationID#16,Borough#17] Batched: false, DataFilters: [isnotnull(LocationID#16)], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/taxi_zone_lookup.csv], PartitionFilters: [], PushedFilters: [IsNotNull(LocationID)], ReadSchema: struct<LocationID:int,Borough:string> +- ReusedExchange [LocationID#16, Borough#17], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#402]