== Parsed Logical Plan == GlobalLimit 11 +- LocalLimit 11 +- Project [cast(tpep_pickup_datetime#40 as string) AS tpep_pickup_datetime#509, cast(tpep_dropoff_datetime#41 as string) AS tpep_dropoff_datetime#510, cast(passenger_count#42 as string) AS passenger_count#511, cast(trip_distance#43 as string) AS trip_distance#512, cast(payment_type#46 as string) AS payment_type#513, cast(fare_amount#47 as string) AS fare_amount#514, cast(extra#48 as string) AS extra#515, cast(mta_tax#49 as string) AS mta_tax#516, cast(tip_amount#50 as string) AS tip_amount#517, cast(tolls_amount#51 as string) AS tolls_amount#518, cast(total_amount#52 as string) AS total_amount#519, cast(congestion_surcharge#53 as string) AS congestion_surcharge#520, cast(airport_fee#54 as string) AS airport_fee#521, cast(taxi_type#55 as string) AS taxi_type#522, cast(Pickup_Borough#206 as string) AS Pickup_Borough#523, cast(Pickup_Zone#227 as string) AS Pickup_Zone#524, cast(Pickup_service_zone#248 as string) AS Pickup_service_zone#525, cast(Dropoff_Borough#353 as string) AS Dropoff_Borough#526, cast(Dropoff_Zone#376 as string) AS Dropoff_Zone#527, cast(Dropoff_service_zone#399 as string) AS Dropoff_service_zone#528, cast(route#442 as string) AS route#529, cast(Month#464 as string) AS Month#530] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, Dropoff_Borough#353, Dropoff_Zone#376, Dropoff_service_zone#399, route#442, month(cast(tpep_pickup_datetime#40 as date)) AS Month#464] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, Dropoff_Borough#353, Dropoff_Zone#376, Dropoff_service_zone#399, concat_ws( to , Pickup_Borough#206, Dropoff_Borough#353) AS route#442] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, Dropoff_Borough#353, Dropoff_Zone#376, Dropoff_service_zone#399] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, LocationID#16, Dropoff_Borough#353, Dropoff_Zone#376, service_zone#19 AS Dropoff_service_zone#399] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, LocationID#16, Dropoff_Borough#353, Zone#18 AS Dropoff_Zone#376, service_zone#19] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, LocationID#16, Borough#17 AS Dropoff_Borough#353, Zone#18, service_zone#19] +- Join LeftOuter, (cast(DOLocationID#45 as int) = LocationID#16) :- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, PULocationID#44, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, LocationID#16, Pickup_Borough#206, Pickup_Zone#227, service_zone#19 AS Pickup_service_zone#248] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, PULocationID#44, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, LocationID#16, Pickup_Borough#206, Zone#18 AS Pickup_Zone#227, service_zone#19] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, PULocationID#44, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, LocationID#16, Borough#17 AS Pickup_Borough#206, Zone#18, service_zone#19] : +- Join LeftOuter, (cast(PULocationID#44 as int) = LocationID#16) : :- Relation[tpep_pickup_datetime#40,tpep_dropoff_datetime#41,passenger_count#42,trip_distance#43,PULocationID#44,DOLocationID#45,payment_type#46,fare_amount#47,extra#48,mta_tax#49,tip_amount#50,tolls_amount#51,total_amount#52,congestion_surcharge#53,airport_fee#54,taxi_type#55] csv : +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Analyzed Logical Plan == tpep_pickup_datetime: string, tpep_dropoff_datetime: string, passenger_count: string, trip_distance: string, payment_type: string, fare_amount: string, extra: string, mta_tax: string, tip_amount: string, tolls_amount: string, total_amount: string, congestion_surcharge: string, airport_fee: string, taxi_type: string, Pickup_Borough: string, Pickup_Zone: string, Pickup_service_zone: string, Dropoff_Borough: string, Dropoff_Zone: string, Dropoff_service_zone: string, route: string, Month: string GlobalLimit 11 +- LocalLimit 11 +- Project [cast(tpep_pickup_datetime#40 as string) AS tpep_pickup_datetime#509, cast(tpep_dropoff_datetime#41 as string) AS tpep_dropoff_datetime#510, cast(passenger_count#42 as string) AS passenger_count#511, cast(trip_distance#43 as string) AS trip_distance#512, cast(payment_type#46 as string) AS payment_type#513, cast(fare_amount#47 as string) AS fare_amount#514, cast(extra#48 as string) AS extra#515, cast(mta_tax#49 as string) AS mta_tax#516, cast(tip_amount#50 as string) AS tip_amount#517, cast(tolls_amount#51 as string) AS tolls_amount#518, cast(total_amount#52 as string) AS total_amount#519, cast(congestion_surcharge#53 as string) AS congestion_surcharge#520, cast(airport_fee#54 as string) AS airport_fee#521, cast(taxi_type#55 as string) AS taxi_type#522, cast(Pickup_Borough#206 as string) AS Pickup_Borough#523, cast(Pickup_Zone#227 as string) AS Pickup_Zone#524, cast(Pickup_service_zone#248 as string) AS Pickup_service_zone#525, cast(Dropoff_Borough#353 as string) AS Dropoff_Borough#526, cast(Dropoff_Zone#376 as string) AS Dropoff_Zone#527, cast(Dropoff_service_zone#399 as string) AS Dropoff_service_zone#528, cast(route#442 as string) AS route#529, cast(Month#464 as string) AS Month#530] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, Dropoff_Borough#353, Dropoff_Zone#376, Dropoff_service_zone#399, route#442, month(cast(tpep_pickup_datetime#40 as date)) AS Month#464] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, Dropoff_Borough#353, Dropoff_Zone#376, Dropoff_service_zone#399, concat_ws( to , Pickup_Borough#206, Dropoff_Borough#353) AS route#442] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, Dropoff_Borough#353, Dropoff_Zone#376, Dropoff_service_zone#399] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, LocationID#16, Dropoff_Borough#353, Dropoff_Zone#376, service_zone#19 AS Dropoff_service_zone#399] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, LocationID#16, Dropoff_Borough#353, Zone#18 AS Dropoff_Zone#376, service_zone#19] +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, LocationID#16, Borough#17 AS Dropoff_Borough#353, Zone#18, service_zone#19] +- Join LeftOuter, (cast(DOLocationID#45 as int) = LocationID#16) :- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, PULocationID#44, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, LocationID#16, Pickup_Borough#206, Pickup_Zone#227, service_zone#19 AS Pickup_service_zone#248] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, PULocationID#44, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, LocationID#16, Pickup_Borough#206, Zone#18 AS Pickup_Zone#227, service_zone#19] : +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, PULocationID#44, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, LocationID#16, Borough#17 AS Pickup_Borough#206, Zone#18, service_zone#19] : +- Join LeftOuter, (cast(PULocationID#44 as int) = LocationID#16) : :- Relation[tpep_pickup_datetime#40,tpep_dropoff_datetime#41,passenger_count#42,trip_distance#43,PULocationID#44,DOLocationID#45,payment_type#46,fare_amount#47,extra#48,mta_tax#49,tip_amount#50,tolls_amount#51,total_amount#52,congestion_surcharge#53,airport_fee#54,taxi_type#55] csv : +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Optimized Logical Plan == GlobalLimit 11 +- LocalLimit 11 +- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, Borough#17 AS Dropoff_Borough#526, Zone#18 AS Dropoff_Zone#527, service_zone#19 AS Dropoff_service_zone#528, concat_ws( to , Pickup_Borough#206, Borough#17) AS route#529, cast(month(cast(tpep_pickup_datetime#40 as date)) as string) AS Month#530] +- Join LeftOuter, (cast(DOLocationID#45 as int) = LocationID#16) :- Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Borough#17 AS Pickup_Borough#206, Zone#18 AS Pickup_Zone#227, service_zone#19 AS Pickup_service_zone#248] : +- Join LeftOuter, (cast(PULocationID#44 as int) = LocationID#16) : :- Relation[tpep_pickup_datetime#40,tpep_dropoff_datetime#41,passenger_count#42,trip_distance#43,PULocationID#44,DOLocationID#45,payment_type#46,fare_amount#47,extra#48,mta_tax#49,tip_amount#50,tolls_amount#51,total_amount#52,congestion_surcharge#53,airport_fee#54,taxi_type#55] csv : +- Filter isnotnull(LocationID#16) : +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv +- Filter isnotnull(LocationID#16) +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Physical Plan == CollectLimit 11 +- *(3) Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Pickup_Borough#206, Pickup_Zone#227, Pickup_service_zone#248, Borough#17 AS Dropoff_Borough#526, Zone#18 AS Dropoff_Zone#527, service_zone#19 AS Dropoff_service_zone#528, concat_ws( to , Pickup_Borough#206, Borough#17) AS route#529, cast(month(cast(tpep_pickup_datetime#40 as date)) as string) AS Month#530] +- *(3) BroadcastHashJoin [cast(DOLocationID#45 as int)], [LocationID#16], LeftOuter, BuildRight :- *(3) Project [tpep_pickup_datetime#40, tpep_dropoff_datetime#41, passenger_count#42, trip_distance#43, DOLocationID#45, payment_type#46, fare_amount#47, extra#48, mta_tax#49, tip_amount#50, tolls_amount#51, total_amount#52, congestion_surcharge#53, airport_fee#54, taxi_type#55, Borough#17 AS Pickup_Borough#206, Zone#18 AS Pickup_Zone#227, service_zone#19 AS Pickup_service_zone#248] : +- *(3) BroadcastHashJoin [cast(PULocationID#44 as int)], [LocationID#16], LeftOuter, BuildRight : :- FileScan csv [tpep_pickup_datetime#40,tpep_dropoff_datetime#41,passenger_count#42,trip_distance#43,PULocationID#44,DOLocationID#45,payment_type#46,fare_amount#47,extra#48,mta_tax#49,tip_amount#50,tolls_amount#51,total_amount#52,congestion_surcharge#53,airport_fee#54,taxi_type#55] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/yellow_tripdata/2023], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<tpep_pickup_datetime:string,tpep_dropoff_datetime:string,passenger_count:string,trip_dista... : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#138] : +- *(1) Project [LocationID#16, Borough#17, Zone#18, service_zone#19] : +- *(1) Filter isnotnull(LocationID#16) : +- FileScan csv [LocationID#16,Borough#17,Zone#18,service_zone#19] Batched: false, DataFilters: [isnotnull(LocationID#16)], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/taxi_zone_lookup.csv], PartitionFilters: [], PushedFilters: [IsNotNull(LocationID)], ReadSchema: struct<LocationID:int,Borough:string,Zone:string,service_zone:string> +- ReusedExchange [LocationID#16, Borough#17, Zone#18, service_zone#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#138]