== Parsed Logical Plan == GlobalLimit 11 +- LocalLimit 11 +- Project [cast(tpep_pickup_datetime#61 as string) AS tpep_pickup_datetime#547, cast(tpep_dropoff_datetime#62 as string) AS tpep_dropoff_datetime#548, cast(passenger_count#63 as string) AS passenger_count#549, cast(trip_distance#64 as string) AS trip_distance#550, cast(payment_type#67 as string) AS payment_type#551, cast(fare_amount#68 as string) AS fare_amount#552, cast(extra#69 as string) AS extra#553, cast(mta_tax#70 as string) AS mta_tax#554, cast(tip_amount#71 as string) AS tip_amount#555, cast(tolls_amount#72 as string) AS tolls_amount#556, cast(total_amount#73 as string) AS total_amount#557, cast(congestion_surcharge#74 as string) AS congestion_surcharge#558, cast(airport_fee#75 as string) AS airport_fee#559, cast(taxi_type#76 as string) AS taxi_type#560, cast(trip_date#114 as string) AS trip_date#561, cast(Pickup_Borough#230 as string) AS Pickup_Borough#562, cast(Pickup_Zone#252 as string) AS Pickup_Zone#563, cast(Pickup_service_zone#274 as string) AS Pickup_service_zone#564, cast(Dropoff_Borough#384 as string) AS Dropoff_Borough#565, cast(Dropoff_Zone#408 as string) AS Dropoff_Zone#566, cast(Dropoff_service_zone#432 as string) AS Dropoff_service_zone#567, cast(route#477 as string) AS route#568, cast(Month#500 as string) AS Month#569] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, Dropoff_Borough#384, Dropoff_Zone#408, Dropoff_service_zone#432, route#477, month(cast(tpep_pickup_datetime#61 as date)) AS Month#500] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, Dropoff_Borough#384, Dropoff_Zone#408, Dropoff_service_zone#432, concat_ws( to , Pickup_Borough#230, Dropoff_Borough#384) AS route#477] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, Dropoff_Borough#384, Dropoff_Zone#408, Dropoff_service_zone#432] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, LocationID#16, Dropoff_Borough#384, Dropoff_Zone#408, service_zone#19 AS Dropoff_service_zone#432] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, LocationID#16, Dropoff_Borough#384, Zone#18 AS Dropoff_Zone#408, service_zone#19] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, LocationID#16, Borough#17 AS Dropoff_Borough#384, Zone#18, service_zone#19] +- Join LeftOuter, (cast(DOLocationID#66 as int) = LocationID#16) :- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274] : +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, PULocationID#65, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, LocationID#16, Pickup_Borough#230, Pickup_Zone#252, service_zone#19 AS Pickup_service_zone#274] : +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, PULocationID#65, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, LocationID#16, Pickup_Borough#230, Zone#18 AS Pickup_Zone#252, service_zone#19] : +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, PULocationID#65, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, LocationID#16, Borough#17 AS Pickup_Borough#230, Zone#18, service_zone#19] : +- Join LeftOuter, (cast(PULocationID#65 as int) = LocationID#16) : :- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, PULocationID#65, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, date_format(cast(tpep_pickup_datetime#61 as timestamp), yyyy-MM-dd, Some(GMT)) AS trip_date#114] : : +- Relation[tpep_pickup_datetime#61,tpep_dropoff_datetime#62,passenger_count#63,trip_distance#64,PULocationID#65,DOLocationID#66,payment_type#67,fare_amount#68,extra#69,mta_tax#70,tip_amount#71,tolls_amount#72,total_amount#73,congestion_surcharge#74,airport_fee#75,taxi_type#76] csv : +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Analyzed Logical Plan == tpep_pickup_datetime: string, tpep_dropoff_datetime: string, passenger_count: string, trip_distance: string, payment_type: string, fare_amount: string, extra: string, mta_tax: string, tip_amount: string, tolls_amount: string, total_amount: string, congestion_surcharge: string, airport_fee: string, taxi_type: string, trip_date: string, Pickup_Borough: string, Pickup_Zone: string, Pickup_service_zone: string, Dropoff_Borough: string, Dropoff_Zone: string, Dropoff_service_zone: string, route: string, Month: string GlobalLimit 11 +- LocalLimit 11 +- Project [cast(tpep_pickup_datetime#61 as string) AS tpep_pickup_datetime#547, cast(tpep_dropoff_datetime#62 as string) AS tpep_dropoff_datetime#548, cast(passenger_count#63 as string) AS passenger_count#549, cast(trip_distance#64 as string) AS trip_distance#550, cast(payment_type#67 as string) AS payment_type#551, cast(fare_amount#68 as string) AS fare_amount#552, cast(extra#69 as string) AS extra#553, cast(mta_tax#70 as string) AS mta_tax#554, cast(tip_amount#71 as string) AS tip_amount#555, cast(tolls_amount#72 as string) AS tolls_amount#556, cast(total_amount#73 as string) AS total_amount#557, cast(congestion_surcharge#74 as string) AS congestion_surcharge#558, cast(airport_fee#75 as string) AS airport_fee#559, cast(taxi_type#76 as string) AS taxi_type#560, cast(trip_date#114 as string) AS trip_date#561, cast(Pickup_Borough#230 as string) AS Pickup_Borough#562, cast(Pickup_Zone#252 as string) AS Pickup_Zone#563, cast(Pickup_service_zone#274 as string) AS Pickup_service_zone#564, cast(Dropoff_Borough#384 as string) AS Dropoff_Borough#565, cast(Dropoff_Zone#408 as string) AS Dropoff_Zone#566, cast(Dropoff_service_zone#432 as string) AS Dropoff_service_zone#567, cast(route#477 as string) AS route#568, cast(Month#500 as string) AS Month#569] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, Dropoff_Borough#384, Dropoff_Zone#408, Dropoff_service_zone#432, route#477, month(cast(tpep_pickup_datetime#61 as date)) AS Month#500] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, Dropoff_Borough#384, Dropoff_Zone#408, Dropoff_service_zone#432, concat_ws( to , Pickup_Borough#230, Dropoff_Borough#384) AS route#477] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, Dropoff_Borough#384, Dropoff_Zone#408, Dropoff_service_zone#432] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, LocationID#16, Dropoff_Borough#384, Dropoff_Zone#408, service_zone#19 AS Dropoff_service_zone#432] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, LocationID#16, Dropoff_Borough#384, Zone#18 AS Dropoff_Zone#408, service_zone#19] +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, LocationID#16, Borough#17 AS Dropoff_Borough#384, Zone#18, service_zone#19] +- Join LeftOuter, (cast(DOLocationID#66 as int) = LocationID#16) :- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274] : +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, PULocationID#65, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, LocationID#16, Pickup_Borough#230, Pickup_Zone#252, service_zone#19 AS Pickup_service_zone#274] : +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, PULocationID#65, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, LocationID#16, Pickup_Borough#230, Zone#18 AS Pickup_Zone#252, service_zone#19] : +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, PULocationID#65, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, LocationID#16, Borough#17 AS Pickup_Borough#230, Zone#18, service_zone#19] : +- Join LeftOuter, (cast(PULocationID#65 as int) = LocationID#16) : :- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, PULocationID#65, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, date_format(cast(tpep_pickup_datetime#61 as timestamp), yyyy-MM-dd, Some(GMT)) AS trip_date#114] : : +- Relation[tpep_pickup_datetime#61,tpep_dropoff_datetime#62,passenger_count#63,trip_distance#64,PULocationID#65,DOLocationID#66,payment_type#67,fare_amount#68,extra#69,mta_tax#70,tip_amount#71,tolls_amount#72,total_amount#73,congestion_surcharge#74,airport_fee#75,taxi_type#76] csv : +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Optimized Logical Plan == GlobalLimit 11 +- LocalLimit 11 +- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, Borough#17 AS Dropoff_Borough#565, Zone#18 AS Dropoff_Zone#566, service_zone#19 AS Dropoff_service_zone#567, concat_ws( to , Pickup_Borough#230, Borough#17) AS route#568, cast(month(cast(tpep_pickup_datetime#61 as date)) as string) AS Month#569] +- Join LeftOuter, (cast(DOLocationID#66 as int) = LocationID#16) :- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Borough#17 AS Pickup_Borough#230, Zone#18 AS Pickup_Zone#252, service_zone#19 AS Pickup_service_zone#274] : +- Join LeftOuter, (cast(PULocationID#65 as int) = LocationID#16) : :- Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, PULocationID#65, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, date_format(cast(tpep_pickup_datetime#61 as timestamp), yyyy-MM-dd, Some(GMT)) AS trip_date#114] : : +- Relation[tpep_pickup_datetime#61,tpep_dropoff_datetime#62,passenger_count#63,trip_distance#64,PULocationID#65,DOLocationID#66,payment_type#67,fare_amount#68,extra#69,mta_tax#70,tip_amount#71,tolls_amount#72,total_amount#73,congestion_surcharge#74,airport_fee#75,taxi_type#76] csv : +- Filter isnotnull(LocationID#16) : +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv +- Filter isnotnull(LocationID#16) +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Physical Plan == CollectLimit 11 +- *(3) Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Pickup_Borough#230, Pickup_Zone#252, Pickup_service_zone#274, Borough#17 AS Dropoff_Borough#565, Zone#18 AS Dropoff_Zone#566, service_zone#19 AS Dropoff_service_zone#567, concat_ws( to , Pickup_Borough#230, Borough#17) AS route#568, cast(month(cast(tpep_pickup_datetime#61 as date)) as string) AS Month#569] +- *(3) BroadcastHashJoin [cast(DOLocationID#66 as int)], [LocationID#16], LeftOuter, BuildRight :- *(3) Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, trip_date#114, Borough#17 AS Pickup_Borough#230, Zone#18 AS Pickup_Zone#252, service_zone#19 AS Pickup_service_zone#274] : +- *(3) BroadcastHashJoin [cast(PULocationID#65 as int)], [LocationID#16], LeftOuter, BuildRight : :- *(3) Project [tpep_pickup_datetime#61, tpep_dropoff_datetime#62, passenger_count#63, trip_distance#64, PULocationID#65, DOLocationID#66, payment_type#67, fare_amount#68, extra#69, mta_tax#70, tip_amount#71, tolls_amount#72, total_amount#73, congestion_surcharge#74, airport_fee#75, taxi_type#76, date_format(cast(tpep_pickup_datetime#61 as timestamp), yyyy-MM-dd, Some(GMT)) AS trip_date#114] : : +- FileScan csv [tpep_pickup_datetime#61,tpep_dropoff_datetime#62,passenger_count#63,trip_distance#64,PULocationID#65,DOLocationID#66,payment_type#67,fare_amount#68,extra#69,mta_tax#70,tip_amount#71,tolls_amount#72,total_amount#73,congestion_surcharge#74,airport_fee#75,taxi_type#76] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/yellow_tripdata/2023/yellow_tripdata_..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<tpep_pickup_datetime:string,tpep_dropoff_datetime:string,passenger_count:string,trip_dista... : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#153] : +- *(1) Project [LocationID#16, Borough#17, Zone#18, service_zone#19] : +- *(1) Filter isnotnull(LocationID#16) : +- FileScan csv [LocationID#16,Borough#17,Zone#18,service_zone#19] Batched: false, DataFilters: [isnotnull(LocationID#16)], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/taxi_zone_lookup.csv], PartitionFilters: [], PushedFilters: [IsNotNull(LocationID)], ReadSchema: struct<LocationID:int,Borough:string,Zone:string,service_zone:string> +- ReusedExchange [LocationID#16, Borough#17, Zone#18, service_zone#19], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#153]