== Parsed Logical Plan == GlobalLimit 11 +- LocalLimit 11 +- Project [cast(tpep_pickup_datetime#55 as string) AS tpep_pickup_datetime#450, cast(tpep_dropoff_datetime#56 as string) AS tpep_dropoff_datetime#451, cast(passenger_count#57 as string) AS passenger_count#452, cast(trip_distance#58 as string) AS trip_distance#453, cast(payment_type#61 as string) AS payment_type#454, cast(fare_amount#62 as string) AS fare_amount#455, cast(extra#63 as string) AS extra#456, cast(mta_tax#64 as string) AS mta_tax#457, cast(tip_amount#65 as string) AS tip_amount#458, cast(tolls_amount#66 as string) AS tolls_amount#459, cast(total_amount#67 as string) AS total_amount#460, cast(congestion_surcharge#68 as string) AS congestion_surcharge#461, cast(airport_fee#69 as string) AS airport_fee#462, cast(taxi_type#70 as string) AS taxi_type#463, cast(Pickup_Borough#147 as string) AS Pickup_Borough#464, cast(Pickup_Zone#168 as string) AS Pickup_Zone#465, cast(Pickup_service_zone#189 as string) AS Pickup_service_zone#466, cast(Dropoff_Borough#294 as string) AS Dropoff_Borough#467, cast(Dropoff_Zone#317 as string) AS Dropoff_Zone#468, cast(Dropoff_service_zone#340 as string) AS Dropoff_service_zone#469, cast(route#383 as string) AS route#470, cast(Month#405 as string) AS Month#471] +- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, Pickup_Borough#147, Pickup_Zone#168, Pickup_service_zone#189, Dropoff_Borough#294, Dropoff_Zone#317, Dropoff_service_zone#340, route#383, month(cast(tpep_pickup_datetime#55 as date)) AS Month#405] +- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, Pickup_Borough#147, Pickup_Zone#168, Pickup_service_zone#189, Dropoff_Borough#294, Dropoff_Zone#317, Dropoff_service_zone#340, concat_ws( to , Pickup_Borough#147, Dropoff_Borough#294) AS route#383] +- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, Pickup_Borough#147, Pickup_Zone#168, Pickup_service_zone#189, Dropoff_Borough#294, Dropoff_Zone#317, Dropoff_service_zone#340] +- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, DOLocationID#60, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, Pickup_Borough#147, Pickup_Zone#168, Pickup_service_zone#189, LocationID#14, Dropoff_Borough#294, Dropoff_Zone#317, service_zone#17 AS Dropoff_service_zone#340] +- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, DOLocationID#60, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, Pickup_Borough#147, Pickup_Zone#168, Pickup_service_zone#189, LocationID#14, Dropoff_Borough#294, Zone#16 AS Dropoff_Zone#317, service_zone#17] +- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, DOLocationID#60, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, Pickup_Borough#147, Pickup_Zone#168, Pickup_service_zone#189, LocationID#14, Borough#15 AS Dropoff_Borough#294, Zone#16, service_zone#17] +- Join LeftOuter, (cast(DOLocationID#60 as int) = LocationID#14) :- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, DOLocationID#60, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, Pickup_Borough#147, Pickup_Zone#168, Pickup_service_zone#189] : +- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, PULocationID#59, DOLocationID#60, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, LocationID#14, Pickup_Borough#147, Pickup_Zone#168, service_zone#17 AS Pickup_service_zone#189] : +- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, PULocationID#59, DOLocationID#60, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, LocationID#14, Pickup_Borough#147, Zone#16 AS Pickup_Zone#168, service_zone#17] : +- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, PULocationID#59, DOLocationID#60, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, LocationID#14, Borough#15 AS Pickup_Borough#147, Zone#16, service_zone#17] : +- Join LeftOuter, (cast(PULocationID#59 as int) = LocationID#14) : :- Relation[tpep_pickup_datetime#55,tpep_dropoff_datetime#56,passenger_count#57,trip_distance#58,PULocationID#59,DOLocationID#60,payment_type#61,fare_amount#62,extra#63,mta_tax#64,tip_amount#65,tolls_amount#66,total_amount#67,congestion_surcharge#68,airport_fee#69,taxi_type#70] csv : +- LogicalRDD [LocationID#14, Borough#15, Zone#16, service_zone#17], false +- LogicalRDD [LocationID#14, Borough#15, Zone#16, service_zone#17], false == Analyzed Logical Plan == tpep_pickup_datetime: string, tpep_dropoff_datetime: string, passenger_count: string, trip_distance: string, payment_type: string, fare_amount: string, extra: string, mta_tax: string, tip_amount: string, tolls_amount: string, total_amount: string, congestion_surcharge: string, airport_fee: string, taxi_type: string, Pickup_Borough: string, Pickup_Zone: string, Pickup_service_zone: string, Dropoff_Borough: string, Dropoff_Zone: string, Dropoff_service_zone: string, route: string, Month: string GlobalLimit 11 +- LocalLimit 11 +- Project [cast(tpep_pickup_datetime#55 as string) AS tpep_pickup_datetime#450, cast(tpep_dropoff_datetime#56 as string) AS tpep_dropoff_datetime#451, cast(passenger_count#57 as string) AS passenger_count#452, cast(trip_distance#58 as string) AS trip_distance#453, cast(payment_type#61 as string) AS payment_type#454, cast(fare_amount#62 as string) AS fare_amount#455, cast(extra#63 as string) AS extra#456, cast(mta_tax#64 as string) AS mta_tax#457, cast(tip_amount#65 as string) AS tip_amount#458, cast(tolls_amount#66 as string) AS tolls_amount#459, cast(total_amount#67 as string) AS total_amount#460, cast(congestion_surcharge#68 as string) AS congestion_surcharge#461, cast(airport_fee#69 as string) AS airport_fee#462, cast(taxi_type#70 as string) AS taxi_type#463, cast(Pickup_Borough#147 as string) AS Pickup_Borough#464, cast(Pickup_Zone#168 as string) AS Pickup_Zone#465, cast(Pickup_service_zone#189 as string) AS Pickup_service_zone#466, cast(Dropoff_Borough#294 as string) AS Dropoff_Borough#467, cast(Dropoff_Zone#317 as string) AS Dropoff_Zone#468, cast(Dropoff_service_zone#340 as string) AS Dropoff_service_zone#469, cast(route#383 as string) AS route#470, cast(Month#405 as string) AS Month#471] +- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, Pickup_Borough#147, Pickup_Zone#168, Pickup_service_zone#189, Dropoff_Borough#294, Dropoff_Zone#317, Dropoff_service_zone#340, route#383, month(cast(tpep_pickup_datetime#55 as date)) AS Month#405] +- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, Pickup_Borough#147, Pickup_Zone#168, Pickup_service_zone#189, Dropoff_Borough#294, Dropoff_Zone#317, Dropoff_service_zone#340, concat_ws( to , Pickup_Borough#147, Dropoff_Borough#294) AS route#383] +- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, Pickup_Borough#147, Pickup_Zone#168, Pickup_service_zone#189, Dropoff_Borough#294, Dropoff_Zone#317, Dropoff_service_zone#340] +- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, DOLocationID#60, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, Pickup_Borough#147, Pickup_Zone#168, Pickup_service_zone#189, LocationID#14, Dropoff_Borough#294, Dropoff_Zone#317, service_zone#17 AS Dropoff_service_zone#340] +- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, DOLocationID#60, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, Pickup_Borough#147, Pickup_Zone#168, Pickup_service_zone#189, LocationID#14, Dropoff_Borough#294, Zone#16 AS Dropoff_Zone#317, service_zone#17] +- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, DOLocationID#60, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, Pickup_Borough#147, Pickup_Zone#168, Pickup_service_zone#189, LocationID#14, Borough#15 AS Dropoff_Borough#294, Zone#16, service_zone#17] +- Join LeftOuter, (cast(DOLocationID#60 as int) = LocationID#14) :- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, DOLocationID#60, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, Pickup_Borough#147, Pickup_Zone#168, Pickup_service_zone#189] : +- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, PULocationID#59, DOLocationID#60, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, LocationID#14, Pickup_Borough#147, Pickup_Zone#168, service_zone#17 AS Pickup_service_zone#189] : +- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, PULocationID#59, DOLocationID#60, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, LocationID#14, Pickup_Borough#147, Zone#16 AS Pickup_Zone#168, service_zone#17] : +- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, PULocationID#59, DOLocationID#60, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, LocationID#14, Borough#15 AS Pickup_Borough#147, Zone#16, service_zone#17] : +- Join LeftOuter, (cast(PULocationID#59 as int) = LocationID#14) : :- Relation[tpep_pickup_datetime#55,tpep_dropoff_datetime#56,passenger_count#57,trip_distance#58,PULocationID#59,DOLocationID#60,payment_type#61,fare_amount#62,extra#63,mta_tax#64,tip_amount#65,tolls_amount#66,total_amount#67,congestion_surcharge#68,airport_fee#69,taxi_type#70] csv : +- LogicalRDD [LocationID#14, Borough#15, Zone#16, service_zone#17], false +- LogicalRDD [LocationID#14, Borough#15, Zone#16, service_zone#17], false == Optimized Logical Plan == GlobalLimit 11 +- LocalLimit 11 +- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, Pickup_Borough#147, Pickup_Zone#168, Pickup_service_zone#189, Borough#15 AS Dropoff_Borough#467, Zone#16 AS Dropoff_Zone#468, service_zone#17 AS Dropoff_service_zone#469, concat_ws( to , Pickup_Borough#147, Borough#15) AS route#470, cast(month(cast(tpep_pickup_datetime#55 as date)) as string) AS Month#471] +- Join LeftOuter, (cast(DOLocationID#60 as int) = LocationID#14) :- Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, DOLocationID#60, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, Borough#15 AS Pickup_Borough#147, Zone#16 AS Pickup_Zone#168, service_zone#17 AS Pickup_service_zone#189] : +- Join LeftOuter, (cast(PULocationID#59 as int) = LocationID#14) : :- Relation[tpep_pickup_datetime#55,tpep_dropoff_datetime#56,passenger_count#57,trip_distance#58,PULocationID#59,DOLocationID#60,payment_type#61,fare_amount#62,extra#63,mta_tax#64,tip_amount#65,tolls_amount#66,total_amount#67,congestion_surcharge#68,airport_fee#69,taxi_type#70] csv : +- Filter isnotnull(LocationID#14) : +- LogicalRDD [LocationID#14, Borough#15, Zone#16, service_zone#17], false +- Filter isnotnull(LocationID#14) +- LogicalRDD [LocationID#14, Borough#15, Zone#16, service_zone#17], false == Physical Plan == CollectLimit 11 +- *(8) Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, Pickup_Borough#147, Pickup_Zone#168, Pickup_service_zone#189, Borough#15 AS Dropoff_Borough#467, Zone#16 AS Dropoff_Zone#468, service_zone#17 AS Dropoff_service_zone#469, concat_ws( to , Pickup_Borough#147, Borough#15) AS route#470, cast(month(cast(tpep_pickup_datetime#55 as date)) as string) AS Month#471] +- SortMergeJoin [cast(DOLocationID#60 as int)], [LocationID#14], LeftOuter :- *(5) Sort [cast(DOLocationID#60 as int) ASC NULLS FIRST], false, 0 : +- Exchange hashpartitioning(cast(DOLocationID#60 as int), 200), true, [id=#139] : +- *(4) Project [tpep_pickup_datetime#55, tpep_dropoff_datetime#56, passenger_count#57, trip_distance#58, DOLocationID#60, payment_type#61, fare_amount#62, extra#63, mta_tax#64, tip_amount#65, tolls_amount#66, total_amount#67, congestion_surcharge#68, airport_fee#69, taxi_type#70, Borough#15 AS Pickup_Borough#147, Zone#16 AS Pickup_Zone#168, service_zone#17 AS Pickup_service_zone#189] : +- SortMergeJoin [cast(PULocationID#59 as int)], [LocationID#14], LeftOuter : :- *(1) Sort [cast(PULocationID#59 as int) ASC NULLS FIRST], false, 0 : : +- Exchange hashpartitioning(cast(PULocationID#59 as int), 200), true, [id=#112] : : +- FileScan csv [tpep_pickup_datetime#55,tpep_dropoff_datetime#56,passenger_count#57,trip_distance#58,PULocationID#59,DOLocationID#60,payment_type#61,fare_amount#62,extra#63,mta_tax#64,tip_amount#65,tolls_amount#66,total_amount#67,congestion_surcharge#68,airport_fee#69,taxi_type#70] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/yellow_tripdata/2023/yellow_tripdata_..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<tpep_pickup_datetime:string,tpep_dropoff_datetime:string,passenger_count:string,trip_dista... : +- *(3) Sort [LocationID#14 ASC NULLS FIRST], false, 0 : +- Exchange hashpartitioning(LocationID#14, 200), true, [id=#131] : +- *(2) Filter isnotnull(LocationID#14) : +- *(2) Scan ExistingRDD[LocationID#14,Borough#15,Zone#16,service_zone#17] +- *(7) Sort [LocationID#14 ASC NULLS FIRST], false, 0 +- ReusedExchange [LocationID#14, Borough#15, Zone#16, service_zone#17], Exchange hashpartitioning(LocationID#14, 200), true, [id=#131]