== Parsed Logical Plan == GlobalLimit 11 +- LocalLimit 11 +- Project [cast(tpep_pickup_datetime#70 as string) AS tpep_pickup_datetime#962, cast(route#539 as string) AS route#963, cast(Month#561 as string) AS Month#964] +- Project [tpep_pickup_datetime#70, route#539, Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, coalesce(nanvl(passenger_count#888, cast(null as float)), cast(0 as float)) AS passenger_count#933, trip_distance#73, payment_type#76, fare_amount#630, extra#653, mta_tax#676, tip_amount#584, tolls_amount#699, total_amount#607, congestion_surcharge#722, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, route#539, Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, cast(passenger_count#72 as float) AS passenger_count#888, trip_distance#73, payment_type#76, fare_amount#630, extra#653, mta_tax#676, tip_amount#584, tolls_amount#699, total_amount#607, congestion_surcharge#722, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, route#539, Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, payment_type#76, fare_amount#630, extra#653, mta_tax#676, tip_amount#584, tolls_amount#699, total_amount#607, cast(congestion_surcharge#83 as decimal(10,2)) AS congestion_surcharge#722, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, route#539, Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, payment_type#76, fare_amount#630, extra#653, mta_tax#676, tip_amount#584, cast(tolls_amount#81 as decimal(10,2)) AS tolls_amount#699, total_amount#607, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, route#539, Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, payment_type#76, fare_amount#630, extra#653, cast(mta_tax#79 as decimal(10,2)) AS mta_tax#676, tip_amount#584, tolls_amount#81, total_amount#607, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, route#539, Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, payment_type#76, fare_amount#630, cast(extra#78 as decimal(10,2)) AS extra#653, mta_tax#79, tip_amount#584, tolls_amount#81, total_amount#607, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, route#539, Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, payment_type#76, cast(fare_amount#77 as decimal(10,2)) AS fare_amount#630, extra#78, mta_tax#79, tip_amount#584, tolls_amount#81, total_amount#607, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, route#539, Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#584, tolls_amount#81, cast(total_amount#82 as decimal(10,2)) AS total_amount#607, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, route#539, Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, payment_type#76, fare_amount#77, extra#78, mta_tax#79, cast(tip_amount#80 as decimal(10,2)) AS tip_amount#584, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, route#539, Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, route#539, month(cast(to_timestamp('tpep_pickup_datetime, Some(yyyy-MM-dd HH:mm:ss)) as date)) AS Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, concat_ws( to , Pickup_Borough#264, Dropoff_Borough#438) AS route#539] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448] +- Join Inner, (cast(DOLocationID#75 as int) = LocationID#16) :- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, DOLocationID#75, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306] : +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, PULocationID#74, DOLocationID#75, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, LocationID#16, Pickup_Borough#264, Pickup_Zone#285, service_zone#19 AS Pickup_service_zone#306] : +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, PULocationID#74, DOLocationID#75, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, LocationID#16, Pickup_Borough#264, Zone#18 AS Pickup_Zone#285, service_zone#19] : +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, PULocationID#74, DOLocationID#75, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, LocationID#16, Borough#17 AS Pickup_Borough#264, Zone#18, service_zone#19] : +- Join Inner, (cast(PULocationID#74 as int) = LocationID#16) : :- Relation[tpep_pickup_datetime#70,tpep_dropoff_datetime#71,passenger_count#72,trip_distance#73,PULocationID#74,DOLocationID#75,payment_type#76,fare_amount#77,extra#78,mta_tax#79,tip_amount#80,tolls_amount#81,total_amount#82,congestion_surcharge#83,airport_fee#84,taxi_type#85] csv : +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv +- Project [LocationID#16, Dropoff_Borough#438, Dropoff_Zone#443, service_zone#19 AS Dropoff_service_zone#448] +- Project [LocationID#16, Dropoff_Borough#438, Zone#18 AS Dropoff_Zone#443, service_zone#19] +- Project [LocationID#16, Borough#17 AS Dropoff_Borough#438, Zone#18, service_zone#19] +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Analyzed Logical Plan == tpep_pickup_datetime: string, route: string, Month: string GlobalLimit 11 +- LocalLimit 11 +- Project [cast(tpep_pickup_datetime#70 as string) AS tpep_pickup_datetime#962, cast(route#539 as string) AS route#963, cast(Month#561 as string) AS Month#964] +- Project [tpep_pickup_datetime#70, route#539, Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, coalesce(nanvl(passenger_count#888, cast(null as float)), cast(0 as float)) AS passenger_count#933, trip_distance#73, payment_type#76, fare_amount#630, extra#653, mta_tax#676, tip_amount#584, tolls_amount#699, total_amount#607, congestion_surcharge#722, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, route#539, Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, cast(passenger_count#72 as float) AS passenger_count#888, trip_distance#73, payment_type#76, fare_amount#630, extra#653, mta_tax#676, tip_amount#584, tolls_amount#699, total_amount#607, congestion_surcharge#722, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, route#539, Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, payment_type#76, fare_amount#630, extra#653, mta_tax#676, tip_amount#584, tolls_amount#699, total_amount#607, cast(congestion_surcharge#83 as decimal(10,2)) AS congestion_surcharge#722, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, route#539, Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, payment_type#76, fare_amount#630, extra#653, mta_tax#676, tip_amount#584, cast(tolls_amount#81 as decimal(10,2)) AS tolls_amount#699, total_amount#607, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, route#539, Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, payment_type#76, fare_amount#630, extra#653, cast(mta_tax#79 as decimal(10,2)) AS mta_tax#676, tip_amount#584, tolls_amount#81, total_amount#607, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, route#539, Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, payment_type#76, fare_amount#630, cast(extra#78 as decimal(10,2)) AS extra#653, mta_tax#79, tip_amount#584, tolls_amount#81, total_amount#607, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, route#539, Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, payment_type#76, cast(fare_amount#77 as decimal(10,2)) AS fare_amount#630, extra#78, mta_tax#79, tip_amount#584, tolls_amount#81, total_amount#607, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, route#539, Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#584, tolls_amount#81, cast(total_amount#82 as decimal(10,2)) AS total_amount#607, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, route#539, Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, payment_type#76, fare_amount#77, extra#78, mta_tax#79, cast(tip_amount#80 as decimal(10,2)) AS tip_amount#584, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, route#539, Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, route#539, month(cast(to_timestamp('tpep_pickup_datetime, Some(yyyy-MM-dd HH:mm:ss)) as date)) AS Month#561] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448, concat_ws( to , Pickup_Borough#264, Dropoff_Borough#438) AS route#539] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306, Dropoff_Borough#438, Dropoff_Zone#443, Dropoff_service_zone#448] +- Join Inner, (cast(DOLocationID#75 as int) = LocationID#16) :- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, DOLocationID#75, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306] : +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, PULocationID#74, DOLocationID#75, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, LocationID#16, Pickup_Borough#264, Pickup_Zone#285, service_zone#19 AS Pickup_service_zone#306] : +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, PULocationID#74, DOLocationID#75, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, LocationID#16, Pickup_Borough#264, Zone#18 AS Pickup_Zone#285, service_zone#19] : +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, PULocationID#74, DOLocationID#75, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, LocationID#16, Borough#17 AS Pickup_Borough#264, Zone#18, service_zone#19] : +- Join Inner, (cast(PULocationID#74 as int) = LocationID#16) : :- Relation[tpep_pickup_datetime#70,tpep_dropoff_datetime#71,passenger_count#72,trip_distance#73,PULocationID#74,DOLocationID#75,payment_type#76,fare_amount#77,extra#78,mta_tax#79,tip_amount#80,tolls_amount#81,total_amount#82,congestion_surcharge#83,airport_fee#84,taxi_type#85] csv : +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv +- Project [LocationID#16, Dropoff_Borough#438, Dropoff_Zone#443, service_zone#19 AS Dropoff_service_zone#448] +- Project [LocationID#16, Dropoff_Borough#438, Zone#18 AS Dropoff_Zone#443, service_zone#19] +- Project [LocationID#16, Borough#17 AS Dropoff_Borough#438, Zone#18, service_zone#19] +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Optimized Logical Plan == GlobalLimit 11 +- LocalLimit 11 +- Project [tpep_pickup_datetime#70, concat_ws( to , Pickup_Borough#264, Dropoff_Borough#438) AS route#963, cast(month(cast(gettimestamp(tpep_pickup_datetime#70, yyyy-MM-dd HH:mm:ss, Some(GMT)) as date)) as string) AS Month#964] +- Join Inner, (cast(DOLocationID#75 as int) = LocationID#16) :- Project [tpep_pickup_datetime#70, DOLocationID#75, Borough#17 AS Pickup_Borough#264] : +- Join Inner, (cast(PULocationID#74 as int) = LocationID#16) : :- Project [tpep_pickup_datetime#70, PULocationID#74, DOLocationID#75] : : +- Filter (isnotnull(PULocationID#74) AND isnotnull(DOLocationID#75)) : : +- Relation[tpep_pickup_datetime#70,tpep_dropoff_datetime#71,passenger_count#72,trip_distance#73,PULocationID#74,DOLocationID#75,payment_type#76,fare_amount#77,extra#78,mta_tax#79,tip_amount#80,tolls_amount#81,total_amount#82,congestion_surcharge#83,airport_fee#84,taxi_type#85] csv : +- Project [LocationID#16, Borough#17] : +- Filter isnotnull(LocationID#16) : +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv +- Project [LocationID#16, Borough#17 AS Dropoff_Borough#438] +- Filter isnotnull(LocationID#16) +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Physical Plan == CollectLimit 11 +- *(3) Project [tpep_pickup_datetime#70, concat_ws( to , Pickup_Borough#264, Dropoff_Borough#438) AS route#963, cast(month(cast(gettimestamp(tpep_pickup_datetime#70, yyyy-MM-dd HH:mm:ss, Some(GMT)) as date)) as string) AS Month#964] +- *(3) BroadcastHashJoin [cast(DOLocationID#75 as int)], [LocationID#16], Inner, BuildRight :- *(3) Project [tpep_pickup_datetime#70, DOLocationID#75, Borough#17 AS Pickup_Borough#264] : +- *(3) BroadcastHashJoin [cast(PULocationID#74 as int)], [LocationID#16], Inner, BuildRight : :- *(3) Project [tpep_pickup_datetime#70, PULocationID#74, DOLocationID#75] : : +- *(3) Filter (isnotnull(PULocationID#74) AND isnotnull(DOLocationID#75)) : : +- FileScan csv [tpep_pickup_datetime#70,PULocationID#74,DOLocationID#75] Batched: false, DataFilters: [isnotnull(PULocationID#74), isnotnull(DOLocationID#75)], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/yellow_tripdata/2023], PartitionFilters: [], PushedFilters: [IsNotNull(PULocationID), IsNotNull(DOLocationID)], ReadSchema: struct<tpep_pickup_datetime:string,PULocationID:string,DOLocationID:string> : +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#373] : +- *(1) Project [LocationID#16, Borough#17] : +- *(1) Filter isnotnull(LocationID#16) : +- FileScan csv [LocationID#16,Borough#17] Batched: false, DataFilters: [isnotnull(LocationID#16)], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/taxi_zone_lookup.csv], PartitionFilters: [], PushedFilters: [IsNotNull(LocationID)], ReadSchema: struct<LocationID:int,Borough:string> +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#381] +- *(2) Project [LocationID#16, Borough#17 AS Dropoff_Borough#438] +- *(2) Filter isnotnull(LocationID#16) +- FileScan csv [LocationID#16,Borough#17] Batched: false, DataFilters: [isnotnull(LocationID#16)], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/taxi_zone_lookup.csv], PartitionFilters: [], PushedFilters: [IsNotNull(LocationID)], ReadSchema: struct<LocationID:int,Borough:string>