== Parsed Logical Plan == GlobalLimit 6 +- LocalLimit 6 +- Project [cast(tpep_pickup_datetime#70 as string) AS tpep_pickup_datetime#363, cast(tpep_dropoff_datetime#71 as string) AS tpep_dropoff_datetime#364, cast(passenger_count#72 as string) AS passenger_count#365, cast(trip_distance#73 as string) AS trip_distance#366, cast(DOLocationID#75 as string) AS DOLocationID#367, cast(payment_type#76 as string) AS payment_type#368, cast(fare_amount#77 as string) AS fare_amount#369, cast(extra#78 as string) AS extra#370, cast(mta_tax#79 as string) AS mta_tax#371, cast(tip_amount#80 as string) AS tip_amount#372, cast(tolls_amount#81 as string) AS tolls_amount#373, cast(total_amount#82 as string) AS total_amount#374, cast(congestion_surcharge#83 as string) AS congestion_surcharge#375, cast(airport_fee#84 as string) AS airport_fee#376, cast(taxi_type#85 as string) AS taxi_type#377, cast(Pickup_Borough#264 as string) AS Pickup_Borough#378, cast(Pickup_Zone#285 as string) AS Pickup_Zone#379, cast(Pickup_service_zone#306 as string) AS Pickup_service_zone#380] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, DOLocationID#75, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, PULocationID#74, DOLocationID#75, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, LocationID#16, Pickup_Borough#264, Pickup_Zone#285, service_zone#19 AS Pickup_service_zone#306] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, PULocationID#74, DOLocationID#75, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, LocationID#16, Pickup_Borough#264, Zone#18 AS Pickup_Zone#285, service_zone#19] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, PULocationID#74, DOLocationID#75, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, LocationID#16, Borough#17 AS Pickup_Borough#264, Zone#18, service_zone#19] +- Join Inner, (cast(PULocationID#74 as int) = LocationID#16) :- Relation[tpep_pickup_datetime#70,tpep_dropoff_datetime#71,passenger_count#72,trip_distance#73,PULocationID#74,DOLocationID#75,payment_type#76,fare_amount#77,extra#78,mta_tax#79,tip_amount#80,tolls_amount#81,total_amount#82,congestion_surcharge#83,airport_fee#84,taxi_type#85] csv +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Analyzed Logical Plan == tpep_pickup_datetime: string, tpep_dropoff_datetime: string, passenger_count: string, trip_distance: string, DOLocationID: string, payment_type: string, fare_amount: string, extra: string, mta_tax: string, tip_amount: string, tolls_amount: string, total_amount: string, congestion_surcharge: string, airport_fee: string, taxi_type: string, Pickup_Borough: string, Pickup_Zone: string, Pickup_service_zone: string GlobalLimit 6 +- LocalLimit 6 +- Project [cast(tpep_pickup_datetime#70 as string) AS tpep_pickup_datetime#363, cast(tpep_dropoff_datetime#71 as string) AS tpep_dropoff_datetime#364, cast(passenger_count#72 as string) AS passenger_count#365, cast(trip_distance#73 as string) AS trip_distance#366, cast(DOLocationID#75 as string) AS DOLocationID#367, cast(payment_type#76 as string) AS payment_type#368, cast(fare_amount#77 as string) AS fare_amount#369, cast(extra#78 as string) AS extra#370, cast(mta_tax#79 as string) AS mta_tax#371, cast(tip_amount#80 as string) AS tip_amount#372, cast(tolls_amount#81 as string) AS tolls_amount#373, cast(total_amount#82 as string) AS total_amount#374, cast(congestion_surcharge#83 as string) AS congestion_surcharge#375, cast(airport_fee#84 as string) AS airport_fee#376, cast(taxi_type#85 as string) AS taxi_type#377, cast(Pickup_Borough#264 as string) AS Pickup_Borough#378, cast(Pickup_Zone#285 as string) AS Pickup_Zone#379, cast(Pickup_service_zone#306 as string) AS Pickup_service_zone#380] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, DOLocationID#75, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, Pickup_Borough#264, Pickup_Zone#285, Pickup_service_zone#306] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, PULocationID#74, DOLocationID#75, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, LocationID#16, Pickup_Borough#264, Pickup_Zone#285, service_zone#19 AS Pickup_service_zone#306] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, PULocationID#74, DOLocationID#75, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, LocationID#16, Pickup_Borough#264, Zone#18 AS Pickup_Zone#285, service_zone#19] +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, PULocationID#74, DOLocationID#75, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, LocationID#16, Borough#17 AS Pickup_Borough#264, Zone#18, service_zone#19] +- Join Inner, (cast(PULocationID#74 as int) = LocationID#16) :- Relation[tpep_pickup_datetime#70,tpep_dropoff_datetime#71,passenger_count#72,trip_distance#73,PULocationID#74,DOLocationID#75,payment_type#76,fare_amount#77,extra#78,mta_tax#79,tip_amount#80,tolls_amount#81,total_amount#82,congestion_surcharge#83,airport_fee#84,taxi_type#85] csv +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Optimized Logical Plan == GlobalLimit 6 +- LocalLimit 6 +- Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, DOLocationID#75, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, Borough#17 AS Pickup_Borough#378, Zone#18 AS Pickup_Zone#379, service_zone#19 AS Pickup_service_zone#380] +- Join Inner, (cast(PULocationID#74 as int) = LocationID#16) :- Filter isnotnull(PULocationID#74) : +- Relation[tpep_pickup_datetime#70,tpep_dropoff_datetime#71,passenger_count#72,trip_distance#73,PULocationID#74,DOLocationID#75,payment_type#76,fare_amount#77,extra#78,mta_tax#79,tip_amount#80,tolls_amount#81,total_amount#82,congestion_surcharge#83,airport_fee#84,taxi_type#85] csv +- Filter isnotnull(LocationID#16) +- Relation[LocationID#16,Borough#17,Zone#18,service_zone#19] csv == Physical Plan == CollectLimit 6 +- *(2) Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, DOLocationID#75, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85, Borough#17 AS Pickup_Borough#378, Zone#18 AS Pickup_Zone#379, service_zone#19 AS Pickup_service_zone#380] +- *(2) BroadcastHashJoin [cast(PULocationID#74 as int)], [LocationID#16], Inner, BuildRight :- *(2) Project [tpep_pickup_datetime#70, tpep_dropoff_datetime#71, passenger_count#72, trip_distance#73, PULocationID#74, DOLocationID#75, payment_type#76, fare_amount#77, extra#78, mta_tax#79, tip_amount#80, tolls_amount#81, total_amount#82, congestion_surcharge#83, airport_fee#84, taxi_type#85] : +- *(2) Filter isnotnull(PULocationID#74) : +- FileScan csv [tpep_pickup_datetime#70,tpep_dropoff_datetime#71,passenger_count#72,trip_distance#73,PULocationID#74,DOLocationID#75,payment_type#76,fare_amount#77,extra#78,mta_tax#79,tip_amount#80,tolls_amount#81,total_amount#82,congestion_surcharge#83,airport_fee#84,taxi_type#85] Batched: false, DataFilters: [isnotnull(PULocationID#74)], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/yellow_tripdata/2023], PartitionFilters: [], PushedFilters: [IsNotNull(PULocationID)], ReadSchema: struct<tpep_pickup_datetime:string,tpep_dropoff_datetime:string,passenger_count:string,trip_dista... +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, int, true] as bigint))), [id=#131] +- *(1) Project [LocationID#16, Borough#17, Zone#18, service_zone#19] +- *(1) Filter isnotnull(LocationID#16) +- FileScan csv [LocationID#16,Borough#17,Zone#18,service_zone#19] Batched: false, DataFilters: [isnotnull(LocationID#16)], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/taxi_zone_lookup.csv], PartitionFilters: [], PushedFilters: [IsNotNull(LocationID)], ReadSchema: struct<LocationID:int,Borough:string,Zone:string,service_zone:string>