digraph G {
0 [labelType="html" label="<br><b>CollectLimit</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (3)";
2 [labelType="html" label="<br><b>Project</b><br><br>"];
3 [labelType="html" label="<b>BroadcastHashJoin</b><br><br>number of output rows: 11"];
4 [labelType="html" label="<b>BroadcastHashJoin</b><br><br>number of output rows: 13"];
5 [labelType="html" label="<br><b>Project</b><br><br>"];
}
6 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 7<br>metadata time: 0 ms<br>size of files read: 47.0 MiB<br>number of output rows: 13"];
7 [labelType="html" label="<b>BroadcastExchange</b><br><br>data size: 1026.1 KiB<br>time to collect: 142 ms<br>time to build: 10 ms<br>time to broadcast: 5 ms"];
subgraph cluster8 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: 57 ms";
9 [labelType="html" label="<br><b>Project</b><br><br>"];
}
10 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 10.5 KiB<br>number of output rows: 265"];
11 [labelType="html" label="<b>BroadcastExchange</b><br><br>data size: 1040.0 KiB<br>time to collect: 165 ms<br>time to build: 10 ms<br>time to broadcast: 3 ms"];
subgraph cluster12 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: 87 ms";
13 [labelType="html" label="<br><b>Project</b><br><br>"];
}
14 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 10.5 KiB<br>number of output rows: 265"];
2->0;
3->2;
4->3;
5->4;
6->5;
7->4;
9->7;
10->9;
11->3;
13->11;
14->13;
}
15
CollectLimit 11
Project [cast(src#123.id as string) AS src.id#215, cast(dst#125.id as string) AS dst.id#216, src#123.Borough AS Borough#217, src#123.service_zone AS service_zone#218]
BroadcastHashJoin [src#123.Borough, src#123.service_zone, edge#121.dst], [dst#125.Borough, dst#125.service_zone, dst#125.id], Inner, BuildRight
BroadcastHashJoin [edge#121.src], [src#123.id], Inner, BuildRight
Project [struct(src, PULocationID#18, dst, DOLocationID#19) AS edge#121]
WholeStageCodegen (3)
FileScan csv [PULocationID#18,DOLocationID#19] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/green_tripdata/2023/green_tripdata_20..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<PULocationID:int,DOLocationID:int>
BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,Borough:string,Zone:string,service_zone:string>, false].id as bigint))), [id=#218]
Project [struct(id, LocationID#72, Borough, Borough#73, Zone, Zone#74, service_zone, service_zone#75) AS src#123]
WholeStageCodegen (1)
FileScan csv [LocationID#72,Borough#73,Zone#74,service_zone#75] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/taxi_zone_lookup.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<LocationID:int,Borough:string,Zone:string,service_zone:string>
BroadcastExchange HashedRelationBroadcastMode(List(input[0, struct<id:int,Borough:string,Zone:string,service_zone:string>, false].Borough, input[0, struct<id:int,Borough:string,Zone:string,service_zone:string>, false].service_zone, input[0, struct<id:int,Borough:string,Zone:string,service_zone:string>, false].id)), [id=#224]
Project [struct(id, LocationID#72, Borough, Borough#73, Zone, Zone#74, service_zone, service_zone#75) AS dst#125]
WholeStageCodegen (2)
FileScan csv [LocationID#72,Borough#73,Zone#74,service_zone#75] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/taxi_zone_lookup.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<LocationID:int,Borough:string,Zone:string,service_zone:string>
== Parsed Logical Plan ==
GlobalLimit 11
+- LocalLimit 11
+- Project [cast(src.id#199 as string) AS src.id#215, cast(dst.id#200 as string) AS dst.id#216, cast(Borough#201 as string) AS Borough#217, cast(service_zone#202 as string) AS service_zone#218]
+- Project [src#123.id AS src.id#199, dst#125.id AS dst.id#200, src#123.Borough AS Borough#201, src#123.service_zone AS service_zone#202]
+- Filter ((src#123.Borough = dst#125.Borough) AND (src#123.service_zone = dst#125.service_zone))
+- Project [src#123, edge#121, dst#125]
+- Join Inner, (edge#121.dst = dst#125.id)
:- Join Inner, (edge#121.src = src#123.id)
: :- Project [struct(src, src#85, dst, dst#86) AS edge#121]
: : +- Project [PULocationID#18 AS src#85, DOLocationID#19 AS dst#86]
: : +- Relation[lpep_pickup_datetime#16,lpep_dropoff_datetime#17,PULocationID#18,DOLocationID#19,passenger_count#20,trip_distance#21,fare_amount#22,extra#23,mta_tax#24,tip_amount#25,tolls_amount#26,ehail_fee#27,total_amount#28,payment_type#29,trip_type#30,congestion_surcharge#31,taxi_type#32] csv
: +- Project [struct(id, id#80, Borough, Borough#73, Zone, Zone#74, service_zone, service_zone#75) AS src#123]
: +- Project [LocationID#72 AS id#80, Borough#73, Zone#74, service_zone#75]
: +- Relation[LocationID#72,Borough#73,Zone#74,service_zone#75] csv
+- Project [struct(id, id#80, Borough, Borough#73, Zone, Zone#74, service_zone, service_zone#75) AS dst#125]
+- Project [LocationID#72 AS id#80, Borough#73, Zone#74, service_zone#75]
+- Relation[LocationID#72,Borough#73,Zone#74,service_zone#75] csv
== Analyzed Logical Plan ==
src.id: string, dst.id: string, Borough: string, service_zone: string
GlobalLimit 11
+- LocalLimit 11
+- Project [cast(src.id#199 as string) AS src.id#215, cast(dst.id#200 as string) AS dst.id#216, cast(Borough#201 as string) AS Borough#217, cast(service_zone#202 as string) AS service_zone#218]
+- Project [src#123.id AS src.id#199, dst#125.id AS dst.id#200, src#123.Borough AS Borough#201, src#123.service_zone AS service_zone#202]
+- Filter ((src#123.Borough = dst#125.Borough) AND (src#123.service_zone = dst#125.service_zone))
+- Project [src#123, edge#121, dst#125]
+- Join Inner, (edge#121.dst = dst#125.id)
:- Join Inner, (edge#121.src = src#123.id)
: :- Project [struct(src, src#85, dst, dst#86) AS edge#121]
: : +- Project [PULocationID#18 AS src#85, DOLocationID#19 AS dst#86]
: : +- Relation[lpep_pickup_datetime#16,lpep_dropoff_datetime#17,PULocationID#18,DOLocationID#19,passenger_count#20,trip_distance#21,fare_amount#22,extra#23,mta_tax#24,tip_amount#25,tolls_amount#26,ehail_fee#27,total_amount#28,payment_type#29,trip_type#30,congestion_surcharge#31,taxi_type#32] csv
: +- Project [struct(id, id#80, Borough, Borough#73, Zone, Zone#74, service_zone, service_zone#75) AS src#123]
: +- Project [LocationID#72 AS id#80, Borough#73, Zone#74, service_zone#75]
: +- Relation[LocationID#72,Borough#73,Zone#74,service_zone#75] csv
+- Project [struct(id, id#80, Borough, Borough#73, Zone, Zone#74, service_zone, service_zone#75) AS dst#125]
+- Project [LocationID#72 AS id#80, Borough#73, Zone#74, service_zone#75]
+- Relation[LocationID#72,Borough#73,Zone#74,service_zone#75] csv
== Optimized Logical Plan ==
GlobalLimit 11
+- LocalLimit 11
+- Project [cast(src#123.id as string) AS src.id#215, cast(dst#125.id as string) AS dst.id#216, src#123.Borough AS Borough#217, src#123.service_zone AS service_zone#218]
+- Join Inner, (((src#123.Borough = dst#125.Borough) AND (src#123.service_zone = dst#125.service_zone)) AND (edge#121.dst = dst#125.id))
:- Join Inner, (edge#121.src = src#123.id)
: :- Project [struct(src, PULocationID#18, dst, DOLocationID#19) AS edge#121]
: : +- Relation[lpep_pickup_datetime#16,lpep_dropoff_datetime#17,PULocationID#18,DOLocationID#19,passenger_count#20,trip_distance#21,fare_amount#22,extra#23,mta_tax#24,tip_amount#25,tolls_amount#26,ehail_fee#27,total_amount#28,payment_type#29,trip_type#30,congestion_surcharge#31,taxi_type#32] csv
: +- Project [struct(id, LocationID#72, Borough, Borough#73, Zone, Zone#74, service_zone, service_zone#75) AS src#123]
: +- Relation[LocationID#72,Borough#73,Zone#74,service_zone#75] csv
+- Project [struct(id, LocationID#72, Borough, Borough#73, Zone, Zone#74, service_zone, service_zone#75) AS dst#125]
+- Relation[LocationID#72,Borough#73,Zone#74,service_zone#75] csv
== Physical Plan ==
CollectLimit 11
+- *(3) Project [cast(src#123.id as string) AS src.id#215, cast(dst#125.id as string) AS dst.id#216, src#123.Borough AS Borough#217, src#123.service_zone AS service_zone#218]
+- *(3) BroadcastHashJoin [src#123.Borough, src#123.service_zone, edge#121.dst], [dst#125.Borough, dst#125.service_zone, dst#125.id], Inner, BuildRight
:- *(3) BroadcastHashJoin [edge#121.src], [src#123.id], Inner, BuildRight
: :- *(3) Project [struct(src, PULocationID#18, dst, DOLocationID#19) AS edge#121]
: : +- FileScan csv [PULocationID#18,DOLocationID#19] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/green_tripdata/2023/green_tripdata_20..., PartitionFilters: [], PushedFilters: [], ReadSchema: struct<PULocationID:int,DOLocationID:int>
: +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,Borough:string,Zone:string,service_zone:string>, false].id as bigint))), [id=#218]
: +- *(1) Project [struct(id, LocationID#72, Borough, Borough#73, Zone, Zone#74, service_zone, service_zone#75) AS src#123]
: +- FileScan csv [LocationID#72,Borough#73,Zone#74,service_zone#75] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/taxi_zone_lookup.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<LocationID:int,Borough:string,Zone:string,service_zone:string>
+- BroadcastExchange HashedRelationBroadcastMode(List(input[0, struct<id:int,Borough:string,Zone:string,service_zone:string>, false].Borough, input[0, struct<id:int,Borough:string,Zone:string,service_zone:string>, false].service_zone, input[0, struct<id:int,Borough:string,Zone:string,service_zone:string>, false].id)), [id=#224]
+- *(2) Project [struct(id, LocationID#72, Borough, Borough#73, Zone, Zone#74, service_zone, service_zone#75) AS dst#125]
+- FileScan csv [LocationID#72,Borough#73,Zone#74,service_zone#75] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/nyc_taxi/taxi_zone_lookup.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<LocationID:int,Borough:string,Zone:string,service_zone:string>