digraph G {
0 [labelType="html" label="<br><b>TakeOrderedAndProject</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n43.4 s (623 ms, 2.0 s, 3.1 s (stage 1055.0: task 15018))";
2 [labelType="html" label="<br><b>Project</b><br><br>"];
3 [labelType="html" label="<b>Filter</b><br><br>number of output rows: 6,190,277"];
}
4 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 2.7 GiB<br>number of output rows: 6,905,288"];
2->0;
3->2;
4->3;
}
5
TakeOrderedAndProject(limit=6, orderBy=[src#397 DESC NULLS LAST], output=[src#409,dst#410,weight#411])
Project [Pickup Community Area#24 AS src#397, Dropoff Community Area#25 AS dst#398, Trip Miles#21 AS weight#399]
Filter AtLeastNNulls(n, Pickup Community Area#24,Dropoff Community Area#25,Trip Miles#21)
WholeStageCodegen (1)
FileScan csv [Trip Miles#21,Pickup Community Area#24,Dropoff Community Area#25] Batched: false, DataFilters: [AtLeastNNulls(n, Pickup Community Area#24,Dropoff Community Area#25,Trip Miles#21)], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/Chicago_Taxitrips/chicago_taxi_trips.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<Trip Miles:double,Pickup Community Area:int,Dropoff Community Area:int>
== Parsed Logical Plan ==
GlobalLimit 6
+- LocalLimit 6
+- Project [cast(src#397 as string) AS src#409, cast(dst#398 as string) AS dst#410, cast(weight#399 as string) AS weight#411]
+- Sort [src#397 DESC NULLS LAST], true
+- Filter AtLeastNNulls(n, src#397,dst#398,weight#399)
+- Project [Pickup Community Area#24 AS src#397, Dropoff Community Area#25 AS dst#398, cast(Trip Miles#21 as double) AS weight#399]
+- Relation[Trip ID#16,Taxi ID#17,Trip Start Timestamp#18,Trip End Timestamp#19,Trip Seconds#20,Trip Miles#21,Pickup Census Tract#22L,Dropoff Census Tract#23L,Pickup Community Area#24,Dropoff Community Area#25,Fare#26,Tips#27,Tolls#28,Extras#29,Trip Total#30,Payment Type#31,Company#32,Pickup Centroid Latitude#33,Pickup Centroid Longitude#34,Pickup Centroid Location#35,Dropoff Centroid Latitude#36,Dropoff Centroid Longitude#37,Dropoff Centroid Location#38] csv
== Analyzed Logical Plan ==
src: string, dst: string, weight: string
GlobalLimit 6
+- LocalLimit 6
+- Project [cast(src#397 as string) AS src#409, cast(dst#398 as string) AS dst#410, cast(weight#399 as string) AS weight#411]
+- Sort [src#397 DESC NULLS LAST], true
+- Filter AtLeastNNulls(n, src#397,dst#398,weight#399)
+- Project [Pickup Community Area#24 AS src#397, Dropoff Community Area#25 AS dst#398, cast(Trip Miles#21 as double) AS weight#399]
+- Relation[Trip ID#16,Taxi ID#17,Trip Start Timestamp#18,Trip End Timestamp#19,Trip Seconds#20,Trip Miles#21,Pickup Census Tract#22L,Dropoff Census Tract#23L,Pickup Community Area#24,Dropoff Community Area#25,Fare#26,Tips#27,Tolls#28,Extras#29,Trip Total#30,Payment Type#31,Company#32,Pickup Centroid Latitude#33,Pickup Centroid Longitude#34,Pickup Centroid Location#35,Dropoff Centroid Latitude#36,Dropoff Centroid Longitude#37,Dropoff Centroid Location#38] csv
== Optimized Logical Plan ==
GlobalLimit 6
+- LocalLimit 6
+- Project [cast(src#397 as string) AS src#409, cast(dst#398 as string) AS dst#410, cast(weight#399 as string) AS weight#411]
+- Sort [src#397 DESC NULLS LAST], true
+- Project [Pickup Community Area#24 AS src#397, Dropoff Community Area#25 AS dst#398, Trip Miles#21 AS weight#399]
+- Filter AtLeastNNulls(n, Pickup Community Area#24,Dropoff Community Area#25,Trip Miles#21)
+- Relation[Trip ID#16,Taxi ID#17,Trip Start Timestamp#18,Trip End Timestamp#19,Trip Seconds#20,Trip Miles#21,Pickup Census Tract#22L,Dropoff Census Tract#23L,Pickup Community Area#24,Dropoff Community Area#25,Fare#26,Tips#27,Tolls#28,Extras#29,Trip Total#30,Payment Type#31,Company#32,Pickup Centroid Latitude#33,Pickup Centroid Longitude#34,Pickup Centroid Location#35,Dropoff Centroid Latitude#36,Dropoff Centroid Longitude#37,Dropoff Centroid Location#38] csv
== Physical Plan ==
TakeOrderedAndProject(limit=6, orderBy=[src#397 DESC NULLS LAST], output=[src#409,dst#410,weight#411])
+- *(1) Project [Pickup Community Area#24 AS src#397, Dropoff Community Area#25 AS dst#398, Trip Miles#21 AS weight#399]
+- *(1) Filter AtLeastNNulls(n, Pickup Community Area#24,Dropoff Community Area#25,Trip Miles#21)
+- FileScan csv [Trip Miles#21,Pickup Community Area#24,Dropoff Community Area#25] Batched: false, DataFilters: [AtLeastNNulls(n, Pickup Community Area#24,Dropoff Community Area#25,Trip Miles#21)], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/Chicago_Taxitrips/chicago_taxi_trips.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<Trip Miles:double,Pickup Community Area:int,Dropoff Community Area:int>