digraph G {
0 [labelType="html" label="<br><b>TakeOrderedAndProject</b><br><br>"];
subgraph cluster1 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n344 ms (0 ms, 1 ms, 48 ms (stage 3.0: task 45))";
2 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>14 ms (0 ms, 0 ms, 13 ms (stage 3.0: task 45))<br>peak memory total (min, med, max (stageId: taskId))<br>114.0 MiB (256.0 KiB, 256.0 KiB, 64.3 MiB (stage 3.0: task 45))<br>number of output rows: 1<br>avg hash probe bucket list iters: 1"];
}
3 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 22<br>shuffle write time total (min, med, max (stageId: taskId))<br>132 ms (2 ms, 4 ms, 27 ms (stage 2.0: task 23))<br>records read: 22<br>local bytes read total (min, med, max (stageId: taskId))<br>600.0 B (0.0 B, 0.0 B, 600.0 B (stage 3.0: task 45))<br>fetch wait time total (min, med, max (stageId: taskId))<br>0 ms (0 ms, 0 ms, 0 ms (stage 3.0: task 45))<br>remote bytes read total (min, med, max (stageId: taskId))<br>720.0 B (0.0 B, 0.0 B, 720.0 B (stage 3.0: task 45))<br>local blocks read: 10<br>remote blocks read: 12<br>data size total (min, med, max (stageId: taskId))<br>528.0 B (24.0 B, 24.0 B, 24.0 B (stage 2.0: task 24))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>1320.0 B (60.0 B, 60.0 B, 60.0 B (stage 2.0: task 24))"];
subgraph cluster4 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n37.7 s (716 ms, 1.7 s, 2.4 s (stage 2.0: task 23))";
5 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>37.2 s (705 ms, 1.6 s, 2.2 s (stage 2.0: task 23))<br>peak memory total (min, med, max (stageId: taskId))<br>1413.5 MiB (64.3 MiB, 64.3 MiB, 64.3 MiB (stage 2.0: task 24))<br>number of output rows: 22<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1, 1, 1 (stage 2.0: task 24))"];
6 [labelType="html" label="<br><b>Project</b><br><br>"];
}
7 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 2.7 GiB<br>number of output rows: 6,905,288"];
2->0;
3->2;
5->3;
6->5;
7->6;
}
8
TakeOrderedAndProject(limit=21, orderBy=[Start Hour#62 ASC NULLS FIRST], output=[Start Hour#117,count#118])
HashAggregate(keys=[Start Hour#62], functions=[count(1)])
WholeStageCodegen (2)
Exchange hashpartitioning(Start Hour#62, 200), true, [id=#41]
HashAggregate(keys=[Start Hour#62], functions=[partial_count(1)])
Project [cast(date_format(cast(Trip Start Timestamp#18 as timestamp), HH, Some(GMT)) as int) AS Start Hour#62]
WholeStageCodegen (1)
FileScan csv [Trip Start Timestamp#18] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/Chicago_Taxitrips/chicago_taxi_trips.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<Trip Start Timestamp:string>
== Parsed Logical Plan ==
GlobalLimit 21
+- LocalLimit 21
+- Project [cast(Start Hour#62 as string) AS Start Hour#117, cast(count#112L as string) AS count#118]
+- Sort [Start Hour#62 ASC NULLS FIRST], true
+- Aggregate [Start Hour#62], [Start Hour#62, count(1) AS count#112L]
+- Project [Trip ID#16, Taxi ID#17, Trip Start Timestamp#18, Trip End Timestamp#19, Trip Seconds#20, Trip Miles#21, Pickup Census Tract#22L, Dropoff Census Tract#23L, Pickup Community Area#24, Dropoff Community Area#25, Fare#26, Tips#27, Tolls#28, Extras#29, Trip Total#30, Payment Type#31, Company#32, Pickup Centroid Latitude#33, Pickup Centroid Longitude#34, Pickup Centroid Location#35, Dropoff Centroid Latitude#36, Dropoff Centroid Longitude#37, Dropoff Centroid Location#38, cast(date_format(cast(Trip Start Timestamp#18 as timestamp), HH, Some(GMT)) as int) AS Start Hour#62]
+- Relation[Trip ID#16,Taxi ID#17,Trip Start Timestamp#18,Trip End Timestamp#19,Trip Seconds#20,Trip Miles#21,Pickup Census Tract#22L,Dropoff Census Tract#23L,Pickup Community Area#24,Dropoff Community Area#25,Fare#26,Tips#27,Tolls#28,Extras#29,Trip Total#30,Payment Type#31,Company#32,Pickup Centroid Latitude#33,Pickup Centroid Longitude#34,Pickup Centroid Location#35,Dropoff Centroid Latitude#36,Dropoff Centroid Longitude#37,Dropoff Centroid Location#38] csv
== Analyzed Logical Plan ==
Start Hour: string, count: string
GlobalLimit 21
+- LocalLimit 21
+- Project [cast(Start Hour#62 as string) AS Start Hour#117, cast(count#112L as string) AS count#118]
+- Sort [Start Hour#62 ASC NULLS FIRST], true
+- Aggregate [Start Hour#62], [Start Hour#62, count(1) AS count#112L]
+- Project [Trip ID#16, Taxi ID#17, Trip Start Timestamp#18, Trip End Timestamp#19, Trip Seconds#20, Trip Miles#21, Pickup Census Tract#22L, Dropoff Census Tract#23L, Pickup Community Area#24, Dropoff Community Area#25, Fare#26, Tips#27, Tolls#28, Extras#29, Trip Total#30, Payment Type#31, Company#32, Pickup Centroid Latitude#33, Pickup Centroid Longitude#34, Pickup Centroid Location#35, Dropoff Centroid Latitude#36, Dropoff Centroid Longitude#37, Dropoff Centroid Location#38, cast(date_format(cast(Trip Start Timestamp#18 as timestamp), HH, Some(GMT)) as int) AS Start Hour#62]
+- Relation[Trip ID#16,Taxi ID#17,Trip Start Timestamp#18,Trip End Timestamp#19,Trip Seconds#20,Trip Miles#21,Pickup Census Tract#22L,Dropoff Census Tract#23L,Pickup Community Area#24,Dropoff Community Area#25,Fare#26,Tips#27,Tolls#28,Extras#29,Trip Total#30,Payment Type#31,Company#32,Pickup Centroid Latitude#33,Pickup Centroid Longitude#34,Pickup Centroid Location#35,Dropoff Centroid Latitude#36,Dropoff Centroid Longitude#37,Dropoff Centroid Location#38] csv
== Optimized Logical Plan ==
GlobalLimit 21
+- LocalLimit 21
+- Project [cast(Start Hour#62 as string) AS Start Hour#117, cast(count#112L as string) AS count#118]
+- Sort [Start Hour#62 ASC NULLS FIRST], true
+- Aggregate [Start Hour#62], [Start Hour#62, count(1) AS count#112L]
+- Project [cast(date_format(cast(Trip Start Timestamp#18 as timestamp), HH, Some(GMT)) as int) AS Start Hour#62]
+- Relation[Trip ID#16,Taxi ID#17,Trip Start Timestamp#18,Trip End Timestamp#19,Trip Seconds#20,Trip Miles#21,Pickup Census Tract#22L,Dropoff Census Tract#23L,Pickup Community Area#24,Dropoff Community Area#25,Fare#26,Tips#27,Tolls#28,Extras#29,Trip Total#30,Payment Type#31,Company#32,Pickup Centroid Latitude#33,Pickup Centroid Longitude#34,Pickup Centroid Location#35,Dropoff Centroid Latitude#36,Dropoff Centroid Longitude#37,Dropoff Centroid Location#38] csv
== Physical Plan ==
TakeOrderedAndProject(limit=21, orderBy=[Start Hour#62 ASC NULLS FIRST], output=[Start Hour#117,count#118])
+- *(2) HashAggregate(keys=[Start Hour#62], functions=[count(1)], output=[Start Hour#62, count#112L])
+- Exchange hashpartitioning(Start Hour#62, 200), true, [id=#41]
+- *(1) HashAggregate(keys=[Start Hour#62], functions=[partial_count(1)], output=[Start Hour#62, count#122L])
+- *(1) Project [cast(date_format(cast(Trip Start Timestamp#18 as timestamp), HH, Some(GMT)) as int) AS Start Hour#62]
+- FileScan csv [Trip Start Timestamp#18] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/Chicago_Taxitrips/chicago_taxi_trips.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<Trip Start Timestamp:string>