digraph G {
0 [labelType="html" label="<b>Execute InsertIntoHadoopFsRelationCommand</b><br><br>number of written files: 1<br>written output: 242.0 B<br>number of output rows: 24<br>number of dynamic part: 0"];
1 [labelType="html" label="<br><b>Coalesce</b><br><br>"];
subgraph cluster2 {
isCluster="true";
label="WholeStageCodegen (3)\n \nduration: 57 ms";
3 [labelType="html" label="<b>Sort</b><br><br>sort time: 0 ms<br>peak memory: 1537.5 MiB<br>spill size: 0.0 B"];
}
4 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 24<br>shuffle write time total (min, med, max (stageId: taskId))<br>17 ms (0 ms, 0 ms, 1 ms (stage 7.0: task 284))<br>records read: 24<br>local bytes read: 815.0 B<br>fetch wait time: 5 ms<br>remote bytes read: 693.0 B<br>local blocks read: 13<br>remote blocks read: 11<br>data size total (min, med, max (stageId: taskId))<br>576.0 B (0.0 B, 0.0 B, 48.0 B (stage 7.0: task 272))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>1508.0 B (0.0 B, 0.0 B, 126.0 B (stage 7.0: task 272))"];
subgraph cluster5 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: total (min, med, max (stageId: taskId))\n807 ms (0 ms, 1 ms, 18 ms (stage 5.0: task 68))";
6 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>189 ms (0 ms, 0 ms, 13 ms (stage 5.0: task 68))<br>peak memory total (min, med, max (stageId: taskId))<br>2.8 GiB (256.0 KiB, 256.0 KiB, 64.3 MiB (stage 5.0: task 69))<br>number of output rows: 48<br>avg hash probe bucket list iters (min, med, max (stageId: taskId)):<br>(1, 1, 1 (stage 5.0: task 69))"];
}
7 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 528<br>shuffle write time total (min, med, max (stageId: taskId))<br>159 ms (4 ms, 5 ms, 25 ms (stage 4.0: task 47))<br>records read: 1,056<br>local bytes read total (min, med, max (stageId: taskId))<br>30.6 KiB (0.0 B, 0.0 B, 936.0 B (stage 5.0: task 78))<br>fetch wait time total (min, med, max (stageId: taskId))<br>30 ms (0 ms, 0 ms, 4 ms (stage 7.0: task 283))<br>remote bytes read total (min, med, max (stageId: taskId))<br>30.0 KiB (0.0 B, 0.0 B, 936.0 B (stage 7.0: task 278))<br>local blocks read: 488<br>remote blocks read: 480<br>data size total (min, med, max (stageId: taskId))<br>12.4 KiB (576.0 B, 576.0 B, 576.0 B (stage 4.0: task 46))<br>shuffle bytes written total (min, med, max (stageId: taskId))<br>30.3 KiB (1408.0 B, 1412.0 B, 1412.0 B (stage 4.0: task 46))"];
subgraph cluster8 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: total (min, med, max (stageId: taskId))\n54.3 s (934 ms, 2.5 s, 3.5 s (stage 4.0: task 47))";
9 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build total (min, med, max (stageId: taskId))<br>53.8 s (921 ms, 2.5 s, 3.3 s (stage 4.0: task 47))<br>peak memory total (min, med, max (stageId: taskId))<br>5.5 MiB (256.0 KiB, 256.0 KiB, 256.0 KiB (stage 4.0: task 46))<br>number of output rows: 528"];
10 [labelType="html" label="<br><b>Project</b><br><br>"];
}
11 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 2.7 GiB<br>number of output rows: 6,905,288"];
1->0;
3->1;
4->3;
6->4;
7->6;
9->7;
10->9;
11->10;
}
12
Execute InsertIntoHadoopFsRelationCommand s3a://object-bucket-eex654-45a8c32f-acaa-44b7-aa06-d996030307e6/trip_counts_by_hour.csv, false, CSV, Map(header -> true, path -> s3a://object-bucket-eex654-45a8c32f-acaa-44b7-aa06-d996030307e6/trip_counts_by_hour.csv), Overwrite, [Start Hour, count]
Coalesce 1
Sort [Start Hour#172 ASC NULLS FIRST], true, 0
WholeStageCodegen (3)
Exchange rangepartitioning(Start Hour#172 ASC NULLS FIRST, 200), true, [id=#77]
HashAggregate(keys=[Start Hour#172], functions=[count(1)])
WholeStageCodegen (2)
Exchange hashpartitioning(Start Hour#172, 200), true, [id=#73]
HashAggregate(keys=[Start Hour#172], functions=[partial_count(1)])
Project [hour(CASE WHEN isnull(gettimestamp(Trip Start Timestamp#80, MM/dd/yyyy hh:mm:ss a, Some(GMT))) THEN gettimestamp(gettimestamp(Trip Start Timestamp#80, MM/dd/yyyy hh:mm:ss a, Some(GMT)), MM/dd/yyyy HH:mm, Some(GMT)) ELSE gettimestamp(Trip Start Timestamp#80, MM/dd/yyyy hh:mm:ss a, Some(GMT)) END, Some(GMT)) AS Start Hour#172]
WholeStageCodegen (1)
FileScan csv [Trip Start Timestamp#80] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/Chicago_Taxitrips/chicago_taxi_trips.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<Trip Start Timestamp:string>
== Parsed Logical Plan ==
InsertIntoHadoopFsRelationCommand s3a://object-bucket-eex654-45a8c32f-acaa-44b7-aa06-d996030307e6/trip_counts_by_hour.csv, false, CSV, Map(header -> true, path -> s3a://object-bucket-eex654-45a8c32f-acaa-44b7-aa06-d996030307e6/trip_counts_by_hour.csv), Overwrite, [Start Hour, count]
+- Repartition 1, false
+- Sort [Start Hour#172 ASC NULLS FIRST], true
+- Aggregate [Start Hour#172], [Start Hour#172, count(1) AS count#222L]
+- Project [Trip ID#78, Taxi ID#79, Trip Start Timestamp#148, Trip End Timestamp#81, Trip Seconds#82, Trip Miles#83, Pickup Census Tract#84L, Dropoff Census Tract#85L, Pickup Community Area#86, Dropoff Community Area#87, Fare#88, Tips#89, Tolls#90, Extras#91, Trip Total#92, Payment Type#93, Company#94, Pickup Centroid Latitude#95, Pickup Centroid Longitude#96, Pickup Centroid Location#97, Dropoff Centroid Latitude#98, Dropoff Centroid Longitude#99, Dropoff Centroid Location#100, hour(Trip Start Timestamp#148, Some(GMT)) AS Start Hour#172]
+- Project [Trip ID#78, Taxi ID#79, CASE WHEN isnull(Trip Start Timestamp#124) THEN to_timestamp('Trip Start Timestamp, Some(MM/dd/yyyy HH:mm)) ELSE Trip Start Timestamp#124 END AS Trip Start Timestamp#148, Trip End Timestamp#81, Trip Seconds#82, Trip Miles#83, Pickup Census Tract#84L, Dropoff Census Tract#85L, Pickup Community Area#86, Dropoff Community Area#87, Fare#88, Tips#89, Tolls#90, Extras#91, Trip Total#92, Payment Type#93, Company#94, Pickup Centroid Latitude#95, Pickup Centroid Longitude#96, Pickup Centroid Location#97, Dropoff Centroid Latitude#98, Dropoff Centroid Longitude#99, Dropoff Centroid Location#100]
+- Project [Trip ID#78, Taxi ID#79, to_timestamp('Trip Start Timestamp, Some(MM/dd/yyyy hh:mm:ss a)) AS Trip Start Timestamp#124, Trip End Timestamp#81, Trip Seconds#82, Trip Miles#83, Pickup Census Tract#84L, Dropoff Census Tract#85L, Pickup Community Area#86, Dropoff Community Area#87, Fare#88, Tips#89, Tolls#90, Extras#91, Trip Total#92, Payment Type#93, Company#94, Pickup Centroid Latitude#95, Pickup Centroid Longitude#96, Pickup Centroid Location#97, Dropoff Centroid Latitude#98, Dropoff Centroid Longitude#99, Dropoff Centroid Location#100]
+- Relation[Trip ID#78,Taxi ID#79,Trip Start Timestamp#80,Trip End Timestamp#81,Trip Seconds#82,Trip Miles#83,Pickup Census Tract#84L,Dropoff Census Tract#85L,Pickup Community Area#86,Dropoff Community Area#87,Fare#88,Tips#89,Tolls#90,Extras#91,Trip Total#92,Payment Type#93,Company#94,Pickup Centroid Latitude#95,Pickup Centroid Longitude#96,Pickup Centroid Location#97,Dropoff Centroid Latitude#98,Dropoff Centroid Longitude#99,Dropoff Centroid Location#100] csv
== Analyzed Logical Plan ==
InsertIntoHadoopFsRelationCommand s3a://object-bucket-eex654-45a8c32f-acaa-44b7-aa06-d996030307e6/trip_counts_by_hour.csv, false, CSV, Map(header -> true, path -> s3a://object-bucket-eex654-45a8c32f-acaa-44b7-aa06-d996030307e6/trip_counts_by_hour.csv), Overwrite, [Start Hour, count]
+- Repartition 1, false
+- Sort [Start Hour#172 ASC NULLS FIRST], true
+- Aggregate [Start Hour#172], [Start Hour#172, count(1) AS count#222L]
+- Project [Trip ID#78, Taxi ID#79, Trip Start Timestamp#148, Trip End Timestamp#81, Trip Seconds#82, Trip Miles#83, Pickup Census Tract#84L, Dropoff Census Tract#85L, Pickup Community Area#86, Dropoff Community Area#87, Fare#88, Tips#89, Tolls#90, Extras#91, Trip Total#92, Payment Type#93, Company#94, Pickup Centroid Latitude#95, Pickup Centroid Longitude#96, Pickup Centroid Location#97, Dropoff Centroid Latitude#98, Dropoff Centroid Longitude#99, Dropoff Centroid Location#100, hour(Trip Start Timestamp#148, Some(GMT)) AS Start Hour#172]
+- Project [Trip ID#78, Taxi ID#79, CASE WHEN isnull(Trip Start Timestamp#124) THEN to_timestamp('Trip Start Timestamp, Some(MM/dd/yyyy HH:mm)) ELSE Trip Start Timestamp#124 END AS Trip Start Timestamp#148, Trip End Timestamp#81, Trip Seconds#82, Trip Miles#83, Pickup Census Tract#84L, Dropoff Census Tract#85L, Pickup Community Area#86, Dropoff Community Area#87, Fare#88, Tips#89, Tolls#90, Extras#91, Trip Total#92, Payment Type#93, Company#94, Pickup Centroid Latitude#95, Pickup Centroid Longitude#96, Pickup Centroid Location#97, Dropoff Centroid Latitude#98, Dropoff Centroid Longitude#99, Dropoff Centroid Location#100]
+- Project [Trip ID#78, Taxi ID#79, to_timestamp('Trip Start Timestamp, Some(MM/dd/yyyy hh:mm:ss a)) AS Trip Start Timestamp#124, Trip End Timestamp#81, Trip Seconds#82, Trip Miles#83, Pickup Census Tract#84L, Dropoff Census Tract#85L, Pickup Community Area#86, Dropoff Community Area#87, Fare#88, Tips#89, Tolls#90, Extras#91, Trip Total#92, Payment Type#93, Company#94, Pickup Centroid Latitude#95, Pickup Centroid Longitude#96, Pickup Centroid Location#97, Dropoff Centroid Latitude#98, Dropoff Centroid Longitude#99, Dropoff Centroid Location#100]
+- Relation[Trip ID#78,Taxi ID#79,Trip Start Timestamp#80,Trip End Timestamp#81,Trip Seconds#82,Trip Miles#83,Pickup Census Tract#84L,Dropoff Census Tract#85L,Pickup Community Area#86,Dropoff Community Area#87,Fare#88,Tips#89,Tolls#90,Extras#91,Trip Total#92,Payment Type#93,Company#94,Pickup Centroid Latitude#95,Pickup Centroid Longitude#96,Pickup Centroid Location#97,Dropoff Centroid Latitude#98,Dropoff Centroid Longitude#99,Dropoff Centroid Location#100] csv
== Optimized Logical Plan ==
InsertIntoHadoopFsRelationCommand s3a://object-bucket-eex654-45a8c32f-acaa-44b7-aa06-d996030307e6/trip_counts_by_hour.csv, false, CSV, Map(header -> true, path -> s3a://object-bucket-eex654-45a8c32f-acaa-44b7-aa06-d996030307e6/trip_counts_by_hour.csv), Overwrite, [Start Hour, count]
+- Repartition 1, false
+- Sort [Start Hour#172 ASC NULLS FIRST], true
+- Aggregate [Start Hour#172], [Start Hour#172, count(1) AS count#222L]
+- Project [hour(CASE WHEN isnull(gettimestamp(Trip Start Timestamp#80, MM/dd/yyyy hh:mm:ss a, Some(GMT))) THEN gettimestamp(gettimestamp(Trip Start Timestamp#80, MM/dd/yyyy hh:mm:ss a, Some(GMT)), MM/dd/yyyy HH:mm, Some(GMT)) ELSE gettimestamp(Trip Start Timestamp#80, MM/dd/yyyy hh:mm:ss a, Some(GMT)) END, Some(GMT)) AS Start Hour#172]
+- Relation[Trip ID#78,Taxi ID#79,Trip Start Timestamp#80,Trip End Timestamp#81,Trip Seconds#82,Trip Miles#83,Pickup Census Tract#84L,Dropoff Census Tract#85L,Pickup Community Area#86,Dropoff Community Area#87,Fare#88,Tips#89,Tolls#90,Extras#91,Trip Total#92,Payment Type#93,Company#94,Pickup Centroid Latitude#95,Pickup Centroid Longitude#96,Pickup Centroid Location#97,Dropoff Centroid Latitude#98,Dropoff Centroid Longitude#99,Dropoff Centroid Location#100] csv
== Physical Plan ==
Execute InsertIntoHadoopFsRelationCommand s3a://object-bucket-eex654-45a8c32f-acaa-44b7-aa06-d996030307e6/trip_counts_by_hour.csv, false, CSV, Map(header -> true, path -> s3a://object-bucket-eex654-45a8c32f-acaa-44b7-aa06-d996030307e6/trip_counts_by_hour.csv), Overwrite, [Start Hour, count]
+- Coalesce 1
+- *(3) Sort [Start Hour#172 ASC NULLS FIRST], true, 0
+- Exchange rangepartitioning(Start Hour#172 ASC NULLS FIRST, 200), true, [id=#77]
+- *(2) HashAggregate(keys=[Start Hour#172], functions=[count(1)], output=[Start Hour#172, count#222L])
+- Exchange hashpartitioning(Start Hour#172, 200), true, [id=#73]
+- *(1) HashAggregate(keys=[Start Hour#172], functions=[partial_count(1)], output=[Start Hour#172, count#228L])
+- *(1) Project [hour(CASE WHEN isnull(gettimestamp(Trip Start Timestamp#80, MM/dd/yyyy hh:mm:ss a, Some(GMT))) THEN gettimestamp(gettimestamp(Trip Start Timestamp#80, MM/dd/yyyy hh:mm:ss a, Some(GMT)), MM/dd/yyyy HH:mm, Some(GMT)) ELSE gettimestamp(Trip Start Timestamp#80, MM/dd/yyyy hh:mm:ss a, Some(GMT)) END, Some(GMT)) AS Start Hour#172]
+- FileScan csv [Trip Start Timestamp#80] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/Chicago_Taxitrips/chicago_taxi_trips.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<Trip Start Timestamp:string>