graphframes - Details for Query 2

Details for Query 2

Submitted Time: 2024/10/30 23:02:39
Duration: 3 s
Succeeded Jobs: 2 3 4

Show the Stage ID and Task ID that corresponds to the max metric

digraph G { 0 [labelType="html" label=" CollectLimit "]; subgraph cluster1 { isCluster="true"; label="WholeStageCodegen (3)"; 2 [labelType="html" label=" Project "]; 3 [labelType="html" label="BroadcastHashJoin number of output rows: 11"]; 4 [labelType="html" label="BroadcastHashJoin number of output rows: 11"]; 5 [labelType="html" label=" Project "]; } 6 [labelType="html" label="Scan csv number of files read: 1 metadata time: 0 ms size of files read: 920.5 KiB number of output rows: 11"]; 7 [labelType="html" label="BroadcastExchange data size: 1055.6 KiB time to collect: 332 ms time to build: 50 ms time to broadcast: 7 ms"]; subgraph cluster8 { isCluster="true"; label="WholeStageCodegen (1)\n \nduration: 163 ms"; 9 [labelType="html" label=" Project "]; } 10 [labelType="html" label="Scan csv number of files read: 1 metadata time: 0 ms size of files read: 119.6 KiB number of output rows: 4,039"]; 11 [labelType="html" label="BroadcastExchange data size: 1055.6 KiB time to collect: 2.6 s time to build: 19 ms time to broadcast: 12 ms"]; subgraph cluster12 { isCluster="true"; label="WholeStageCodegen (2)\n \nduration: 1.5 s"; 13 [labelType="html" label=" Project "]; } 14 [labelType="html" label="Scan csv number of files read: 1 metadata time: 1 ms size of files read: 119.6 KiB number of output rows: 4,039"]; 2->0; 3->2; 4->3; 5->4; 6->5; 7->4; 9->7; 10->9; 11->3; 13->11; 14->13; }

CollectLimit 11

Project [cast(src#60 as string) AS src#89, cast(edge#58 as string) AS edge#90, cast(dst#62 as string) AS dst#91]

BroadcastHashJoin [edge#58.dst], [dst#62.id], Inner, BuildRight

BroadcastHashJoin [edge#58.src], [src#60.id], Inner, BuildRight

Project [struct(src, src#0, dst, dst#1) AS edge#58]

WholeStageCodegen (3)

FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>

BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#68]

Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS src#60]

WholeStageCodegen (1)

FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...

WholeStageCodegen (2)

Details

== Parsed Logical Plan ==
GlobalLimit 11
+- LocalLimit 11
   +- Project [cast(src#60 as string) AS src#89, cast(edge#58 as string) AS edge#90, cast(dst#62 as string) AS dst#91]
      +- Project [src#60, edge#58, dst#62]
         +- Join Inner, (edge#58.dst = dst#62.id)
            :- Join Inner, (edge#58.src = src#60.id)
            :  :- Project [struct(src, src#0, dst, dst#1) AS edge#58]
            :  :  +- Relation[src#0,dst#1] csv
            :  +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS src#60]
            :     +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
            +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS dst#62]
               +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv

== Analyzed Logical Plan ==
src: string, edge: string, dst: string
GlobalLimit 11
+- LocalLimit 11
   +- Project [cast(src#60 as string) AS src#89, cast(edge#58 as string) AS edge#90, cast(dst#62 as string) AS dst#91]
      +- Project [src#60, edge#58, dst#62]
         +- Join Inner, (edge#58.dst = dst#62.id)
            :- Join Inner, (edge#58.src = src#60.id)
            :  :- Project [struct(src, src#0, dst, dst#1) AS edge#58]
            :  :  +- Relation[src#0,dst#1] csv
            :  +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS src#60]
            :     +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
            +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS dst#62]
               +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv

== Optimized Logical Plan ==
GlobalLimit 11
+- LocalLimit 11
   +- Project [cast(src#60 as string) AS src#89, cast(edge#58 as string) AS edge#90, cast(dst#62 as string) AS dst#91]
      +- Join Inner, (edge#58.dst = dst#62.id)
         :- Join Inner, (edge#58.src = src#60.id)
         :  :- Project [struct(src, src#0, dst, dst#1) AS edge#58]
         :  :  +- Relation[src#0,dst#1] csv
         :  +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS src#60]
         :     +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
         +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS dst#62]
            +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv

== Physical Plan ==
CollectLimit 11
+- *(3) Project [cast(src#60 as string) AS src#89, cast(edge#58 as string) AS edge#90, cast(dst#62 as string) AS dst#91]
   +- *(3) BroadcastHashJoin [edge#58.dst], [dst#62.id], Inner, BuildRight
      :- *(3) BroadcastHashJoin [edge#58.src], [src#60.id], Inner, BuildRight
      :  :- *(3) Project [struct(src, src#0, dst, dst#1) AS edge#58]
      :  :  +- FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>
      :  +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#68]
      :     +- *(1) Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS src#60]
      :        +- FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
      +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#74]
         +- *(2) Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS dst#62]
            +- FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...