graphframes - Details for Query 4

Details for Query 4

Submitted Time: 2024/10/30 23:02:44
Duration: 0.5 s
Succeeded Jobs: 8 9 10

Show the Stage ID and Task ID that corresponds to the max metric

digraph G { 0 [labelType="html" label=" CollectLimit "]; subgraph cluster1 { isCluster="true"; label="WholeStageCodegen (3)"; 2 [labelType="html" label=" Project "]; 3 [labelType="html" label="BroadcastHashJoin number of output rows: 11"]; 4 [labelType="html" label="BroadcastHashJoin number of output rows: 522"]; 5 [labelType="html" label=" Project "]; } 6 [labelType="html" label="Scan csv number of files read: 1 metadata time: 0 ms size of files read: 920.5 KiB number of output rows: 522"]; 7 [labelType="html" label="BroadcastExchange data size: 1055.6 KiB time to collect: 236 ms time to build: 16 ms time to broadcast: 5 ms"]; subgraph cluster8 { isCluster="true"; label="WholeStageCodegen (1)\n \nduration: 111 ms"; 9 [labelType="html" label=" Project "]; } 10 [labelType="html" label="Scan csv number of files read: 1 metadata time: 0 ms size of files read: 119.6 KiB number of output rows: 4,039"]; 11 [labelType="html" label="BroadcastExchange data size: 1088.0 KiB time to collect: 231 ms time to build: 18 ms time to broadcast: 4 ms"]; subgraph cluster12 { isCluster="true"; label="WholeStageCodegen (2)\n \nduration: 121 ms"; 13 [labelType="html" label=" Project "]; } 14 [labelType="html" label="Scan csv number of files read: 1 metadata time: 0 ms size of files read: 119.6 KiB number of output rows: 4,039"]; 2->0; 3->2; 4->3; 5->4; 6->5; 7->4; 9->7; 10->9; 11->3; 13->11; 14->13; }

CollectLimit 11

Project [cast(a#115.id as string) AS id#174, cast(b#117.id as string) AS id#175, cast(b#117.birthday as string) AS birthday#176]

BroadcastHashJoin [a#115.birthday, __tmp-4363599943432734077#113.dst], [b#117.birthday, b#117.id], Inner, BuildRight

BroadcastHashJoin [__tmp-4363599943432734077#113.src], [a#115.id], Inner, BuildRight

Project [struct(src, src#0, dst, dst#1) AS __tmp-4363599943432734077#113]

WholeStageCodegen (3)

FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>

BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#191]

Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#115]

WholeStageCodegen (1)

FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...

BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].birthday as bigint), 32) | (cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint) & 4294967295)))), [id=#197]

WholeStageCodegen (2)

Details

== Parsed Logical Plan ==
GlobalLimit 11
+- LocalLimit 11
   +- Project [cast(id#165 as string) AS id#174, cast(id#166 as string) AS id#175, cast(birthday#167 as string) AS birthday#176]
      +- Project [a#115.id AS id#165, b#117.id AS id#166, b#117.birthday AS birthday#167]
         +- Filter (a#115.birthday = b#117.birthday)
            +- Project [a#115, b#117]
               +- Project [a#115, b#117]
                  +- Join Inner, (__tmp-4363599943432734077#113.dst = b#117.id)
                     :- Join Inner, (__tmp-4363599943432734077#113.src = a#115.id)
                     :  :- Project [struct(src, src#0, dst, dst#1) AS __tmp-4363599943432734077#113]
                     :  :  +- Relation[src#0,dst#1] csv
                     :  +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#115]
                     :     +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
                     +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#117]
                        +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv

== Analyzed Logical Plan ==
id: string, id: string, birthday: string
GlobalLimit 11
+- LocalLimit 11
   +- Project [cast(id#165 as string) AS id#174, cast(id#166 as string) AS id#175, cast(birthday#167 as string) AS birthday#176]
      +- Project [a#115.id AS id#165, b#117.id AS id#166, b#117.birthday AS birthday#167]
         +- Filter (a#115.birthday = b#117.birthday)
            +- Project [a#115, b#117]
               +- Project [a#115, b#117]
                  +- Join Inner, (__tmp-4363599943432734077#113.dst = b#117.id)
                     :- Join Inner, (__tmp-4363599943432734077#113.src = a#115.id)
                     :  :- Project [struct(src, src#0, dst, dst#1) AS __tmp-4363599943432734077#113]
                     :  :  +- Relation[src#0,dst#1] csv
                     :  +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#115]
                     :     +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
                     +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#117]
                        +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv

== Optimized Logical Plan ==
GlobalLimit 11
+- LocalLimit 11
   +- Project [cast(a#115.id as string) AS id#174, cast(b#117.id as string) AS id#175, cast(b#117.birthday as string) AS birthday#176]
      +- Join Inner, ((a#115.birthday = b#117.birthday) AND (__tmp-4363599943432734077#113.dst = b#117.id))
         :- Join Inner, (__tmp-4363599943432734077#113.src = a#115.id)
         :  :- Project [struct(src, src#0, dst, dst#1) AS __tmp-4363599943432734077#113]
         :  :  +- Relation[src#0,dst#1] csv
         :  +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#115]
         :     +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
         +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#117]
            +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv

== Physical Plan ==
CollectLimit 11
+- *(3) Project [cast(a#115.id as string) AS id#174, cast(b#117.id as string) AS id#175, cast(b#117.birthday as string) AS birthday#176]
   +- *(3) BroadcastHashJoin [a#115.birthday, __tmp-4363599943432734077#113.dst], [b#117.birthday, b#117.id], Inner, BuildRight
      :- *(3) BroadcastHashJoin [__tmp-4363599943432734077#113.src], [a#115.id], Inner, BuildRight
      :  :- *(3) Project [struct(src, src#0, dst, dst#1) AS __tmp-4363599943432734077#113]
      :  :  +- FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>
      :  +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#191]
      :     +- *(1) Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#115]
      :        +- FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
      +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].birthday as bigint), 32) | (cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint) & 4294967295)))), [id=#197]
         +- *(2) Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#117]
            +- FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...