graphframes - Details for Query 5

Details for Query 5

Submitted Time: 2024/10/30 23:02:45
Duration: 10 s
Succeeded Jobs: 11 12 13 14 15 16 17 18

Show the Stage ID and Task ID that corresponds to the max metric

digraph G { 0 [labelType="html" label=" CollectLimit "]; subgraph cluster1 { isCluster="true"; label="WholeStageCodegen (15)\n \nduration: total (min, med, max (stageId: taskId))\n4 ms (1 ms, 1 ms, 2 ms (stage 21.0: task 220))"; 2 [labelType="html" label="HashAggregate time in aggregation build total (min, med, max (stageId: taskId)) 0 ms (0 ms, 0 ms, 0 ms (stage 25.0: task 221)) peak memory total (min, med, max (stageId: taskId)) 192.8 MiB (64.3 MiB, 64.3 MiB, 64.3 MiB (stage 25.0: task 221)) number of output rows: 15 avg hash probe bucket list iters (min, med, max (stageId: taskId)): (1, 1, 1 (stage 25.0: task 221))"]; } 3 [labelType="html" label="Exchange shuffle records written: 1,285 shuffle write time total (min, med, max (stageId: taskId)) 581 ms (2 ms, 2 ms, 8 ms (stage 20.0: task 147)) records read: 15 local bytes read total (min, med, max (stageId: taskId)) 1883.0 B (264.0 B, 628.0 B, 991.0 B (stage 25.0: task 222)) fetch wait time total (min, med, max (stageId: taskId)) 0 ms (0 ms, 0 ms, 0 ms (stage 25.0: task 221)) local blocks read: 15 data size total (min, med, max (stageId: taskId)) 251.0 KiB (200.0 B, 1200.0 B, 2.9 KiB (stage 20.0: task 164)) shuffle bytes written total (min, med, max (stageId: taskId)) 155.8 KiB (120.0 B, 769.0 B, 1837.0 B (stage 20.0: task 164))"]; subgraph cluster4 { isCluster="true"; label="WholeStageCodegen (14)\n \nduration: total (min, med, max (stageId: taskId))\n1.5 s (4 ms, 5 ms, 139 ms (stage 20.0: task 20))"; 5 [labelType="html" label="HashAggregate time in aggregation build total (min, med, max (stageId: taskId)) 474 ms (0 ms, 1 ms, 97 ms (stage 20.0: task 20)) peak memory total (min, med, max (stageId: taskId)) 12.5 GiB (64.3 MiB, 64.3 MiB, 64.3 MiB (stage 20.0: task 20)) number of output rows: 1,285 avg hash probe bucket list iters (min, med, max (stageId: taskId)): (1, 1, 1 (stage 20.0: task 20))"]; } 6 [labelType="html" label="SortMergeJoin number of output rows: 1,285"]; subgraph cluster7 { isCluster="true"; label="WholeStageCodegen (6)\n \nduration: total (min, med, max (stageId: taskId))\n4.8 s (14 ms, 16 ms, 442 ms (stage 20.0: task 20))"; 8 [labelType="html" label="Sort sort time total (min, med, max (stageId: taskId)) 0 ms (0 ms, 0 ms, 0 ms (stage 20.0: task 20)) peak memory total (min, med, max (stageId: taskId)) 12.5 GiB (64.1 MiB, 64.1 MiB, 64.1 MiB (stage 20.0: task 20)) spill size total (min, med, max (stageId: taskId)) 0.0 B (0.0 B, 0.0 B, 0.0 B (stage 20.0: task 20))"]; } 9 [labelType="html" label="Exchange shuffle records written: 2,760 shuffle write time: 23 ms records read: 2,760 local bytes read total (min, med, max (stageId: taskId)) 96.2 KiB (260.0 B, 497.0 B, 737.0 B (stage 20.0: task 201)) fetch wait time total (min, med, max (stageId: taskId)) 0 ms (0 ms, 0 ms, 0 ms (stage 20.0: task 20)) local blocks read: 200 data size: 539.1 KiB shuffle bytes written: 96.2 KiB"]; subgraph cluster10 { isCluster="true"; label="WholeStageCodegen (5)\n \nduration: 653 ms"; 11 [labelType="html" label=" Project "]; 12 [labelType="html" label="BroadcastHashJoin number of output rows: 2,760"]; 13 [labelType="html" label="BroadcastHashJoin number of output rows: 2,690,019"]; 14 [labelType="html" label=" Project "]; 15 [labelType="html" label="BroadcastHashJoin number of output rows: 88,234"]; 16 [labelType="html" label="BroadcastHashJoin number of output rows: 88,234"]; 17 [labelType="html" label=" Project "]; } 18 [labelType="html" label="Scan csv number of files read: 1 metadata time: 0 ms size of files read: 920.5 KiB number of output rows: 88,234"]; 19 [labelType="html" label="BroadcastExchange data size: 1055.6 KiB time to collect: 255 ms time to build: 13 ms time to broadcast: 4 ms"]; subgraph cluster20 { isCluster="true"; label="WholeStageCodegen (1)\n \nduration: 124 ms"; 21 [labelType="html" label=" Project "]; } 22 [labelType="html" label="Scan csv number of files read: 1 metadata time: 0 ms size of files read: 119.6 KiB number of output rows: 4,039"]; 23 [labelType="html" label="BroadcastExchange data size: 1055.6 KiB time to collect: 243 ms time to build: 15 ms time to broadcast: 4 ms"]; subgraph cluster24 { isCluster="true"; label="WholeStageCodegen (2)\n \nduration: 108 ms"; 25 [labelType="html" label=" Project "]; } 26 [labelType="html" label="Scan csv number of files read: 1 metadata time: 0 ms size of files read: 119.6 KiB number of output rows: 4,039"]; 27 [labelType="html" label="BroadcastExchange data size: 8.0 MiB time to collect: 599 ms time to build: 65 ms time to broadcast: 9 ms"]; subgraph cluster28 { isCluster="true"; label="WholeStageCodegen (3)\n \nduration: 174 ms"; 29 [labelType="html" label=" Project "]; } 30 [labelType="html" label="Scan csv number of files read: 1 metadata time: 0 ms size of files read: 920.5 KiB number of output rows: 88,234"]; 31 [labelType="html" label="BroadcastExchange data size: 2.1 MiB time to collect: 385 ms time to build: 35 ms time to broadcast: 6 ms"]; subgraph cluster32 { isCluster="true"; label="WholeStageCodegen (4)\n \nduration: 108 ms"; 33 [labelType="html" label=" Project "]; } 34 [labelType="html" label="Scan csv number of files read: 1 metadata time: 0 ms size of files read: 119.6 KiB number of output rows: 4,039"]; subgraph cluster35 { isCluster="true"; label="WholeStageCodegen (13)"; 36 [labelType="html" label="Sort sort time total (min, med, max (stageId: taskId)) 389 ms (1 ms, 1 ms, 100 ms (stage 20.0: task 88)) peak memory total (min, med, max (stageId: taskId)) 12.5 GiB (64.3 MiB, 64.3 MiB, 64.3 MiB (stage 20.0: task 20)) spill size total (min, med, max (stageId: taskId)) 0.0 B (0.0 B, 0.0 B, 0.0 B (stage 20.0: task 20))"]; } 37 [labelType="html" label="Exchange shuffle records written: 1,612,010 shuffle write time: 65 ms records read: 1,612,010 fetch wait time total (min, med, max (stageId: taskId)) 0 ms (0 ms, 0 ms, 0 ms (stage 20.0: task 20)) remote bytes read total (min, med, max (stageId: taskId)) 26.8 MiB (133.3 KiB, 137.4 KiB, 141.0 KiB (stage 20.0: task 54)) remote blocks read: 200 data size: 307.5 MiB shuffle bytes written: 26.8 MiB"]; subgraph cluster38 { isCluster="true"; label="WholeStageCodegen (12)\n \nduration: 1.7 s"; 39 [labelType="html" label=" Project "]; 40 [labelType="html" label="BroadcastHashJoin number of output rows: 1,612,010"]; 41 [labelType="html" label=" Project "]; 42 [labelType="html" label="BroadcastHashJoin number of output rows: 2,690,019"]; 43 [labelType="html" label="BroadcastHashJoin number of output rows: 2,690,019"]; 44 [labelType="html" label=" Project "]; 45 [labelType="html" label="BroadcastHashJoin number of output rows: 88,234"]; 46 [labelType="html" label="BroadcastHashJoin number of output rows: 88,234"]; 47 [labelType="html" label=" Project "]; } 48 [labelType="html" label="Scan csv number of files read: 1 metadata time: 0 ms size of files read: 920.5 KiB number of output rows: 88,234"]; 49 [labelType="html" label="BroadcastExchange data size: 1055.6 KiB time to collect: 607 ms time to build: 18 ms time to broadcast: 3 ms"]; subgraph cluster50 { isCluster="true"; label="WholeStageCodegen (10)\n \nduration: 119 ms"; 51 [labelType="html" label=" Project "]; } 52 [labelType="html" label="Scan csv number of files read: 1 metadata time: 0 ms size of files read: 119.6 KiB number of output rows: 4,039"]; 53 [labelType="html" label="BroadcastExchange data size: 12.0 MiB time to collect: 450 ms time to build: 75 ms time to broadcast: 26 ms"]; subgraph cluster54 { isCluster="true"; label="WholeStageCodegen (11)\n \nduration: 162 ms"; 55 [labelType="html" label=" Project "]; } 56 [labelType="html" label="Scan csv number of files read: 1 metadata time: 0 ms size of files read: 920.5 KiB number of output rows: 88,234"]; 2->0; 3->2; 5->3; 6->5; 8->6; 9->8; 11->9; 12->11; 13->12; 14->13; 15->14; 16->15; 17->16; 18->17; 19->16; 21->19; 22->21; 23->15; 25->23; 26->25; 27->13; 29->27; 30->29; 31->12; 33->31; 34->33; 36->6; 37->36; 39->37; 40->39; 41->40; 42->41; 43->42; 44->43; 45->44; 46->45; 47->46; 48->47; 19->46; 23->45; 27->43; 49->42; 51->49; 52->51; 53->40; 55->53; 56->55; }

CollectLimit 11

HashAggregate(keys=[a#200, b#202, c#227], functions=[])

WholeStageCodegen (15)

Exchange hashpartitioning(a#200, b#202, c#227, 200), true, [id=#590]

HashAggregate(keys=[a#200, b#202, c#227], functions=[])

WholeStageCodegen (14)

SortMergeJoin [coalesce(a#200, [0,0,0,0,0,0]), isnull(a#200), coalesce(b#202, [0,0,0,0,0,0]), isnull(b#202), coalesce(c#227, [0,0,0,0,0,0]), isnull(c#227)], [coalesce(a#278, [0,0,0,0,0,0]), isnull(a#278), coalesce(b#279, [0,0,0,0,0,0]), isnull(b#279), coalesce(c#280, [0,0,0,0,0,0]), isnull(c#280)], LeftAnti

Sort [coalesce(a#200, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(a#200) ASC NULLS FIRST, coalesce(b#202, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(b#202) ASC NULLS FIRST, coalesce(c#227, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(c#227) ASC NULLS FIRST], false, 0

WholeStageCodegen (6)

Exchange hashpartitioning(coalesce(a#200, [0,0,0,0,0,0]), isnull(a#200), coalesce(b#202, [0,0,0,0,0,0]), isnull(b#202), coalesce(c#227, [0,0,0,0,0,0]), isnull(c#227), 200), true, [id=#409]

Project [a#200, b#202, c#227]

BroadcastHashJoin [a#200.education_year_id, a#200.education_school_id, __tmp-1043886091038848698#225.dst], [c#227.education_year_id, c#227.education_school_id, c#227.id], Inner, BuildRight, NOT (a#200.id = c#227.id)

BroadcastHashJoin [b#202.id], [__tmp-1043886091038848698#225.src], Inner, BuildRight

Project [a#200, b#202]

BroadcastHashJoin [__tmp3640351034883199571#198.dst], [b#202.id], Inner, BuildRight

BroadcastHashJoin [__tmp3640351034883199571#198.src], [a#200.id], Inner, BuildRight

Project [struct(src, src#0, dst, dst#1) AS __tmp3640351034883199571#198]

WholeStageCodegen (5)

FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>

BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#385]

Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#200]

WholeStageCodegen (1)

FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...

WholeStageCodegen (2)

BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<src:int,dst:int>, false].src as bigint))), [id=#398]

Project [struct(src, src#0, dst, dst#1) AS __tmp-1043886091038848698#225]

WholeStageCodegen (3)

BroadcastExchange HashedRelationBroadcastMode(List(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].education_year_id, input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].education_school_id, input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id)), [id=#404]

WholeStageCodegen (4)

Sort [coalesce(a#278, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(a#278) ASC NULLS FIRST, coalesce(b#279, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(b#279) ASC NULLS FIRST, coalesce(c#280, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(c#280) ASC NULLS FIRST], false, 0

WholeStageCodegen (13)

Exchange hashpartitioning(coalesce(a#278, [0,0,0,0,0,0]), isnull(a#278), coalesce(b#279, [0,0,0,0,0,0]), isnull(b#279), coalesce(c#280, [0,0,0,0,0,0]), isnull(c#280), 200), true, [id=#525]

Project [a#278, b#279, c#280]

BroadcastHashJoin [a#278.id, c#280.id], [__tmp-430217833014886237#257.src, __tmp-430217833014886237#257.dst], Inner, BuildRight

Project [a#278, b#279, c#280]

BroadcastHashJoin [__tmp-1043886091038848698#225.dst], [c#280.id], Inner, BuildRight

BroadcastHashJoin [b#279.id], [__tmp-1043886091038848698#225.src], Inner, BuildRight

Project [a#278, b#279]

BroadcastHashJoin [__tmp3640351034883199571#198.dst], [b#279.id], Inner, BuildRight

BroadcastHashJoin [__tmp3640351034883199571#198.src], [a#278.id], Inner, BuildRight

Project [struct(src, src#0, dst, dst#1) AS __tmp3640351034883199571#198]

WholeStageCodegen (12)

WholeStageCodegen (10)

BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, struct<src:int,dst:int>, false].src as bigint), 32) | (cast(input[0, struct<src:int,dst:int>, false].dst as bigint) & 4294967295)))), [id=#444]

Project [struct(src, src#0, dst, dst#1) AS __tmp-430217833014886237#257]

WholeStageCodegen (11)

Details

== Parsed Logical Plan ==
GlobalLimit 11
+- LocalLimit 11
   +- Project [cast(source person#293 as string) AS source person#311, cast(intermediate friend#294 as string) AS intermediate friend#312, cast(target person#295 as string) AS target person#313, cast(education_school_id#299 as string) AS education_school_id#314, cast(education_year_id#300 as string) AS education_year_id#315]
      +- Project [a#200.id AS source person#293, b#202.id AS intermediate friend#294, c#227.id AS target person#295, a#200.education_school_id AS education_school_id#299, a#200.education_year_id AS education_year_id#300]
         +- Filter NOT (a#200.id = c#227.id)
            +- Filter (a#200.education_year_id = c#227.education_year_id)
               +- Filter (a#200.education_school_id = c#227.education_school_id)
                  +- Project [a#200, b#202, c#227]
                     +- Except false
                        :- Project [a#200, b#202, c#227]
                        :  +- Join Inner, (__tmp-1043886091038848698#225.dst = c#227.id)
                        :     :- Join Inner, (__tmp-1043886091038848698#225.src = b#202.id)
                        :     :  :- Project [a#200, b#202]
                        :     :  :  +- Join Inner, (__tmp3640351034883199571#198.dst = b#202.id)
                        :     :  :     :- Join Inner, (__tmp3640351034883199571#198.src = a#200.id)
                        :     :  :     :  :- Project [struct(src, src#0, dst, dst#1) AS __tmp3640351034883199571#198]
                        :     :  :     :  :  +- Relation[src#0,dst#1] csv
                        :     :  :     :  +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#200]
                        :     :  :     :     +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
                        :     :  :     +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#202]
                        :     :  :        +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
                        :     :  +- Project [struct(src, src#0, dst, dst#1) AS __tmp-1043886091038848698#225]
                        :     :     +- Relation[src#0,dst#1] csv
                        :     +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS c#227]
                        :        +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
                        +- Project [a#278, b#279, c#280]
                           +- Join Inner, ((__tmp-430217833014886237#257.src = a#278.id) AND (__tmp-430217833014886237#257.dst = c#280.id))
                              :- Project [a#278, b#279, c#280]
                              :  +- Join Inner, (__tmp-1043886091038848698#225.dst = c#280.id)
                              :     :- Join Inner, (__tmp-1043886091038848698#225.src = b#279.id)
                              :     :  :- Project [a#278, b#279]
                              :     :  :  +- Join Inner, (__tmp3640351034883199571#198.dst = b#279.id)
                              :     :  :     :- Join Inner, (__tmp3640351034883199571#198.src = a#278.id)
                              :     :  :     :  :- Project [struct(src, src#0, dst, dst#1) AS __tmp3640351034883199571#198]
                              :     :  :     :  :  +- Relation[src#0,dst#1] csv
                              :     :  :     :  +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#278]
                              :     :  :     :     +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
                              :     :  :     +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#279]
                              :     :  :        +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
                              :     :  +- Project [struct(src, src#0, dst, dst#1) AS __tmp-1043886091038848698#225]
                              :     :     +- Relation[src#0,dst#1] csv
                              :     +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS c#280]
                              :        +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
                              +- Project [struct(src, src#0, dst, dst#1) AS __tmp-430217833014886237#257]
                                 +- Relation[src#0,dst#1] csv

== Analyzed Logical Plan ==
source person: string, intermediate friend: string, target person: string, education_school_id: string, education_year_id: string
GlobalLimit 11
+- LocalLimit 11
   +- Project [cast(source person#293 as string) AS source person#311, cast(intermediate friend#294 as string) AS intermediate friend#312, cast(target person#295 as string) AS target person#313, cast(education_school_id#299 as string) AS education_school_id#314, cast(education_year_id#300 as string) AS education_year_id#315]
      +- Project [a#200.id AS source person#293, b#202.id AS intermediate friend#294, c#227.id AS target person#295, a#200.education_school_id AS education_school_id#299, a#200.education_year_id AS education_year_id#300]
         +- Filter NOT (a#200.id = c#227.id)
            +- Filter (a#200.education_year_id = c#227.education_year_id)
               +- Filter (a#200.education_school_id = c#227.education_school_id)
                  +- Project [a#200, b#202, c#227]
                     +- Except false
                        :- Project [a#200, b#202, c#227]
                        :  +- Join Inner, (__tmp-1043886091038848698#225.dst = c#227.id)
                        :     :- Join Inner, (__tmp-1043886091038848698#225.src = b#202.id)
                        :     :  :- Project [a#200, b#202]
                        :     :  :  +- Join Inner, (__tmp3640351034883199571#198.dst = b#202.id)
                        :     :  :     :- Join Inner, (__tmp3640351034883199571#198.src = a#200.id)
                        :     :  :     :  :- Project [struct(src, src#0, dst, dst#1) AS __tmp3640351034883199571#198]
                        :     :  :     :  :  +- Relation[src#0,dst#1] csv
                        :     :  :     :  +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#200]
                        :     :  :     :     +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
                        :     :  :     +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#202]
                        :     :  :        +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
                        :     :  +- Project [struct(src, src#0, dst, dst#1) AS __tmp-1043886091038848698#225]
                        :     :     +- Relation[src#0,dst#1] csv
                        :     +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS c#227]
                        :        +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
                        +- Project [a#278, b#279, c#280]
                           +- Join Inner, ((__tmp-430217833014886237#257.src = a#278.id) AND (__tmp-430217833014886237#257.dst = c#280.id))
                              :- Project [a#278, b#279, c#280]
                              :  +- Join Inner, (__tmp-1043886091038848698#225.dst = c#280.id)
                              :     :- Join Inner, (__tmp-1043886091038848698#225.src = b#279.id)
                              :     :  :- Project [a#278, b#279]
                              :     :  :  +- Join Inner, (__tmp3640351034883199571#198.dst = b#279.id)
                              :     :  :     :- Join Inner, (__tmp3640351034883199571#198.src = a#278.id)
                              :     :  :     :  :- Project [struct(src, src#0, dst, dst#1) AS __tmp3640351034883199571#198]
                              :     :  :     :  :  +- Relation[src#0,dst#1] csv
                              :     :  :     :  +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#278]
                              :     :  :     :     +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
                              :     :  :     +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#279]
                              :     :  :        +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
                              :     :  +- Project [struct(src, src#0, dst, dst#1) AS __tmp-1043886091038848698#225]
                              :     :     +- Relation[src#0,dst#1] csv
                              :     +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS c#280]
                              :        +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
                              +- Project [struct(src, src#0, dst, dst#1) AS __tmp-430217833014886237#257]
                                 +- Relation[src#0,dst#1] csv

== Optimized Logical Plan ==
GlobalLimit 11
+- LocalLimit 11
   +- Aggregate [a#200, b#202, c#227], [cast(a#200.id as string) AS source person#311, cast(b#202.id as string) AS intermediate friend#312, cast(c#227.id as string) AS target person#313, cast(a#200.education_school_id as string) AS education_school_id#314, cast(a#200.education_year_id as string) AS education_year_id#315]
      +- Join LeftAnti, (((a#200 <=> a#278) AND (b#202 <=> b#279)) AND (c#227 <=> c#280))
         :- Project [a#200, b#202, c#227]
         :  +- Join Inner, ((((a#200.education_year_id = c#227.education_year_id) AND NOT (a#200.id = c#227.id)) AND (a#200.education_school_id = c#227.education_school_id)) AND (__tmp-1043886091038848698#225.dst = c#227.id))
         :     :- Join Inner, (__tmp-1043886091038848698#225.src = b#202.id)
         :     :  :- Project [a#200, b#202]
         :     :  :  +- Join Inner, (__tmp3640351034883199571#198.dst = b#202.id)
         :     :  :     :- Join Inner, (__tmp3640351034883199571#198.src = a#200.id)
         :     :  :     :  :- Project [struct(src, src#0, dst, dst#1) AS __tmp3640351034883199571#198]
         :     :  :     :  :  +- Relation[src#0,dst#1] csv
         :     :  :     :  +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#200]
         :     :  :     :     +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
         :     :  :     +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#202]
         :     :  :        +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
         :     :  +- Project [struct(src, src#0, dst, dst#1) AS __tmp-1043886091038848698#225]
         :     :     +- Relation[src#0,dst#1] csv
         :     +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS c#227]
         :        +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
         +- Project [a#278, b#279, c#280]
            +- Join Inner, ((__tmp-430217833014886237#257.src = a#278.id) AND (__tmp-430217833014886237#257.dst = c#280.id))
               :- Project [a#278, b#279, c#280]
               :  +- Join Inner, (__tmp-1043886091038848698#225.dst = c#280.id)
               :     :- Join Inner, (__tmp-1043886091038848698#225.src = b#279.id)
               :     :  :- Project [a#278, b#279]
               :     :  :  +- Join Inner, (__tmp3640351034883199571#198.dst = b#279.id)
               :     :  :     :- Join Inner, (__tmp3640351034883199571#198.src = a#278.id)
               :     :  :     :  :- Project [struct(src, src#0, dst, dst#1) AS __tmp3640351034883199571#198]
               :     :  :     :  :  +- Relation[src#0,dst#1] csv
               :     :  :     :  +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#278]
               :     :  :     :     +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
               :     :  :     +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#279]
               :     :  :        +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
               :     :  +- Project [struct(src, src#0, dst, dst#1) AS __tmp-1043886091038848698#225]
               :     :     +- Relation[src#0,dst#1] csv
               :     +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS c#280]
               :        +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
               +- Project [struct(src, src#0, dst, dst#1) AS __tmp-430217833014886237#257]
                  +- Relation[src#0,dst#1] csv

== Physical Plan ==
CollectLimit 11
+- *(15) HashAggregate(keys=[a#200, b#202, c#227], functions=[], output=[source person#311, intermediate friend#312, target person#313, education_school_id#314, education_year_id#315])
   +- Exchange hashpartitioning(a#200, b#202, c#227, 200), true, [id=#590]
      +- *(14) HashAggregate(keys=[a#200, b#202, c#227], functions=[], output=[a#200, b#202, c#227])
         +- SortMergeJoin [coalesce(a#200, [0,0,0,0,0,0]), isnull(a#200), coalesce(b#202, [0,0,0,0,0,0]), isnull(b#202), coalesce(c#227, [0,0,0,0,0,0]), isnull(c#227)], [coalesce(a#278, [0,0,0,0,0,0]), isnull(a#278), coalesce(b#279, [0,0,0,0,0,0]), isnull(b#279), coalesce(c#280, [0,0,0,0,0,0]), isnull(c#280)], LeftAnti
            :- *(6) Sort [coalesce(a#200, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(a#200) ASC NULLS FIRST, coalesce(b#202, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(b#202) ASC NULLS FIRST, coalesce(c#227, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(c#227) ASC NULLS FIRST], false, 0
            :  +- Exchange hashpartitioning(coalesce(a#200, [0,0,0,0,0,0]), isnull(a#200), coalesce(b#202, [0,0,0,0,0,0]), isnull(b#202), coalesce(c#227, [0,0,0,0,0,0]), isnull(c#227), 200), true, [id=#409]
            :     +- *(5) Project [a#200, b#202, c#227]
            :        +- *(5) BroadcastHashJoin [a#200.education_year_id, a#200.education_school_id, __tmp-1043886091038848698#225.dst], [c#227.education_year_id, c#227.education_school_id, c#227.id], Inner, BuildRight, NOT (a#200.id = c#227.id)
            :           :- *(5) BroadcastHashJoin [b#202.id], [__tmp-1043886091038848698#225.src], Inner, BuildRight
            :           :  :- *(5) Project [a#200, b#202]
            :           :  :  +- *(5) BroadcastHashJoin [__tmp3640351034883199571#198.dst], [b#202.id], Inner, BuildRight
            :           :  :     :- *(5) BroadcastHashJoin [__tmp3640351034883199571#198.src], [a#200.id], Inner, BuildRight
            :           :  :     :  :- *(5) Project [struct(src, src#0, dst, dst#1) AS __tmp3640351034883199571#198]
            :           :  :     :  :  +- FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>
            :           :  :     :  +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#385]
            :           :  :     :     +- *(1) Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#200]
            :           :  :     :        +- FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
            :           :  :     +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#391]
            :           :  :        +- *(2) Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#202]
            :           :  :           +- FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
            :           :  +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<src:int,dst:int>, false].src as bigint))), [id=#398]
            :           :     +- *(3) Project [struct(src, src#0, dst, dst#1) AS __tmp-1043886091038848698#225]
            :           :        +- FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>
            :           +- BroadcastExchange HashedRelationBroadcastMode(List(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].education_year_id, input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].education_school_id, input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id)), [id=#404]
            :              +- *(4) Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS c#227]
            :                 +- FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
            +- *(13) Sort [coalesce(a#278, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(a#278) ASC NULLS FIRST, coalesce(b#279, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(b#279) ASC NULLS FIRST, coalesce(c#280, [0,0,0,0,0,0]) ASC NULLS FIRST, isnull(c#280) ASC NULLS FIRST], false, 0
               +- Exchange hashpartitioning(coalesce(a#278, [0,0,0,0,0,0]), isnull(a#278), coalesce(b#279, [0,0,0,0,0,0]), isnull(b#279), coalesce(c#280, [0,0,0,0,0,0]), isnull(c#280), 200), true, [id=#525]
                  +- *(12) Project [a#278, b#279, c#280]
                     +- *(12) BroadcastHashJoin [a#278.id, c#280.id], [__tmp-430217833014886237#257.src, __tmp-430217833014886237#257.dst], Inner, BuildRight
                        :- *(12) Project [a#278, b#279, c#280]
                        :  +- *(12) BroadcastHashJoin [__tmp-1043886091038848698#225.dst], [c#280.id], Inner, BuildRight
                        :     :- *(12) BroadcastHashJoin [b#279.id], [__tmp-1043886091038848698#225.src], Inner, BuildRight
                        :     :  :- *(12) Project [a#278, b#279]
                        :     :  :  +- *(12) BroadcastHashJoin [__tmp3640351034883199571#198.dst], [b#279.id], Inner, BuildRight
                        :     :  :     :- *(12) BroadcastHashJoin [__tmp3640351034883199571#198.src], [a#278.id], Inner, BuildRight
                        :     :  :     :  :- *(12) Project [struct(src, src#0, dst, dst#1) AS __tmp3640351034883199571#198]
                        :     :  :     :  :  +- FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>
                        :     :  :     :  +- ReusedExchange [a#278], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#385]
                        :     :  :     +- ReusedExchange [b#279], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#391]
                        :     :  +- ReusedExchange [__tmp-1043886091038848698#225], BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<src:int,dst:int>, false].src as bigint))), [id=#398]
                        :     +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#437]
                        :        +- *(10) Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS c#280]
                        :           +- FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
                        +- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, struct<src:int,dst:int>, false].src as bigint), 32) | (cast(input[0, struct<src:int,dst:int>, false].dst as bigint) & 4294967295)))), [id=#444]
                           +- *(11) Project [struct(src, src#0, dst, dst#1) AS __tmp-430217833014886237#257]
                              +- FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>