digraph G {
subgraph cluster0 {
isCluster="true";
label="WholeStageCodegen (4)\n \nduration: 7 ms";
1 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build: 7 ms<br>number of output rows: 1"];
}
2 [labelType="html" label="<b>Exchange</b><br><br>shuffle records written: 1<br>shuffle write time: 7 ms<br>records read: 1<br>local bytes read: 59.0 B<br>fetch wait time: 0 ms<br>local blocks read: 1<br>data size: 16.0 B<br>shuffle bytes written: 59.0 B"];
subgraph cluster3 {
isCluster="true";
label="WholeStageCodegen (3)\n \nduration: 241 ms";
4 [labelType="html" label="<b>HashAggregate</b><br><br>time in aggregation build: 201 ms<br>number of output rows: 1"];
5 [labelType="html" label="<br><b>Project</b><br><br>"];
6 [labelType="html" label="<b>BroadcastHashJoin</b><br><br>number of output rows: 100"];
7 [labelType="html" label="<b>BroadcastHashJoin</b><br><br>number of output rows: 88,234"];
8 [labelType="html" label="<br><b>Project</b><br><br>"];
}
9 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 920.5 KiB<br>number of output rows: 88,234"];
10 [labelType="html" label="<b>BroadcastExchange</b><br><br>data size: 1055.6 KiB<br>time to collect: 235 ms<br>time to build: 20 ms<br>time to broadcast: 5 ms"];
subgraph cluster11 {
isCluster="true";
label="WholeStageCodegen (1)\n \nduration: 127 ms";
12 [labelType="html" label="<br><b>Project</b><br><br>"];
}
13 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 119.6 KiB<br>number of output rows: 4,039"];
14 [labelType="html" label="<b>BroadcastExchange</b><br><br>data size: 1088.0 KiB<br>time to collect: 238 ms<br>time to build: 30 ms<br>time to broadcast: 4 ms"];
subgraph cluster15 {
isCluster="true";
label="WholeStageCodegen (2)\n \nduration: 110 ms";
16 [labelType="html" label="<br><b>Project</b><br><br>"];
}
17 [labelType="html" label="<b>Scan csv </b><br><br>number of files read: 1<br>metadata time: 0 ms<br>size of files read: 119.6 KiB<br>number of output rows: 4,039"];
2->1;
4->2;
5->4;
6->5;
7->6;
8->7;
9->8;
10->7;
12->10;
13->12;
14->6;
16->14;
17->16;
}
18
HashAggregate(keys=[], functions=[count(1)])
WholeStageCodegen (4)
Exchange SinglePartition, true, [id=#143]
HashAggregate(keys=[], functions=[partial_count(1)])
Project
BroadcastHashJoin [a#115.birthday, __tmp-4363599943432734077#113.dst], [b#117.birthday, b#117.id], Inner, BuildRight
BroadcastHashJoin [__tmp-4363599943432734077#113.src], [a#115.id], Inner, BuildRight
Project [struct(src, src#0, dst, dst#1) AS __tmp-4363599943432734077#113]
WholeStageCodegen (3)
FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>
BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#131]
Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#115]
WholeStageCodegen (1)
FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].birthday as bigint), 32) | (cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint) & 4294967295)))), [id=#137]
Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#117]
WholeStageCodegen (2)
FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
== Parsed Logical Plan ==
Aggregate [count(1) AS count#147L]
+- Filter (a#115.birthday = b#117.birthday)
+- Project [a#115, b#117]
+- Project [a#115, b#117]
+- Join Inner, (__tmp-4363599943432734077#113.dst = b#117.id)
:- Join Inner, (__tmp-4363599943432734077#113.src = a#115.id)
: :- Project [struct(src, src#0, dst, dst#1) AS __tmp-4363599943432734077#113]
: : +- Relation[src#0,dst#1] csv
: +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#115]
: +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
+- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#117]
+- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
== Analyzed Logical Plan ==
count: bigint
Aggregate [count(1) AS count#147L]
+- Filter (a#115.birthday = b#117.birthday)
+- Project [a#115, b#117]
+- Project [a#115, b#117]
+- Join Inner, (__tmp-4363599943432734077#113.dst = b#117.id)
:- Join Inner, (__tmp-4363599943432734077#113.src = a#115.id)
: :- Project [struct(src, src#0, dst, dst#1) AS __tmp-4363599943432734077#113]
: : +- Relation[src#0,dst#1] csv
: +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#115]
: +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
+- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#117]
+- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
== Optimized Logical Plan ==
Aggregate [count(1) AS count#147L]
+- Project
+- Join Inner, ((a#115.birthday = b#117.birthday) AND (__tmp-4363599943432734077#113.dst = b#117.id))
:- Join Inner, (__tmp-4363599943432734077#113.src = a#115.id)
: :- Project [struct(src, src#0, dst, dst#1) AS __tmp-4363599943432734077#113]
: : +- Relation[src#0,dst#1] csv
: +- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#115]
: +- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
+- Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#117]
+- Relation[id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] csv
== Physical Plan ==
*(4) HashAggregate(keys=[], functions=[count(1)], output=[count#147L])
+- Exchange SinglePartition, true, [id=#143]
+- *(3) HashAggregate(keys=[], functions=[partial_count(1)], output=[count#150L])
+- *(3) Project
+- *(3) BroadcastHashJoin [a#115.birthday, __tmp-4363599943432734077#113.dst], [b#117.birthday, b#117.id], Inner, BuildRight
:- *(3) BroadcastHashJoin [__tmp-4363599943432734077#113.src], [a#115.id], Inner, BuildRight
: :- *(3) Project [struct(src, src#0, dst, dst#1) AS __tmp-4363599943432734077#113]
: : +- FileScan csv [src#0,dst#1] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_edges.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<src:int,dst:int>
: +- BroadcastExchange HashedRelationBroadcastMode(List(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint))), [id=#131]
: +- *(1) Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS a#115]
: +- FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...
+- BroadcastExchange HashedRelationBroadcastMode(List((shiftleft(cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].birthday as bigint), 32) | (cast(input[0, struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education_year_id:int>, false].id as bigint) & 4294967295)))), [id=#137]
+- *(2) Project [struct(id, id#4, birthday, birthday#5, hometown_id, hometown_id#6, work_employer_id, work_employer_id#7, education_school_id, education_school_id#8, education_year_id, education_year_id#9) AS b#117]
+- FileScan csv [id#4,birthday#5,hometown_id#6,work_employer_id#7,education_school_id#8,education_year_id#9] Batched: false, DataFilters: [], Format: CSV, Location: InMemoryFileIndex[s3a://data-repository-bkt/ECS765/graph/stanford_fb_vertices.csv], PartitionFilters: [], PushedFilters: [], ReadSchema: struct<id:int,birthday:int,hometown_id:int,work_employer_id:int,education_school_id:int,education...