How to do it...

We will be leveraging our Python Databricks notebook, but we will include the following Scala cell. At the top level here's the flow of the code:

%scala
package d3a

import org.apache.spark.sql._
import com.databricks.backend.daemon.driver.EnhancedRDDFunctions.displayHTML

case class Edge(src: String, dest: String, count: Long)
case class Node(name: String)
case class Link(source: Int, target: Int, value: Long)
case class Graph(nodes: Seq[Node], links: Seq[Link])

object graphs {
val sqlContext = SQLContext.getOrCreate(org.apache.spark.SparkContext.getOrCreate())
import sqlContext.implicits._

def force(clicks: Dataset[Edge], height: Int = 100, width: Int = 960): Unit = {
val data = clicks.collect()
val nodes = (data.map(_.src) ++ data.map(_.dest)).map(_.replaceAll("_", " ")).toSet.toSeq.map(Node)
val links = data.map { t =>
Link(nodes.indexWhere(_.name == t.src.replaceAll("_", " ")), nodes.indexWhere(_.name == t.dest.replaceAll("_", " ")), t.count / 20 + 1)
}
showGraph(height, width, Seq(Graph(nodes, links)).toDF().toJSON.first())
}

/**
* Displays a force directed graph using d3
* input: {"nodes": [{"name": "..."}], "links": [{"source": 1, "target": 2, "value": 0}]}
*/
def showGraph(height: Int, width: Int, graph: String): Unit = {

displayHTML(s"""<!DOCTYPE html>
<html>
<head>
<link type="text/css" rel="stylesheet" href="https://mbostock.github.io/d3/talk/20111116/style.css"/>
<style type="text/css">
#states path {
fill: #ccc;
stroke: #fff;
}

path.arc {
pointer-events: none;
fill: none;
stroke: #000;
display: none;
}

path.cell {
fill: none;
pointer-events: all;
}

circle {
fill: steelblue;
fill-opacity: .8;
stroke: #fff;
}

#cells.voronoi path.cell {
stroke: brown;
}

#cells g:hover path.arc {
display: inherit;
}
</style>
</head>
<body>
<script src="https://mbostock.github.io/d3/talk/20111116/d3/d3.js"></script>
<script src="https://mbostock.github.io/d3/talk/20111116/d3/d3.csv.js"></script>
<script src="https://mbostock.github.io/d3/talk/20111116/d3/d3.geo.js"></script>
<script src="https://mbostock.github.io/d3/talk/20111116/d3/d3.geom.js"></script>
<script>
var graph = $graph;
var w = $width;
var h = $height;

var linksByOrigin = {};
var countByAirport = {};
var locationByAirport = {};
var positions = [];

var projection = d3.geo.azimuthal()
.mode("equidistant")
.origin([-98, 38])
.scale(1400)
.translate([640, 360]);

var path = d3.geo.path()
.projection(projection);

var svg = d3.select("body")
.insert("svg:svg", "h2")
.attr("width", w)
.attr("height", h);

var states = svg.append("svg:g")
.attr("id", "states");

var circles = svg.append("svg:g")
.attr("id", "circles");

var cells = svg.append("svg:g")
.attr("id", "cells");

var arc = d3.geo.greatArc()
.source(function(d) { return locationByAirport[d.source]; })
.target(function(d) { return locationByAirport[d.target]; });

d3.select("input[type=checkbox]").on("change", function() {
cells.classed("voronoi", this.checked);
});

// Draw US map.
d3.json("https://mbostock.github.io/d3/talk/20111116/us-states.json", function(collection) {
states.selectAll("path")
.data(collection.features)
.enter().append("svg:path")
.attr("d", path);
});

// Parse links
graph.links.forEach(function(link) {
var origin = graph.nodes[link.source].name;
var destination = graph.nodes[link.target].name;

var links = linksByOrigin[origin] || (linksByOrigin[origin] = []);
links.push({ source: origin, target: destination });

countByAirport[origin] = (countByAirport[origin] || 0) + 1;
countByAirport[destination] = (countByAirport[destination] || 0) + 1;
});

d3.csv("https://mbostock.github.io/d3/talk/20111116/airports.csv", function(data) {

// Build list of airports.
var airports = graph.nodes.map(function(node) {
return data.find(function(airport) {
if (airport.iata === node.name) {
var location = [+airport.longitude, +airport.latitude];
locationByAirport[airport.iata] = location;
positions.push(projection(location));

return true;
} else {
return false;
}
});
});

// Compute the Voronoi diagram of airports' projected positions.
var polygons = d3.geom.voronoi(positions);

var g = cells.selectAll("g")
.data(airports)
.enter().append("svg:g");

g.append("svg:path")
.attr("class", "cell")
.attr("d", function(d, i) { return "M" + polygons[i].join("L") + "Z"; })
.on("mouseover", function(d, i) { d3.select("h2 span").text(d.name); });

g.selectAll("path.arc")
.data(function(d) { return linksByOrigin[d.iata] || []; })
.enter().append("svg:path")
.attr("class", "arc")
.attr("d", function(d) { return path(arc(d)); });

circles.selectAll("circle")
.data(airports)
.enter().append("svg:circle")
.attr("cx", function(d, i) { return positions[i][0]; })
.attr("cy", function(d, i) { return positions[i][1]; })
.attr("r", function(d, i) { return Math.sqrt(countByAirport[d.iata]); })
.sort(function(a, b) { return countByAirport[b.iata] - countByAirport[a.iata]; });
});
</script>
</body>
</html>""")
}

def help() = {
displayHTML("""
<p>
Produces a force-directed graph given a collection of edges of the following form:</br>
<tt><font color="#a71d5d">case class</font> <font color="#795da3">Edge</font>(<font color="#ed6a43">src</font>: <font color="#a71d5d">String</font>, <font color="#ed6a43">dest</font>: <font color="#a71d5d">String</font>, <font color="#ed6a43">count</font>: <font color="#a71d5d">Long</font>)</tt>
</p>
<p>Usage:<br/>
<tt>%scala</tt></br>
<tt><font color="#a71d5d">import</font> <font color="#ed6a43">d3._</font></tt><br/>
<tt><font color="#795da3">graphs.force</font>(</br>
<font color="#ed6a43">height</font> = <font color="#795da3">500</font>,<br/>
<font color="#ed6a43">width</font> = <font color="#795da3">500</font>,<br/>
<font color="#ed6a43">clicks</font>: <font color="#795da3">Dataset</font>[<font color="#795da3">Edge</font>])</tt>
</p>""")
}
}

In the next cell, you will call the following Scala cell:

%scala
// On-time and Early Arrivals
import d3a._
graphs.force(
height = 800,
width = 1200,
clicks = sql("""select src, dst as dest, count(1) as count from deptsDelays_GEO where delay <= 0 group by src, dst""").as[Edge])

Which results in the following visualization:

..................Content has been hidden....................

You can't read the all page of ebook, please click here login for view all page.
Reset