How to do it...

We will be leveraging our Python Databricks notebook, but we will include the following Scala cell. At the top level here's the flow of the code:

package d3a

import org.apache.spark.sql._
import com.databricks.backend.daemon.driver.EnhancedRDDFunctions.displayHTML

case class Edge(src: String, dest: String, count: Long)
case class Node(name: String)
case class Link(source: Int, target: Int, value: Long)
case class Graph(nodes: Seq[Node], links: Seq[Link])

object graphs {
val sqlContext = SQLContext.getOrCreate(org.apache.spark.SparkContext.getOrCreate())
import sqlContext.implicits._

def force(clicks: Dataset[Edge], height: Int = 100, width: Int = 960): Unit = {
val data = clicks.collect()
val nodes = ( ++"_", " "))
val links = { t =>
Link(nodes.indexWhere( == t.src.replaceAll("_", " ")), nodes.indexWhere( == t.dest.replaceAll("_", " ")), t.count / 20 + 1)
showGraph(height, width, Seq(Graph(nodes, links)).toDF().toJSON.first())

* Displays a force directed graph using d3
* input: {"nodes": [{"name": "..."}], "links": [{"source": 1, "target": 2, "value": 0}]}
def showGraph(height: Int, width: Int, graph: String): Unit = {

displayHTML(s"""<!DOCTYPE html>
<link type="text/css" rel="stylesheet" href=""/>
<style type="text/css">
#states path {
fill: #ccc;
stroke: #fff;

path.arc {
pointer-events: none;
fill: none;
stroke: #000;
display: none;

path.cell {
fill: none;
pointer-events: all;

circle {
fill: steelblue;
fill-opacity: .8;
stroke: #fff;

#cells.voronoi path.cell {
stroke: brown;

#cells g:hover path.arc {
display: inherit;
<script src=""></script>
<script src=""></script>
<script src=""></script>
<script src=""></script>
var graph = $graph;
var w = $width;
var h = $height;

var linksByOrigin = {};
var countByAirport = {};
var locationByAirport = {};
var positions = [];

var projection = d3.geo.azimuthal()
.origin([-98, 38])
.translate([640, 360]);

var path = d3.geo.path()

var svg ="body")
.insert("svg:svg", "h2")
.attr("width", w)
.attr("height", h);

var states = svg.append("svg:g")
.attr("id", "states");

var circles = svg.append("svg:g")
.attr("id", "circles");

var cells = svg.append("svg:g")
.attr("id", "cells");

var arc = d3.geo.greatArc()
.source(function(d) { return locationByAirport[d.source]; })
.target(function(d) { return locationByAirport[]; });"input[type=checkbox]").on("change", function() {
cells.classed("voronoi", this.checked);

// Draw US map.
d3.json("", function(collection) {
.attr("d", path);

// Parse links
graph.links.forEach(function(link) {
var origin = graph.nodes[link.source].name;
var destination = graph.nodes[].name;

var links = linksByOrigin[origin] || (linksByOrigin[origin] = []);
links.push({ source: origin, target: destination });

countByAirport[origin] = (countByAirport[origin] || 0) + 1;
countByAirport[destination] = (countByAirport[destination] || 0) + 1;

d3.csv("", function(data) {

// Build list of airports.
var airports = {
return data.find(function(airport) {
if (airport.iata === {
var location = [+airport.longitude, +airport.latitude];
locationByAirport[airport.iata] = location;

return true;
} else {
return false;

// Compute the Voronoi diagram of airports' projected positions.
var polygons = d3.geom.voronoi(positions);

var g = cells.selectAll("g")

.attr("class", "cell")
.attr("d", function(d, i) { return "M" + polygons[i].join("L") + "Z"; })
.on("mouseover", function(d, i) {"h2 span").text(; });

.data(function(d) { return linksByOrigin[d.iata] || []; })
.attr("class", "arc")
.attr("d", function(d) { return path(arc(d)); });

.attr("cx", function(d, i) { return positions[i][0]; })
.attr("cy", function(d, i) { return positions[i][1]; })
.attr("r", function(d, i) { return Math.sqrt(countByAirport[d.iata]); })
.sort(function(a, b) { return countByAirport[b.iata] - countByAirport[a.iata]; });

def help() = {
Produces a force-directed graph given a collection of edges of the following form:</br>
<tt><font color="#a71d5d">case class</font> <font color="#795da3">Edge</font>(<font color="#ed6a43">src</font>: <font color="#a71d5d">String</font>, <font color="#ed6a43">dest</font>: <font color="#a71d5d">String</font>, <font color="#ed6a43">count</font>: <font color="#a71d5d">Long</font>)</tt>
<tt><font color="#a71d5d">import</font> <font color="#ed6a43">d3._</font></tt><br/>
<tt><font color="#795da3">graphs.force</font>(</br>
<font color="#ed6a43">height</font> = <font color="#795da3">500</font>,<br/>
<font color="#ed6a43">width</font> = <font color="#795da3">500</font>,<br/>
<font color="#ed6a43">clicks</font>: <font color="#795da3">Dataset</font>[<font color="#795da3">Edge</font>])</tt>

In the next cell, you will call the following Scala cell:

// On-time and Early Arrivals
import d3a._
height = 800,
width = 1200,
clicks = sql("""select src, dst as dest, count(1) as count from deptsDelays_GEO where delay <= 0 group by src, dst""").as[Edge])

Which results in the following visualization:

