import React from "react";
import Highlight from "react-highlight.js";
import { Link } from "react-router-dom";

import PropTypes from "prop-types";

import { makeStyles } from "@material-ui/core/styles";

import moment from "moment";

import Muted from "components/Typography/Muted.js";

import jpScreen from "assets/img/jupyter.png";
import styles from "assets/jss/hwblog/views/componentsSections/basicsStyle.js";

const useStyles = makeStyles(styles);

export default function Article(props) {
  const classes = useStyles();

  const title = "Jupyter with Spark Setup";
  const dateStamp = "20161222";
  return (
    <div className={classes.sections}>
      <div className={classes.container}>
        <h2 className={classes.title}>
          <Link to={props.link}>{title}</Link>
        </h2>
        <Muted>Posted on: {moment(dateStamp).format("MMM Do, YYYY")}</Muted>
        <p>
          Recently doing a poc for work, learnt one thing or two about jupyter and spark. Here is the environmental
          summary I experienced
        </p>
        <ul>
          <li>jupyter version 4.2.1</li>
          <li>spark version 2.0.2 with hadoop2.7</li>
          <li>python version 2.7.12</li>
          <li>docker version 1.11.2, build b9f10c9/1.11.2</li>
        </ul>
        <p>
          <b>Simplified version – Docker</b>
        </p>
        <Highlight language="Bash">{`# pull the docker image from jupyter org
docker pull jupyter/all-spark-notebook
# run the image with the name & data dir you want to mount
docker run -d --name hans-notebook -v <local_src_dir>:/home/data -p 8888:8888 jupyter/all-spark-notebook`}</Highlight>
        <p>
          <b>A little bit complicated version – manual setup </b>
        </p>
        <ol>
          <li>
            Manual installation of each components
            <Highlight language="Bash">{`#create virtualenv
virtualenv jupyter_poc
source ./jupyter_poc/bin/activate
   
#install jupyter
pip install jupyter
   
#generate jupyter config file by running
jupyter notebook --generate-config
   
#download spark
wget http://d3kbcqa49mib13.cloudfront.net/spark-2.0.2-bin-hadoop2.7.tgz
   
#un-zip the file
tar -zxvf spark-2.0.2-bin-hadoop2.7.tgz .`}</Highlight>
          </li>
          <li>
            In the jupyter config file, we need to simple initial setup for file{" "}
            <em>~/.jupyter/jupyter_notebook_config.py</em>
            <Highlight language="Python">{`#open up the ip
c.NotebookApp.ip = '*'
#disable browser opening since its not localhost dev
c.NotebookApp.open_browser = False
#specify the port
c.NotebookApp.port = 8888
#temp disable the password token
c.NotebookApp.token = u''`}</Highlight>
          </li>
          <li>
            After downloading spark, we need to export it in the PATH in <em>~/.bash_profile</em> ( or{" "}
            <em>~/.bashrc</em> )<p></p>
            <Highlight language="Bash">{`PATH=$PATH:$HOME/spark-2.0.2-bin-hadoop2.7/bin
  
  export PATH
    
  export SPARK_HOME=$HOME/spark-2.0.2-bin-hadoop2.7
  export PYSPARK_SUBMIT_ARGS='--master local[*] pyspark-shell'`}</Highlight>
          </li>
          <li>
            Also we need to introduce pyspark into jupyter python env in{" "}
            <em>~/.ipython/profile_default/startup/00-default-setup.py</em>
            <Highlight language="Python">{`import os
import sys
  
  
spark_home = os.environ.get("SPARK_HOME")
if not spark_home:
    raise ValueError('SPARK_HOME environment varialble is not set')
  
sys.path.insert(0, os.path.join(spark_home, 'python'))
sys.path.insert(0, os.path.join(spark_home, 'python/lib/py4j-0.9-src.zip'))
execfile(os.path.join(spark_home, 'python/pyspark/shell.py'))`}</Highlight>
          </li>
        </ol>
        <p>
          After these setup, just open the <code>{`<ip>:8888`}</code> on the browser, the jupyter welcome page should be shown.
          Happy coding with spark! <span role="img" aria-label="happy">😛</span>
        </p>
        <img src={jpScreen} alt="Jupyter screenshot" />
      </div>
    </div>
  );
}

Article.propTypes = {
  link: PropTypes.string,
};
