forked from jlyang1990/Spark_Python_Do_Big_Data_Analytics
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSETUP Python for Spark.py
executable file
·64 lines (50 loc) · 2.14 KB
/
SETUP Python for Spark.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# -*- coding: utf-8 -*-
"""
Make sure you give execute privileges
-----------------------------------------------------------------------------
Spark with Python: Setup Spyder IDE for Spark
Copyright : V2 Maestros @2016
Execute this script once when Spyder is started on Windows
-----------------------------------------------------------------------------
"""
import os
import sys
# NOTE: Please change the folder paths to your current setup.
#Windows
if sys.platform.startswith('win'):
#Where you downloaded the resource bundle
os.chdir("C:/Users/Kumaran/Dropbox/V2Maestros/Modules/Apache Spark/Python")
#Where you installed spark.
os.environ['SPARK_HOME'] = 'C:/Spark/spark-2.0.0-bin-hadoop2.7'
#other platforms - linux/mac
else:
os.chdir("/Users/jlyang/Documents/Intern&Job/Spark_Python_Do_Big_Data_Analytics")
os.environ['SPARK_HOME'] = '/Users/jlyang/Spark/spark-2.1.0-bin-hadoop2.7'
os.curdir
# Create a variable for our root path
SPARK_HOME = os.environ['SPARK_HOME']
#Add the following paths to the system path. Please check your installation
#to make sure that these zip files actually exist. The names might change
#as versions change.
sys.path.insert(0,os.path.join(SPARK_HOME,"python"))
sys.path.insert(0,os.path.join(SPARK_HOME,"python","lib"))
sys.path.insert(0,os.path.join(SPARK_HOME,"python","lib","pyspark.zip"))
sys.path.insert(0,os.path.join(SPARK_HOME,"python","lib","py4j-0.10.4-src.zip"))
#Initialize SparkSession and SparkContext
from pyspark.sql import SparkSession
from pyspark import SparkContext
#Create a Spark Session
SpSession = SparkSession \
.builder \
.master("local[2]") \
.appName("jlyang_spark") \
.config("spark.executor.memory", "1g") \
.config("spark.cores.max","2") \
.config("spark.sql.warehouse.dir", "/Users/jlyang/Spark/spark-warehouse")\
.getOrCreate()
#Get the Spark Context from Spark Session
SpContext = SpSession.sparkContext
#Test Spark
testData = SpContext.parallelize([3,6,4,2])
testData.count()
#check http://localhost:4040 to see if Spark is running