-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathspark_sql4.py
42 lines (27 loc) · 1.21 KB
/
spark_sql4.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#!/usr/lib/spark/bin/pyspark
# https://spark.apache.org/docs/latest/sql-programming-guide.html#getting-started
from pyspark import SparkContext
from pyspark.sql import SQLContext
sc = SparkContext( 'local', 'pyspark' )
print( " *** hello world sparkContext created" )
sqlContext = SQLContext(sc)
print( " *** hello world spark sql context created" )
#df = sqlContext.read.json("./taxorpt.json")
#df.show()
#lines = sc.textFile("passwd.txt")
lines = sc.textFile("/home/hoti1/code/svn/spark/example/people.json")
lines = sc.textFile("example/people.json")
parts = lines.map(lambda l: l.split(","))
people = parts.map(lambda p: (p[0], p[1].strip()))
schemaString = "name age"
print( " *** something about StructField and StructType not working, may need more lib, not listed in eg " )
fields = [StructField(field_name, StringType(), True) for field_name in schemaString.split()]
##fields = schemaString.split()
schema = StructType(fields)
schemaPeople = sqlContext.createDataFrame(people, schema)
schemaPeople.registerTemTable("people")
results = sqlContext.sql("SELECT name FROM people")
names = result.map(lambda p: "Name: " + p.name)
for name in names.collect() :
print(name)
print( " *** good bye world !!" )