#Import SparkContext from pyspark from pyspark import SparkContext sc = SparkContext() from operator import add rdd1 =…

#Import SparkContext from pyspark
from pyspark import SparkContext
sc = SparkContext()
from operator import add
rdd1 = sc.parallelize([(“a”,1),(“b”,1),(“a”,1)])
sorted(rdd1.reduceByKey(add).collect())
!curl -L https://github.com/fivethirtyeight/data/blob/master/daily-show-guests/daily_show_guests.csv -o daily.csv
!head -10 daily.csv
raw = sc.textFile(“daily.csv”)
raw.take(5)
daily = raw.map(lambda line: line.split(‘,’))
daily.take(5)
# Aggregate total count of visitors per year
tally = daily.map(lambda x: (x[0], 1))
       .reduceByKey(lambda x,y: x+y)
print(tally)
# because Spark is lazy we need to perform an action on the RDD
tally.take(tally.count())
Questions: How do I sort the tally by year
 
“Looking for a Similar Assignment? Get Expert Help at an Amazing Discount!”

"Get 15% discount on your first 3 orders with us"
Use the following coupon
"FIRST15"

Order Now