# Read the 'hacker_news.csv' by using reader funtion in package csv

from csv import reader
opened_file = open('hacker_news.csv')
read_file = reader(opened_file)
hn = list(read_file)
headers = hn[0]

# Extract the first row of data
hn = hn[1:] 

# Display headers
print(headers) 

# Display the first five rows 
print(hn[:5])

['id', 'title', 'url', 'num_points', 'num_comments', 'author', 'created_at']
[['12224879', 'Interactive Dynamic Video', 'http://www.interactivedynamicvideo.com/', '386', '52', 'ne0phyte', '8/4/2016 11:52'], ['10975351', 'How to Use Open Source and Shut the Fuck Up at the Same Time', 'http://hueniverse.com/2016/01/26/how-to-use-open-source-and-shut-the-fuck-up-at-the-same-time/', '39', '10', 'josep2', '1/26/2016 19:30'], ['11964716', "Florida DJs May Face Felony for April Fools' Water Joke", 'http://www.thewire.com/entertainment/2013/04/florida-djs-april-fools-water-joke/63798/', '2', '1', 'vezycash', '6/23/2016 22:20'], ['11919867', 'Technology ventures: From Idea to Enterprise', 'https://www.amazon.com/Technology-Ventures-Enterprise-Thomas-Byers/dp/0073523429', '3', '1', 'hswarna', '6/17/2016 0:01'], ['10301696', 'Note by Note: The Making of Steinway L1037 (2007)', 'http://www.nytimes.com/2007/11/07/movies/07stein.html?_r=0', '8', '2', 'walterbell', '9/30/2015 4:12']]


# Create three empty lists called ask_posts, show_posts, and other_posts.

ask_posts = []
show_posts = []
other_posts = []

# Loop through each row in hn
for row in hn[1:]:
    # Assign the lowercase version of title in each row to a variable named title
    title = row[1]
    title = title.lower()
    # Check if title starts with ask hn, show hn
    if title.startswith('ask hn'):
        ask_posts.append(row)
    elif title.startswith('show hn'):
        show_posts.append(row)
    else:
        other_posts.append(row)

print('The number of ask_posts is: '  , len(ask_posts))
print('The number of show_posts is: ' , len(show_posts))
print('The number of other_posts is: ', len(other_posts))

The number of ask_posts is:  1744
The number of show_posts is:  1162
The number of other_posts is:  17193


# Display the fist five rows of each type of post

print(ask_posts[:5])
print(show_posts[:5])

[['12296411', 'Ask HN: How to improve my personal website?', '', '2', '6', 'ahmedbaracat', '8/16/2016 9:55'], ['10610020', 'Ask HN: Am I the only one outraged by Twitter shutting down share counts?', '', '28', '29', 'tkfx', '11/22/2015 13:43'], ['11610310', 'Ask HN: Aby recent changes to CSS that broke mobile?', '', '1', '1', 'polskibus', '5/2/2016 10:14'], ['12210105', 'Ask HN: Looking for Employee #3 How do I do it?', '', '1', '3', 'sph130', '8/2/2016 14:20'], ['10394168', 'Ask HN: Someone offered to buy my browser extension from me. What now?', '', '28', '17', 'roykolak', '10/15/2015 16:38']]
[['10627194', 'Show HN: Wio Link  ESP8266 Based Web of Things Hardware Development Platform', 'https://iot.seeed.cc', '26', '22', 'kfihihc', '11/25/2015 14:03'], ['10646440', 'Show HN: Something pointless I made', 'http://dn.ht/picklecat/', '747', '102', 'dhotson', '11/29/2015 22:46'], ['11590768', 'Show HN: Shanhu.io, a programming playground powered by e8vm', 'https://shanhu.io', '1', '1', 'h8liu', '4/28/2016 18:05'], ['12178806', 'Show HN: Webscope  Easy way for web developers to communicate with Clients', 'http://webscopeapp.com', '3', '3', 'fastbrick', '7/28/2016 7:11'], ['10872799', 'Show HN: GeoScreenshot  Easily test Geo-IP based web pages', 'https://www.geoscreenshot.com/', '1', '9', 'kpsychwave', '1/9/2016 20:45']]


total_ask_comments = 0

for i in ask_posts:
    num_comments = int(i[4])
    total_ask_comments += num_comments
    avg_ask_comments = total_ask_comments / len(ask_posts)

print("The total number of ask post is: ", total_ask_comments)
print("The average number of ask post is: ", "{:.2f}".format(avg_ask_comments))

The total number of ask post is:  24483
The average number of ask post is:  14.04


total_show_comments = 0

for i in show_posts:
    num_comments = int(i[4])
    total_show_comments += num_comments
    avg_show_comments = total_show_comments / len(show_posts)

print("The total number of show post is: ", total_show_comments)
print("The average number of show post is: ", "{:.2f}".format(avg_show_comments))

The total number of show post is:  11988
The average number of show post is:  10.32


# Import the datetime module as dt
import datetime as dt


result_list = []

# Iterate over ask_posts, and append to result_list a list with two elements
for i in ask_posts:
    created_at = i[6]
    num_comments = int(i[4])
    result_list.append((created_at, num_comments))


counts_by_hour = {}
comments_by_hour = {}

for i in result_list:
    # Extract the hour from the date
    res = dt.datetime.strptime(i[0],"%m/%d/%Y %H:%M")
    hour_str = dt.datetime.strftime(res, "%H")
    num_comments = i[1]
    # Check if hour_str in the dictionary
    if hour_str not in counts_by_hour:
        counts_by_hour[hour_str] = 1
        comments_by_hour[hour_str] = num_comments
    else:
        counts_by_hour[hour_str] += 1
        comments_by_hour[hour_str] += num_comments

print(counts_by_hour)
print(comments_by_hour)

{'09': 45, '13': 85, '10': 59, '14': 107, '16': 108, '23': 68, '12': 73, '17': 100, '15': 116, '21': 109, '20': 80, '02': 58, '18': 109, '03': 54, '05': 46, '19': 110, '01': 60, '22': 71, '08': 48, '04': 47, '00': 55, '06': 44, '07': 34, '11': 58}
{'09': 251, '13': 1253, '10': 793, '14': 1416, '16': 1814, '23': 543, '12': 687, '17': 1146, '15': 4477, '21': 1745, '20': 1722, '02': 1381, '18': 1439, '03': 421, '05': 464, '19': 1188, '01': 683, '22': 479, '08': 492, '04': 337, '00': 447, '06': 397, '07': 267, '11': 641}


avg_by_hour = []

for hour in comments_by_hour:
    avg_by_hour.append([hour,comments_by_hour[hour]/counts_by_hour[hour]])

print(avg_by_hour)

[['09', 5.5777777777777775], ['13', 14.741176470588234], ['10', 13.440677966101696], ['14', 13.233644859813085], ['16', 16.796296296296298], ['23', 7.985294117647059], ['12', 9.41095890410959], ['17', 11.46], ['15', 38.5948275862069], ['21', 16.009174311926607], ['20', 21.525], ['02', 23.810344827586206], ['18', 13.20183486238532], ['03', 7.796296296296297], ['05', 10.08695652173913], ['19', 10.8], ['01', 11.383333333333333], ['22', 6.746478873239437], ['08', 10.25], ['04', 7.170212765957447], ['00', 8.127272727272727], ['06', 9.022727272727273], ['07', 7.852941176470588], ['11', 11.051724137931034]]


# Create a list swap_avg_by_hour whose first element is the second element of the row, 
# and whose second element is the first element of the row.
swap_avg_by_hour = []

for row in avg_by_hour:
    swap_avg_by_hour.append([row[1],row[0]])
swap_avg_by_hour
sorted_swap = sorted(swap_avg_by_hour,reverse = True)

print("Top 5 Hours for Ask Posts Comments(EST):\n")

for row in sorted_swap[:5]:
    date_str = dt.datetime.strptime(row[1],"%H")
    time_str = dt.datetime.strftime(date_str, "%I:%M %p")
    avg_number = "{:.2f}".format(row[0])
    print(time_str + ": " + avg_number + "average comments per cost.")

Top 5 Hours for Ask Posts Comments(EST):

03:00 PM: 38.59average comments per cost.
02:00 AM: 23.81average comments per cost.
08:00 PM: 21.52average comments per cost.
04:00 PM: 16.80average comments per cost.
09:00 PM: 16.01average comments per cost.

Exploring Hacker News Posts¶

Exploration of the Dataset¶

Extract Ask HN, Show HN and Other Posts¶

Explore Ask HN and Show HN Posts¶

Determine Average Number of Comments¶

Determine if Ask HN Posts Created At a Certain Time¶

Calculate Number of Ask HN Posts Creted per Hour & Number of Comments Received¶

Calculate Average Number of Ask HN Posts Created per Hour¶

Conclusion¶