import pandas as pd
star_wars = pd.read_csv("star_wars.csv", encoding="ISO-8859-1")
star_wars.head(10)
star_wars.columns
star_wars = star_wars[pd.notnull(star_wars["RespondentID"])]
yes_no = {
"Yes": True,
"No": False
}
star_wars["Have you seen any of the 6 films in the Star Wars franchise?"] = star_wars[
"Have you seen any of the 6 films in the Star Wars franchise?"].map(yes_no
)
star_wars["Do you consider yourself to be a fan of the Star Wars film franchise?"] = star_wars[
"Do you consider yourself to be a fan of the Star Wars film franchise?"].map(yes_no
)
import numpy as np
movie_mapping = {
"Star Wars: Episode I The Phantom Menace": True,
np.nan: False,
"Star Wars: Episode II Attack of the Clones": True,
"Star Wars: Episode III Revenge of the Sith": True,
"Star Wars: Episode IV A New Hope": True,
"Star Wars: Episode V The Empire Strikes Back": True,
"Star Wars: Episode VI Return of the Jedi": True
}
for col in star_wars.columns[3:9]:
star_wars[col] = star_wars[col].map(movie_mapping)
star_wars = star_wars.rename(columns={
"Which of the following Star Wars films have you seen? Please select all that apply.": "seen_1",
"Unnamed: 4": "seen_2",
"Unnamed: 5": "seen_3",
"Unnamed: 6": "seen_4",
"Unnamed: 7": "seen_5",
"Unnamed: 8": "seen_6"
})
star_wars.head(10)
star_wars[star_wars.columns[9:15]] = star_wars[star_wars.columns[9:15]].astype(float)
star_wars = star_wars.rename(columns={
"Please rank the Star Wars films in order of preference with 1 being your favorite film in the franchise and 6 being your least favorite film.": "ranking_1",
"Unnamed: 10": "ranking_2",
"Unnamed: 11": "ranking_3",
"Unnamed: 12": "ranking_4",
"Unnamed: 13": "ranking_5",
"Unnamed: 14": "ranking_6"
})
star_wars.head(10)
%matplotlib inline
ranking_fields = star_wars.columns[9:15]
star_wars.mean()[ranking_fields].plot.bar()
seen_fields = star_wars.columns[3:9]
star_wars.sum()[seen_fields].plot.bar()
sw_fan = star_wars[star_wars["Do you consider yourself to be a fan of the Star Wars film franchise?"] == True]
not_sw_fan = star_wars[star_wars["Do you consider yourself to be a fan of the Star Wars film franchise?"] == False]
ranking_fields_sw_fan = sw_fan.columns[9:15]
sw_fan.mean()[ranking_fields_sw_fan].plot.bar()
seen_fields_sw_fan = sw_fan.columns[3:9]
sw_fan.sum()[seen_fields_sw_fan].plot.bar()
ranking_fields_not_sw_fan = not_sw_fan.columns[9:15]
not_sw_fan.mean()[ranking_fields_not_sw_fan].plot.bar()
seen_fields_not_sw_fan = not_sw_fan.columns[3:9]
not_sw_fan.sum()[seen_fields_not_sw_fan].plot.bar()