
Bar graphs are useful for displaying relationships between categorical data and at least one numerical variable. seaborn.countplot is a barplot where the dependent variable is the number of instances of each instance of the independent variable.

dataset: IMDB 5000 Movie Dataset

%matplotlib inline
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
plt.rcParams['figure.figsize'] = (20.0, 10.0)
df = pd.read_csv('../../../datasets/movie_metadata.csv')
For the bar plot, let’s look at the number of movies in each category, allowing each movie to be counted more than once.

# split each movie's genre list, then form a set from the unwrapped list of all genres
categories = set([s for genre_list in df.genres.unique() for s in genre_list.split("|")])

# one-hot encode each movie's classification
for cat in categories:
    df[cat] = df.genres.transform(lambda s: int(cat in s))
# drop other columns
df = df[['director_name','genres','duration'] + list(categories)]

# convert from wide to long format and remove null classificaitons
df = pd.melt(df,
             value_vars = list(categories),
             var_name = 'Category',
             value_name = 'Count')
df = df.loc[df.Count>0]

# add an indicator whether a movie is short or long, split at 100 minutes runtime
df['islong'] = df.duration.transform(lambda x: int(x > 100))

# sort in descending order
#df = df.loc[df.groupby('Category').transform(sum).sort_values('Count', ascending=False).index]
Basic plot

p = sns.countplot(data=df, x = 'Category')


color by a category

p = sns.countplot(data=df,
                  x = 'Category',
                  hue = 'islong')


make plot horizontal

p = sns.countplot(data=df,
                  y = 'Category',
                  hue = 'islong')



p = sns.countplot(data=df,
                  y = 'Category',
                  hue = 'islong',


Various palettes

p = sns.countplot(data=df,
                  y = 'Category',
                  hue = 'islong',
                  palette = 'deep')


p = sns.countplot(data=df,
                  y = 'Category',
                  hue = 'islong',
                  palette = 'muted')


p = sns.countplot(data=df,
                  y = 'Category',
                  hue = 'islong',
                  palette = 'pastel')


p = sns.countplot(data=df,
                  y = 'Category',
                  hue = 'islong',
                  palette = 'bright')


p = sns.countplot(data=df,
                  y = 'Category',
                  hue = 'islong',
                  palette = 'dark')


p = sns.countplot(data=df,
                  y = 'Category',
                  hue = 'islong',
                  palette = 'colorblind')


p = sns.countplot(data=df,
                  y = 'Category',
                  hue = 'islong',
                  palette = ((50/255, 132/255.0, 191/255.0), (255/255.0, 232/255.0, 0/255.0)))


p = sns.countplot(data=df,
                  y = 'Category',
                  hue = 'islong',
                  palette = 'Dark2')


p = sns.countplot(data=df, x = 'Category')
plt.text(9,2000, "Color Palettes", fontsize = 95, color='black', fontstyle='italic')
