Study/AI
[DS][Visualization] Additional Plot
생각많은 소심남
2020. 5. 12. 21:29
Categorical Plot Types¶
In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
stripplot() and swarmplot()¶
In [2]:
df = pd.read_csv('./dataset/schoolimprovement2010grants.csv')
In [3]:
sns.stripplot(data=df,
x='Award_Amount',
y='Model Selected',
jitter=True)
Out[3]:
In [4]:
# Create and display a swarmplot with hue set to the Region
sns.swarmplot(data=df,
x='Award_Amount',
y='Model Selected',
hue='Region')
Out[4]:
boxplots, violinplots and lvplots¶
In [5]:
# Create a boxplot
sns.boxplot(data=df,
x='Award_Amount',
y='Model Selected',)
Out[5]:
In [7]:
# Create a violinplot with the husl palette
sns.violinplot(data=df,
x='Award_Amount',
y='Model Selected',
palette='husl')
Out[7]:
In [9]:
# Create a lvplot(boxenplot) with the Paired palette and the Region column as the hue
sns.boxenplot(data=df,
x='Award_Amount',
y='Model Selected',
palette='Paired',
hue='Region')
Out[9]:
barplots, pointplots and countplots¶
In [10]:
# Show a countplot with the number of models used with each region a different color
sns.countplot(data=df,
y='Model Selected',
hue='Region')
Out[10]:
In [12]:
# Create a pointplot and include the capsize in order to show bars on the confidence interval
sns.pointplot(data=df,
y='Award_Amount',
x='Model Selected',
capsize=.1)
Out[12]:
In [13]:
# Create a barplot with each Region shown as a different color
sns.barplot(data=df,
y='Award_Amount',
x='Model Selected',
hue='Region')
Out[13]:
Regression Plots¶
Regression and residual plots¶
In [15]:
df = pd.read_csv('./dataset/college_datav3.csv')
In [16]:
# Display a regression plot for Tuition
sns.regplot(data=df,
y='Tuition',
x='SAT_AVG_ALL',
marker='^',
color='g')
Out[16]:
In [17]:
# Display the residual plot
sns.residplot(data=df,
x='Tuition',
y='SAT_AVG_ALL',
color='g')
Out[17]:
Regression plot parameters¶
In [18]:
# Plot a regression plot of Tuition and the Percentage of Pell Grants
sns.regplot(data=df,
y='Tuition',
x='PCTPELL')
Out[18]:
In [19]:
# Create another plot that estimates the tuition by PCTPELL
sns.regplot(data=df,
y='Tuition',
x='PCTPELL',
x_bins=5)
Out[19]:
In [20]:
# The final plot should include a line using a 2nd order polynomial
sns.regplot(data=df,
y='Tuition',
x='PCTPELL',
x_bins=5,
order=2)
Out[20]:
Binning data¶
In [21]:
# Create a scatter plot by disabling the regression line
sns.regplot(data=df,
y='Tuition',
x='UG',
fit_reg=False)
Out[21]:
In [22]:
# Create a scatter plot and bin the data into 5 bins
sns.regplot(data=df,
y='Tuition',
x='UG',
x_bins=5)
Out[22]:
In [23]:
# Create a regplot and bin the data into 8 bins
sns.regplot(data=df,
y='Tuition',
x='UG',
x_bins=8)
Out[23]:
Matrix plots¶
Creating heatmaps¶
In [24]:
df = pd.read_csv('./dataset/daily_show_guests_cleaned.csv')
# Create a crosstab table of the data
pd_crosstab = pd.crosstab(df['Group'], df['YEAR'])
print(pd_crosstab)
# Plot a heatmap of the table
sns.heatmap(pd_crosstab)
# Rotate tick marks for visibility
plt.yticks(rotation=0)
plt.xticks(rotation=90)
Out[24]:
Customizing heatmaps¶
In [25]:
# Create the crosstab DataFrame
pd_crosstab = pd.crosstab(df['Group'], df['YEAR'])
# Plot a heatmap of the table with no color bar and using the BuGn palette
sns.heatmap(pd_crosstab, cbar=False, cmap='BuGn', linewidths=0.3)
# Rotate tick marks for visibility
plt.yticks(rotation=0)
plt.xticks(rotation=90)
Out[25]: