티스토리 뷰

Study/AI

[DS][Visualization] Additional Plot

생각많은 소심남 2020. 5. 12. 21:29
Additional Plot Types

Categorical Plot Types

In [1]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

%matplotlib inline

stripplot() and swarmplot()

In [2]:
df = pd.read_csv('./dataset/schoolimprovement2010grants.csv')
In [3]:
sns.stripplot(data=df,
             x='Award_Amount',
             y='Model Selected',
             jitter=True)
Out[3]:
<matplotlib.axes._subplots.AxesSubplot at 0x10d94f5d088>
In [4]:
# Create and display a swarmplot with hue set to the Region
sns.swarmplot(data=df,
             x='Award_Amount',
             y='Model Selected',
             hue='Region')
Out[4]:
<matplotlib.axes._subplots.AxesSubplot at 0x10d962bc4c8>

boxplots, violinplots and lvplots

In [5]:
# Create a boxplot
sns.boxplot(data=df,
           x='Award_Amount',
           y='Model Selected',)
Out[5]:
<matplotlib.axes._subplots.AxesSubplot at 0x10d96219788>
In [7]:
# Create a violinplot with the husl palette
sns.violinplot(data=df,
              x='Award_Amount',
              y='Model Selected',
              palette='husl')
Out[7]:
<matplotlib.axes._subplots.AxesSubplot at 0x10d963de848>
In [9]:
# Create a lvplot(boxenplot) with the Paired palette and the Region column as the hue
sns.boxenplot(data=df,
           x='Award_Amount',
          y='Model Selected',
          palette='Paired',
          hue='Region')
Out[9]:
<matplotlib.axes._subplots.AxesSubplot at 0x10d9648d348>

barplots, pointplots and countplots

In [10]:
# Show a countplot with the number of models used with each region a different color
sns.countplot(data=df,
             y='Model Selected',
             hue='Region')
Out[10]:
<matplotlib.axes._subplots.AxesSubplot at 0x10d963da8c8>
In [12]:
# Create a pointplot and include the capsize in order to show bars on the confidence interval
sns.pointplot(data=df,
         y='Award_Amount',
         x='Model Selected',
         capsize=.1)
Out[12]:
<matplotlib.axes._subplots.AxesSubplot at 0x10d95fed808>
In [13]:
# Create a barplot with each Region shown as a different color
sns.barplot(data=df,
           y='Award_Amount',
           x='Model Selected',
           hue='Region')
Out[13]:
<matplotlib.axes._subplots.AxesSubplot at 0x10d96087048>

Regression Plots

Regression and residual plots

In [15]:
df = pd.read_csv('./dataset/college_datav3.csv')
In [16]:
# Display a regression plot for Tuition
sns.regplot(data=df,
           y='Tuition',
           x='SAT_AVG_ALL',
           marker='^',
           color='g')
Out[16]:
<matplotlib.axes._subplots.AxesSubplot at 0x10d96150e08>
In [17]:
# Display the residual plot
sns.residplot(data=df,
             x='Tuition',
             y='SAT_AVG_ALL',
             color='g')
Out[17]:
<matplotlib.axes._subplots.AxesSubplot at 0x10d9613e848>

Regression plot parameters

In [18]:
# Plot a regression plot of Tuition and the Percentage of Pell Grants
sns.regplot(data=df,
           y='Tuition',
           x='PCTPELL')
Out[18]:
<matplotlib.axes._subplots.AxesSubplot at 0x10d960c6408>
In [19]:
# Create another plot that estimates the tuition by PCTPELL
sns.regplot(data=df,
           y='Tuition',
           x='PCTPELL',
           x_bins=5)
Out[19]:
<matplotlib.axes._subplots.AxesSubplot at 0x10d975df4c8>
In [20]:
# The final plot should include a line using a 2nd order polynomial
sns.regplot(data=df,
           y='Tuition',
           x='PCTPELL',
           x_bins=5,
           order=2)
Out[20]:
<matplotlib.axes._subplots.AxesSubplot at 0x10d9617c388>

Binning data

In [21]:
# Create a scatter plot by disabling the regression line
sns.regplot(data=df,
           y='Tuition',
           x='UG',
           fit_reg=False)
Out[21]:
<matplotlib.axes._subplots.AxesSubplot at 0x10d976bfe88>
In [22]:
# Create a scatter plot and bin the data into 5 bins
sns.regplot(data=df,
           y='Tuition',
           x='UG',
           x_bins=5)
Out[22]:
<matplotlib.axes._subplots.AxesSubplot at 0x10d97745108>
In [23]:
# Create a regplot and bin the data into 8 bins
sns.regplot(data=df,
           y='Tuition',
           x='UG',
           x_bins=8)
Out[23]:
<matplotlib.axes._subplots.AxesSubplot at 0x10d960065c8>

Matrix plots

Creating heatmaps

In [24]:
df = pd.read_csv('./dataset/daily_show_guests_cleaned.csv')

# Create a crosstab table of the data
pd_crosstab = pd.crosstab(df['Group'], df['YEAR'])
print(pd_crosstab)

# Plot a heatmap of the table
sns.heatmap(pd_crosstab)

# Rotate tick marks for visibility
plt.yticks(rotation=0)
plt.xticks(rotation=90)
YEAR            1999  2000  2001  2002  2003  2004  2005  2006  2007  2008  \
Group                                                                        
Academic           0     0     2     0     4     1    12     9    13     5   
Acting           108   100    92    84    74    51    44    44    25    26   
Advocacy           0     1     0     1     0     4     0     0     2     3   
Athletics          0     3     1     2     0     2     2     5     4     1   
Business           0     1     0     0     0     2     1     1     2     1   
Clergy             0     0     0     1     1     1     0     0     1     0   
Comedy            25    12    11     5    12     7     5     8     9     7   
Consultant         0     0     0     0     1     4     1     4     2     3   
Government         0     0     2     1     2     3     1     3     1     0   
Media             11    21    31    42    41    45    54    47    47    77   
Military           0     0     0     0     0     0     1     1     3     1   
Misc               0     0     2     1     1     0     4     3     2     2   
Musician          17    13    11    10     7     5    11     6     2     1   
Political Aide     0     1     1     2     1     2     3     3     2     6   
Politician         2    13     3     8    14    32    22    25    21    27   
Science            0     0     0     0     1     2     1     1     4     1   

YEAR            2009  2010  2011  2012  2013  2014  2015  
Group                                                     
Academic          11     8    10     8     8    10     2  
Acting            22    45    42    33    60    47    33  
Advocacy           1     1     1     2     2     3     3  
Athletics          7     5     2     7     4     4     3  
Business           4     2     3     3     3     1     1  
Clergy             1     0     1     2     0     0     0  
Comedy             7     7     7     6     6     9     7  
Consultant         2     1     0     0     0     0     0  
Government         5     3     3     3     7     6     0  
Media             59    50    51    52    51    53    24  
Military           1     2     3     1     1     1     1  
Misc               5     4     5     6     2     5     3  
Musician           5     6     6     5     5     8     5  
Political Aide     3     2     1     1     3     2     3  
Politician        26    25    23    29    11    13    14  
Science            4     3     5     2     2     1     1  
Out[24]:
(array([ 0.5,  1.5,  2.5,  3.5,  4.5,  5.5,  6.5,  7.5,  8.5,  9.5, 10.5,
        11.5, 12.5, 13.5, 14.5, 15.5, 16.5]),
 <a list of 17 Text xticklabel objects>)

Customizing heatmaps

In [25]:
# Create the crosstab DataFrame
pd_crosstab = pd.crosstab(df['Group'], df['YEAR'])

# Plot a heatmap of the table with no color bar and using the BuGn palette
sns.heatmap(pd_crosstab, cbar=False, cmap='BuGn', linewidths=0.3)

# Rotate tick marks for visibility
plt.yticks(rotation=0)
plt.xticks(rotation=90)
Out[25]:
(array([ 0.5,  1.5,  2.5,  3.5,  4.5,  5.5,  6.5,  7.5,  8.5,  9.5, 10.5,
        11.5, 12.5, 13.5, 14.5, 15.5, 16.5]),
 <a list of 17 Text xticklabel objects>)
댓글