import pandas as pd # Importing
import numpy as np # All Required
import matplotlib.pyplot as plt # Modules
%matplotlib inline
#--------------------------------------------------------------------------------------------------------------------------
fn = pd.read_csv("C:/Users/JoshW/Desktop/Programming/SalGuide/SalMaster.csv")# Reading
fn.dropna(inplace = True) # Sorting
fn["Department"]= fn["Department"].str.split("-", n = -1, expand = True) # And
fn["Department"]= fn["Department"].str.split(",", n = -1, expand = True) # Cleaning
fn.sort_values(['Last'], ascending=True) # New
fn["First"] = fn.First.str[0:1] # Dataframe
#----------------------------------------------------------------------------------------------------------------------------
#This block intended to give some basic understanding of data
p = fn.Department.value_counts().count() # Assigning Count for --
print('\nThere are',p,'different departments') # and displaying Department count
t = fn.Title.value_counts().count() # Assignging count for --
print('\nThere are',t,'different job titles') # and displaying Job Title count
#----------------------------------------------------------------------------------------------------------------------------
# A This block (for some reason) creates a new dataframe
# B but serves to convert Salary to floating integers
# C and remove hourly workers
# D Sorts new dataframe by descending salary
# E And resets the index, while dropping the original
# F Then sets the index to start at 1 (instead of 0)
# G Oh yeah, and we need to make title all caps for a function later
common_title = fn # A
common_title['Salary'] = common_title[common_title.columns[5]].replace('[\$,]', '', regex=True).astype(float) # B
pd.options.display.float_format = '${:,.2f}'.format
common_title = common_title.loc[common_title['Salary'] > 20000] # C
common_title = common_title.sort_values('Salary',ascending=False) # D
common_title.reset_index(drop=True, inplace=True) # E
common_title.index += 1 # F
common_title['Title'] = common_title['Title'].str.upper() # G
common_title.head(50)
Dataframes created for each year
5 number summaries for each year are produced
Then they are plotted in box & whisker plots
y_all_mean = common_title['Salary'].mean() # Dataframes created for individual years
y_all_x = common_title.loc[common_title['Salary'] > 20000]
y_all_min = y_all_x['Salary'].min()
y_all_med = common_title['Salary'].median()
y_all_max = common_title['Salary'].max()
y_all_1q = (y_all_min + y_all_med)/2
y_all_3q = (y_all_max + y_all_med)/2
# - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - -
year17 = common_title.loc[common_title['Year'] == 2017] #2017 snip
y17mean = year17['Salary'].mean()
y17x = year17.loc[year17['Salary'] > 2000]
y17min = y17x['Salary'].min()
y17med = year17['Salary'].median()
y17max = year17['Salary'].max()
y17_1q = (y17min + y17med)/2
y17_3q = (y17max + y17med)/2
# - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - -
year16 = common_title.loc[common_title['Year'] == 2016] #2016 snip
y16x = year16.loc[year16['Salary'] > 20000]
y16min = y16x['Salary'].min()
y16mean = year16['Salary'].mean()
y16med = year16['Salary'].median()
y16max = year16['Salary'].max()
y16_1q = (y16min + y16med)/2
y16_3q = (y16max + y16med)/2
# - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - -
year15 = common_title.loc[common_title['Year'] == 2015] #2015 snip
y15x = year15.loc[year15['Salary'] > 20000]
y15min = y15x['Salary'].min()
y15mean = year15['Salary'].mean()
y15med = year15['Salary'].median()
y15max = year15['Salary'].max()
y15_1q = (y15min + y15med)/2
y15_3q = (y15max + y15med)/2
# - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - -
year14 = common_title.loc[common_title['Year'] == 2014] #2014 snip
y14x = year14.loc[year14['Salary'] > 20000]
y14min = y14x['Salary'].min()
y14mean = year14['Salary'].mean()
y14med = year14['Salary'].median()
y14max = year14['Salary'].max()
y14_1q = (y14min + y14med)/2
y14_3q = (y14max + y14med)/2
# - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - -
year13 = common_title.loc[common_title['Year'] == 2013] #2013 snip
y13x = year13.loc[year13['Salary'] > 20000]
y13min = y17x['Salary'].min()
y13mean = year13['Salary'].mean()
y13med = year13['Salary'].median()
y13max = year13['Salary'].max()
y13_1q = (y13min + y13med)/2
y13_3q = (y13max + y13med)/2
#--------------------------------------------------------------------------------------------------------------------------------
all_summ = y_all_min, y_all_1q, y_all_med, y_all_3q, y_all_max # Now that we have all the necessary assignments
y13_summ = y13min, y13_1q, y13med, y13_3q, y13max # We need to create assignments to store them together
y14_summ = y14min, y14_1q, y14med, y14_3q, y14max
y15_summ = y15min, y15_1q, y15med, y17_3q, y15max
y16_summ = y16min, y16_1q, y16med, y16_3q, y16max
y17_summ = y17min, y17_1q, y17med, y17_3q, y17max
#--------------------------------------------------------------------------------------------------------------------------------
fig, axs = plt.subplots(2, 3) # This line needed for separate plots
# - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - -
axs[0, 0].boxplot(all_summ, 0, '') # And here we set up a plot for each year
axs[0, 0].set_title("All Years") # With each given a space to occupy within 6 separate plots
axs[0, 1].boxplot(y13_summ, 0, '')
axs[0, 1].set_title("2013")
axs[0, 2].boxplot(y14_summ, 0, '')
axs[0, 2].set_title("2014")
axs[1, 0].boxplot(y15_summ, 0, '')
axs[1, 0].set_title("2015")
axs[1, 1].boxplot(y16_summ, 0, '')
axs[1, 1].set_title("2016")
axs[1, 2].boxplot(y17_summ, 0, '')
axs[1, 2].set_title("2017")
# - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - -
fig.subplots_adjust(left=0.08, right=1.8, bottom=0.05, top=0.9,
hspace=0.4, wspace=3) # Almost ready for plotting!
# But first we need to adust space
# So we do so here
# - - - - - - - - - - - - - - - - - - - - - - - - -
print('\n',
'The average salary at UMD for all years was','$%0.2f'%y_all_mean,'\n\n',
'The average salary at UMD for 2013 was','$%0.2f'%y13mean,'\n',
'The average salary at UMD for 2014 was','$%0.2f'%y14mean,'\n',
'The average salary at UMD for 2015 was','$%0.2f'%y15mean,'\n',
'The average salary at UMD for 2016 was','$%0.2f'%y16mean,'\n',
'The average salary at UMD for 2017 was','$%0.2f'%y17mean,'\n')
def scatter(data):
salary = data['Salary']
department = data['Department']
plt.figure(figsize=(8,6))
plt.scatter(salary, department)
year = data['Year'].unique()
yr = list()
for i in year:
i = int(i)
yr.append(i)
yr.sort()
text = 'Salaries vs Department',yr
plt.title(text)
plt.ylabel('Department')
plt.xlabel('Salary')
fig.subplots_adjust(left=0.08, right=1.8, bottom=0.05, top=0.9,
hspace=0.4, wspace=5)
plt.show()
inp = input('Which year or years are interested in?\n(Individual year is reccomended.)\n Please, separate values by comma.\n\n') # This block takes user input
inp = inp.lower() # Makes each letter lowercase
inp = inp.split(",") # Splits it by comma to separate years
y = len(inp) # And assigns a variable to the length of a split list
year = pd.DataFrame(columns = ['Year','Last', 'First', 'Department', 'Title', 'Salary']) # This dataframe is created to house
# data for a multiyear frame
#---------------------------------------------------------------------------------------------------------------------------------------------------------------
# The input serves as fuel to this for statement
for i in inp: # This lengthy block is written to separate the master dataframe by
i = str(i) # User submitted years
i = i.lstrip() # Several if statements are used to determine the years of the input by:
if i == 'all' or i == 'all years': # Number of [i]tems
year = common_title # And building upon itself as it gets hits
continue
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if i == '2017' or i == '17':
if y > 1:
year = pd.concat([year, year17])
p = '2017'
else:
year = year17 # 2017 snip
continue
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if i == '2016' or i == '16':
if y > 1:
year = pd.concat([year, year16])
else:
year = year16 # 2016 snip
continue
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if i == '2015' or i == '15':
if y > 1:
year = pd.concat([year, year15])
else:
year = year15 # 2015 snip
continue
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if i == '2014' or i == '14':
if y > 1:
year = pd.concat([year, year14])
else:
year = year14 # 2014 snip
continue
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
if i == '2013' or i == '13':
if y > 1:
year = pd.concat([year, year13])
else:
year = year13 # 2013 snip
continue
else:
print("\n","Look, nothing personal, but we didn't write this script with a try/except\n",
"and you cannot follow directions...\n",
"So this is where this ends, and you need to run this cell again\n\n",
"PEACE!")
break
#-----------------------------------------------------------------------------------------------------------------------------------------------------------
year = year.sort_values('Year', ascending = True) #This sort occurs to order the dataframe so the year output is chronological
yn = year['Year'].unique()
year = year.sort_values('Salary', ascending = False)
print(yn) #This output verifies that the input was properly converted to the specified years
scatter(year)
Functions include whisker_func(data), pg_break(), yr_quick_view(year), and spec_quick_view(data)
#\____________/\____________/\____________/\____________/\____________/\____________/\____________/\____________/\____________/--
#--\________/----\________/----\________/----\________/----\________/----\________/----\________/----\________/----\________/----
#----\____/--------\____/--------\____/--------\____/--------\____/--------\____/--------\____/--------\____/--------\____/------
#------\/------------\/------------\/------------\/------------\/------------\/------------\/------------\/------------\/--------
def whisker_func(data, yn, inp):
data = data.sort_values('Year', ascending = True)
ymin = data['Salary'].min()
ymean = data['Salary'].mean()
ymed = data['Salary'].median()
ymax = data['Salary'].max()
y_1q = (ymin + ymed)/2
y_3q = (ymax + ymed)/2
summ = ymin, y_1q, ymed, y_3q, ymax
yn = data['Year'].unique()
title = "Box and Whisker Plot for",str(yn)
fig, ax = plt.subplots()
ax.boxplot(summ)
blanky = list()
for i in yn:
blanky.append(i)
fig.align_ylabels(axs[-2:, ])
text = 'Salary for',blanky
ax.set_ylabel(text)
ax.set_xlabel(inp)
#\____________/\____________/\____________/\____________/\____________/\____________/\____________/\____________/\____________/--
#--\________/----\________/----\________/----\________/----\________/----\________/----\________/----\________/----\________/----
#----\____/--------\____/--------\____/--------\____/--------\____/--------\____/--------\____/--------\____/--------\____/------
#------\/------------\/------------\/------------\/------------\/------------\/------------\/------------\/------------\/--------
def yr_quick_view(year):
top10 = year.head(10)
avg10 = top10['Salary'].mean()
min10 = top10['Salary'].min()
med10 = top10['Salary'].median()
max10 = top10['Salary'].max()
yr_1q10 = (min10 + med10)/2
yr_3q10 = (max10 + med10)/2
# - - - - - - - - - - - - - - - - - - - - - - - - -
top25 = year.head(25)
avg25 = top25['Salary'].mean()
min25 = top25['Salary'].min()
med25 = top25['Salary'].median()
max25 = top25['Salary'].max()
yr_1q25 = (min25 + med25)/2
yr_3q25 = (max25 + med25)/2
# - - - - - - - - - - - - - - - - - - - - - - - - -
top50 = year.head(50)
avg50 = top50['Salary'].mean()
min50 = top50['Salary'].min()
med50 = top50['Salary'].median()
max50 = top50['Salary'].max()
yr_1q50 = (min50 + med50)/2
yr_3q50 = (max50 + med50)/2
# - - - - - - - - - - - - - - - - - - - - - - - - -
top100 = year.head(100)
avg100 = top100['Salary'].mean()
min100 = top100['Salary'].min()
med100 = top100['Salary'].median()
max100 = top100['Salary'].max()
yr_1q100 = (min100 + med100)/2
yr_3q100 = (max100 + med100)/2
# - - - - - - - - - - - - - - - - - - - - - - - - -
top250 = year.head(250)
avg250 = top250['Salary'].mean()
min250 = top250['Salary'].min()
med250 = top250['Salary'].median()
max250 = top250['Salary'].max()
yr_1q250 = (min250 + med250)/2
yr_3q250 = (max250 + med250)/2
# - - - - - - - - - - - - - - - - - - - - - - - - -
top500 = year.head(500)
avg500 = top500['Salary'].mean()
min500 = top500['Salary'].min()
med500 = top500['Salary'].median()
max500 = top500['Salary'].max()
yr_1q500 = (min500 + med500)/2
yr_3q500 = (max500 + med500)/2
#--------------------------------------------------------------------------------------------------------------------------------
t10_summ = min10, yr_1q10, med10, yr_3q10, max10 # Now that we have all the necessary assignments
t25_summ = min25, yr_1q25, med25, yr_3q25, max25 # We need to create assignments to store them together
t50_summ = min50, yr_1q50, med50, yr_3q50, max50
t100_summ = min100, yr_1q100, med100, yr_1q100, max100
t250_summ = min250, yr_1q250, med250, yr_1q250, max250
t500_summ = min500, yr_1q500, med500, yr_1q500, max500
# - - - - - - - - - - - - - - - - - - - - - - - - -
fig, axs = plt.subplots(2, 3) # This line needed for separate plots
# - - - - - - - - - - - - - - - - - - - - - - - - -
# - - - - - - - - - - - - - - - - - - - - - - - - -
axs[0, 0].boxplot(t10_summ, 0, '') # And here we set up a plot for each year
axs[0, 0].set_title("Top 10 Salaries") # With each given a space to occupy within 6 separate plots
axs[0, 1].boxplot(t25_summ, 0, '')
axs[0, 1].set_title("Top 25 Salaries")
axs[0, 2].boxplot(t50_summ, 0, '')
axs[0, 2].set_title("Top 50 Salaries")
axs[1, 0].boxplot(t100_summ, 0, '')
axs[1, 0].set_title("Top 100 Salaries")
axs[1, 1].boxplot(t250_summ, 0, '')
axs[1, 1].set_title("Top 250 Salaries")
axs[1, 2].boxplot(t500_summ, 0, '')
axs[1, 2].set_title("Top 500 Salaries")
fig.subplots_adjust(left=0.08, right=1.8, bottom=0.05, top=0.9,
hspace=0.4, wspace=3)
#\____________/\____________/\____________/\____________/\____________/\____________/\____________/\____________/\____________/--
#--\________/----\________/----\________/----\________/----\________/----\________/----\________/----\________/----\________/----
#----\____/--------\____/--------\____/--------\____/--------\____/--------\____/--------\____/--------\____/--------\____/------
#------\/------------\/------------\/------------\/------------\/------------\/------------\/------------\/------------\/--------
def spec_quick_500(year):
tot_emp = year.Year.value_counts().count()
meany = year['Salary'].mean()
mini = year['Salary'].min()
medi = year['Salary'].median()
maxi = year['Salary'].max()
yr_1q_tot = (mini + medi)/2
yr_3q_tot = (maxi + medi)/2
tot_summ = mini, yr_1q_tot, medi, yr_3q_tot, maxi
#--------------------------------------------------------------------------------------------------------------------------------
top10 = year.head(10)
avg10 = top10['Salary'].mean()
min10 = top10['Salary'].min()
med10 = top10['Salary'].median()
max10 = top10['Salary'].max()
yr_1q10 = (min10 + med10)/2
yr_3q10 = (max10 + med10)/2
t10_summ = min10, yr_1q10, med10, yr_3q10, max10
# - - - - - - - - - - - - - - - - - - - - - - - - -
top25 = year.head(25)
avg25 = top25['Salary'].mean()
min25 = top25['Salary'].min()
med25 = top25['Salary'].median()
max25 = top25['Salary'].max()
yr_1q25 = (min25 + med25)/2
yr_3q25 = (max25 + med25)/2
t25_summ = min25, yr_1q25, med25, yr_3q25, max25
# - - - - - - - - - - - - - - - - - - - - - - - - -
top50 = year.head(50)
avg50 = top50['Salary'].mean()
min50 = top50['Salary'].min()
med50 = top50['Salary'].median()
max50 = top50['Salary'].max()
yr_1q50 = (min50 + med50)/2
yr_3q50 = (max50 + med50)/2
t50_summ = min50, yr_1q50, med50, yr_3q50, max50
# - - - - - - - - - - - - - - - - - - - - - - - - -
top100 = year.head(100)
avg100 = top100['Salary'].mean()
min100 = top100['Salary'].min()
med100 = top100['Salary'].median()
max100 = top100['Salary'].max()
yr_1q100 = (min100 + med100)/2
yr_3q100 = (max100 + med100)/2
t100_summ = min100, yr_1q100, med100, yr_1q100, max100
# - - - - - - - - - - - - - - - - - - - - - - - - -
top250 = year.head(250)
avg250 = top250['Salary'].mean()
min250 = top250['Salary'].min()
med250 = top250['Salary'].median()
max250 = top250['Salary'].max()
yr_1q250 = (min250 + med250)/2
yr_3q250 = (max250 + med250)/2
t250_summ = min250, yr_1q250, med250, yr_1q250, max250
# - - - - - - - - - - - - - - - - - - - - - - - - -
top500 = year.head(500)
avg500 = top500['Salary'].mean()
min500 = top500['Salary'].min()
med500 = top500['Salary'].median()
max500 = top500['Salary'].max()
yr_1q500 = (min500 + med500)/2
yr_3q500 = (max500 + med500)/2
t500_summ = min500, yr_1q500, med500, yr_1q500, max500
# - - - - - - - - - - - - - - - - - - - - - - - - -
fig, axs = plt.subplots(2, 3) # This line needed for separate plots
axs[0, 0].boxplot(t10_summ, 0, '') # And here we set up a plot for each year
axs[0, 0].set_title("Top 10 Salaries") # With each given a space to occupy within 6 separate plots
axs[0, 1].boxplot(t25_summ, 0, '')
axs[0, 1].set_title("Top 25 Salaries")
axs[0, 2].boxplot(t50_summ, 0, '')
axs[0, 2].set_title("Top 50 Salaries")
axs[1, 0].boxplot(t100_summ, 0, '')
axs[1, 0].set_title("Top 100 Salaries")
axs[1, 1].boxplot(t250_summ, 0, '')
axs[1, 1].set_title("Top 250 Salaries")
axs[1, 2].boxplot(t500_summ, 0, '')
axs[1, 2].set_title("Top 500 Salaries")
fig.subplots_adjust(left=0.08, right=1.8, bottom=0.05, top=0.9,
hspace=0.4, wspace=3)
#--------------------------------------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------------------------------------
def spec_quick_250(year):
tot_emp = year.Year.value_counts().count()
meany = year['Salary'].mean()
mini = year['Salary'].min()
medi = year['Salary'].median()
maxi = year['Salary'].max()
yr_1q_tot = (mini + medi)/2
yr_3q_tot = (maxi + medi)/2
tot_summ = mini, yr_1q_tot, medi, yr_3q_tot, maxi
#--------------------------------------------------------------------------------------------------------------------------------
top10 = year.head(10)
avg10 = top10['Salary'].mean()
min10 = top10['Salary'].min()
med10 = top10['Salary'].median()
max10 = top10['Salary'].max()
yr_1q10 = (min10 + med10)/2
yr_3q10 = (max10 + med10)/2
t10_summ = min10, yr_1q10, med10, yr_3q10, max10
# - - - - - - - - - - - - - - - - - - - - - - - - -
top25 = year.head(25)
avg25 = top25['Salary'].mean()
min25 = top25['Salary'].min()
med25 = top25['Salary'].median()
max25 = top25['Salary'].max()
yr_1q25 = (min25 + med25)/2
yr_3q25 = (max25 + med25)/2
t25_summ = min25, yr_1q25, med25, yr_3q25, max25
# - - - - - - - - - - - - - - - - - - - - - - - - -
top50 = year.head(50)
avg50 = top50['Salary'].mean()
min50 = top50['Salary'].min()
med50 = top50['Salary'].median()
max50 = top50['Salary'].max()
yr_1q50 = (min50 + med50)/2
yr_3q50 = (max50 + med50)/2
t50_summ = min50, yr_1q50, med50, yr_3q50, max50
# - - - - - - - - - - - - - - - - - - - - - - - - -
top100 = year.head(100)
avg100 = top100['Salary'].mean()
min100 = top100['Salary'].min()
med100 = top100['Salary'].median()
max100 = top100['Salary'].max()
yr_1q100 = (min100 + med100)/2
yr_3q100 = (max100 + med100)/2
t100_summ = min100, yr_1q100, med100, yr_1q100, max100
# - - - - - - - - - - - - - - - - - - - - - - - - -
top250 = year.head(250)
avg250 = top250['Salary'].mean()
min250 = top250['Salary'].min()
med250 = top250['Salary'].median()
max250 = top250['Salary'].max()
yr_1q250 = (min250 + med250)/2
yr_3q250 = (max250 + med250)/2
t250_summ = min250, yr_1q250, med250, yr_1q250, max250
fig, axs = plt.subplots(2, 3)
axs[0, 0].boxplot(t10_summ, 0, '')
axs[0, 0].set_title("Top 10 Salaries")
axs[0, 1].boxplot(t25_summ, 0, '')
axs[0, 1].set_title("Top 25 Salaries")
axs[0, 2].boxplot(t50_summ, 0, '')
axs[0, 2].set_title("Top 50 Salaries")
axs[1, 0].boxplot(t100_summ, 0, '')
axs[1, 0].set_title("Top 100 Salaries")
axs[1, 1].boxplot(t250_summ, 0, '')
axs[1, 1].set_title("Top 250 Salaries")
axs[1, 2].boxplot(tot_summ, 0, '')
axs[1, 2].set_title("All Salaries")
fig.subplots_adjust(left=0.08, right=1.8, bottom=0.05, top=0.9,
hspace=0.4, wspace=.75)
#--------------------------------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------------------------------------
def spec_quick_100(year):
top10 = year.head(10)
avg10 = top10['Salary'].mean()
min10 = top10['Salary'].min()
med10 = top10['Salary'].median()
max10 = top10['Salary'].max()
yr_1q10 = (min10 + med10)/2
yr_3q10 = (max10 + med10)/2
t10_summ = min10, yr_1q10, med10, yr_3q10, max10
# - - - - - - - - - - - - - - - - - - - - - - - - -
top25 = year.head(25)
avg25 = top25['Salary'].mean()
min25 = top25['Salary'].min()
med25 = top25['Salary'].median()
max25 = top25['Salary'].max()
yr_1q25 = (min25 + med25)/2
yr_3q25 = (max25 + med25)/2
t25_summ = min25, yr_1q25, med25, yr_3q25, max25
# - - - - - - - - - - - - - - - - - - - - - - - - -
top50 = year.head(50)
avg50 = top50['Salary'].mean()
min50 = top50['Salary'].min()
med50 = top50['Salary'].median()
max50 = top50['Salary'].max()
yr_1q50 = (min50 + med50)/2
yr_3q50 = (max50 + med50)/2
t50_summ = min50, yr_1q50, med50, yr_3q50, max50
# - - - - - - - - - - - - - - - - - - - - - - - - -
top100 = year.head(100)
avg100 = top100['Salary'].mean()
min100 = top100['Salary'].min()
med100 = top100['Salary'].median()
max100 = top100['Salary'].max()
yr_1q100 = (min100 + med100)/2
yr_3q100 = (max100 + med100)/2
t100_summ = min100, yr_1q100, med100, yr_1q100, max100
# - - - - - - - - - - - - - - - - - - - - - - - - -
fig, axs = plt.subplots(1, 4)
axs[0].boxplot(t10_summ, 0, '')
axs[0].set_title("Top 10 Salaries")
axs[1].boxplot(t25_summ, 0, '')
axs[1].set_title("Top 25 Salaries")
axs[2].boxplot(t50_summ, 0, '')
axs[2].set_title("Top 50 Salaries")
axs[3].boxplot(t100_summ, 0, '')
axs[3].set_title("Top 100 Salaries")
fig.subplots_adjust(left=0.08, right=1.8, bottom=0.05, top=0.9,
hspace=0.4, wspace=.75)
#--------------------------------------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------------------------------------
def spec_quick_50(year):
top10 = year.head(10)
avg10 = top10['Salary'].mean()
min10 = top10['Salary'].min()
med10 = top10['Salary'].median()
max10 = top10['Salary'].max()
yr_1q10 = (min10 + med10)/2
yr_3q10 = (max10 + med10)/2
t10_summ = min10, yr_1q10, med10, yr_3q10, max10
# - - - - - - - - - - - - - - - - - - - - - - - - -
top25 = year.head(25)
avg25 = top25['Salary'].mean()
min25 = top25['Salary'].min()
med25 = top25['Salary'].median()
max25 = top25['Salary'].max()
yr_1q25 = (min25 + med25)/2
yr_3q25 = (max25 + med25)/2
t25_summ = min25, yr_1q25, med25, yr_3q25, max25
# - - - - - - - - - - - - - - - - - - - - - - - - -
top50 = year.head(50)
avg50 = top50['Salary'].mean()
min50 = top50['Salary'].min()
med50 = top50['Salary'].median()
max50 = top50['Salary'].max()
yr_1q50 = (min50 + med50)/2
yr_3q50 = (max50 + med50)/2
t50_summ = min50, yr_1q50, med50, yr_3q50, max50
# - - - - - - - - - - - - - - - - - - - - - - - - -
fig, axs = plt.subplots(1, 3)
axs[0].boxplot(t10_summ, 0, '')
axs[0].set_title("Top 10 Salaries")
axs[1].boxplot(t25_summ, 0, '')
axs[1].set_title("Top 25 Salaries")
axs[2].boxplot(t50_summ, 0, '')
axs[2].set_title("Top 50 Salaries")
fig.subplots_adjust(left=0.08, right=1.8, bottom=0.05, top=0.9,
hspace=0.4, wspace=1)
#-------------------------------------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------------------------------------
def spec_quick_25(year):
top10 = year.head(10)
avg10 = top10['Salary'].mean()
min10 = top10['Salary'].min()
med10 = top10['Salary'].median()
max10 = top10['Salary'].max()
yr_1q10 = (min10 + med10)/2
yr_3q10 = (max10 + med10)/2
t10_summ = min10, yr_1q10, med10, yr_3q10, max10
# - - - - - - - - - - - - - - - - - - - - - - - - -
top25 = year.head(25)
avg25 = top25['Salary'].mean()
min25 = top25['Salary'].min()
med25 = top25['Salary'].median()
max25 = top25['Salary'].max()
yr_1q25 = (min25 + med25)/2
yr_3q25 = (max25 + med25)/2
t25_summ = min25, yr_1q25, med25, yr_3q25, max25
# - - - - - - - - - - - - - - - - - - - - - - - - -
fig, axs = plt.subplots(1,2)
axs[0].boxplot(t10_summ, 0, '')
axs[0].set_title("Top 10 Salaries")
axs[1].boxplot(t25_summ, 0, '')
axs[1].set_title("Top 25 Salaries")
fig.subplots_adjust(left=0.08, right=1.8, bottom=0.05, top=0.9,
hspace=0.4, wspace=1)
#----------------------------------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------------------------------------
def spec_quick_10(year):
top10 = year.head(10)
avg10 = top10['Salary'].mean()
min10 = top10['Salary'].min()
med10 = top10['Salary'].median()
max10 = top10['Salary'].max()
yr_1q10 = (min10 + med10)/2
yr_3q10 = (max10 + med10)/2
t10_summ = min10, yr_1q10, med10, yr_3q10, max10
# - - - - - - - - - - - - - - - - - - - - - - - - -
fig, axs = plt.subplots()
axs.boxplot(t10_summ, 0, '')
axs.set_title("Top 10 Salaries")
fig.subplots_adjust(left=0.08, right=1.8, bottom=0.05, top=0.9,
hspace=0.4, wspace=3)
#-------------------------------------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------------------------------------
def spec_bot_500(year):
tot_emp = year.Year.value_counts().count()
meany = year['Salary'].mean()
mini = year['Salary'].min()
medi = year['Salary'].median()
maxi = year['Salary'].max()
yr_1q_tot = (mini + medi)/2
yr_3q_tot = (maxi + medi)/2
tot_summ = mini, yr_1q_tot, medi, yr_3q_tot, maxi
#--------------------------------------------------------------------------------------------------------------------------------
top10 = year.head(10)
avg10 = top10['Salary'].mean()
min10 = top10['Salary'].min()
med10 = top10['Salary'].median()
max10 = top10['Salary'].max()
yr_1q10 = (min10 + med10)/2
yr_3q10 = (max10 + med10)/2
t10_summ = min10, yr_1q10, med10, yr_3q10, max10
# - - - - - - - - - - - - - - - - - - - - - - - - -
top25 = year.head(25)
avg25 = top25['Salary'].mean()
min25 = top25['Salary'].min()
med25 = top25['Salary'].median()
max25 = top25['Salary'].max()
yr_1q25 = (min25 + med25)/2
yr_3q25 = (max25 + med25)/2
t25_summ = min25, yr_1q25, med25, yr_3q25, max25
# - - - - - - - - - - - - - - - - - - - - - - - - -
top50 = year.head(50)
avg50 = top50['Salary'].mean()
min50 = top50['Salary'].min()
med50 = top50['Salary'].median()
max50 = top50['Salary'].max()
yr_1q50 = (min50 + med50)/2
yr_3q50 = (max50 + med50)/2
t50_summ = min50, yr_1q50, med50, yr_3q50, max50
# - - - - - - - - - - - - - - - - - - - - - - - - -
top100 = year.head(100)
avg100 = top100['Salary'].mean()
min100 = top100['Salary'].min()
med100 = top100['Salary'].median()
max100 = top100['Salary'].max()
yr_1q100 = (min100 + med100)/2
yr_3q100 = (max100 + med100)/2
t100_summ = min100, yr_1q100, med100, yr_1q100, max100
# - - - - - - - - - - - - - - - - - - - - - - - - -
top250 = year.head(250)
avg250 = top250['Salary'].mean()
min250 = top250['Salary'].min()
med250 = top250['Salary'].median()
max250 = top250['Salary'].max()
yr_1q250 = (min250 + med250)/2
yr_3q250 = (max250 + med250)/2
t250_summ = min250, yr_1q250, med250, yr_1q250, max250
# - - - - - - - - - - - - - - - - - - - - - - - - -
top500 = year.head(500)
avg500 = top500['Salary'].mean()
min500 = top500['Salary'].min()
med500 = top500['Salary'].median()
max500 = top500['Salary'].max()
yr_1q500 = (min500 + med500)/2
yr_3q500 = (max500 + med500)/2
t500_summ = min500, yr_1q500, med500, yr_1q500, max500
# - - - - - - - - - - - - - - - - - - - - - - - - -
fig, axs = plt.subplots(2, 3) # This line needed for separate plots
axs[0, 0].boxplot(t10_summ, 0, '') # And here we set up a plot for each year
axs[0, 0].set_title("Bottom 10 Salaries") # With each given a space to occupy within 6 separate plots
axs[0, 1].boxplot(t25_summ, 0, '')
axs[0, 1].set_title("Bottom 25 Salaries")
axs[0, 2].boxplot(t50_summ, 0, '')
axs[0, 2].set_title("Bottom 50 Salaries")
axs[1, 0].boxplot(t100_summ, 0, '')
axs[1, 0].set_title("Bottom 100 Salaries")
axs[1, 1].boxplot(t250_summ, 0, '')
axs[1, 1].set_title("Bottom 250 Salaries")
axs[1, 2].boxplot(t500_summ, 0, '')
axs[1, 2].set_title("Bottom 500 Salaries")
fig.subplots_adjust(left=0.08, right=1.8, bottom=0.05, top=0.9,
hspace=0.4, wspace=3)
#--------------------------------------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------------------------------------
def spec_bot_250(year):
tot_emp = year.Year.value_counts().count()
meany = year['Salary'].mean()
mini = year['Salary'].min()
medi = year['Salary'].median()
maxi = year['Salary'].max()
yr_1q_tot = (mini + medi)/2
yr_3q_tot = (maxi + medi)/2
tot_summ = mini, yr_1q_tot, medi, yr_3q_tot, maxi
#--------------------------------------------------------------------------------------------------------------------------------
top10 = year.head(10)
avg10 = top10['Salary'].mean()
min10 = top10['Salary'].min()
med10 = top10['Salary'].median()
max10 = top10['Salary'].max()
yr_1q10 = (min10 + med10)/2
yr_3q10 = (max10 + med10)/2
t10_summ = min10, yr_1q10, med10, yr_3q10, max10
# - - - - - - - - - - - - - - - - - - - - - - - - -
top25 = year.head(25)
avg25 = top25['Salary'].mean()
min25 = top25['Salary'].min()
med25 = top25['Salary'].median()
max25 = top25['Salary'].max()
yr_1q25 = (min25 + med25)/2
yr_3q25 = (max25 + med25)/2
t25_summ = min25, yr_1q25, med25, yr_3q25, max25
# - - - - - - - - - - - - - - - - - - - - - - - - -
top50 = year.head(50)
avg50 = top50['Salary'].mean()
min50 = top50['Salary'].min()
med50 = top50['Salary'].median()
max50 = top50['Salary'].max()
yr_1q50 = (min50 + med50)/2
yr_3q50 = (max50 + med50)/2
t50_summ = min50, yr_1q50, med50, yr_3q50, max50
# - - - - - - - - - - - - - - - - - - - - - - - - -
top100 = year.head(100)
avg100 = top100['Salary'].mean()
min100 = top100['Salary'].min()
med100 = top100['Salary'].median()
max100 = top100['Salary'].max()
yr_1q100 = (min100 + med100)/2
yr_3q100 = (max100 + med100)/2
t100_summ = min100, yr_1q100, med100, yr_1q100, max100
# - - - - - - - - - - - - - - - - - - - - - - - - -
top250 = year.head(250)
avg250 = top250['Salary'].mean()
min250 = top250['Salary'].min()
med250 = top250['Salary'].median()
max250 = top250['Salary'].max()
yr_1q250 = (min250 + med250)/2
yr_3q250 = (max250 + med250)/2
t250_summ = min250, yr_1q250, med250, yr_1q250, max250
fig, axs = plt.subplots(2, 3)
axs[0, 0].boxplot(t10_summ, 0, '')
axs[0, 0].set_title("Bottom 10 Salaries")
axs[0, 1].boxplot(t25_summ, 0, '')
axs[0, 1].set_title("Bottom 25 Salaries")
axs[0, 2].boxplot(t50_summ, 0, '')
axs[0, 2].set_title("Bottom 50 Salaries")
axs[1, 0].boxplot(t100_summ, 0, '')
axs[1, 0].set_title("Bottom 100 Salaries")
axs[1, 1].boxplot(t250_summ, 0, '')
axs[1, 1].set_title("Bottom 250 Salaries")
axs[1, 2].boxplot(tot_summ, 0, '')
axs[1, 2].set_title("All Salaries")
fig.subplots_adjust(left=0.08, right=1.8, bottom=0.05, top=0.9,
hspace=0.4, wspace=.75)
#--------------------------------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------------------------------------
def spec_bot_100(year):
top10 = year.head(10)
avg10 = top10['Salary'].mean()
min10 = top10['Salary'].min()
med10 = top10['Salary'].median()
max10 = top10['Salary'].max()
yr_1q10 = (min10 + med10)/2
yr_3q10 = (max10 + med10)/2
t10_summ = min10, yr_1q10, med10, yr_3q10, max10
# - - - - - - - - - - - - - - - - - - - - - - - - -
top25 = year.head(25)
avg25 = top25['Salary'].mean()
min25 = top25['Salary'].min()
med25 = top25['Salary'].median()
max25 = top25['Salary'].max()
yr_1q25 = (min25 + med25)/2
yr_3q25 = (max25 + med25)/2
t25_summ = min25, yr_1q25, med25, yr_3q25, max25
# - - - - - - - - - - - - - - - - - - - - - - - - -
top50 = year.head(50)
avg50 = top50['Salary'].mean()
min50 = top50['Salary'].min()
med50 = top50['Salary'].median()
max50 = top50['Salary'].max()
yr_1q50 = (min50 + med50)/2
yr_3q50 = (max50 + med50)/2
t50_summ = min50, yr_1q50, med50, yr_3q50, max50
# - - - - - - - - - - - - - - - - - - - - - - - - -
top100 = year.head(100)
avg100 = top100['Salary'].mean()
min100 = top100['Salary'].min()
med100 = top100['Salary'].median()
max100 = top100['Salary'].max()
yr_1q100 = (min100 + med100)/2
yr_3q100 = (max100 + med100)/2
t100_summ = min100, yr_1q100, med100, yr_1q100, max100
# - - - - - - - - - - - - - - - - - - - - - - - - -
fig, axs = plt.subplots(1, 4)
axs[0].boxplot(t10_summ, 0, '')
axs[0].set_title("Bottom 10 Salaries")
axs[1].boxplot(t25_summ, 0, '')
axs[1].set_title("Bottom 25 Salaries")
axs[2].boxplot(t50_summ, 0, '')
axs[2].set_title("Bottom 50 Salaries")
axs[3].boxplot(t100_summ, 0, '')
axs[3].set_title("Bottom 100 Salaries")
fig.subplots_adjust(left=0.08, right=1.8, bottom=0.05, top=0.9,
hspace=0.4, wspace=.75)
#--------------------------------------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------------------------------------
def spec_bot_50(year):
top10 = year.head(10)
avg10 = top10['Salary'].mean()
min10 = top10['Salary'].min()
med10 = top10['Salary'].median()
max10 = top10['Salary'].max()
yr_1q10 = (min10 + med10)/2
yr_3q10 = (max10 + med10)/2
t10_summ = min10, yr_1q10, med10, yr_3q10, max10
# - - - - - - - - - - - - - - - - - - - - - - - - -
top25 = year.head(25)
avg25 = top25['Salary'].mean()
min25 = top25['Salary'].min()
med25 = top25['Salary'].median()
max25 = top25['Salary'].max()
yr_1q25 = (min25 + med25)/2
yr_3q25 = (max25 + med25)/2
t25_summ = min25, yr_1q25, med25, yr_3q25, max25
# - - - - - - - - - - - - - - - - - - - - - - - - -
top50 = year.head(50)
avg50 = top50['Salary'].mean()
min50 = top50['Salary'].min()
med50 = top50['Salary'].median()
max50 = top50['Salary'].max()
yr_1q50 = (min50 + med50)/2
yr_3q50 = (max50 + med50)/2
t50_summ = min50, yr_1q50, med50, yr_3q50, max50
# - - - - - - - - - - - - - - - - - - - - - - - - -
fig, axs = plt.subplots(1, 3)
axs[0].boxplot(t10_summ, 0, '')
axs[0].set_title("Bottom 10 Salaries")
axs[1].boxplot(t25_summ, 0, '')
axs[1].set_title("Bottom 25 Salaries")
axs[2].boxplot(t50_summ, 0, '')
axs[2].set_title("Bottom 50 Salaries")
fig.subplots_adjust(left=0.08, right=1.8, bottom=0.05, top=0.9,
hspace=0.4, wspace=1)
#-------------------------------------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------------------------------------
def spec_bot_25(year):
top10 = year.head(10)
avg10 = top10['Salary'].mean()
min10 = top10['Salary'].min()
med10 = top10['Salary'].median()
max10 = top10['Salary'].max()
yr_1q10 = (min10 + med10)/2
yr_3q10 = (max10 + med10)/2
t10_summ = min10, yr_1q10, med10, yr_3q10, max10
# - - - - - - - - - - - - - - - - - - - - - - - - -
top25 = year.head(25)
avg25 = top25['Salary'].mean()
min25 = top25['Salary'].min()
med25 = top25['Salary'].median()
max25 = top25['Salary'].max()
yr_1q25 = (min25 + med25)/2
yr_3q25 = (max25 + med25)/2
t25_summ = min25, yr_1q25, med25, yr_3q25, max25
# - - - - - - - - - - - - - - - - - - - - - - - - -
fig, axs = plt.subplots(1,2)
axs[0].boxplot(t10_summ, 0, '')
axs[0].set_title("Top 10 Salaries")
axs[1].boxplot(t25_summ, 0, '')
axs[1].set_title("Top 25 Salaries")
fig.subplots_adjust(left=0.08, right=1.8, bottom=0.05, top=0.9,
hspace=0.4, wspace=1)
#----------------------------------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------------------------------------
def spec_bot_10(year):
top10 = year.head(10)
avg10 = top10['Salary'].mean()
min10 = top10['Salary'].min()
med10 = top10['Salary'].median()
max10 = top10['Salary'].max()
yr_1q10 = (min10 + med10)/2
yr_3q10 = (max10 + med10)/2
t10_summ = min10, yr_1q10, med10, yr_3q10, max10
# - - - - - - - - - - - - - - - - - - - - - - - - -
fig, axs = plt.subplots()
axs.boxplot(t10_summ, 0, '')
axs.set_title("Top 10 Salaries")
fig.subplots_adjust(left=0.08, right=1.8, bottom=0.05, top=0.9,
hspace=0.4, wspace=3)
#-------------------------------------------------------------------------------------------------------------------------------
#--------------------------------------------------------------------------------------------------------------------------------
This block creates a function for analyzing 'year' created in block 3. The goal is to craft coding that allows the user to specify what they see based on their want/need. Once completed, it would ideally perform to crunch data as desired and give options for output (Flowchart to come)
def year_plt_dept(year):
#The following code loops through code pertaining to a departmental input response
inp2 = input('Do you want to know about a specific department or various?')
inp2.lower()
if inp2 == 'specific' or inp2 == 'specific department' or inp2 == 'spec' or inp2 == 'specific dept':
depts = year['Department'].value_counts()
print('Different departments with value counts:\n')
print(depts.head(20))
inp3 = input('\nWhich department?\n')
inp3 = inp3.upper()
spec = year.loc[inp3 == year['Department']]
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
d_sal = spec.loc[spec['Salary'] > 20000]
sal_cnt = d_sal['Year'].count() # This variable created to count salary employees
sal_avg = d_sal['Salary'].mean() #Created to average salary wages
print("\nFor", yn,"in the",inp3,"department: \n")
print("There were",sal_cnt,"salary employees.\nThe average salary was",'$%0.2f'% sal_avg,'\n')
#---------------------------------------------------------------------------------------------------------------------------------------
print('\nWould you like to further analyze',inp3,'for',yn,'?\n')
inp4 = input('Yes or no?\n')
inp4.lower()
if inp4 == 'yes' or inp4 == 'y' or inp4 == 'yeah':
if spec['Title'].count() < 1:
print('\nSorry.\nThere were no results.\nGoodbye')
else:
inp5 = input('\nSort high to low(descending) or low to high(ascending) values?\n') #This block sorts 'department' values and .head as specified
inp5.lower()
if inp5 == 'high to low' or inp5 == 'high low' or inp5 == 'h l' or inp5 == 'descending':
inp6 = int(input('How many values would you like to assess?\n(Give an integer value)\n'))
spec = spec.sort_values(['Salary'], ascending=False)
spec = spec.head(inp6)
spec.reset_index(drop=True, inplace=True)
spec.index += 1
spec_avg = spec['Salary'].mean()
spec_min = spec['Salary'].min()
spec_max = spec['Salary'].max()
print('\nFor',yn,'in the',inp3,'department:\n(Pay sorted',inp5,'and top',inp6,'values)\n')
print('The minimum pay was','$%0.2f'%spec_min)
print('The maximum pay was','$%0.2f'%spec_max)
print('The average pay was','$%0.2f'%spec_avg,'\n')
if inp6 > 499:
whisker_func(spec, yn, inp3)
spec_quick_500(spec)
scatter(spec)
print(spec.head(20))
if inp6 > 249 and inp6 < 500:
whisker_func(spec, yn, inp3)
spec_quick_250(spec)
scatter(spec)
print(spec.head(20))
if inp6 > 99 and inp6 < 250:
whisker_func(spec, yn, inp3)
spec_quick_100(spec)
scatter(spec)
print(spec.head(20))
if inp6 > 49 and inp6 < 100:
whisker_func(spec, yn, inp3)
spec_quick_50(spec)
scatter(spec)
print(spec.head(20))
if inp6 > 24 and inp6 < 50:
whisker_func(spec, yn, inp3)
spec_quick_25(spec)
scatter(spec)
print(spec)
if inp6 > 9 and inp6 < 25:
whisker_func(spec, yn, inp3)
spec_quick_10(spec)
scatter(spec)
print(spec)
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
if inp5 == 'low to high' or inp5 == 'low high' or inp5 == 'l h' or inp5 == 'ascending':
spec = spec.sort_values(['Salary'], ascending=True)
inp6 = int(input('How many values would you like to assess?\n(Give an integer value)\n'))
spec = spec.head(inp6)
spec.reset_index(drop=True, inplace=True)
spec.index += 1
spec_avg = spec['Salary'].mean()
spec_min = spec['Salary'].min()
spec_max = spec['Salary'].max()
print('\nFor',yn,'in the',inp3,'department:\n(Pay sorted',inp5,'and top',inp6,'values)\n')
print('The minimum pay was','$%0.2f'%spec_min)
print('The maximum pay was','$%0.2f'%spec_max)
print('The average pay was','$%0.2f'%spec_avg,'\n')
print(spec)
if inp6 > 499:
whisker_func(spec, yn, inp3)
spec_bot_500(spec)
scatter(spec)
print(spec.head(20))
if inp6 > 249 and inp6 < 500:
whisker_func(spec, yn, inp3)
spec_bot_250(spec)
scatter(spec)
print(spec.head(20))
if inp6 > 99 and inp6 < 250:
whisker_func(spec, yn, inp3)
spec_bot_100(spec)
scatter(spec)
print(spec.head(20))
if inp6 > 49 and inp6 < 100:
whisker_func(spec, yn, inp3)
spec_bot_50(spec)
scatter(spec)
print(spec.head(20))
if inp6 > 24 and inp6 < 50:
whisker_func(spec, yn, inp3)
spec_bot_25(spec)
scatter(spec)
print(spec)
if inp6 > 9 and inp6 < 25:
whisker_func(spec, yn, inp3)
spec_bot_10(spec)
scatter(spec)
print(spec)
if inp4 == 'no' or inp4 == 'n' or inp4 == 'nah':
print('Okay!')
#---------------------------------------------------------------------------------------------------------------------------------------
#The following code loops through code pertaining to a positive departmental summary response
if inp2 == 'various' or inp2 =='multiple':
dep_lis = list()
depts = year['Department'].value_counts()
print('Different departments with value counts:\n',depts.head(20))
inp3 = input('Which departments?\n-----Up to 3-----\n(Separate by comma)\n')
inp3 = inp3.upper()
inp3 = inp3.split(",")
inp3list = list()
for i in inp3:
i = str(i)
i = i.lstrip()
inp3list.append(i)
print(inp3list)
length = len(inp3list)
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
x = 0
while x < length:
if x == 0:
key = inp3list[x]
spec = year.loc[key == year['Department']]
x = x + 1
continue
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
if x == 1:
key = inp3list[x]
spec2 = year.loc[key == year['Department']]
x = x + 1
clean = pd.concat([spec,spec2])
spec = clean
continue
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
if x == 2:
key = inp3list[x]
spec3 = year.loc[key == year['Department']]
x = x + 1
clean = pd.concat([clean, spec3])
spec = clean
continue
#-------------------------------------------------------------------------------------------------------------------------------------
d_sal = spec.loc[spec['Salary'] > 20000] # This variable created to count salary employees
sal_cnt = d_sal['Year'].count() #Created to average salary wages
sal_avg = d_sal['Salary'].mean()
print("\nFor", yn,"in the",inp3list,"department: \n")
print("There were",sal_cnt,"salary employees.\nThe average salary was",'$%0.2f'% sal_avg,'\n')
print(spec)
#---------------------------------------------------------------------------------------------------------------------------------------
#The following code allows the user to specify value sort and .head()
print('\nWould you like to further analyze',inp3list,'for',yn,'?\n')
inp4 = input('Yes or no?\n')
inp4.lower()
if inp4 == 'yes' or inp4 == 'y' or inp4 == 'yeah':
if spec['Title'].count() < 1:
print('\nSorry.\nThere were no results.\nGoodbye')
else:
inp5 = input('Sort high to low(descending) or low to high values(ascending)?\n') #This block sorts 'department' values and .head as specified
inp5.lower()
if inp5 == 'high to low' or inp5 == 'high low' or inp5 == 'h l' or inp5 == 'descending':
inp6 = int(input('How many values would you like to assess?\n(Give an integer value)\n'))
spec = spec.sort_values('Salary', ascending = False)
spec.reset_index(drop=True, inplace=True)
spec.index += 1
spec_avg = spec['Salary'].mean()
spec_min = spec['Salary'].min()
spec_max = spec['Salary'].max()
print('\nFor',yn,'in the',inp3list,'departments:\n(Pay sorted',inp5,'and top',inp6,'values)\n')
print('The minimum pay was','$%0.2f'%spec_min)
print('The maximum pay was','$%0.2f'%spec_max)
print('The average pay was','$%0.2f'%spec_avg,'\n')
spec = spec.head(inp6)
if inp6 > 499:
whisker_func(spec, yn, inp3list)
spec_quick_500(spec)
scatter(spec)
print(spec.head(20))
if inp6 > 249 and inp6 < 500:
whisker_func(spec, yn, inp3list)
spec_quick_250(spec)
scatter(spec)
print(spec.head(20))
if inp6 > 99 and inp6 < 250:
whisker_func(spec, yn, inp3list)
spec_quick_100(spec)
scatter(spec)
print(spec.head(20))
if inp6 > 49 and inp6 < 100:
whisker_func(spec, yn, inp3list)
spec_quick_50(spec)
scatter(spec)
print(spec.head(20))
if inp6 > 24 and inp6 < 50:
whisker_func(spec, yn, inp3list)
spec_quick_25(spec)
scatter(spec)
print(spec)
if inp6 > 9 and inp6 < 25:
whisker_func(spec, yn, inp3list)
spec_quick_10(spec)
scatter(spec)
print(spec)
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
if inp4 == 'no' or inp4 == 'n' or inp4 == 'nah':
print('Okay!')
def year_plt_title(year):
#The following code loops through code pertaining to a title input response
inp2 = input('Do you want to know about a specific title or various?')
inp2.lower()
#------------------------------------------------------------------------------------------------------------------------------------
if inp2 == 'specific' or inp2 == 'specific title' or inp2 == 'spec' or inp2 == 'specific title':
titles = year['Title'].value_counts()
print('Different titles with value counts:\n')
print(titles.head(20))
inp3 = input('What title?\n')
inp3 = inp3.upper()
spec = year.loc[inp3 == year['Title']]
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
print("\nFor", inp3,"in",yn,": \n")
d_bonus = spec.loc[(spec['Salary'] < 20000)]
bon_cnt = d_bonus['Year'].count() # This variable created to count annual bonuses
bon_avg = d_bonus['Salary'].mean() # Created to give average bonus number
if bon_cnt != 0:
print("There were",bon_cnt,"%s's"% inp3,"with bonuses.\nThe average bonus was","$%0.2f"%bon_avg)
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
d_sal = spec.loc[spec['Salary'] > 20000]
sal_cnt = d_sal['Year'].count() # This variable created to count salary employees
sal_avg = d_sal['Salary'].mean() #Created to average salary wages
if sal_cnt != 0:
print("There were",sal_cnt,"salary employees with the title", inp3,".\nThe average salary was",'$%0.2f'% sal_avg,'\n')
#-------------------------------------------------------------------------------------------------------------------------------------------------
print('\nWould you like to further analyze',inp3,'for',yn,'?\n')
inp4 = input('Yes or no?\n')
inp4.lower()
if inp4 == 'yes' or inp4 == 'y' or inp4 == 'yeah':
if spec['Title'].count() < 1:
print('\nSorry.\nThere were no results.\nGoodbye')
else:
inp5 = input('Sort high to low(descending) or low to high values?(ascending)\n') #This block sorts 'department' values and .head as specified
inp5.lower()
if inp5 == 'high to low' or inp5 == 'high low' or inp5 == 'h l' or inp5 == 'descending':
inp6 = int(input('How many values would you like to assess?\n(Give an integer value)\n'))
spec = spec.sort_values(['Salary'], ascending=False)
spec = spec.head(inp6)
spec.reset_index(drop=True, inplace=True)
spec.index += 1
spec_avg = spec['Salary'].mean()
spec_min = spec['Salary'].min()
spec_max = spec['Salary'].max()
print('\nFor',yn,'in the',inp3,'department:\n(Pay sorted',inp5,'and top',inp6,'values)\n')
print('The minimum pay was','$%0.2f'%spec_min)
print('The maximum pay was','$%0.2f'%spec_max)
print('The average pay was','$%0.2f'%spec_avg,'\n')
print(spec)
if inp6 > 499:
whisker_func(spec, yn, inp3)
spec_quick_500(spec)
scatter(spec)
print(spec.head(20))
if inp6 > 249 and inp6 < 500:
whisker_func(spec, yn, inp3)
spec_quick_250(spec)
print(spec.head(20))
if inp6 > 99 and inp6 < 250:
whisker_func(spec, yn, inp3)
spec_quick_100(spec)
print(spec.head(20))
if inp6 > 49 and inp6 < 100:
whisker_func(spec, yn, inp3)
spec_quick_50(spec)
print(spec.head(20))
if inp6 > 24 and inp6 < 50:
whisker_func(spec, yn, inp3)
spec_quick_25(spec)
print(spec)
if inp6 > 9 and inp6 < 25:
whisker_func(spec, yn, inp3)
spec_quick_10(spec)
print(spec)
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
if inp5 == 'low to high' or inp5 == 'low high' or inp5 == 'l h' or inp5 == 'ascending':
spec = spec.sort_values(['Salary'], ascending=True)
inp6 = int(input('How many values would you like to assess?\n(Give an integer value)\n'))
spec = spec.head(inp6)
spec.reset_index(drop=True, inplace=True)
spec.index += 1
spec_avg = spec['Salary'].mean()
spec_min = spec['Salary'].min()
spec_max = spec['Salary'].max()
print('\nFor',yn,'in the',inp3,'department:\n(Pay sorted',inp5,'and top',inp6,'values)\n')
print('The minimum pay was','$%0.2f'%spec_min)
print('The maximum pay was','$%0.2f'%spec_max)
print('The average pay was','$%0.2f'%spec_avg,'\n')
print(spec)
if inp6 > 499:
whisker_func(spec, yn, inp3)
spec_bot_500(spec)
print(spec.head(20))
if inp6 > 249 and inp6 < 500:
whisker_func(spec, yn, inp3)
spec_bot_250(spec)
print(spec.head(20))
if inp6 > 99 and inp6 < 250:
whisker_func(spec, yn, inp3)
spec_bot_100(spec)
print(spec.head(20))
if inp6 > 49 and inp6 < 100:
whisker_func(spec, yn, inp3)
spec_bot_50(spec)
print(spec.head(20))
if inp6 > 24 and inp6 < 50:
whisker_func(spec, yn, inp3)
spec_bot_25(spec)
print(spec)
if inp6 > 9 and inp6 < 25:
whisker_func(spec, yn, inp3)
spec_bot_10(spec)
print(spec)
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
if inp4 == 'no' or inp4 == 'n' or inp4 == 'nah':
spec = spec
print('Okay!')
#-----------------------------------------------------------------------------------------------------------------------------------------
#The following code loops through code pertaining to a positive departmental summary response
if inp2 == 'various' or inp2 =='multiple':
dep_lis = list()
titles = year['Title'].value_counts()
print('Different titles with value counts:\n',titles.head(20),'\n')
inp3 = input('Which titles?\n-----Up to 3-----\n(Separate by comma)\n')
inp3 = inp3.upper()
inp3 = inp3.split(",")
inp3list = list()
for i in inp3:
i = str(i)
i = i.lstrip()
inp3list.append(i)
print(inp3list)
length = len(inp3list)
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
x = 0
while x < length:
if x == 0:
key = inp3list[x]
spec = year.loc[key == year['Title']]
x = x + 1
continue
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
if x == 1:
key = inp3list[x]
spec2 = year.loc[key == year['Title']]
x = x + 1
clean = pd.concat([spec,spec2])
spec = clean
continue
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
if x == 2:
key = inp3list[x]
spec3 = year.loc[key == year['Title']]
x = x + 1
clean = pd.concat([clean, spec3])
spec = clean
continue
#--------------------------------------------------------------------------------------------------------------------------------------------
#The following code allows the user to specify value sort and .head()
print('\nWould you like to further analyze',inp3list,'for',yn,'?\n')
inp4 = input('Yes or no?\n')
inp4.lower()
if inp4 == 'yes' or inp4 == 'y' or inp4 == 'yeah':
if spec['Title'].count() < 1:
print('\nSorry.\nThere were no results.\nGoodbye')
else:
inp5 = input('Sort high to low(descending) or low to high values(ascending)?\n') #This block sorts 'department' values and .head as specified
inp5.lower()
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
if inp5 == 'high to low' or inp5 == 'high low' or inp5 == 'h l' or inp5 == 'descending':
inp6 = int(input('How many values would you like to assess?\n(Give an integer value)\n'))
spec = spec.sort_values(['Salary'], ascending=False)
spec = spec.head(inp6)
spec.reset_index(drop=True, inplace=True)
spec.index += 1
spec_avg = spec['Salary'].mean()
spec_min = spec['Salary'].min()
spec_max = spec['Salary'].max()
print('\nFor',yn,'with the',inp3,'title:\n(Pay sorted',inp5,'and top',inp6,'values)\n')
print('The minimum pay was','$%0.2f'%spec_min)
print('The maximum pay was','$%0.2f'%spec_max)
print('The average pay was','$%0.2f'%spec_avg,'\n')
print(spec)
if inp6 > 499:
whisker_func(spec, yn, inp3list)
spec_quick_500(spec)
print(spec.head(20))
if inp6 > 249 and inp6 < 500:
whisker_func(spec, yn, inp3list)
spec_quick_250(spec)
print(spec.head(20))
if inp6 > 99 and inp6 < 250:
whisker_func(spec, yn, inp3list)
spec_quick_100(spec)
print(spec.head(20))
if inp6 > 49 and inp6 < 100:
whisker_func(spec, yn, inp3list)
spec_quick_50(spec)
print(spec.head(20))
if inp6 > 24 and inp6 < 50:
whisker_func(spec, yn, inp3list)
spec_quick_25(spec)
print(spec)
if inp6 > 9 and inp6 < 25:
whisker_func(spec, yn, inp3list)
spec_quick_10(spec)
print(spec)
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
if inp5 == 'low to high' or inp5 == 'low high' or inp5 == 'l h' or inp5 == 'ascending':
spec = spec.sort_values(['Salary'], ascending=True)
inp6 = int(input('How many values would you like to assess?\n(Give an integer value)\n'))
spec = spec.head(inp6)
spec.reset_index(drop=True, inplace=True)
spec.index += 1
spec_avg = spec['Salary'].mean()
spec_min = spec['Salary'].min()
spec_max = spec['Salary'].max()
print('\nFor',yn,'in the',inp3,'department:\n(Pay sorted',inp5,'and top',inp6,'values)\n')
print('The minimum pay was','$%0.2f'%spec_min)
print('The maximum pay was','$%0.2f'%spec_max)
print('The average pay was','$%0.2f'%spec_avg,'\n')
if inp6 > 499:
whisker_func(spec, yn, inp3list)
spec_bot_500(spec)
print(spec.head(20))
if inp6 > 249 and inp6 < 500:
whisker_func(spec, yn, inp3list)
spec_bot_250(spec)
print(spec.head(20))
if inp6 > 99 and inp6 < 250:
whisker_func(spec, yn, inp3list)
spec_bot_100(spec)
print(spec.head(20))
if inp6 > 49 and inp6 < 100:
whisker_func(spec, yn, inp3list)
spec_bot_50(spec)
print(spec.head(20))
if inp6 > 24 and inp6 < 50:
whisker_func(spec, yn, inp3list)
spec_bot_25(spec)
print(spec)
if inp6 > 9 and inp6 < 25:
whisker_func(spec, yn, inp3list)
spec_bot_10(spec)
print(spec)
# - - - - - - - - - - - - - - - - - - - - - - - - - - -
if inp4 == 'no' or inp4 == 'n' or inp4 == 'nah':
print('Okay! Bye!')
pd.set_option('display.width', 300)
def yearplot(data):
inp = input('Which field/s are you interested in?\n(Department, Title)\n')
inp = inp.lower()
if inp == 'dept' or inp == 'department' or inp == 'departments':
year_plt_dept(year)
if inp == 'title' or inp == 'job title' or inp == 'job':
year_plt_title(year)
yearplot(year)