Loading the data
The BLS has their annual data in Excel spreadheets, which varies in structure year to year.
So it took some custom cleaning for each spreadsheet.
If you want to see the specifics behind that, check out the compiler.R
script. But it essentially creates two dataframes: mega
and titles
.
mega <- readRDS("data/mega.rds")
titles <- read.csv("data/titles_codes_clean.csv")
kable(head(mega[,1:7]))
01 |
AL |
Alabama |
00-0000 |
All Occupations |
1883310 |
0.4 |
01 |
AL |
Alabama |
11-0000 |
Management Occupations |
69100 |
1.0 |
01 |
AL |
Alabama |
11-1011 |
Chief Executives |
1100 |
4.1 |
01 |
AL |
Alabama |
11-1021 |
General and Operations Managers |
27240 |
1.4 |
01 |
AL |
Alabama |
11-1031 |
Legislators |
1430 |
8.7 |
01 |
AL |
Alabama |
11-2011 |
Advertising and Promotions Managers |
50 |
16.0 |
kable(head(titles))
1 |
11-0000 |
Management occupations |
2 |
11-1011 |
Chief executives |
3 |
11-1021 |
General and operations managers |
4 |
11-1031 |
Legislators |
5 |
11-2011 |
Advertising and promotions managers |
6 |
11-2021 |
Marketing managers |
## These are the column names of the combined BLS OES dataframe (also with some additional adjustments and rankings done in the compiler.R script)
colnames(mega)
## [1] "AREA" "ST"
## [3] "STATE" "OCC_CODE"
## [5] "OCC_TITLE" "TOT_EMP"
## [7] "EMP_PRSE" "JOBS_1000"
## [9] "LOC_Q" "H_MEAN"
## [11] "A_MEAN" "MEAN_PRSE"
## [13] "H_PCT10" "H_PCT25"
## [15] "H_MEDIAN" "H_PCT75"
## [17] "H_PCT90" "A_PCT10"
## [19] "A_PCT25" "A_MEDIAN"
## [21] "A_PCT75" "A_PCT90"
## [23] "YEAR" "adj_value"
## [25] "pct_increase" "adjusted_H_MEAN"
## [27] "adjusted_H_MEDIAN" "adjusted_A_MEAN"
## [29] "adjusted_A_MEDIAN" "rank_A_MEAN"
## [31] "rank_adjusted_A_MEAN" "rank_A_MEDIAN"
## [33] "rank_adjusted_A_MEDIAN" "OCC_GROUP"
## And how many years did we end up with?
range(mega$YEAR)
## [1] 1999 2015
## Let's take a look at some of the raw figures for median annual pay
raw_pay <- mega[c("STATE", "OCC_TITLE", "YEAR", "A_MEDIAN")]
## And look at a sampling of 2008 data
raw_tail <- subset(raw_pay, YEAR==2008)
raw_tail <- tail(raw_pay, 10)
kable(raw_tail)
586169 |
Wyoming |
Loading Machine Operators, Underground Mining |
1999 |
40690 |
586170 |
Wyoming |
Industrial Truck and Tractor Operators |
1999 |
30820 |
586171 |
Wyoming |
Cleaners of Vehicles and Equipment |
1999 |
13530 |
586172 |
Wyoming |
Laborers and Freight, Stock, and Material Movers, Hand |
1999 |
17460 |
586173 |
Wyoming |
Packers and Packagers, Hand |
1999 |
13120 |
586174 |
Wyoming |
Gas Compressor and Gas Pumping Station Operators |
1999 |
42620 |
586175 |
Wyoming |
Pump Operators, Except Wellhead Pumpers |
1999 |
35880 |
586176 |
Wyoming |
Wellhead Pumpers |
1999 |
42600 |
586177 |
Wyoming |
Refuse and Recyclable Material Collectors |
1999 |
18100 |
586178 |
Wyoming |
Tank Car, Truck, and Ship Loaders |
1999 |
21820 |
## OK, let's adjust for inflation
## We'll use the blscrapeR package which is a wrapper for the BLS API.
### Here's the documentation https://cran.r-project.org/web/packages/blscrapeR/README.html
## This function pulls annual inflation rates compared to 2015
df <- inflation_adjust(2015)
kable(head(df))
1947-12-01 |
2015 |
0.09 |
-90.59072 |
1948-12-01 |
2015 |
0.10 |
-89.87342 |
1949-12-01 |
2015 |
0.10 |
-89.95781 |
1950-12-01 |
2015 |
0.10 |
-89.83122 |
1951-12-01 |
2015 |
0.11 |
-89.02954 |
1952-12-01 |
2015 |
0.11 |
-88.77637 |
## Adding a column specifically for year
df$year <- year(df$date)
## Narrowing down the dataframe
df <- df[c("adj_value", "pct_increase", "year")]
## Renaming the dataframe
colnames(df) <- c("adj_value", "pct_increase", "YEAR")
## Let's join it to our sample data frame
raw_tail <- left_join(raw_tail, df)
kable(raw_tail)
Wyoming |
Loading Machine Operators, Underground Mining |
1999 |
40690 |
0.7 |
-29.70464 |
Wyoming |
Industrial Truck and Tractor Operators |
1999 |
30820 |
0.7 |
-29.70464 |
Wyoming |
Cleaners of Vehicles and Equipment |
1999 |
13530 |
0.7 |
-29.70464 |
Wyoming |
Laborers and Freight, Stock, and Material Movers, Hand |
1999 |
17460 |
0.7 |
-29.70464 |
Wyoming |
Packers and Packagers, Hand |
1999 |
13120 |
0.7 |
-29.70464 |
Wyoming |
Gas Compressor and Gas Pumping Station Operators |
1999 |
42620 |
0.7 |
-29.70464 |
Wyoming |
Pump Operators, Except Wellhead Pumpers |
1999 |
35880 |
0.7 |
-29.70464 |
Wyoming |
Wellhead Pumpers |
1999 |
42600 |
0.7 |
-29.70464 |
Wyoming |
Refuse and Recyclable Material Collectors |
1999 |
18100 |
0.7 |
-29.70464 |
Wyoming |
Tank Car, Truck, and Ship Loaders |
1999 |
21820 |
0.7 |
-29.70464 |
## What's the median pay adjusted for inflation?
raw_tail$inflation_A_MEDIAN <- raw_tail$A_MEDIAN/raw_tail$adj_value
kable(raw_tail)
Wyoming |
Loading Machine Operators, Underground Mining |
1999 |
40690 |
0.7 |
-29.70464 |
58128.57 |
Wyoming |
Industrial Truck and Tractor Operators |
1999 |
30820 |
0.7 |
-29.70464 |
44028.57 |
Wyoming |
Cleaners of Vehicles and Equipment |
1999 |
13530 |
0.7 |
-29.70464 |
19328.57 |
Wyoming |
Laborers and Freight, Stock, and Material Movers, Hand |
1999 |
17460 |
0.7 |
-29.70464 |
24942.86 |
Wyoming |
Packers and Packagers, Hand |
1999 |
13120 |
0.7 |
-29.70464 |
18742.86 |
Wyoming |
Gas Compressor and Gas Pumping Station Operators |
1999 |
42620 |
0.7 |
-29.70464 |
60885.71 |
Wyoming |
Pump Operators, Except Wellhead Pumpers |
1999 |
35880 |
0.7 |
-29.70464 |
51257.14 |
Wyoming |
Wellhead Pumpers |
1999 |
42600 |
0.7 |
-29.70464 |
60857.14 |
Wyoming |
Refuse and Recyclable Material Collectors |
1999 |
18100 |
0.7 |
-29.70464 |
25857.14 |
Wyoming |
Tank Car, Truck, and Ship Loaders |
1999 |
21820 |
0.7 |
-29.70464 |
31171.43 |
## So we know what the 1999 salaries were in 2015 dollars.
## Next, let's adjust for power of the dollar in Wyoming compared to other states
value100s <- read.csv("data/price_parities.csv", stringsAsFactors=F)
kable(head(value100s, 10))
Alabama |
87.6 |
87.5 |
87.9 |
87.7 |
88.1 |
87.8 |
87.8 |
Alaska |
106.9 |
106.9 |
105.5 |
105.1 |
105.4 |
105.0 |
105.7 |
Arizona |
100.6 |
100.1 |
98.6 |
97.9 |
97.1 |
96.6 |
96.4 |
Arkansas |
86.9 |
86.6 |
87.7 |
87.6 |
87.8 |
87.7 |
87.5 |
California |
113.1 |
112.9 |
113.6 |
113.4 |
112.9 |
112.7 |
112.4 |
Colorado |
100.4 |
101.1 |
100.9 |
101.4 |
101.1 |
101.8 |
102.0 |
Connecticut |
110.7 |
110.4 |
109.4 |
109.0 |
109.2 |
108.7 |
108.8 |
Delaware |
102.0 |
103.2 |
102.8 |
101.8 |
101.2 |
101.0 |
101.9 |
District of Columbia |
115.6 |
116.4 |
118.2 |
117.8 |
117.7 |
117.7 |
118.1 |
Florida |
100.8 |
100.0 |
99.1 |
99.2 |
99.1 |
99.0 |
99.1 |
## This is the regional price parity by state of $100
## Let's translate that to the how much $100 is worth in each state (Only for 2008)
value100s <- value100s[c("STATE", "y2008")]
kable(head(value100s))
Alabama |
87.6 |
Alaska |
106.9 |
Arizona |
100.6 |
Arkansas |
86.9 |
California |
113.1 |
Colorado |
100.4 |
value100s$y2008_value <- 100+((100-value100s$y2008)/value100s$y2008*100)
value100s$y2008_percentchange <- (value100s$y2008_value-100)/100
kable(head(value100s, 10))
Alabama |
87.6 |
114.15525 |
0.1415525 |
Alaska |
106.9 |
93.54537 |
-0.0645463 |
Arizona |
100.6 |
99.40358 |
-0.0059642 |
Arkansas |
86.9 |
115.07480 |
0.1507480 |
California |
113.1 |
88.41733 |
-0.1158267 |
Colorado |
100.4 |
99.60159 |
-0.0039841 |
Connecticut |
110.7 |
90.33424 |
-0.0966576 |
Delaware |
102.0 |
98.03922 |
-0.0196078 |
District of Columbia |
115.6 |
86.50519 |
-0.1349481 |
Florida |
100.8 |
99.20635 |
-0.0079365 |
## Alright, lets join t |
hat to t |
he inflation-a |
djusted dataframe from earlier |
raw_tail <- left_join(raw_tail, value100s)
## Joining, by = "STATE"
kable(head(raw_tail, 10))
Wyoming |
Loading Machine Operators, Underground Mining |
1999 |
40690 |
0.7 |
-29.70464 |
58128.57 |
96.1 |
104.0583 |
0.0405827 |
Wyoming |
Industrial Truck and Tractor Operators |
1999 |
30820 |
0.7 |
-29.70464 |
44028.57 |
96.1 |
104.0583 |
0.0405827 |
Wyoming |
Cleaners of Vehicles and Equipment |
1999 |
13530 |
0.7 |
-29.70464 |
19328.57 |
96.1 |
104.0583 |
0.0405827 |
Wyoming |
Laborers and Freight, Stock, and Material Movers, Hand |
1999 |
17460 |
0.7 |
-29.70464 |
24942.86 |
96.1 |
104.0583 |
0.0405827 |
Wyoming |
Packers and Packagers, Hand |
1999 |
13120 |
0.7 |
-29.70464 |
18742.86 |
96.1 |
104.0583 |
0.0405827 |
Wyoming |
Gas Compressor and Gas Pumping Station Operators |
1999 |
42620 |
0.7 |
-29.70464 |
60885.71 |
96.1 |
104.0583 |
0.0405827 |
Wyoming |
Pump Operators, Except Wellhead Pumpers |
1999 |
35880 |
0.7 |
-29.70464 |
51257.14 |
96.1 |
104.0583 |
0.0405827 |
Wyoming |
Wellhead Pumpers |
1999 |
42600 |
0.7 |
-29.70464 |
60857.14 |
96.1 |
104.0583 |
0.0405827 |
Wyoming |
Refuse and Recyclable Material Collectors |
1999 |
18100 |
0.7 |
-29.70464 |
25857.14 |
96.1 |
104.0583 |
0.0405827 |
Wyoming |
Tank Car, Truck, and Ship Loaders |
1999 |
21820 |
0.7 |
-29.70464 |
31171.43 |
96.1 |
104.0583 |
0.0405827 |
## And one more calculation to apply the price parity to Wyoming
raw_tail$adjusted_A_MEDIAN <- raw_tail$inflation_A_MEDIAN*raw_tail$y2008_percentchange+raw_tail$inflation_A_MEDIAN
raw_tail <- raw_tail[c("STATE", "OCC_TITLE", "YEAR", "A_MEDIAN", "inflation_A_MEDIAN", "adjusted_A_MEDIAN")]
kable(head(raw_tail, 10))
Wyoming |
Loading Machine Operators, Underground Mining |
1999 |
40690 |
58128.57 |
60487.59 |
Wyoming |
Industrial Truck and Tractor Operators |
1999 |
30820 |
44028.57 |
45815.37 |
Wyoming |
Cleaners of Vehicles and Equipment |
1999 |
13530 |
19328.57 |
20112.98 |
Wyoming |
Laborers and Freight, Stock, and Material Movers, Hand |
1999 |
17460 |
24942.86 |
25955.11 |
Wyoming |
Packers and Packagers, Hand |
1999 |
13120 |
18742.86 |
19503.49 |
Wyoming |
Gas Compressor and Gas Pumping Station Operators |
1999 |
42620 |
60885.71 |
63356.62 |
Wyoming |
Pump Operators, Except Wellhead Pumpers |
1999 |
35880 |
51257.14 |
53337.30 |
Wyoming |
Wellhead Pumpers |
1999 |
42600 |
60857.14 |
63326.89 |
Wyoming |
Refuse and Recyclable Material Collectors |
1999 |
18100 |
25857.14 |
26906.50 |
Wyoming |
Tank Car, Truck, and Ship Loaders |
1999 |
21820 |
31171.43 |
32436.45 |
## So this is the process we took to determine the *actual* salary adjusting for year and state.