# Lines with a "pound" symbol are "commented" out of existence. R will ignore them. 
# Let's do some simple arithmetic.

2+2 #addition

5*10 #multiplication

144/12 #division

4^2 #exponentiation 

log(1)#logarithm

# R will even follow the order of opeations. Sweet!

(2*3)^2

# Now let's try something more complex. Let's define a vector. 

x1<-c(0,1,2,3,4,5)

# Notice that R doesn't return anything with our definition. Nevertheless...

x1

# Alternatively, we can "sequence" that vector.

x2<-seq(0,5); x2

# Same thing! Say we want a different interval than a whole number...

x3<-seq(0,5, by=.1); x3

# Cool! But let's go back to the shorter vector for illustrative purposes.

# Our traditional operators work on vectors, too.

x1_new<-x1*2; x1_new

# Now let's try another vector

x4<-c(2016, 2015, 2014, 2013, 2012, 2011); x4

# We've now made 2 vectors of the same length. We can manipulate and compare those.

plot(x=x4, y=x1)

# Let's give these a little context...

plot(x=x2, y=x1, xlab="Year", ylab="Something Else")

# Now let's kill the dots and make it a line. 

plot(x=x2, y=x1, xlab="Year", ylab="Something Else", type='l')

# What if we want to create nonnumeric vectors? R can do it!

x5<-c("Red", "Orange", "Yellow", "Green", "Blue", "Indigo", "Violet"); x5

##############################################################################

# Now let's do something a little more sophisticated. Let's define a function.

rv<-function(x){-x^2}; plot(rv, xlim=c(-5,5))

# We can also use R to simulate data. Let's take 20 draws from a normal distribution.

x6<-rnorm(20, 0, 1); summary(x6)

# We can verify that "x6" follows a normal distribution graphically

plot(density(x6))
abline(v=0, col='red')

# What happens if we increase the number of draws?

x7<-rnorm(20000, 0, 1); summary(x7)
plot(density(x7))
abline(v=0, col='red')

#############################################################################

# Turns out we want to do more than just play with silly functions. But we'll need
# to add to R's toolkit to achieve some of these ends.

install.packages("car")

# Let's look at some survey data on individuals' jobs from the car package.

head(Duncan)

# Oh no! What happened? We have to call the package up out of the library.

library(car)

head(Duncan)

# We could open a spreadsheet to see/fix data (this command only works on PCs) #

fix(Duncan)

# Let's get some summary statistics on income

summary(Duncan$income)
sd(Duncan$income)

# Let's now create a dichotomous variable that's '1' for above-average prestige, '0' otherwise

mean(Duncan$prestige)

Duncan$prestigious[Duncan$prestige>=47.68889]<-1
Duncan$prestigious[Duncan$prestige<47.68889]<-0

# Sanity check

fix(Duncan)

# Report tabular results

table(Duncan$prestigious)

# Let's look at a 2-dimensional table showing occupation type and prestigious

table(Duncan$prestigious, Duncan$type)

# Some plot types with options demonstrated #

boxplot(Duncan$income)
boxplot(Duncan$income~Duncan$type)
lab1<-c("Blue Collar", "Professional", "White Collar")
boxplot(Duncan$income~Duncan$type, xlab="", ylab="Income", las=1, xaxt='n')
axis(1, at=1:3, labels=lab1)

hist(Duncan$income)
hist(Duncan$income, breaks=7)
hist(Duncan$income, breaks=7, main="", xlab="Income", las=1, col='darkblue')

plot(x=Duncan$education, y=Duncan$income)
plot(x=Duncan$education, y=Duncan$income, pch=16)
plot(x=Duncan$education, y=Duncan$income, pch=16, xlab="Education", ylab="Income", las=1)
abline(lm(Duncan$income~Duncan$education), col='red', type=2)

counts<-table(Duncan$type); counts
barplot(counts, xaxt='n', las=1, col='gray70', border='black', ylab='Frequency')
axis(1, at=1:3, labels=lab1)

############################################################################

# Let's say we want to use our own data, and not some generated by somebody else
# First, it's a good idea to set the working directory. First, check your working directory 

getwd()

# You'll use your own computer's working directory, and this format differs between Macs and PCs. Here's mine on my PC.

setwd("C:/Users/dhughe10/Dropbox/AUM/courses/spring_2022/pols_3970/data_and_code/")

# Next, we'll 'read-in' data saved in that working directory using its file name. This is a sample of 2018 GSS data.

data1<-read.csv("gss_data.csv")
head(data1)
objects(data1)
fix(data1)

# Let's take a look at some of the data #

median(data1$age[data1$atheist==1])
median(data1$age[data1$atheist==0])
boxplot(data1$age~data1$atheist)
lab2<-c("Not Atheist", "Atheist")
boxplot(data1$age~data1$atheist, ylab="Age", las=1, xaxt='n', xlab='')
axis(1, at=1:2, labels=lab2)

install.packages('gmodels')
library(gmodels)
CrossTable(data1$gay_bisexual, data1$atheist)
CrossTable(data1$gay_bisexual, data1$atheist, prop.c=FALSE, prop.t=FALSE, prop.chisq=FALSE, chisq=FALSE, format="SPSS")


############################################################################

# Some useful R code...the question mark (?).

?plot


# Finally, let's make sure we quit R correctly after making sure to save our script

quit('no')
