# 2 Data Visualization with R

This story is the continuation of this article.

#Get working directory

getwd()

#Set working directory

setwd(“C:\\Users\\batur\\Desktop\\R Tutorial”)

#Read csv data file and store as data frame

bankChurnersData=read.csv(file=”BankChurners.csv”)

#Drop columns has number of 22 and 23

df <- bankChurnersData[-c(22:23)]

#Encode Attrition_Flag column of df as a factor — Binary variable

df$Attrition_Flag=factor(df$Attrition_Flag,levels=c(“Attrited Customer”,”Existing Customer”))

#Encode Gender column of df as a factor — Binary variable

df$Gender=factor(df$Gender,levels=c(“M”,”F”))

#Encode Education_Level column of df as an ordered factor — Ordinal variable

df$Education_Level=factor(df$Education_Level, ordered=TRUE, levels=c(“Unknown”,”Uneducated”,”High School”,”College”,”Graduate”,”Post-Graduate”,”Doctorate”) )

#Encode Marital_Status column of df as a factor — Nominal variable

df$Marital_Status=factor(df$Marital_Status,levels=c(“Married”,”Single”,”Unknown”,”Divorced”))

#Encode Income_Category column of df as an ordered factor — Ordinal variable

df$Income_Category=factor(df$Income_Category,ordered=TRUE,levels=c(“Unknown”,”Less than $40K”,”$40K — $60K”,”$60K — $80K”,”$80K — $120K”,”$120K +”))

#Encode Card_Category column of df as an ordered factor — Ordinal variable

df$Card_Category<-factor(df$Card_Category,ordered=TRUE,levels = c(“Blue”,”Silver”,”Gold”,”Platinum”))

#Smash the window

par(mfrow=c(2,2))

**1.PLOT**

#Draw Education_Level vs Months_on_book plot

plot(df$Education_Level,df$Months_on_book,

xlab=”Education Level”, ylab=”Months on Book”,

main=”Edu. Lev. vs Book”)

#Draw vertical and horizontal lines

abline(v=3)

abline(h=31)

**2.BOXPLOT**

#Customer_Age vs Marital_Status Box Plot

boxplot(Customer_Age ~ Marital_Status ,data = df,

xlab = “Marital Status”,

ylab = “Customer Age”,

main = “Marital Data Box Plot”,

varwidth = TRUE,

col = c(“green”,”yellow”,”purple”,”red”))

**3.DATA DISTRIBUTION**

#Show data distribution of Months_on_book

qqnorm(df$Months_on_book)

qqline(df$Months_on_book)

**4.HISTOGRAM**

#Relative density histogram graph of Dependent_count

hist(df$Dependent_count,prob=T, breaks=5,

col=”red”,

xlab=”Dependent Count”,

main=”Dependent Count Histogram Graph”)

#Real accuracy histogram graph of Dependent_count

hist(df$Dependent_count, breaks=5,

col=”darkmagenta”,

xlab=”Dependent Count”,

main=”Dependent Count Histogram Graph”)

#Draw Customer_Age histogram graph with line

hist(df$Customer_Age, breaks=15, prob=T,col=”darkmagenta”,

xlab=”Customer Age”,

main=”Customer Age Histogram Graph”)

density(df$Customer_Age)

lines(density(df$Customer_Age))

**5.PIE CHART**

#Draw Marital_Status pie chart

result<-table(df$Marital_Status)

marital_status_names<-names(result)

pie(result, labels=marital_status_names, main=”Marital Status Pie Chart”)

**6.BARPLOT**

#Create data frame from Education_Level, Income_Category and Customer_Age columns

df_2 <- data.frame(Education_Level=df$Education_Level,

Income_Category=df$Income_Category,

Customer_Age=df$Customer_Age)

#Calculate mean of Customer_Age according to Education_Level and Income_Category

agg = aggregate(df_2,

by = list(df_2$Education_Level, df_2$Income_Category),

FUN = mean)

#Create row and column names

rownames = unique(agg$Group.2)

colnames = unique(agg$Group.1)

#Create empty matrix of Education_Level and Income_Category

matrix_1 <- matrix(nrow = length(rownames),ncol = length(colnames), dimnames = list(rownames, colnames))

#Fill matrix_1 with using mean of Customer_Age according to Education_Level and Income_Category

for (row in rownames){

for (col in colnames){

value<-agg[agg$Group.1==col & agg$Group.2==row,]$Customer_Age

matrix_1[row,col]<-value

}

}

print(matrix_1)

#Draw bar plot of matrix_1

barplot(matrix_1, legend=rownames(matrix_1),las=1,

xlab=”Education Level”, ylab=”Frequency”,

main=c(“Mean of Customer Age”,”Bar Plot”))

**7.DOTCHART**

#Create data frame from Card_Category and Credit_Limit

df_3 <- data.frame(Card_Category=df$Card_Category,

Credit_Limit=df$Credit_Limit)

#Calculate maximum of Credit_Limit by Card_Category

agg_2 = aggregate(df_3,

by = list(df_3$Card_Category),

FUN = max)

#Create row names

rownames_2 = unique(agg_2$Group.1)

#Create empty matrix of Card_Category

matrix_2 <- matrix(nrow = length(rownames_2),ncol = 1, dimnames = list(rownames_2))

#Fill matrix_2 with using maximum of Credit_Limit by Card_Category

for (row in rownames_2){

value<-agg_2[agg_2$Group.1==row,]$Credit_Limit

matrix_2[row,1]<-value

}

print(matrix_2)

#Draw maximum credit limit by card category dotchart

dotchart(matrix_2, labels = row.names(matrix_2),

cex = .6,

main = “Maximum Credit Limit By Card Category”,

xlab = “Maximum Credit Limit “,

ylab = “Card Category”,

pch = 19,

col = c(“red”,”blue”),

lcolor = “gray90”,

cex.main = 2, cex.lab = 1.5)