Analyzing the garbage collection log using ‘R’
November 12, 2013 Leave a comment
I know that I am publishing ‘R’ code like this in a hurry. But I plan to add more explanations later on. The comments in the code are missing.
I obtain garbage collection log from a production JVM and isolate the ‘Full GC’ lines. The goal is to draw graphs of utilization and find the mean and recommend a size for the memory pools. I refer to Charlie Hunt’s book on Java Performance.
2013-10-04T19:36:16.497+0530: 2.152: [GC [PSYoungGen: 65537K->3456K(382272K)] 65537K->3456K(1256128K), 0.0090610 secs] [Times: user=0.04 sys=0.00, real=0.00 secs]
2013-10-04T19:36:16.506+0530: 2.161: [Full GC (System) [PSYoungGen: 3456K->0K(382272K)] [PSOldGen: 0K->3285K(873856K)] 3456K->3285K(1256128K) [PSPermGen: 12862K->12862K(25984K)], 0.0512630 secs] [Times: user=0.05 sys=0.00, real=0.06 secs]
2013-10-04T19:36:25.830+0530: 11.485: [GC [PSYoungGen: 327680K->22711K(382272K)] 330965K->25997K(1256128K), 0.0234720 secs] [Times: user=0.15 sys=0.02, real=0.02 secs]
2013-10-04T19:36:26.910+0530: 12.565: [GC [PSYoungGen: 84665K->28038K(382272K)] 87950K->31324K(1256128K), 0.0214230 secs] [Times: user=0.20 sys=0.03, real=0.02 secs]
2013-10-04T19:36:26.931+0530: 12.586: [Full GC (System) [PSYoungGen: 28038K->0K(382272K)] [PSOldGen: 3285K->30437K(873856K)] 31324K->30437K(1256128K) [PSPermGen: 39212K->39212K(69504K)], 0.2616280 secs] [Times: user=0.25 sys=0.01, real=0.26 secs]
2013-10-04T19:36:39.614+0530: 25.269: [GC [PSYoungGen: 327680K->15462K(382272K)] 358117K->45900K(1256128K), 0.0361220 secs] [Times: user=0.34 sys=0.00, real=0.03 secs]
2013-10-04T19:36:45.141+0530: 30.796: [GC [PSYoungGen: 343142K->30879K(382272K)] 373580K->61317K(1256128K), 0.1013610 secs] [Times: user=1.30 sys=0.00, real=0.10 secs]
library(stringr)
this.dir <- dirname(parent.frame(2)$ofile)
setwd(this.dir)
data <- read.table("D:\\GC Analysis\\gc.log-node1",sep="\n")
#print(data)
parse.mean <- function(){
fullgc.timestamp( fullgc.read() )
}
# Grep Full GC lines
fullgc.read <- function(){
return (data$V1[ grep ("(.*)Full GC(.*)", data[,1])])
}
fullgc.data <- function(timedata,memorydata){
memorydata["Time"] <- "NA"
memorydata$Time <- unlist(timedata)
return (memorydata)
}
fullgc.timestamp <- function(input){
time <- str_extract(input,"T[^.]*")
timeframex<-data.frame(time)
timeframey<-subset(timeframex, timeframex[1] != "NA")
timeframey<-substring(timeframey$time,2,9)
timeframey <- data.frame(timeframey)
colnames( timeframey ) <- c("Time")
return (timeframey)
}
fullgc.oldgen.mean <- function(input){
data <- str_extract(input,"PSOld.*[/)]")
data <- str_extract_all(data,"[0-9]+")
data <- data.frame(matrix(unlist(data),ncol=9,byrow=T))
colnames(data)[5] <- c("OldGenAfterFullGC")
colnames(data)[8] <- c("PermGenAfterFullGC")
return (data)
}
subset.removezerohours <- function( input ){
return( subset(input,as.POSIXlt(strptime(input$Time,"%H:%M:%S"))$hour != 0))
}
fullgc.permgen.graph <- function(filtereddata){
png(
"permgenrecommendation.png",
width =1720, height = 490)
filtereddata$Time1<-strptime(filtereddata$Time,"%H:%M:%S")
print(filtereddata)
offset<-86400
lasttime<-filtereddata$Time1[1]
filtereddata$Time1[1]<-filtereddata$Time1[1]+offset
lasttime<-filtereddata$Time1[1]
for(timedate in 2:length(filtereddata$Time1)) {
if(as.numeric(filtereddata$Time1[timedate]) < lasttime){
offset<- offset + 86400
}
filtereddata$Time1[timedate]<-filtereddata$Time1[timedate]+offset
lasttime<-filtereddata$Time1[timedate]
}
plot( filtereddata$Time1,
levels(filtereddata$PermGenAfterFullGC)[filtereddata$PermGenAfterFullGC],
col="darkblue",type="l",
ylab="MB",
xlab="",
las=2,
lwd=5.5,
cex.lab=1,
cex.axis=1,
xaxt="n")
axis(1, at = as.numeric(filtereddata$Time1), labels = filtereddata$Time, las = 2,cex.axis=1.3)
mtext("Time", side=1, line=-1, cex=1.3)
title("Permgen mean and recommendation",cex.main=1.5)
dev.off()
}
