Analyzing the garbage collection log using ‘R’
November 12, 2013 Leave a comment
I know that I am publishing ‘R’ code like this in a hurry. But I plan to add more explanations later on. The comments in the code are missing.
I obtain garbage collection log from a production JVM and isolate the ‘Full GC’ lines. The goal is to draw graphs of utilization and find the mean and recommend a size for the memory pools. I refer to Charlie Hunt’s book on Java Performance.
2013-10-04T19:36:16.497+0530: 2.152: [GC [PSYoungGen: 65537K->3456K(382272K)] 65537K->3456K(1256128K), 0.0090610 secs] [Times: user=0.04 sys=0.00, real=0.00 secs]
2013-10-04T19:36:16.506+0530: 2.161: [Full GC (System) [PSYoungGen: 3456K->0K(382272K)] [PSOldGen: 0K->3285K(873856K)] 3456K->3285K(1256128K) [PSPermGen: 12862K->12862K(25984K)], 0.0512630 secs] [Times: user=0.05 sys=0.00, real=0.06 secs]
2013-10-04T19:36:25.830+0530: 11.485: [GC [PSYoungGen: 327680K->22711K(382272K)] 330965K->25997K(1256128K), 0.0234720 secs] [Times: user=0.15 sys=0.02, real=0.02 secs]
2013-10-04T19:36:26.910+0530: 12.565: [GC [PSYoungGen: 84665K->28038K(382272K)] 87950K->31324K(1256128K), 0.0214230 secs] [Times: user=0.20 sys=0.03, real=0.02 secs]
2013-10-04T19:36:26.931+0530: 12.586: [Full GC (System) [PSYoungGen: 28038K->0K(382272K)] [PSOldGen: 3285K->30437K(873856K)] 31324K->30437K(1256128K) [PSPermGen: 39212K->39212K(69504K)], 0.2616280 secs] [Times: user=0.25 sys=0.01, real=0.26 secs]
2013-10-04T19:36:39.614+0530: 25.269: [GC [PSYoungGen: 327680K->15462K(382272K)] 358117K->45900K(1256128K), 0.0361220 secs] [Times: user=0.34 sys=0.00, real=0.03 secs]
2013-10-04T19:36:45.141+0530: 30.796: [GC [PSYoungGen: 343142K->30879K(382272K)] 373580K->61317K(1256128K), 0.1013610 secs] [Times: user=1.30 sys=0.00, real=0.10 secs]
library(stringr) this.dir <- dirname(parent.frame(2)$ofile) setwd(this.dir) data <- read.table("D:\\GC Analysis\\gc.log-node1",sep="\n") #print(data) parse.mean <- function(){ fullgc.timestamp( fullgc.read() ) } # Grep Full GC lines fullgc.read <- function(){ return (data$V1[ grep ("(.*)Full GC(.*)", data[,1])]) } fullgc.data <- function(timedata,memorydata){ memorydata["Time"] <- "NA" memorydata$Time <- unlist(timedata) return (memorydata) } fullgc.timestamp <- function(input){ time <- str_extract(input,"T[^.]*") timeframex<-data.frame(time) timeframey<-subset(timeframex, timeframex[1] != "NA") timeframey<-substring(timeframey$time,2,9) timeframey <- data.frame(timeframey) colnames( timeframey ) <- c("Time") return (timeframey) } fullgc.oldgen.mean <- function(input){ data <- str_extract(input,"PSOld.*[/)]") data <- str_extract_all(data,"[0-9]+") data <- data.frame(matrix(unlist(data),ncol=9,byrow=T)) colnames(data)[5] <- c("OldGenAfterFullGC") colnames(data)[8] <- c("PermGenAfterFullGC") return (data) } subset.removezerohours <- function( input ){ return( subset(input,as.POSIXlt(strptime(input$Time,"%H:%M:%S"))$hour != 0)) } fullgc.permgen.graph <- function(filtereddata){ png( "permgenrecommendation.png", width =1720, height = 490) filtereddata$Time1<-strptime(filtereddata$Time,"%H:%M:%S") print(filtereddata) offset<-86400 lasttime<-filtereddata$Time1[1] filtereddata$Time1[1]<-filtereddata$Time1[1]+offset lasttime<-filtereddata$Time1[1] for(timedate in 2:length(filtereddata$Time1)) { if(as.numeric(filtereddata$Time1[timedate]) < lasttime){ offset<- offset + 86400 } filtereddata$Time1[timedate]<-filtereddata$Time1[timedate]+offset lasttime<-filtereddata$Time1[timedate] } plot( filtereddata$Time1, levels(filtereddata$PermGenAfterFullGC)[filtereddata$PermGenAfterFullGC], col="darkblue",type="l", ylab="MB", xlab="", las=2, lwd=5.5, cex.lab=1, cex.axis=1, xaxt="n") axis(1, at = as.numeric(filtereddata$Time1), labels = filtereddata$Time, las = 2,cex.axis=1.3) mtext("Time", side=1, line=-1, cex=1.3) title("Permgen mean and recommendation",cex.main=1.5) dev.off() }