setwd("/home/suhel/Documents/MigrantWatch/AnalysisScripts/PiedCuckoo/Analysis2011") ## first explore all PC data and filter those for 2009 and 2010 dat <- read.csv("all_pc_records_30April2011.csv") ## correct some city names dat$City[dat$City=="Dehra Dun"] <- "Dehradun" dat$City[dat$City=="Kanpur City"] <- "Kanpur" ## add a serial number dat$Sno <- 1:nrow(dat) dat$Date <- as.Date(dat$Date) ## remove places south of 15 degrees North dat <- subset(dat, Latitude>=15) ## Could also remove first visits, but not doing so for now # dat <- subset(dat, Observation.frequency!="First visit") ## choose appropriate date range dat.2009 <- subset(dat, Date>=as.Date("2009-05-01") & Date <=as.Date("2009-07-15")) dat.2010 <- subset(dat, Date>=as.Date("2010-05-01") & Date <=as.Date("2010-07-15")) ## remove extra City names dat.2009$City <- factor(dat.2009$City) dat.2010$City <- factor(dat.2010$City) ## order sightings in ascending order within City dat.2009 <- dat.2009[order(dat.2009$City, dat.2009$Date),] dat.2010 <- dat.2010[order(dat.2010$City, dat.2010$Date),] ## take earliest sighting for each City dat.2009 <- dat.2009[with(dat.2009, match(unique(City), City)) ,] dat.2010 <- dat.2010[with(dat.2010, match(unique(City), City)), ] ## read monsoon arrival data mon <- read.csv("monsoon_arrival_0910.csv") mon$Monsoon_arrival <- as.Date(mon$Monsoon_arrival) mon.2009 <- subset(mon, Year==2009) mon.2010 <- subset(mon, Year==2010) ## take only unique cities mon.2009 <- mon.2009[match(unique(mon.2009$City), mon.2009$City),] mon.2010 <- mon.2010[match(unique(mon.2010$City), mon.2010$City),] ## match cities to fill in monsoon dates dat.2009$mon.arr <- mon.2009[match(dat.2009$City, mon.2009$City), "Monsoon_arrival"] dat.2010$mon.arr <- mon.2010[match(dat.2010$City, mon.2010$City), "Monsoon_arrival"] ## check arrival dates against monsoon master file. dat.2009[,c("City", "Date", "mon.arr")] dat.2010[,c("City", "Date", "mon.arr")] ## missing monsoon data subset(dat.2009, is.na(mon.arr), select=c("City", "State")) subset(dat.2010, is.na(mon.arr), select=c("City", "State")) ## no missing data ## which locations have data from both years bothyrs <- intersect(dat.2009$City, dat.2010$City) bothyrs <- data.frame(City=bothyrs, No=1:length(bothyrs)) bothyrs.2009 <- data.frame(bothyrs, dat.2009[match(bothyrs$City, dat.2009$City), c("Date", "mon.arr")]) bothyrs.2010 <- data.frame(bothyrs, dat.2010[match(bothyrs$City, dat.2010$City), c("Date", "mon.arr")]) names(bothyrs.2009)[3:4] <- c("cuck09", "mon09") names(bothyrs.2010)[3:4] <- c("cuck10", "mon10") ## Now plot pied cuckoo date versus monsoon arrival date separately for 2009 and 2010 svg("PCblog-May2011-plot1.svg", width=12, height=5) ## if file is to be printed par(mfrow=c(1,2), mar=c(4,5,2,2), las=1) plot(Date ~ mon.arr, data=dat.2009, pch=16, xlim=as.Date(c("2009-05-15", "2009-07-05")), ylim=as.Date(c("2009-05-15", "2009-07-15")), xaxt="n", yaxt="n", xlab="Monsoon arrival", ylab="First sighting") text(bothyrs.2009$mon09-1, bothyrs.2009$cuck09+1, bothyrs.2009$No) axis(side=1, at=as.Date(c("2009-05-15", "2009-06-01", "2009-06-15", "2009-07-01")), labels=c("15 May", "1 June", "15 June", "1 July")) axis(side=2, at=as.Date(c("2009-05-15", "2009-06-01", "2009-06-15", "2009-07-01", "2009-07-15")), labels=c("15 May", "1 June", "15 June", "1 July", "15 July")) abline(c(0,1)); abline(c(-5, 1), lty=2); abline(c(-30, 1), lty=3) plot(Date ~ mon.arr, data=dat.2010, pch=16, xlim=as.Date(c("2010-05-15", "2010-07-05")), ylim=as.Date(c("2010-05-15", "2010-07-15")), xaxt="n", yaxt="n", xlab="Monsoon arrival", ylab="First sighting") text(bothyrs.2010$mon10-1, bothyrs.2010$cuck10+1, bothyrs.2010$No) axis(side=1, at=as.Date(c("2010-05-15", "2010-06-01", "2010-06-15", "2010-07-01")), labels=c("15 May", "1 June", "15 June", "1 July")) axis(side=2, at=as.Date(c("2010-05-15", "2010-06-01", "2010-06-15", "2010-07-01", "2010-07-15")), labels=c("15 May", "1 June", "15 June", "1 July", "15 July")) abline(c(0,1)); abline(c(-5, 1), lty=2); abline(c(-30, 1), lty=3) legend("topleft", legend=c("Same day as monsoon arrival", "5 days before monsoon arrival", "30 days before monsoon arrival"), lty=1:3, bty="n") dev.off() ## if file is to be printed ## plot the 12 common locations separately both <- data.frame(bothyrs.2009, bothyrs.2010[,c("cuck10", "mon10")]) ## assign a common year, for the purpose of plotting both$cuck09.com <- as.Date(paste("2011", substr(both$cuck09, 6, 10), sep="-")) both$cuck10.com <- as.Date(paste("2011", substr(both$cuck10, 6, 10), sep="-")) both$mon09.com <- as.Date(paste("2011", substr(both$mon09, 6, 10), sep="-")) both$mon10.com <- as.Date(paste("2011", substr(both$mon10, 6, 10), sep="-")) ## Plot monsoon on x, sighting on y svg("PCblog-May2011-plot2.svg", width=5, height=5) ## if plot to be printed to file par(mfrow=c(1,1), las=1) with(both, plot(mon09.com, cuck09.com, xlim=as.Date(c("2011-05-15", "2011-07-15")), ylim=as.Date(c("2011-05-15", "2011-07-15")), pch=16, xlab="Monsoon arrival",ylab="First sighting", xaxt="n", yaxt="n")) axis(side=1, at=as.Date(c("2011-05-15", "2011-06-01", "2011-06-15", "2011-07-01", "2011-07-15")), labels=c("15 May", "1 June", "15 June", "1 July", "15 July")) axis(side=2, at=as.Date(c("2011-05-15", "2011-06-01", "2011-06-15", "2011-07-01", "2011-07-15")), labels=c("15 May", "1 June", "15 June", "1 July", "15 July")) with(both, points(mon10.com, cuck10.com, pch=16, col="red")) with(both, text(mon09.com-1, cuck09.com+1, No)) abline(c(0, 1), lty=2) for(i in 1:nrow(both)){ with(both[i,], lines(c(mon09.com, mon10.com), c(cuck09.com, cuck10.com))) } legend("bottomright", legend=c(2009, 2010), pch=16, col=c("black", "red"), bty="n") ## Add hypothetical location where changed in cuckoo arrival matches change in monsoon points(as.Date(c("2011-06-01", "2011-06-11")), as.Date(c("2011-05-20", "2011-05-30")), pch="*", cex=3, col=1:2) lines(as.Date(c("2011-06-01", "2011-06-11")), as.Date(c("2011-05-20", "2011-05-30")), lty=2, lwd=1.5) text(as.Date("2011-05-31"), as.Date("2011-05-18"), labels="Example", cex=0.8) dev.off() ## end print ## Output processed text files for others to use dat.2009$Year <- 2009 dat.2010$Year <- 2010 dat.complete <- rbind(dat.2009, dat.2010) dat.complete <- subset(dat.complete, select=c(Species, Location.name, City, State, Reporter, Date, Sighting.type, On.behalf.of, Latitude, Longitude, mon.arr) ) names(dat.complete)[names(dat.complete)=="Date"] <- "Sighting.date" names(dat.complete)[names(dat.complete)=="mon.arr"] <- "Monsoon.arrival" both.years <- both[,1:6] names(both.years)[-(1:2)] <- c("Sighting.date.09", "Monsoon.09", "Sighting.date.10", "Monsoon.10") ## Output these data.frames write.csv(dat.complete, file="PCblog-May2011-Plot1Data.csv",row.names=FALSE) write.csv(both.years, file="PCblog-Mat2011-Plot2Data.csv", row.names=FALSE)