####################################################################################################
####################################################################################################
####################################################################################################
#
# pgDataLoader.R
#
# © 2009-2014, Michael Baudis (www.progenetix.org)
# pls. use code with attribution
#
####################################################################################################
####################################################################################################
####################################################################################################

# TODO(mbaudis): will just return an error due to "read.table" not getting data, if no search parameter is provided
# TODO(mbaudis): documentation about the parameters & usage (see also the API documentation on the website
# TODO(mbaudis): ? implement preferred data loading through Rcurl, with readTable as fallback
#
# see: wiki.progenetix.org
#
# example: 
#
# source("~/lib/Rlibs/pgDataLoader.R")
# amtab <- pgDataLoader(pg.server="127.0.0.1", icdm_m=c("9500", "947"), db="arraymap", genes_m=c("MYCN", "TP53$"), qcveto=1)
#

pgDataLoader <- function(

  pg.server    = "www.progenetix.org",
  db           = 'progenetix',
  project      = "",
  icdm_m       = c(""),  # e.g. c("8140/3", "8144")
  icdt_m       = c(""),  # e.g. c("C04", "C06")
  pmid_m       = c(""),
  text_m       = c(""),  # e.g. "metast"
  groups_m     = c(""),  # e.g. "carcinomas"
  series_m     = c(""),
  uid_m        = c(""),
  genes_m      = c(""),  # used for adding gene specific status columns to the table/matrix
  locus_m      = "",     # e.g. "17:7512445-7531642:-1" for a TP53 deletion
  randno       = 0,      # optional number of randomized samples
  followup     = "FALSE",  # if "TRUE", only samples with survival (death & followup) will be returned
  qcveto       = 1,      # for arraymap; "1" limits selection to "good quality"
                         # arrays; otherwise use "-1"
  api_out      = "samples",  # options: one of "matrix", "valuematrix", "segments", "samples"
  api_doctype  = "tab",  # fixed; there would be "json" and "svg", "png" ..., too, but this doesn't make sense here
  ...

) {

  if (project=='arraymap') { db <- 'arraymap' }
  if (db=='arraymap' && project=='progenetix') { project  <- 'arraymap' }
  
  cat("\naccessing project", project, "through db", db, "from server", pg.server, "\n")

  pg.url <- paste(
			  "http://",
			  pg.server,
			  "/api/?",
			  "db=",
			  db,
			  "&project=",
			  project,
			  "&api_out=",
			  api_out,
			  "&api_doctype=",
			  api_doctype,
           sep="")
                
  query.tags    <- c("icdm_m", "icdt_m", "pmid_m", "series_m", "uid_m", "text_m", "genes_m", "groups_m")
  query.count   = 0
  
  for (query.tag in query.tags) {
    for (codevalues in get(query.tag)) {
      for (code in codevalues) {
        if (nchar(code) > 0) {
		  pg.url      <- paste(pg.url, '&', query.tag, '=', code, sep="")
		  query.count =  query.count+1
  }}}}
  
  if (nchar(locus_m) > 5)  pg.url <- paste(pg.url, '&locus_m=', locus_m, sep="")
  if (randno > 0)          pg.url <- paste(pg.url, '&randno=', randno, sep="")
  if (followup=="TRUE")    pg.url <- paste(pg.url, '&followup=yes', sep="")
  if (db=='arraymap')      pg.url <- paste(pg.url, '&qcveto=', qcveto, sep="")
  
  # / of URL generation
  
  # feedback
  
  cat("\ntrying URL", pg.url, "\n")

  if (query.count < 1) {
    stop(cat("\n", "at least one of those parameters has to be provided:\n\n", query.tags, "\n\n"))
  }

  pg.con   <-  url(
                 description=pg.url,
                 open = "r",
                 blocking = TRUE,
                 encoding = getOption("encoding")
               ) 
  pg.data  <- read.table(pg.con, header=T, sep="\t", na="NA")
  close(pg.con)
    
  if (api_out=='matrix') {
    cat("\nretrieved", nrow(pg.data), "samples with 1Mb gain/loss status matrix\n\n")
  }
  if (api_out=='samples') {
    cat("\nretrieved", nrow(pg.data), "samples as tab-delimited data table\n\n")
  }   
  if (api_out=='segments') {
    cat("\nretrieved",
      length(levels(pg.data[[1]])),
      "samples in segment annotation format (this only reflects samples with CNA gain/loss segments)",     
      "\n\n"
    )
  }
  
  return(pg.data)
  
}

####################################################################################################
####################################################################################################
####################################################################################################

Topic revision: r1 - 10 Dec 2015, MichaelBaudis
This site is powered by FoswikiCopyright © by the contributing authors. All material on this collaboration platform is the property of the contributing authors.
Ideas, requests, problems regarding Progenetixwiki? Send feedback