Skip to content

Searching for Images

Luis J. Villanueva, Ph.D edited this page Dec 4, 2023 · 3 revisions

OCIO's Digitization Program Office completed a Mass Digitization project of the Living Orchid Collection of Smithsonian Gardens. As part of a new effort to showcase our projects, we used EDANr to search for the images taken during this project. When a plant had flowers, a close-up photograph of the flower was taken. These close-ups were downloaded using EDANr to display in 4k resolution displays at the DPO office. This vignette summarizes the process:

  1. Query EDAN for the specimens in the collection
  2. Look for images taken by the contractor in the project
  3. Download the close-up images
  4. Resize the images to 4k and add text with general information to the images

Article in Smithsonian Insider about the digitization project.

Query EDAN for the specimens in the collection

Add the AppID and AppKey, load packages, other preliminary steps:

#EDAN creds
AppID = "APP_ID"
AppKey = "verylong_key"

#Required packages
library("EDANr")
library("magick")
library("stringr")

#Create folders to store images
#Horizontal images
dir.create("images_h", showWarnings = FALSE)
#Vertical images
dir.create("images_v", showWarnings = FALSE)

Run the query, first to get the total number of results EDAN has:

#EDAN query for orchids of Smithsonian Gardens with images
orchids_query <- "orchid smithsonian gardens&fq=online_media_type:\"Images\""
orchids_fqs <- "online_media_type:\"Images\""


#Get number of results in EDAN
results <- EDANr::edan_metadata_search(query = orchids_query, 
                             fqs = orchids_fqs,
                             AppID = AppID, 
                             AppKey = AppKey, 
                             rows = 1, 
                             start = 0)
results_count <- results$rowCount

#Calculate the number of steps needed to get all the results,
# in steps of 100 rows each, the maximum the API returns
steps <- floor(results_count/100)

Then, loop over the results in steps of 100 results.

#Loop over each step, getting all the images
for (i in seq(0, steps)){
  #Query to get the next 100 results
  results <- EDANr::edan_metadata_search(query = orchids_query, 
                               fqs = orchids_fqs,
                               AppID = AppID, 
                               AppKey = AppKey, 
                               rows = 100, 
                               start = (i*100))
  
  #If there are results in this step
  if (!is.null(dim(results$rows))){
    
    #Loop over each result in this step
    for (j in seq(1, dim(results$rows)[1])){
      #
      #See code below
      #
    }
  }
}

Look for images taken by the vendor

Get the images of the j-th row:

#Get images, available from IDS
ids_images <- results$rows$content$descriptiveNonRepeating$online_media$media[j][[1]]

#Filter for images that were taken by the contractor of the project
which_image <- ids_images[stringr::str_which(ids_images$caption, "Creekside Digital"),]

#Select the close-up image of the flowers of the plant, coded with '102' in the filename
which_image_detail <- which_image[stringr::str_which(ids_images$idsId, "102"),]

Download the close-up images

Get the URL of the images and download:

#Get image URL
image <- which_image_detail$content[1]

#Get image ID
idsID <- which_image_detail$idsId[1]

#Download the image
localimage <- paste0(idsID, ".jpg")
dlfile <- try(download.file(url = image, 
                            destfile = localimage, 
                            mode = "wb"), 
              silent = TRUE)

#If there is an error, skip and
# go to the next one in the loop
if (class(dlfile) == "try-error"){
    next
  }

Resize and add text with general information to the images

Resize the image for 4k display, checking if horizontal or vertical first:

#Read image to find size
img <- image_read(localimage)
img_info <- image_info(img)

if (img_info$width > img_info$height){
  #Horizontal, resize to 3840x2160
  img_scaled <- image_scale(img, "3840x2160")
}else{
  #Vertical, resize to 2160x3840
  img_scaled <- image_scale(img, "2160x3840")
}

Get specimen info to write on the image:

#Get image title, which contains the name of the specimen
img_title <- results$rows$title[j]

#If title is too long, break into two lines
if (nchar(img_title) > 30){
  img_title1 <- strsplit(img_title, " ")[[1]]
  spl_no <- floor(length(img_title1)/2)
  
  img_title_1 <- paste(img_title1[1:spl_no], 
                       collapse = " ")
  img_title_2 <- paste(img_title1[spl_no+1:(length(img_title1) - spl_no)], 
                       collapse = " ")
  img_title <- paste0(img_title_1, 
                      "\n   ", 
                      img_title_2)
}

#Get common name of plant, if available
common_name <- unlist(results$rows$content$indexedStructured$common_name[j])[[1]]
if (!is.null(common_name)){
  title <- paste0(common_name, "\n", img_title)
}

#Add the accession number and identify the unit
title <- paste0(title, 
                "\nAccession Number: ", 
                results$rows$content$freetext$identifier[j][[1]]$content, 
                "\nSmithsonian Gardens")

#Write the info to the image
img_ready <- image_annotate(img_scaled, 
                            title, 
                            font = 'Arial', 
                            size = 60, 
                            color = "white", 
                            gravity = "SouthWest")

#Write image to one of two folders
if (img_info$width > img_info$height){
  #Image is wide, store in Horizontal folder
  image_write(img_ready, 
              path = paste0("images_h/" , 
                            idsID, 
                            ".jpg"), 
              format = "jpg")
}else{
  #Image is tall, store in Vertical folder
  image_write(img_ready, 
              path = paste0("images_v/",
                            idsID, 
                            ".jpg"), 
              format = "jpg")
}

#Delete original image
file.remove(localimage)