Skip to content

Commit

Permalink
Work on #463.
Browse files Browse the repository at this point in the history
  • Loading branch information
mjordan committed Apr 12, 2018
1 parent e7d7ece commit 53901a3
Show file tree
Hide file tree
Showing 4 changed files with 300 additions and 1 deletion.
2 changes: 1 addition & 1 deletion mik
Original file line number Diff line number Diff line change
Expand Up @@ -247,7 +247,7 @@ foreach ($records as $record) {
// Create metadata to return an XML file that can be passed
// on to the writer.
try {
$metadata = $parser->metadata($record_key) . "\n";
$metadata = $parser->metadata($record_key);
} catch (Exception $exception) {
$log->addError(
'ErrorException',
Expand Down
28 changes: 28 additions & 0 deletions src/metadataparsers/csv/Csv.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
<?php
// src/metadataparsers/csv/Csv.php

namespace mik\metadataparsers\csv;

use mik\metadataparsers\MetadataParser;

/**
* Templated metadata parser - Generates CSV metadata.
*/
abstract class Csv extends MetadataParser
{
public function __construct($settings)
{
parent::__construct($settings);
$fetcherClass = 'mik\\fetchers\\' . $settings['FETCHER']['class'];
$this->fetcher = new $fetcherClass($settings);

$this->outputFile = $this->settings['WRITER']['output_file'];

if (isset($this->settings['MANIPULATORS']['metadatamanipulators'])) {
$this->metadatamanipulators = $this->settings['MANIPULATORS']['metadatamanipulators'];
} else {
$this->metadatamanipulators = null;
}
}

}
136 changes: 136 additions & 0 deletions src/metadataparsers/csv/DcToCsv.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
<?php
// src/metadataparsers/csv/Csv.php

/**
* Metadata parser class that writes metadata out to a CSV file.
*/

namespace mik\metadataparsers\csv;

// use mik\metadataparsers\MetadataParser;
use League\Csv\Writer;
use Monolog\Logger;

class DcToCsv extends Csv
{
/**
* Create a new metadata parser instance
*/
public function __construct($settings)
{
// Call Metadata.php contructor
parent::__construct($settings);

// The CSV writer that writes out object metadata is instantiated in the writer.
$headings = $this->settings['METADATA_PARSER']['dc_elements'];
array_unshift($headings, $this->settings['METADATA_PARSER']['record_key']);
$output_file_path = $this->settings['WRITER']['output_file'];
$output_csv_writer = Writer::createFromPath($output_file_path, 'a');
$output_csv_writer->insertOne($headings);
}

/**
* {@inheritdoc}
*
* Returns the output of the template.
*/
public function metadata($record_key)
{
$objectInfo = $this->fetcher->getItemInfo($record_key);
$metadata = $this->populateRow($record_key, $objectInfo);
return $metadata;
}

/**
* Converts the item's metadata into an array.
*
* @param string $record_key
* The current item's record_key.
* @param object $objectInfo
* The current item's metadata as generated by the fetcher.
*
* @return array
* The row, as an array.
*/
public function populateRow($record_key, $objectInfo)
{
$record = $this->getDcValues($objectInfo);
$row = array();

// Field will be named in metadata parser's 'record_key' config setting.
$row[] = $record_key;

foreach ($this->settings['METADATA_PARSER']['dc_elements'] as $element) {
// @todo: parse out multiple values and add them to the CSV separated
// by a character; what does Migrate Plus want?
foreach ($record as $record_element => $record_values) {
if ($element == $record_element) {
if (count($record_values) > 0) {
$row[] = $record_values[0];
}
else {
$row[] = '';
}
}
}
}

if (isset($this->metadatamanipulators)) {
$record = $this->applyMetadatamanipulators($record_key, $xml_from_template);
}

return $row;
}

/**
* Applies metadatamanipulators listed in the config to provided serialized XML document.
*
* @param string $record_key
* The current item's record_key.
* @param array $row
* An associative array containing the object's metadata.
*
* @return array
* The modified associative array containing the object's metadata.
*/
public function applyMetadatamanipulators($record_key, $row)
{
foreach ($this->metadatamanipulators as $metadatamanipulator) {
$metadatamanipulatorClassAndParams = explode('|', $metadatamanipulator);
$metadatamanipulatorClassName = array_shift($metadatamanipulatorClassAndParams);
$manipulatorParams = $metadatamanipulatorClassAndParams;
$metdataManipulatorClass = 'mik\\metadatamanipulators\\' . $metadatamanipulatorClassName;
$metadatamanipulator = new $metdataManipulatorClass($this->settings, $manipulatorParams, $record_key);
$modified_xml = $metadatamanipulator->manipulate($xml);
}

// return $modified_xml;
return $modified_row;
}

/**
* Parses a DC XML document into an array.
*
* @param string $xml
* The DC XML document.
*
* @return array
* An associative array containing element name => element values.
*/
public function getDcValues($xml) {
$dc_values = array();
$dom = new \DomDocument();
$dom->loadXML($xml);
$elements = $dom->getElementsByTagNameNS('http://purl.org/dc/elements/1.1/', '*');
foreach ($elements as $e) {
if (!array_key_exists($e->localName, $dc_values)) {
$dc_values[$e->localName] = array();
$dc_values[$e->localName][] = $e->nodeValue;
}
else {
$dc_values[$e->localName][] = $e->nodeValue;
}
}
return $dc_values;
}
}
135 changes: 135 additions & 0 deletions src/writers/OaipmhCsv.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
<?php

namespace mik\writers;

use GuzzleHttp\Client;
use mik\exceptions\MikErrorException;
use Monolog\Logger;
use League\Csv\Writer;

class OaipmhCsv extends \mik\writers\Writer
{
/**
* @var array $settings - configuration settings from confugration class.
*/
public $settings;

/**
* @var object $fetcher - Fetcher registered in .ini file.
*/
private $fetcher;

/**
* @var object File getter registered in .ini file.
*/
private $fileGetter;

/**
* Create a new OAI-PMH writer Instance
* @param array $settings configuration settings.
*/
public function __construct($settings)
{
parent::__construct($settings);
$this->fetcher = new \mik\fetchers\Oaipmh($settings);
$fileGetterClass = 'mik\\filegetters\\' . $settings['FILE_GETTER']['class'];
$this->fileGetter = new $fileGetterClass($settings);
$this->output_directory = $settings['WRITER']['output_directory'];

$this->output_file_path = $this->settings['WRITER']['output_file'];
$this->output_csv_writer = Writer::createFromPath($this->output_file_path, 'a');

if (isset($this->settings['WRITER']['http_timeout'])) {
// Seconds.
$this->httpTimeout = $this->settings['WRITER']['http_timeout'];
} else {
$this->httpTimeout = 60;
}

if (isset($this->settings['WRITER']['metadata_only'])) {
// Seconds.
$this->metadata_only = $this->settings['WRITER']['metadata_only'];
} else {
$this->metadata_only = false;
}

// Default Mac PHP setups may use Apple's Secure Transport
// rather than OpenSSL, causing issues with CA verification.
// Allow configuration override of CA verification at users own risk.
if (isset($this->settings['SYSTEM']['verify_ca'])) {
if ($this->settings['SYSTEM']['verify_ca'] == false) {
$this->verifyCA = false;
}
} else {
$this->verifyCA = true;
}
}

/**
* Write folders and files.
*/
public function writePackages($metadata, $pages, $record_id)
{
// Create root output folder
$this->createOutputDirectory();
$output_path = $this->outputDirectory . DIRECTORY_SEPARATOR;

$normalized_record_id = $this->normalizeFilename($record_id);
$this->writeMetadataFile($metadata);

if ($this->metadata_only) {
return;
}

// Retrieve the file associated with the document and write it to the output
// folder using the filename or record_id identifier
$source_file_url = $this->fileGetter->getFilePath($record_id);
// Retrieve the PDF, etc. using Guzzle.
if ($source_file_url) {
$client = new Client();
$response = $client->get(
$source_file_url,
['stream' => true,
'timeout' => $this->httpTimeout,
'connect_timeout' => $this->httpTimeout,
'verify' => $this->verifyCA]
);

// Lazy MimeType => extension mapping: use the last part of the MimeType.
$content_types = $response->getHeader('Content-Type');
list($type, $extension) = explode('/', $content_types[0]);
$extension = preg_replace('/;.*$/', '', $extension);

$content_file_path = $output_path . $normalized_record_id . '.' . $extension;

$body = $response->getBody();
while (!$body->eof()) {
file_put_contents($content_file_path, $body->read(2048), FILE_APPEND);
}
} else {
$this->log->addWarning(
"No content file found in OAI-PMH record",
array('record' => $record_id)
);
}
}

/**
* Adds a row to CSV file (unlike other Writers' writeMetadataFile(),
* which writes out an entire metadata XML file.
*/
public function writeMetadataFile($metadata, $output_file_path = '')
{
$this->output_csv_writer->insertOne($metadata);
}

/**
* Convert %3A (:) in filenames into underscores (_).
*/
public function normalizeFilename($string)
{
$string = urldecode($string);
$string = preg_replace('/:/', '_', $string);
return $string;
}
}

0 comments on commit 53901a3

Please sign in to comment.