-
Notifications
You must be signed in to change notification settings - Fork 10
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
300 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
<?php | ||
// src/metadataparsers/csv/Csv.php | ||
|
||
namespace mik\metadataparsers\csv; | ||
|
||
use mik\metadataparsers\MetadataParser; | ||
|
||
/** | ||
* Templated metadata parser - Generates CSV metadata. | ||
*/ | ||
abstract class Csv extends MetadataParser | ||
{ | ||
public function __construct($settings) | ||
{ | ||
parent::__construct($settings); | ||
$fetcherClass = 'mik\\fetchers\\' . $settings['FETCHER']['class']; | ||
$this->fetcher = new $fetcherClass($settings); | ||
|
||
$this->outputFile = $this->settings['WRITER']['output_file']; | ||
|
||
if (isset($this->settings['MANIPULATORS']['metadatamanipulators'])) { | ||
$this->metadatamanipulators = $this->settings['MANIPULATORS']['metadatamanipulators']; | ||
} else { | ||
$this->metadatamanipulators = null; | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
<?php | ||
// src/metadataparsers/csv/Csv.php | ||
|
||
/** | ||
* Metadata parser class that writes metadata out to a CSV file. | ||
*/ | ||
|
||
namespace mik\metadataparsers\csv; | ||
|
||
// use mik\metadataparsers\MetadataParser; | ||
use League\Csv\Writer; | ||
use Monolog\Logger; | ||
|
||
class DcToCsv extends Csv | ||
{ | ||
/** | ||
* Create a new metadata parser instance | ||
*/ | ||
public function __construct($settings) | ||
{ | ||
// Call Metadata.php contructor | ||
parent::__construct($settings); | ||
|
||
// The CSV writer that writes out object metadata is instantiated in the writer. | ||
$headings = $this->settings['METADATA_PARSER']['dc_elements']; | ||
array_unshift($headings, $this->settings['METADATA_PARSER']['record_key']); | ||
$output_file_path = $this->settings['WRITER']['output_file']; | ||
$output_csv_writer = Writer::createFromPath($output_file_path, 'a'); | ||
$output_csv_writer->insertOne($headings); | ||
} | ||
|
||
/** | ||
* {@inheritdoc} | ||
* | ||
* Returns the output of the template. | ||
*/ | ||
public function metadata($record_key) | ||
{ | ||
$objectInfo = $this->fetcher->getItemInfo($record_key); | ||
$metadata = $this->populateRow($record_key, $objectInfo); | ||
return $metadata; | ||
} | ||
|
||
/** | ||
* Converts the item's metadata into an array. | ||
* | ||
* @param string $record_key | ||
* The current item's record_key. | ||
* @param object $objectInfo | ||
* The current item's metadata as generated by the fetcher. | ||
* | ||
* @return array | ||
* The row, as an array. | ||
*/ | ||
public function populateRow($record_key, $objectInfo) | ||
{ | ||
$record = $this->getDcValues($objectInfo); | ||
$row = array(); | ||
|
||
// Field will be named in metadata parser's 'record_key' config setting. | ||
$row[] = $record_key; | ||
|
||
foreach ($this->settings['METADATA_PARSER']['dc_elements'] as $element) { | ||
// @todo: parse out multiple values and add them to the CSV separated | ||
// by a character; what does Migrate Plus want? | ||
foreach ($record as $record_element => $record_values) { | ||
if ($element == $record_element) { | ||
if (count($record_values) > 0) { | ||
$row[] = $record_values[0]; | ||
} | ||
else { | ||
$row[] = ''; | ||
} | ||
} | ||
} | ||
} | ||
|
||
if (isset($this->metadatamanipulators)) { | ||
$record = $this->applyMetadatamanipulators($record_key, $xml_from_template); | ||
} | ||
|
||
return $row; | ||
} | ||
|
||
/** | ||
* Applies metadatamanipulators listed in the config to provided serialized XML document. | ||
* | ||
* @param string $record_key | ||
* The current item's record_key. | ||
* @param array $row | ||
* An associative array containing the object's metadata. | ||
* | ||
* @return array | ||
* The modified associative array containing the object's metadata. | ||
*/ | ||
public function applyMetadatamanipulators($record_key, $row) | ||
{ | ||
foreach ($this->metadatamanipulators as $metadatamanipulator) { | ||
$metadatamanipulatorClassAndParams = explode('|', $metadatamanipulator); | ||
$metadatamanipulatorClassName = array_shift($metadatamanipulatorClassAndParams); | ||
$manipulatorParams = $metadatamanipulatorClassAndParams; | ||
$metdataManipulatorClass = 'mik\\metadatamanipulators\\' . $metadatamanipulatorClassName; | ||
$metadatamanipulator = new $metdataManipulatorClass($this->settings, $manipulatorParams, $record_key); | ||
$modified_xml = $metadatamanipulator->manipulate($xml); | ||
} | ||
|
||
// return $modified_xml; | ||
return $modified_row; | ||
} | ||
|
||
/** | ||
* Parses a DC XML document into an array. | ||
* | ||
* @param string $xml | ||
* The DC XML document. | ||
* | ||
* @return array | ||
* An associative array containing element name => element values. | ||
*/ | ||
public function getDcValues($xml) { | ||
$dc_values = array(); | ||
$dom = new \DomDocument(); | ||
$dom->loadXML($xml); | ||
$elements = $dom->getElementsByTagNameNS('http://purl.org/dc/elements/1.1/', '*'); | ||
foreach ($elements as $e) { | ||
if (!array_key_exists($e->localName, $dc_values)) { | ||
$dc_values[$e->localName] = array(); | ||
$dc_values[$e->localName][] = $e->nodeValue; | ||
} | ||
else { | ||
$dc_values[$e->localName][] = $e->nodeValue; | ||
} | ||
} | ||
return $dc_values; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,135 @@ | ||
<?php | ||
|
||
namespace mik\writers; | ||
|
||
use GuzzleHttp\Client; | ||
use mik\exceptions\MikErrorException; | ||
use Monolog\Logger; | ||
use League\Csv\Writer; | ||
|
||
class OaipmhCsv extends \mik\writers\Writer | ||
{ | ||
/** | ||
* @var array $settings - configuration settings from confugration class. | ||
*/ | ||
public $settings; | ||
|
||
/** | ||
* @var object $fetcher - Fetcher registered in .ini file. | ||
*/ | ||
private $fetcher; | ||
|
||
/** | ||
* @var object File getter registered in .ini file. | ||
*/ | ||
private $fileGetter; | ||
|
||
/** | ||
* Create a new OAI-PMH writer Instance | ||
* @param array $settings configuration settings. | ||
*/ | ||
public function __construct($settings) | ||
{ | ||
parent::__construct($settings); | ||
$this->fetcher = new \mik\fetchers\Oaipmh($settings); | ||
$fileGetterClass = 'mik\\filegetters\\' . $settings['FILE_GETTER']['class']; | ||
$this->fileGetter = new $fileGetterClass($settings); | ||
$this->output_directory = $settings['WRITER']['output_directory']; | ||
|
||
$this->output_file_path = $this->settings['WRITER']['output_file']; | ||
$this->output_csv_writer = Writer::createFromPath($this->output_file_path, 'a'); | ||
|
||
if (isset($this->settings['WRITER']['http_timeout'])) { | ||
// Seconds. | ||
$this->httpTimeout = $this->settings['WRITER']['http_timeout']; | ||
} else { | ||
$this->httpTimeout = 60; | ||
} | ||
|
||
if (isset($this->settings['WRITER']['metadata_only'])) { | ||
// Seconds. | ||
$this->metadata_only = $this->settings['WRITER']['metadata_only']; | ||
} else { | ||
$this->metadata_only = false; | ||
} | ||
|
||
// Default Mac PHP setups may use Apple's Secure Transport | ||
// rather than OpenSSL, causing issues with CA verification. | ||
// Allow configuration override of CA verification at users own risk. | ||
if (isset($this->settings['SYSTEM']['verify_ca'])) { | ||
if ($this->settings['SYSTEM']['verify_ca'] == false) { | ||
$this->verifyCA = false; | ||
} | ||
} else { | ||
$this->verifyCA = true; | ||
} | ||
} | ||
|
||
/** | ||
* Write folders and files. | ||
*/ | ||
public function writePackages($metadata, $pages, $record_id) | ||
{ | ||
// Create root output folder | ||
$this->createOutputDirectory(); | ||
$output_path = $this->outputDirectory . DIRECTORY_SEPARATOR; | ||
|
||
$normalized_record_id = $this->normalizeFilename($record_id); | ||
$this->writeMetadataFile($metadata); | ||
|
||
if ($this->metadata_only) { | ||
return; | ||
} | ||
|
||
// Retrieve the file associated with the document and write it to the output | ||
// folder using the filename or record_id identifier | ||
$source_file_url = $this->fileGetter->getFilePath($record_id); | ||
// Retrieve the PDF, etc. using Guzzle. | ||
if ($source_file_url) { | ||
$client = new Client(); | ||
$response = $client->get( | ||
$source_file_url, | ||
['stream' => true, | ||
'timeout' => $this->httpTimeout, | ||
'connect_timeout' => $this->httpTimeout, | ||
'verify' => $this->verifyCA] | ||
); | ||
|
||
// Lazy MimeType => extension mapping: use the last part of the MimeType. | ||
$content_types = $response->getHeader('Content-Type'); | ||
list($type, $extension) = explode('/', $content_types[0]); | ||
$extension = preg_replace('/;.*$/', '', $extension); | ||
|
||
$content_file_path = $output_path . $normalized_record_id . '.' . $extension; | ||
|
||
$body = $response->getBody(); | ||
while (!$body->eof()) { | ||
file_put_contents($content_file_path, $body->read(2048), FILE_APPEND); | ||
} | ||
} else { | ||
$this->log->addWarning( | ||
"No content file found in OAI-PMH record", | ||
array('record' => $record_id) | ||
); | ||
} | ||
} | ||
|
||
/** | ||
* Adds a row to CSV file (unlike other Writers' writeMetadataFile(), | ||
* which writes out an entire metadata XML file. | ||
*/ | ||
public function writeMetadataFile($metadata, $output_file_path = '') | ||
{ | ||
$this->output_csv_writer->insertOne($metadata); | ||
} | ||
|
||
/** | ||
* Convert %3A (:) in filenames into underscores (_). | ||
*/ | ||
public function normalizeFilename($string) | ||
{ | ||
$string = urldecode($string); | ||
$string = preg_replace('/:/', '_', $string); | ||
return $string; | ||
} | ||
} |