Skip to content

Commit

Permalink
Merge pull request #84 from talis/truncate-large-fields
Browse files Browse the repository at this point in the history
90% Handle Mongo 2.6's stricter enforcement of index key sizes.
  • Loading branch information
rsinger committed Jul 23, 2015
2 parents 89044e8 + 2b1b19c commit e96dfdb
Show file tree
Hide file tree
Showing 7 changed files with 308 additions and 14 deletions.
96 changes: 95 additions & 1 deletion src/mongo/delegates/Tables.class.php
Original file line number Diff line number Diff line change
Expand Up @@ -589,7 +589,7 @@ public function generateTableRows($tableType,$resource=null,$context=null,$queue
// Remove temp fields from document

$generatedRow['value'] = array_diff_key($value, array_flip($this->temporaryFields));
$collection->save($generatedRow);
$this->truncatingSave($collection, $generatedRow);
}
}

Expand All @@ -602,6 +602,100 @@ public function generateTableRows($tableType,$resource=null,$context=null,$queue
$this->getStat()->timer(MONGO_CREATE_TABLE.".$tableType",$t->result());
}

/**
* Save the generated rows to the given collection.
*
* If an exception in thrown because a field is too large to index, the field is
* truncated and the save is retried.
*
* @param \MongoCollection $collection
* @param array $generatedRow The rows to save.
*/
protected function truncatingSave(\MongoCollection $collection, array $generatedRow)
{
try
{
$collection->save($generatedRow);
} catch (\Exception $e) {
// We only truncate and retry the save if the \Exception contains this text.
if (strpos($e->getMessage(),"Btree::insert: key too large to index") !== FALSE)
{
$this->truncateFields($collection, $generatedRow);
$collection->save($generatedRow);
}
else
{
throw $e;
}
}
}

/**
* Truncate any indexed fields in the generated rows which are too large to index
*
* @param \MongoCollection $collection
* @param array $generatedRow - Pass by reference so that the contents is truncated
*/
protected function truncateFields(\MongoCollection $collection, array &$generatedRow)
{
// Find the name of any indexed fields
$indexedFields = array();
$indexesGroupedByCollection = $this->config->getIndexesGroupedByCollection($this->storeName);
if (isset($indexesGroupedByCollection) && isset($indexesGroupedByCollection[$collection->getName()]))
{
$indexes = $indexesGroupedByCollection[$collection->getName()];
if (isset($indexes))
{
foreach($indexes as $repset)
{
foreach ($repset as $index)
{
foreach ($index as $indexedFieldname => $v)
{
if (strpos($indexedFieldname, "value.") === 0)
{
$indexedFields[] = substr($indexedFieldname, strlen('value.'));
}
}
}
}
}
}

if (count($indexedFields) > 0 && isset($generatedRow['value']) && is_array($generatedRow['value']))
{
// Iterate over generated rows BY REFERENCE (&) - we are going to modify the contents of $field
foreach($generatedRow as &$field)
{
foreach($indexedFields as $indexedFieldname)
{
// The key will have the index name in the following format added to it.
// Adjust the max key size allowed to take it into account.
$maxKeySize = 1024 - strlen("value_".$indexedFieldname."_1");

if (array_key_exists($indexedFieldname, $field))
{
// It's important that we count the number of bytes
// in the field - not just the number of characters.
// UTF-8 characters can be between 1 and 4 bytes.
//
// From the strlen documentation:
// Attention with utf8:
// $foo = "bär";
// strlen($foo) will return 4 and not 3 as expected..
//
// So strlen does count the bytes - not the characters.

if (is_string($field[$indexedFieldname]) && strlen($field[$indexedFieldname]) > $maxKeySize)
{
$field[$indexedFieldname] = substr($field[$indexedFieldname],0, $maxKeySize);
}
}
}
}
}
}

/**
* @param array $spec The table spec
* @param array $dest The table row document to save
Expand Down
5 changes: 3 additions & 2 deletions test/unit/mongo/MongoTripodConfigTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -969,9 +969,10 @@ public function testSearchConfigNotPresent()
public function testGetAllTypesInSpecifications()
{
$types = $this->tripodConfig->getAllTypesInSpecifications("tripod_php_testing");
$this->assertEquals(10, count($types), "There should be 10 types based on the configured view, table and search specifications in config.json");
$this->assertEquals(11, count($types), "There should be 11 types based on the configured view, table and search specifications in config.json");
$expectedValues = array(
"acorn:Resource",
"acorn:ResourceForTruncating",
"acorn:Work",
"http://talisaspire.com/schema#Work2",
"acorn:Work2",
Expand Down Expand Up @@ -1326,7 +1327,7 @@ public function testTransactionLogIsWrittenToCorrectDBAndCollection()
$transactionColletion = $transactionMongo->selectCollection($newConfig['transaction_log']['database'], $newConfig['transaction_log']['collection']);
$transactionCount = $transactionColletion->count();
$transactionExampleDocument = $transactionColletion->findOne();
$this->assertEquals(19, $transactionCount);
$this->assertEquals(20, $transactionCount);
$this->assertContains('transaction_', $transactionExampleDocument["_id"]);
}

Expand Down
12 changes: 6 additions & 6 deletions test/unit/mongo/MongoTripodDriverTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -218,7 +218,7 @@ public function testDescribeResources()
public function testGetCount()
{
$count = $this->tripod->getCount(array("rdf:type.".VALUE_URI=>"bibo:Book"));
$this->assertEquals(5,$count);
$this->assertEquals(6,$count);
}

public function testTripodSaveChangesRemovesLiteralTriple()
Expand Down Expand Up @@ -1670,14 +1670,14 @@ public function testGetDistinctTableValues()
$table = 't_distinct';
$this->tripod->generateTableRows($table);
$rows = $this->tripod->getTableRows($table, array(), array(), 0, 0);
$this->assertEquals(7, $rows['head']['count']);
$this->assertEquals(8, $rows['head']['count']);
$results = $this->tripod->getDistinctTableColumnValues($table, "value.title");

$this->assertArrayHasKey('head', $results);
$this->assertArrayHasKey('count', $results['head']);
$this->assertEquals(3, $results['head']['count']);
$this->assertEquals(4, $results['head']['count']);
$this->assertArrayHasKey('results', $results);
$this->assertEquals(3, count($results['results']));
$this->assertEquals(4, count($results['results']));
$this->assertContains('Physics 3rd Edition: Physics for Engineers and Scientists', $results['results']);
$this->assertContains('A document title', $results['results']);
$this->assertContains('Another document title', $results['results']);
Expand All @@ -1696,9 +1696,9 @@ public function testGetDistinctTableValues()
$results = $this->tripod->getDistinctTableColumnValues($table, "value.type");
$this->assertArrayHasKey('head', $results);
$this->assertArrayHasKey('count', $results['head']);
$this->assertEquals(4, $results['head']['count']);
$this->assertEquals(5, $results['head']['count']);
$this->assertArrayHasKey('results', $results);
$this->assertEquals(4, count($results['results']));
$this->assertEquals(5, count($results['results']));
$this->assertContains('acorn:Resource', $results['results']);
$this->assertContains('acorn:Work', $results['results']);
$this->assertContains('bibo:Book', $results['results']);
Expand Down
26 changes: 26 additions & 0 deletions test/unit/mongo/MongoTripodNQuadSerializerTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,32 @@ public function testSerializerWithMultipleSubjects()
<http://talisaspire.com/resources/3SplCtWGPqEyXcDiyhHQpA-2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/ontology/bibo/Book> <http://talisaspire.com/> .
<http://talisaspire.com/resources/3SplCtWGPqEyXcDiyhHQpA-2> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://talisaspire.com/schema#Resource> <http://talisaspire.com/> .
<http://talisaspire.com/resources/3SplCtWGPqEyXcDiyhHQpA-2> <http://www.w3.org/2002/07/owl#sameAs> <http://talisaspire.com/isbn/9780393929690> <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://purl.org/dc/terms/isVersionOf> <http://talisaspire.com/works/4d101f63c10a6> <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://purl.org/dc/terms/source> <http://life.ac.uk/resources/836E7CAD-63D2-63A0-B1CB-AA6A7E54A5C9> <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://purl.org/dc/terms/source> <http://life.ac.uk/resources/BFBC6A06-A8B0-DED8-53AA-8E80DB44CC53> <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://purl.org/dc/terms/subject> <http://talisaspire.com/disciplines/physics> <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://purl.org/ontology/bibo/isbn13> \"1234567890123\" <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://talisaspire.com/schema#bookmarkReferences> <http://talisaspire.com/resources/3SplCtWGPqEyXcDiyhHQpA/bookmarks> <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://talisaspire.com/schema#foo> \"wibble\" <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://talisaspire.com/schema#jacsUri> <http://jacs3.dataincubator.org/f300> <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://talisaspire.com/schema#jacsUri> <http://jacs3.dataincubator.org/f340> <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://talisaspire.com/schema#listReferences> <http://talisaspire.com/resources/3SplCtWGPqEyXcDiyhHQpA/lists> <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://talisaspire.com/schema#openLibraryUri> <http://openlibrary.org/books/OL10157958M> <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://talisaspire.com/schema#preferredMetadata> <http://talisaspire.com/resources/3SplCtWGPqEyXcDiyhHQpA/metadata> <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://talisaspire.com/searchTerms/schema#author> \"Ohanian\" <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://talisaspire.com/searchTerms/schema#discipline> \"physics\" <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://talisaspire.com/searchTerms/schema#isbn> \"1234567890\" <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://talisaspire.com/searchTerms/schema#openLibrarySubject> \"Engineering: general\" <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://talisaspire.com/searchTerms/schema#openLibrarySubject> \"PHYSICS\" <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://talisaspire.com/searchTerms/schema#openLibrarySubject> \"Science\" <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://talisaspire.com/searchTerms/schema#title> \"Mahommah Gardo Baquaqua. Biography of Mahommah G. Baquaqua, a Native of Zoogoo, in the Interior of Africa. (A Convert to Christianity,) With a Description of That Part of the World; Including the Manners and Customs of the Inhabitants, Their Religious Notions, Form of Government, Laws, Appearance of the Country, Buildings, Agriculture, Manufactures, Shepherds and Herdsmen, Domestic Animals, Marriage Ceremonials, Funeral Services, Styles of Dress, Trade and Commerce, Modes of Warfare, System of Slavery, &amp;c., &amp;c. Mahommah&#039;s Early Life, His Education, His Capture and Slavery in Western Africa and Brazil, His Escape to the United States, from Thence to Hayti, (the City of Port Au Prince,) His Reception by the Baptist Missionary There, The Rev. W. L. Judd; His Conversion to Christianity, Baptism, and Return to This Country, His Views, Objects and Aim. Written and Revised from His Own Words, by Samuel Moore, Esq., Late Publisher of the &quot;North of England Shipping Gazette,&quot; Author of Several Popular Works, and Editor of Sundry Reform Papers.\" <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://talisaspire.com/searchTerms/schema#topic> \"engineering: general\" <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://talisaspire.com/searchTerms/schema#topic> \"physics\" <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://talisaspire.com/searchTerms/schema#topic> \"science\" <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://talisaspire.com/searchTerms/schema#usedAt> \"0071\" <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://purl.org/ontology/bibo/Book> <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://talisaspire.com/schema#ResourceForTruncating> <http://talisaspire.com/> .
<http://talisaspire.com/resources/indexKeyTooLarge> <http://www.w3.org/2002/07/owl#sameAs> <http://talisaspire.com/isbn/9780393929690> <http://talisaspire.com/> .
<http://jacs3.dataincubator.org/f300> <http://purl.org/dc/terms/title> \"First title\" <http://talisaspire.com/> .
<http://jacs3.dataincubator.org/f300> <http://purl.org/dc/terms/title> \"Second title\" <http://talisaspire.com/> .
<http://jacs3.dataincubator.org/f340> <http://purl.org/dc/terms/title> \"First title\" <http://talisaspire.com/> .
Expand Down
35 changes: 30 additions & 5 deletions test/unit/mongo/MongoTripodTablesTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -640,6 +640,31 @@ public function testGenerateTableRowsForUsersWithModifiersLowercaseDate()
$this->assertEquals($rows['results'][0]['mongoDate']->__toString(), $rows['results'][0]['lowercaseDate']);
}

/**
* Test table rows are tuncated if they are too large to index
* @access public
* @return void
*/
public function testGenerateTableRowsTruncatesFieldsTooLargeToIndex()
{
$fullTitle = "Mahommah Gardo Baquaqua. Biography of Mahommah G. Baquaqua, a Native of Zoogoo, in the Interior of Africa. (A Convert to Christianity,) With a Description of That Part of the World; Including the Manners and Customs of the Inhabitants, Their Religious Notions, Form of Government, Laws, Appearance of the Country, Buildings, Agriculture, Manufactures, Shepherds and Herdsmen, Domestic Animals, Marriage Ceremonials, Funeral Services, Styles of Dress, Trade and Commerce, Modes of Warfare, System of Slavery, &amp;c., &amp;c. Mahommah&#039;s Early Life, His Education, His Capture and Slavery in Western Africa and Brazil, His Escape to the United States, from Thence to Hayti, (the City of Port Au Prince,) His Reception by the Baptist Missionary There, The Rev. W. L. Judd; His Conversion to Christianity, Baptism, and Return to This Country, His Views, Objects and Aim. Written and Revised from His Own Words, by Samuel Moore, Esq., Late Publisher of the &quot;North of England Shipping Gazette,&quot; Author of Several Popular Works, and Editor of Sundry Reform Papers.";
$truncatedTitle = substr($fullTitle,0,1011); // 1011 = 1024 - index name "value_title_1"
$fullTitleLength = strlen($fullTitle);
$truncatedTitleLength = strLen($truncatedTitle);

$rows = $this->generateTableRows("t_truncation");

// When using Mongo 2.4 and below, the string will not have been truncated.
// Due to stricter index key enforcement in Mongo 2.6 and above, the string will have been truncated.
// Allow the test to pass for either version of Mongo
$actualLength = strlen($rows['results'][0]['title']);
$this->assertTrue($actualLength === $fullTitleLength || $actualLength === $truncatedTitleLength, "Title is an unexpected length");

// Assert that the title starts with the truncated title.
// This will be the case for both Mongo 2.4 and Mongo 2.6
$this->assertTrue(strpos($rows['results'][0]['title'], $truncatedTitle) === 0, "Unexpected title");
}

/**
* Test that link modifier is derived from the joined resource id, rather than base
* @access public
Expand Down Expand Up @@ -733,14 +758,14 @@ public function testDistinct()
$table = 't_distinct';
$this->generateTableRows($table);
$rows = $this->tripodTables->getTableRows($table, array(), array(), 0, 0);
$this->assertEquals(7, $rows['head']['count']);
$this->assertEquals(8, $rows['head']['count']);
$results = $this->tripodTables->distinct($table, "value.title");

$this->assertArrayHasKey('head', $results);
$this->assertArrayHasKey('count', $results['head']);
$this->assertEquals(3, $results['head']['count']);
$this->assertEquals(4, $results['head']['count']);
$this->assertArrayHasKey('results', $results);
$this->assertEquals(3, count($results['results']));
$this->assertEquals(4, count($results['results']));
$this->assertContains('Physics 3rd Edition: Physics for Engineers and Scientists', $results['results']);
$this->assertContains('A document title', $results['results']);
$this->assertContains('Another document title', $results['results']);
Expand All @@ -759,9 +784,9 @@ public function testDistinct()
$results = $this->tripodTables->distinct($table, "value.type");
$this->assertArrayHasKey('head', $results);
$this->assertArrayHasKey('count', $results['head']);
$this->assertEquals(4, $results['head']['count']);
$this->assertEquals(5, $results['head']['count']);
$this->assertArrayHasKey('results', $results);
$this->assertEquals(4, count($results['results']));
$this->assertEquals(5, count($results['results']));
$this->assertContains('acorn:Resource', $results['results']);
$this->assertContains('acorn:Work', $results['results']);
$this->assertContains('bibo:Book', $results['results']);
Expand Down
Loading

0 comments on commit e96dfdb

Please sign in to comment.