From de054583f472c7314c9c69873bb63f627e0d45ff Mon Sep 17 00:00:00 2001 From: MalcyL Date: Thu, 23 Jul 2015 12:04:56 +0100 Subject: [PATCH 1/4] Handle Mongo 2.6's stricter enforcement of index key sizes. Mongo 2.6 has introduced stricter enforcement of the maximum index key size. By default an index key > 1024 bytes now throws an exception. This pull requests changes Table Row generation to capture this error when thrown. The key is then truncated and the save retried. --- src/mongo/delegates/Tables.class.php | 96 ++++++++++++++++++- test/unit/mongo/MongoTripodTablesTest.php | 25 +++++ test/unit/mongo/data/config.json | 40 ++++++++ test/unit/mongo/data/resources.json | 111 ++++++++++++++++++++++ 4 files changed, 271 insertions(+), 1 deletion(-) diff --git a/src/mongo/delegates/Tables.class.php b/src/mongo/delegates/Tables.class.php index aac0f43a..0f75e012 100644 --- a/src/mongo/delegates/Tables.class.php +++ b/src/mongo/delegates/Tables.class.php @@ -589,7 +589,7 @@ public function generateTableRows($tableType,$resource=null,$context=null,$queue // Remove temp fields from document $generatedRow['value'] = array_diff_key($value, array_flip($this->temporaryFields)); - $collection->save($generatedRow); + $this->truncatingSave($collection, $generatedRow); } } @@ -602,6 +602,100 @@ public function generateTableRows($tableType,$resource=null,$context=null,$queue $this->getStat()->timer(MONGO_CREATE_TABLE.".$tableType",$t->result()); } + /** + * Save the generated rows to the given collection. + * + * If an exception in thrown because a field is too large to index, the field is + * truncated and the save is retried. + * + * @param \MongoCollection $collection + * @param array $generatedRow The rows to save. + */ + protected function truncatingSave(\MongoCollection $collection, array $generatedRow) + { + try + { + $collection->save($generatedRow); + } catch (\Exception $e) { + // We only trunacte and retry the save if the \MongoCursorException contains this text. + if (strpos($e->getMessage(),"Btree::insert: key too large to index") !== FALSE) + { + $this->truncateFields($collection, $generatedRow); + $collection->save($generatedRow); + } + else + { + throw $e; + } + } + } + + /** + * Truncate any indexed fields in the generated rows which are too large to index + * + * @param \MongoCollection $collection + * @param array $generatedRow - Pass by reference so that the contents is truncated + */ + protected function truncateFields(\MongoCollection $collection, array &$generatedRow) + { + // Find the name of any indexed fields + $indexedFields = array(); + $indexesGroupedByCollection = $this->config->getIndexesGroupedByCollection($this->storeName); + if (isset($indexesGroupedByCollection) && isset($indexesGroupedByCollection[$collection->getName()])) + { + $indexes = $indexesGroupedByCollection[$collection->getName()]; + if (isset($indexes)) + { + foreach($indexes as $repset) + { + foreach ($repset as $index) + { + foreach ($index as $indexedFieldname => $v) + { + if (strpos($indexedFieldname, "value.") === 0) + { + $indexedFields[] = substr($indexedFieldname, strlen('value.')); + } + } + } + } + } + } + + if (count($indexedFields) > 0 && isset($generatedRow['value']) && is_array($generatedRow['value'])) + { + // Iterate over generated rows BY REFERENCE (&) - we are going to modify the contents of $field + foreach($generatedRow as &$field) + { + foreach($indexedFields as $indexedFieldname) + { + // The key will have the index name in the following format added to it. + // Adjust the max key size allowed to take it into account. + $maxKeySize = 1024 - strlen("value_".$indexedFieldname."_1"); + + if (array_key_exists($indexedFieldname, $field)) + { + // It's important that we count the number of bytes + // in the field - not just the number of characters. + // UTF-8 characters can be between 1 and 4 bytes. + // + // From the strlen documentation: + // Attention with utf8: + // $foo = "bär"; + // strlen($foo) will return 4 and not 3 as expected.. + // + // So strlen does count the bytes - not the characters. + + if (is_string($field[$indexedFieldname]) && strlen($field[$indexedFieldname]) > $maxKeySize) + { + $field[$indexedFieldname] = substr($field[$indexedFieldname],0, $maxKeySize); + } + } + } + } + } + } + /** * @param array $spec The table spec * @param array $dest The table row document to save diff --git a/test/unit/mongo/MongoTripodTablesTest.php b/test/unit/mongo/MongoTripodTablesTest.php index eb87a311..702e9496 100644 --- a/test/unit/mongo/MongoTripodTablesTest.php +++ b/test/unit/mongo/MongoTripodTablesTest.php @@ -640,6 +640,31 @@ public function testGenerateTableRowsForUsersWithModifiersLowercaseDate() $this->assertEquals($rows['results'][0]['mongoDate']->__toString(), $rows['results'][0]['lowercaseDate']); } + /** + * Test table rows are tuncated if they are too large to index + * @access public + * @return void + */ + public function testGenerateTableRowsTruncatesFieldsTooLargeToIndex() + { + $fullTitle = "Mahommah Gardo Baquaqua. Biography of Mahommah G. Baquaqua, a Native of Zoogoo, in the Interior of Africa. (A Convert to Christianity,) With a Description of That Part of the World; Including the Manners and Customs of the Inhabitants, Their Religious Notions, Form of Government, Laws, Appearance of the Country, Buildings, Agriculture, Manufactures, Shepherds and Herdsmen, Domestic Animals, Marriage Ceremonials, Funeral Services, Styles of Dress, Trade and Commerce, Modes of Warfare, System of Slavery, &c., &c. Mahommah's Early Life, His Education, His Capture and Slavery in Western Africa and Brazil, His Escape to the United States, from Thence to Hayti, (the City of Port Au Prince,) His Reception by the Baptist Missionary There, The Rev. W. L. Judd; His Conversion to Christianity, Baptism, and Return to This Country, His Views, Objects and Aim. Written and Revised from His Own Words, by Samuel Moore, Esq., Late Publisher of the "North of England Shipping Gazette," Author of Several Popular Works, and Editor of Sundry Reform Papers."; + $truncatedTitle = substr($fullTitle,0,1011); // 1011 = 1024 - index name "value_title_1" + $fullTitleLength = strlen($fullTitle); + $truncatedTitleLength = strLen($truncatedTitle); + + $rows = $this->generateTableRows("t_truncation"); + + // When using Mongo 2.4 and below, the string will not have been truncated. + // Due to stricter index key enforcement in Mongo 2.6 and above, the string will have been truncated. + // Allow the test to pass for either version of Mongo + $actualLength = strlen($rows['results'][0]['title']); + $this->assertTrue($actualLength === $fullTitleLength || $actualLength === $truncatedTitleLength, "Title is an unexpected length"); + + // Assert that the title starts with the truncated title. + // This will be the case for both Mongo 2.4 and Mongo 2.6 + $this->assertTrue(strpos($rows['results'][0]['title'], $truncatedTitle) === 0, "Unexpected title"); + } + /** * Test that link modifier is derived from the joined resource id, rather than base * @access public diff --git a/test/unit/mongo/data/config.json b/test/unit/mongo/data/config.json index 801b5d7e..fef66a6b 100644 --- a/test/unit/mongo/data/config.json +++ b/test/unit/mongo/data/config.json @@ -336,6 +336,9 @@ "ensureIndexes": [ { "value.isbn": 1 + }, + { + "value.title": 1 } ], "fields": [ @@ -359,6 +362,43 @@ } } }, + { + "_id": "t_truncation", + "type": "acorn:ResourceForTruncating", + "from": "CBD_testing", + "ensureIndexes": [ + { + "value.isbn": 1 + }, + { + "value.title": 1 + } + ], + "fields": [ + { + "fieldName": "type", + "predicates": ["rdf:type"] + }, + { + "fieldName": "isbn", + "predicates": ["bibo:isbn13"] + }, + { + "fieldName": "title", + "predicates": ["searchterms:title"] + } + ], + "joins": { + "dct:isVersionOf": { + "fields": [ + { + "fieldName": "isbn13", + "predicates": ["bibo:isbn13"] + } + ] + } + } + }, { "_id": "t_source_count", "type": "acorn:Resource", diff --git a/test/unit/mongo/data/resources.json b/test/unit/mongo/data/resources.json index db72b423..3097b446 100644 --- a/test/unit/mongo/data/resources.json +++ b/test/unit/mongo/data/resources.json @@ -245,6 +245,117 @@ "u":"http://talisaspire.com/isbn/9780393929690" } }, + { + "_id": { + "r":"http://talisaspire.com/resources/indexKeyTooLarge", + "c":"http://talisaspire.com/" + }, + "_version": 0, + "dct:isVersionOf": + { + "u":"http://talisaspire.com/works/4d101f63c10a6" + }, + "dct:source":[ + { + "u":"http://life.ac.uk/resources/836E7CAD-63D2-63A0-B1CB-AA6A7E54A5C9" + }, + { + "u":"http://life.ac.uk/resources/BFBC6A06-A8B0-DED8-53AA-8E80DB44CC53" + } + ], + "dct:subject": + { + "u":"http://talisaspire.com/disciplines/physics" + }, + "bibo:isbn13": + { + "l":"1234567890123" + }, + "acorn:bookmarkReferences": + { + "u":"http://talisaspire.com/resources/3SplCtWGPqEyXcDiyhHQpA/bookmarks" + }, + "acorn:foo": + { + "l":"wibble" + }, + "acorn:jacsUri":[ + { + "u":"http://jacs3.dataincubator.org/f300" + }, + { + "u":"http://jacs3.dataincubator.org/f340" + } + ], + "acorn:listReferences": + { + "u":"http://talisaspire.com/resources/3SplCtWGPqEyXcDiyhHQpA/lists" + }, + "acorn:openLibraryUri": + { + "u":"http://openlibrary.org/books/OL10157958M" + }, + "acorn:preferredMetadata": + { + "u":"http://talisaspire.com/resources/3SplCtWGPqEyXcDiyhHQpA/metadata" + }, + "searchterms:author": + { + "l":"Ohanian" + }, + "searchterms:discipline": + { + "l":"physics" + }, + "searchterms:isbn": + { + "l":"1234567890" + }, + "searchterms:openLibrarySubject":[ + { + "l":"Engineering: general" + }, + { + "l":"PHYSICS" + }, + { + "l":"Science" + } + ], + "searchterms:title":[ + + { + "l":"Mahommah Gardo Baquaqua. Biography of Mahommah G. Baquaqua, a Native of Zoogoo, in the Interior of Africa. (A Convert to Christianity,) With a Description of That Part of the World; Including the Manners and Customs of the Inhabitants, Their Religious Notions, Form of Government, Laws, Appearance of the Country, Buildings, Agriculture, Manufactures, Shepherds and Herdsmen, Domestic Animals, Marriage Ceremonials, Funeral Services, Styles of Dress, Trade and Commerce, Modes of Warfare, System of Slavery, &c., &c. Mahommah's Early Life, His Education, His Capture and Slavery in Western Africa and Brazil, His Escape to the United States, from Thence to Hayti, (the City of Port Au Prince,) His Reception by the Baptist Missionary There, The Rev. W. L. Judd; His Conversion to Christianity, Baptism, and Return to This Country, His Views, Objects and Aim. Written and Revised from His Own Words, by Samuel Moore, Esq., Late Publisher of the "North of England Shipping Gazette," Author of Several Popular Works, and Editor of Sundry Reform Papers." + } + ], + "searchterms:topic":[ + { + "l":"engineering: general" + }, + { + "l":"physics" + }, + { + "l":"science" + } + ], + "searchterms:usedAt": + { + "l":"0071" + }, + "rdf:type":[ + { + "u":"http://purl.org/ontology/bibo/Book" + }, + { + "u":"http://talisaspire.com/schema#ResourceForTruncating" + } + ], + "owl:sameAs": + { + "u":"http://talisaspire.com/isbn/9780393929690" + } + }, { "_id": { "r" : "http://jacs3.dataincubator.org/f300", From 0f04827ec82059ca25fbebb9fc01f758fa72d660 Mon Sep 17 00:00:00 2001 From: MalcyL Date: Thu, 23 Jul 2015 13:46:56 +0100 Subject: [PATCH 2/4] Fixes to tests --- test/unit/mongo/MongoTripodConfigTest.php | 5 ++-- test/unit/mongo/MongoTripodDriverTest.php | 12 ++++----- .../mongo/MongoTripodNQuadSerializerTest.php | 26 +++++++++++++++++++ test/unit/mongo/MongoTripodTablesTest.php | 10 +++---- test/unit/mongo/data/config.json | 3 --- 5 files changed, 40 insertions(+), 16 deletions(-) diff --git a/test/unit/mongo/MongoTripodConfigTest.php b/test/unit/mongo/MongoTripodConfigTest.php index bb208731..ffd54126 100644 --- a/test/unit/mongo/MongoTripodConfigTest.php +++ b/test/unit/mongo/MongoTripodConfigTest.php @@ -969,9 +969,10 @@ public function testSearchConfigNotPresent() public function testGetAllTypesInSpecifications() { $types = $this->tripodConfig->getAllTypesInSpecifications("tripod_php_testing"); - $this->assertEquals(10, count($types), "There should be 10 types based on the configured view, table and search specifications in config.json"); + $this->assertEquals(11, count($types), "There should be 10 types based on the configured view, table and search specifications in config.json"); $expectedValues = array( "acorn:Resource", + "acorn:ResourceForTruncating", "acorn:Work", "http://talisaspire.com/schema#Work2", "acorn:Work2", @@ -1326,7 +1327,7 @@ public function testTransactionLogIsWrittenToCorrectDBAndCollection() $transactionColletion = $transactionMongo->selectCollection($newConfig['transaction_log']['database'], $newConfig['transaction_log']['collection']); $transactionCount = $transactionColletion->count(); $transactionExampleDocument = $transactionColletion->findOne(); - $this->assertEquals(19, $transactionCount); + $this->assertEquals(20, $transactionCount); $this->assertContains('transaction_', $transactionExampleDocument["_id"]); } diff --git a/test/unit/mongo/MongoTripodDriverTest.php b/test/unit/mongo/MongoTripodDriverTest.php index aca37c63..9ea1ebf0 100755 --- a/test/unit/mongo/MongoTripodDriverTest.php +++ b/test/unit/mongo/MongoTripodDriverTest.php @@ -218,7 +218,7 @@ public function testDescribeResources() public function testGetCount() { $count = $this->tripod->getCount(array("rdf:type.".VALUE_URI=>"bibo:Book")); - $this->assertEquals(5,$count); + $this->assertEquals(6,$count); } public function testTripodSaveChangesRemovesLiteralTriple() @@ -1670,14 +1670,14 @@ public function testGetDistinctTableValues() $table = 't_distinct'; $this->tripod->generateTableRows($table); $rows = $this->tripod->getTableRows($table, array(), array(), 0, 0); - $this->assertEquals(7, $rows['head']['count']); + $this->assertEquals(8, $rows['head']['count']); $results = $this->tripod->getDistinctTableColumnValues($table, "value.title"); $this->assertArrayHasKey('head', $results); $this->assertArrayHasKey('count', $results['head']); - $this->assertEquals(3, $results['head']['count']); + $this->assertEquals(4, $results['head']['count']); $this->assertArrayHasKey('results', $results); - $this->assertEquals(3, count($results['results'])); + $this->assertEquals(4, count($results['results'])); $this->assertContains('Physics 3rd Edition: Physics for Engineers and Scientists', $results['results']); $this->assertContains('A document title', $results['results']); $this->assertContains('Another document title', $results['results']); @@ -1696,9 +1696,9 @@ public function testGetDistinctTableValues() $results = $this->tripod->getDistinctTableColumnValues($table, "value.type"); $this->assertArrayHasKey('head', $results); $this->assertArrayHasKey('count', $results['head']); - $this->assertEquals(4, $results['head']['count']); + $this->assertEquals(5, $results['head']['count']); $this->assertArrayHasKey('results', $results); - $this->assertEquals(4, count($results['results'])); + $this->assertEquals(5, count($results['results'])); $this->assertContains('acorn:Resource', $results['results']); $this->assertContains('acorn:Work', $results['results']); $this->assertContains('bibo:Book', $results['results']); diff --git a/test/unit/mongo/MongoTripodNQuadSerializerTest.php b/test/unit/mongo/MongoTripodNQuadSerializerTest.php index 0e01c169..b85111e4 100644 --- a/test/unit/mongo/MongoTripodNQuadSerializerTest.php +++ b/test/unit/mongo/MongoTripodNQuadSerializerTest.php @@ -93,6 +93,32 @@ public function testSerializerWithMultipleSubjects() . . . + . + . + . + . + \"1234567890123\" . + . + \"wibble\" . + . + . + . + . + . + \"Ohanian\" . + \"physics\" . + \"1234567890\" . + \"Engineering: general\" . + \"PHYSICS\" . + \"Science\" . + \"Mahommah Gardo Baquaqua. Biography of Mahommah G. Baquaqua, a Native of Zoogoo, in the Interior of Africa. (A Convert to Christianity,) With a Description of That Part of the World; Including the Manners and Customs of the Inhabitants, Their Religious Notions, Form of Government, Laws, Appearance of the Country, Buildings, Agriculture, Manufactures, Shepherds and Herdsmen, Domestic Animals, Marriage Ceremonials, Funeral Services, Styles of Dress, Trade and Commerce, Modes of Warfare, System of Slavery, &c., &c. Mahommah's Early Life, His Education, His Capture and Slavery in Western Africa and Brazil, His Escape to the United States, from Thence to Hayti, (the City of Port Au Prince,) His Reception by the Baptist Missionary There, The Rev. W. L. Judd; His Conversion to Christianity, Baptism, and Return to This Country, His Views, Objects and Aim. Written and Revised from His Own Words, by Samuel Moore, Esq., Late Publisher of the "North of England Shipping Gazette," Author of Several Popular Works, and Editor of Sundry Reform Papers.\" . + \"engineering: general\" . + \"physics\" . + \"science\" . + \"0071\" . + . + . + . \"First title\" . \"Second title\" . \"First title\" . diff --git a/test/unit/mongo/MongoTripodTablesTest.php b/test/unit/mongo/MongoTripodTablesTest.php index 702e9496..4f2abeb4 100644 --- a/test/unit/mongo/MongoTripodTablesTest.php +++ b/test/unit/mongo/MongoTripodTablesTest.php @@ -758,14 +758,14 @@ public function testDistinct() $table = 't_distinct'; $this->generateTableRows($table); $rows = $this->tripodTables->getTableRows($table, array(), array(), 0, 0); - $this->assertEquals(7, $rows['head']['count']); + $this->assertEquals(8, $rows['head']['count']); $results = $this->tripodTables->distinct($table, "value.title"); $this->assertArrayHasKey('head', $results); $this->assertArrayHasKey('count', $results['head']); - $this->assertEquals(3, $results['head']['count']); + $this->assertEquals(4, $results['head']['count']); $this->assertArrayHasKey('results', $results); - $this->assertEquals(3, count($results['results'])); + $this->assertEquals(4, count($results['results'])); $this->assertContains('Physics 3rd Edition: Physics for Engineers and Scientists', $results['results']); $this->assertContains('A document title', $results['results']); $this->assertContains('Another document title', $results['results']); @@ -784,9 +784,9 @@ public function testDistinct() $results = $this->tripodTables->distinct($table, "value.type"); $this->assertArrayHasKey('head', $results); $this->assertArrayHasKey('count', $results['head']); - $this->assertEquals(4, $results['head']['count']); + $this->assertEquals(5, $results['head']['count']); $this->assertArrayHasKey('results', $results); - $this->assertEquals(4, count($results['results'])); + $this->assertEquals(5, count($results['results'])); $this->assertContains('acorn:Resource', $results['results']); $this->assertContains('acorn:Work', $results['results']); $this->assertContains('bibo:Book', $results['results']); diff --git a/test/unit/mongo/data/config.json b/test/unit/mongo/data/config.json index fef66a6b..d904a8cb 100644 --- a/test/unit/mongo/data/config.json +++ b/test/unit/mongo/data/config.json @@ -336,9 +336,6 @@ "ensureIndexes": [ { "value.isbn": 1 - }, - { - "value.title": 1 } ], "fields": [ From 392effc45c87e1fb7580dac8055de4231e434b31 Mon Sep 17 00:00:00 2001 From: MalcyL Date: Thu, 23 Jul 2015 16:56:36 +0100 Subject: [PATCH 3/4] Fix typo in comment. --- src/mongo/delegates/Tables.class.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/mongo/delegates/Tables.class.php b/src/mongo/delegates/Tables.class.php index 0f75e012..99b55ddf 100644 --- a/src/mongo/delegates/Tables.class.php +++ b/src/mongo/delegates/Tables.class.php @@ -617,7 +617,7 @@ protected function truncatingSave(\MongoCollection $collection, array $generated { $collection->save($generatedRow); } catch (\Exception $e) { - // We only trunacte and retry the save if the \MongoCursorException contains this text. + // We only truncate and retry the save if the \Exception contains this text. if (strpos($e->getMessage(),"Btree::insert: key too large to index") !== FALSE) { $this->truncateFields($collection, $generatedRow); From 2b1b19c0a019fafde268bb0e278d6f62e1d06f18 Mon Sep 17 00:00:00 2001 From: MalcyL Date: Thu, 23 Jul 2015 17:01:13 +0100 Subject: [PATCH 4/4] Fix text in assertion message. --- test/unit/mongo/MongoTripodConfigTest.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/unit/mongo/MongoTripodConfigTest.php b/test/unit/mongo/MongoTripodConfigTest.php index ffd54126..8d9b5380 100644 --- a/test/unit/mongo/MongoTripodConfigTest.php +++ b/test/unit/mongo/MongoTripodConfigTest.php @@ -969,7 +969,7 @@ public function testSearchConfigNotPresent() public function testGetAllTypesInSpecifications() { $types = $this->tripodConfig->getAllTypesInSpecifications("tripod_php_testing"); - $this->assertEquals(11, count($types), "There should be 10 types based on the configured view, table and search specifications in config.json"); + $this->assertEquals(11, count($types), "There should be 11 types based on the configured view, table and search specifications in config.json"); $expectedValues = array( "acorn:Resource", "acorn:ResourceForTruncating",