diff --git a/src/mongo/delegates/Tables.class.php b/src/mongo/delegates/Tables.class.php index aac0f43a..99b55ddf 100644 --- a/src/mongo/delegates/Tables.class.php +++ b/src/mongo/delegates/Tables.class.php @@ -589,7 +589,7 @@ public function generateTableRows($tableType,$resource=null,$context=null,$queue // Remove temp fields from document $generatedRow['value'] = array_diff_key($value, array_flip($this->temporaryFields)); - $collection->save($generatedRow); + $this->truncatingSave($collection, $generatedRow); } } @@ -602,6 +602,100 @@ public function generateTableRows($tableType,$resource=null,$context=null,$queue $this->getStat()->timer(MONGO_CREATE_TABLE.".$tableType",$t->result()); } + /** + * Save the generated rows to the given collection. + * + * If an exception in thrown because a field is too large to index, the field is + * truncated and the save is retried. + * + * @param \MongoCollection $collection + * @param array $generatedRow The rows to save. + */ + protected function truncatingSave(\MongoCollection $collection, array $generatedRow) + { + try + { + $collection->save($generatedRow); + } catch (\Exception $e) { + // We only truncate and retry the save if the \Exception contains this text. + if (strpos($e->getMessage(),"Btree::insert: key too large to index") !== FALSE) + { + $this->truncateFields($collection, $generatedRow); + $collection->save($generatedRow); + } + else + { + throw $e; + } + } + } + + /** + * Truncate any indexed fields in the generated rows which are too large to index + * + * @param \MongoCollection $collection + * @param array $generatedRow - Pass by reference so that the contents is truncated + */ + protected function truncateFields(\MongoCollection $collection, array &$generatedRow) + { + // Find the name of any indexed fields + $indexedFields = array(); + $indexesGroupedByCollection = $this->config->getIndexesGroupedByCollection($this->storeName); + if (isset($indexesGroupedByCollection) && isset($indexesGroupedByCollection[$collection->getName()])) + { + $indexes = $indexesGroupedByCollection[$collection->getName()]; + if (isset($indexes)) + { + foreach($indexes as $repset) + { + foreach ($repset as $index) + { + foreach ($index as $indexedFieldname => $v) + { + if (strpos($indexedFieldname, "value.") === 0) + { + $indexedFields[] = substr($indexedFieldname, strlen('value.')); + } + } + } + } + } + } + + if (count($indexedFields) > 0 && isset($generatedRow['value']) && is_array($generatedRow['value'])) + { + // Iterate over generated rows BY REFERENCE (&) - we are going to modify the contents of $field + foreach($generatedRow as &$field) + { + foreach($indexedFields as $indexedFieldname) + { + // The key will have the index name in the following format added to it. + // Adjust the max key size allowed to take it into account. + $maxKeySize = 1024 - strlen("value_".$indexedFieldname."_1"); + + if (array_key_exists($indexedFieldname, $field)) + { + // It's important that we count the number of bytes + // in the field - not just the number of characters. + // UTF-8 characters can be between 1 and 4 bytes. + // + // From the strlen documentation: + // Attention with utf8: + // $foo = "bär"; + // strlen($foo) will return 4 and not 3 as expected.. + // + // So strlen does count the bytes - not the characters. + + if (is_string($field[$indexedFieldname]) && strlen($field[$indexedFieldname]) > $maxKeySize) + { + $field[$indexedFieldname] = substr($field[$indexedFieldname],0, $maxKeySize); + } + } + } + } + } + } + /** * @param array $spec The table spec * @param array $dest The table row document to save diff --git a/test/unit/mongo/MongoTripodConfigTest.php b/test/unit/mongo/MongoTripodConfigTest.php index bb208731..8d9b5380 100644 --- a/test/unit/mongo/MongoTripodConfigTest.php +++ b/test/unit/mongo/MongoTripodConfigTest.php @@ -969,9 +969,10 @@ public function testSearchConfigNotPresent() public function testGetAllTypesInSpecifications() { $types = $this->tripodConfig->getAllTypesInSpecifications("tripod_php_testing"); - $this->assertEquals(10, count($types), "There should be 10 types based on the configured view, table and search specifications in config.json"); + $this->assertEquals(11, count($types), "There should be 11 types based on the configured view, table and search specifications in config.json"); $expectedValues = array( "acorn:Resource", + "acorn:ResourceForTruncating", "acorn:Work", "http://talisaspire.com/schema#Work2", "acorn:Work2", @@ -1326,7 +1327,7 @@ public function testTransactionLogIsWrittenToCorrectDBAndCollection() $transactionColletion = $transactionMongo->selectCollection($newConfig['transaction_log']['database'], $newConfig['transaction_log']['collection']); $transactionCount = $transactionColletion->count(); $transactionExampleDocument = $transactionColletion->findOne(); - $this->assertEquals(19, $transactionCount); + $this->assertEquals(20, $transactionCount); $this->assertContains('transaction_', $transactionExampleDocument["_id"]); } diff --git a/test/unit/mongo/MongoTripodDriverTest.php b/test/unit/mongo/MongoTripodDriverTest.php index aca37c63..9ea1ebf0 100755 --- a/test/unit/mongo/MongoTripodDriverTest.php +++ b/test/unit/mongo/MongoTripodDriverTest.php @@ -218,7 +218,7 @@ public function testDescribeResources() public function testGetCount() { $count = $this->tripod->getCount(array("rdf:type.".VALUE_URI=>"bibo:Book")); - $this->assertEquals(5,$count); + $this->assertEquals(6,$count); } public function testTripodSaveChangesRemovesLiteralTriple() @@ -1670,14 +1670,14 @@ public function testGetDistinctTableValues() $table = 't_distinct'; $this->tripod->generateTableRows($table); $rows = $this->tripod->getTableRows($table, array(), array(), 0, 0); - $this->assertEquals(7, $rows['head']['count']); + $this->assertEquals(8, $rows['head']['count']); $results = $this->tripod->getDistinctTableColumnValues($table, "value.title"); $this->assertArrayHasKey('head', $results); $this->assertArrayHasKey('count', $results['head']); - $this->assertEquals(3, $results['head']['count']); + $this->assertEquals(4, $results['head']['count']); $this->assertArrayHasKey('results', $results); - $this->assertEquals(3, count($results['results'])); + $this->assertEquals(4, count($results['results'])); $this->assertContains('Physics 3rd Edition: Physics for Engineers and Scientists', $results['results']); $this->assertContains('A document title', $results['results']); $this->assertContains('Another document title', $results['results']); @@ -1696,9 +1696,9 @@ public function testGetDistinctTableValues() $results = $this->tripod->getDistinctTableColumnValues($table, "value.type"); $this->assertArrayHasKey('head', $results); $this->assertArrayHasKey('count', $results['head']); - $this->assertEquals(4, $results['head']['count']); + $this->assertEquals(5, $results['head']['count']); $this->assertArrayHasKey('results', $results); - $this->assertEquals(4, count($results['results'])); + $this->assertEquals(5, count($results['results'])); $this->assertContains('acorn:Resource', $results['results']); $this->assertContains('acorn:Work', $results['results']); $this->assertContains('bibo:Book', $results['results']); diff --git a/test/unit/mongo/MongoTripodNQuadSerializerTest.php b/test/unit/mongo/MongoTripodNQuadSerializerTest.php index 0e01c169..b85111e4 100644 --- a/test/unit/mongo/MongoTripodNQuadSerializerTest.php +++ b/test/unit/mongo/MongoTripodNQuadSerializerTest.php @@ -93,6 +93,32 @@ public function testSerializerWithMultipleSubjects() . . . + . + . + . + . + \"1234567890123\" . + . + \"wibble\" . + . + . + . + . + . + \"Ohanian\" . + \"physics\" . + \"1234567890\" . + \"Engineering: general\" . + \"PHYSICS\" . + \"Science\" . + \"Mahommah Gardo Baquaqua. Biography of Mahommah G. Baquaqua, a Native of Zoogoo, in the Interior of Africa. (A Convert to Christianity,) With a Description of That Part of the World; Including the Manners and Customs of the Inhabitants, Their Religious Notions, Form of Government, Laws, Appearance of the Country, Buildings, Agriculture, Manufactures, Shepherds and Herdsmen, Domestic Animals, Marriage Ceremonials, Funeral Services, Styles of Dress, Trade and Commerce, Modes of Warfare, System of Slavery, &c., &c. Mahommah's Early Life, His Education, His Capture and Slavery in Western Africa and Brazil, His Escape to the United States, from Thence to Hayti, (the City of Port Au Prince,) His Reception by the Baptist Missionary There, The Rev. W. L. Judd; His Conversion to Christianity, Baptism, and Return to This Country, His Views, Objects and Aim. Written and Revised from His Own Words, by Samuel Moore, Esq., Late Publisher of the "North of England Shipping Gazette," Author of Several Popular Works, and Editor of Sundry Reform Papers.\" . + \"engineering: general\" . + \"physics\" . + \"science\" . + \"0071\" . + . + . + . \"First title\" . \"Second title\" . \"First title\" . diff --git a/test/unit/mongo/MongoTripodTablesTest.php b/test/unit/mongo/MongoTripodTablesTest.php index eb87a311..4f2abeb4 100644 --- a/test/unit/mongo/MongoTripodTablesTest.php +++ b/test/unit/mongo/MongoTripodTablesTest.php @@ -640,6 +640,31 @@ public function testGenerateTableRowsForUsersWithModifiersLowercaseDate() $this->assertEquals($rows['results'][0]['mongoDate']->__toString(), $rows['results'][0]['lowercaseDate']); } + /** + * Test table rows are tuncated if they are too large to index + * @access public + * @return void + */ + public function testGenerateTableRowsTruncatesFieldsTooLargeToIndex() + { + $fullTitle = "Mahommah Gardo Baquaqua. Biography of Mahommah G. Baquaqua, a Native of Zoogoo, in the Interior of Africa. (A Convert to Christianity,) With a Description of That Part of the World; Including the Manners and Customs of the Inhabitants, Their Religious Notions, Form of Government, Laws, Appearance of the Country, Buildings, Agriculture, Manufactures, Shepherds and Herdsmen, Domestic Animals, Marriage Ceremonials, Funeral Services, Styles of Dress, Trade and Commerce, Modes of Warfare, System of Slavery, &c., &c. Mahommah's Early Life, His Education, His Capture and Slavery in Western Africa and Brazil, His Escape to the United States, from Thence to Hayti, (the City of Port Au Prince,) His Reception by the Baptist Missionary There, The Rev. W. L. Judd; His Conversion to Christianity, Baptism, and Return to This Country, His Views, Objects and Aim. Written and Revised from His Own Words, by Samuel Moore, Esq., Late Publisher of the "North of England Shipping Gazette," Author of Several Popular Works, and Editor of Sundry Reform Papers."; + $truncatedTitle = substr($fullTitle,0,1011); // 1011 = 1024 - index name "value_title_1" + $fullTitleLength = strlen($fullTitle); + $truncatedTitleLength = strLen($truncatedTitle); + + $rows = $this->generateTableRows("t_truncation"); + + // When using Mongo 2.4 and below, the string will not have been truncated. + // Due to stricter index key enforcement in Mongo 2.6 and above, the string will have been truncated. + // Allow the test to pass for either version of Mongo + $actualLength = strlen($rows['results'][0]['title']); + $this->assertTrue($actualLength === $fullTitleLength || $actualLength === $truncatedTitleLength, "Title is an unexpected length"); + + // Assert that the title starts with the truncated title. + // This will be the case for both Mongo 2.4 and Mongo 2.6 + $this->assertTrue(strpos($rows['results'][0]['title'], $truncatedTitle) === 0, "Unexpected title"); + } + /** * Test that link modifier is derived from the joined resource id, rather than base * @access public @@ -733,14 +758,14 @@ public function testDistinct() $table = 't_distinct'; $this->generateTableRows($table); $rows = $this->tripodTables->getTableRows($table, array(), array(), 0, 0); - $this->assertEquals(7, $rows['head']['count']); + $this->assertEquals(8, $rows['head']['count']); $results = $this->tripodTables->distinct($table, "value.title"); $this->assertArrayHasKey('head', $results); $this->assertArrayHasKey('count', $results['head']); - $this->assertEquals(3, $results['head']['count']); + $this->assertEquals(4, $results['head']['count']); $this->assertArrayHasKey('results', $results); - $this->assertEquals(3, count($results['results'])); + $this->assertEquals(4, count($results['results'])); $this->assertContains('Physics 3rd Edition: Physics for Engineers and Scientists', $results['results']); $this->assertContains('A document title', $results['results']); $this->assertContains('Another document title', $results['results']); @@ -759,9 +784,9 @@ public function testDistinct() $results = $this->tripodTables->distinct($table, "value.type"); $this->assertArrayHasKey('head', $results); $this->assertArrayHasKey('count', $results['head']); - $this->assertEquals(4, $results['head']['count']); + $this->assertEquals(5, $results['head']['count']); $this->assertArrayHasKey('results', $results); - $this->assertEquals(4, count($results['results'])); + $this->assertEquals(5, count($results['results'])); $this->assertContains('acorn:Resource', $results['results']); $this->assertContains('acorn:Work', $results['results']); $this->assertContains('bibo:Book', $results['results']); diff --git a/test/unit/mongo/data/config.json b/test/unit/mongo/data/config.json index 801b5d7e..d904a8cb 100644 --- a/test/unit/mongo/data/config.json +++ b/test/unit/mongo/data/config.json @@ -359,6 +359,43 @@ } } }, + { + "_id": "t_truncation", + "type": "acorn:ResourceForTruncating", + "from": "CBD_testing", + "ensureIndexes": [ + { + "value.isbn": 1 + }, + { + "value.title": 1 + } + ], + "fields": [ + { + "fieldName": "type", + "predicates": ["rdf:type"] + }, + { + "fieldName": "isbn", + "predicates": ["bibo:isbn13"] + }, + { + "fieldName": "title", + "predicates": ["searchterms:title"] + } + ], + "joins": { + "dct:isVersionOf": { + "fields": [ + { + "fieldName": "isbn13", + "predicates": ["bibo:isbn13"] + } + ] + } + } + }, { "_id": "t_source_count", "type": "acorn:Resource", diff --git a/test/unit/mongo/data/resources.json b/test/unit/mongo/data/resources.json index db72b423..3097b446 100644 --- a/test/unit/mongo/data/resources.json +++ b/test/unit/mongo/data/resources.json @@ -245,6 +245,117 @@ "u":"http://talisaspire.com/isbn/9780393929690" } }, + { + "_id": { + "r":"http://talisaspire.com/resources/indexKeyTooLarge", + "c":"http://talisaspire.com/" + }, + "_version": 0, + "dct:isVersionOf": + { + "u":"http://talisaspire.com/works/4d101f63c10a6" + }, + "dct:source":[ + { + "u":"http://life.ac.uk/resources/836E7CAD-63D2-63A0-B1CB-AA6A7E54A5C9" + }, + { + "u":"http://life.ac.uk/resources/BFBC6A06-A8B0-DED8-53AA-8E80DB44CC53" + } + ], + "dct:subject": + { + "u":"http://talisaspire.com/disciplines/physics" + }, + "bibo:isbn13": + { + "l":"1234567890123" + }, + "acorn:bookmarkReferences": + { + "u":"http://talisaspire.com/resources/3SplCtWGPqEyXcDiyhHQpA/bookmarks" + }, + "acorn:foo": + { + "l":"wibble" + }, + "acorn:jacsUri":[ + { + "u":"http://jacs3.dataincubator.org/f300" + }, + { + "u":"http://jacs3.dataincubator.org/f340" + } + ], + "acorn:listReferences": + { + "u":"http://talisaspire.com/resources/3SplCtWGPqEyXcDiyhHQpA/lists" + }, + "acorn:openLibraryUri": + { + "u":"http://openlibrary.org/books/OL10157958M" + }, + "acorn:preferredMetadata": + { + "u":"http://talisaspire.com/resources/3SplCtWGPqEyXcDiyhHQpA/metadata" + }, + "searchterms:author": + { + "l":"Ohanian" + }, + "searchterms:discipline": + { + "l":"physics" + }, + "searchterms:isbn": + { + "l":"1234567890" + }, + "searchterms:openLibrarySubject":[ + { + "l":"Engineering: general" + }, + { + "l":"PHYSICS" + }, + { + "l":"Science" + } + ], + "searchterms:title":[ + + { + "l":"Mahommah Gardo Baquaqua. Biography of Mahommah G. Baquaqua, a Native of Zoogoo, in the Interior of Africa. (A Convert to Christianity,) With a Description of That Part of the World; Including the Manners and Customs of the Inhabitants, Their Religious Notions, Form of Government, Laws, Appearance of the Country, Buildings, Agriculture, Manufactures, Shepherds and Herdsmen, Domestic Animals, Marriage Ceremonials, Funeral Services, Styles of Dress, Trade and Commerce, Modes of Warfare, System of Slavery, &c., &c. Mahommah's Early Life, His Education, His Capture and Slavery in Western Africa and Brazil, His Escape to the United States, from Thence to Hayti, (the City of Port Au Prince,) His Reception by the Baptist Missionary There, The Rev. W. L. Judd; His Conversion to Christianity, Baptism, and Return to This Country, His Views, Objects and Aim. Written and Revised from His Own Words, by Samuel Moore, Esq., Late Publisher of the "North of England Shipping Gazette," Author of Several Popular Works, and Editor of Sundry Reform Papers." + } + ], + "searchterms:topic":[ + { + "l":"engineering: general" + }, + { + "l":"physics" + }, + { + "l":"science" + } + ], + "searchterms:usedAt": + { + "l":"0071" + }, + "rdf:type":[ + { + "u":"http://purl.org/ontology/bibo/Book" + }, + { + "u":"http://talisaspire.com/schema#ResourceForTruncating" + } + ], + "owl:sameAs": + { + "u":"http://talisaspire.com/isbn/9780393929690" + } + }, { "_id": { "r" : "http://jacs3.dataincubator.org/f300",