|
| 1 | +// Copyright 2019 The TensorFlow Authors. All Rights Reserved. |
| 2 | +// |
| 3 | +// Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +// you may not use this file except in compliance with the License. |
| 5 | +// You may obtain a copy of the License at |
| 6 | +// |
| 7 | +// http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +// |
| 9 | +// Unless required by applicable law or agreed to in writing, software |
| 10 | +// distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +// See the License for the specific language governing permissions and |
| 13 | +// limitations under the License. |
| 14 | +// Original source: |
| 15 | +// "The MNIST database of handwritten digits" |
| 16 | +// Yann LeCun, Corinna Cortes, and Christopher J.C. Burges |
| 17 | +// http://yann.lecun.com/exdb/mnist/ |
| 18 | +import Foundation |
| 19 | +import TensorFlow |
| 20 | + |
| 21 | +public struct MNIST: ImageClassificationDataset { |
| 22 | + public let trainingDataset: Dataset<LabeledExample> |
| 23 | + public let testDataset: Dataset<LabeledExample> |
| 24 | + public let trainingExampleCount = 60000 |
| 25 | + public let testExampleCount = 10000 |
| 26 | + |
| 27 | + public init() { |
| 28 | + self.init(flattening: false, normalizing: false) |
| 29 | + } |
| 30 | + |
| 31 | + public init( |
| 32 | + flattening: Bool = false, normalizing: Bool = false, |
| 33 | + localStorageDirectory: URL = FileManager.default.temporaryDirectory.appendingPathComponent( |
| 34 | + "MNIST") |
| 35 | + ) { |
| 36 | + self.trainingDataset = Dataset<LabeledExample>( |
| 37 | + elements: fetchDataset( |
| 38 | + localStorageDirectory: localStorageDirectory, |
| 39 | + imagesFilename: "train-images-idx3-ubyte", |
| 40 | + labelsFilename: "train-labels-idx1-ubyte", |
| 41 | + flattening: flattening, |
| 42 | + normalizing: normalizing)) |
| 43 | + |
| 44 | + self.testDataset = Dataset<LabeledExample>( |
| 45 | + elements: fetchDataset( |
| 46 | + localStorageDirectory: localStorageDirectory, |
| 47 | + imagesFilename: "t10k-images-idx3-ubyte", |
| 48 | + labelsFilename: "t10k-labels-idx1-ubyte", |
| 49 | + flattening: flattening, |
| 50 | + normalizing: normalizing)) |
| 51 | + } |
| 52 | +} |
| 53 | + |
| 54 | +fileprivate func fetchDataset( |
| 55 | + localStorageDirectory: URL, |
| 56 | + imagesFilename: String, |
| 57 | + labelsFilename: String, |
| 58 | + flattening: Bool, |
| 59 | + normalizing: Bool |
| 60 | +) -> LabeledExample { |
| 61 | + guard let remoteRoot = URL(string: "https://storage.googleapis.com/cvdf-datasets/mnist") else { |
| 62 | + fatalError("Failed to create MNIST root url: https://storage.googleapis.com/cvdf-datasets/mnist") |
| 63 | + } |
| 64 | + |
| 65 | + let imagesData = DatasetUtilities.fetchResource( |
| 66 | + filename: imagesFilename, |
| 67 | + fileExtension: "gz", |
| 68 | + remoteRoot: remoteRoot, |
| 69 | + localStorageDirectory: localStorageDirectory) |
| 70 | + let labelsData = DatasetUtilities.fetchResource( |
| 71 | + filename: labelsFilename, |
| 72 | + fileExtension: "gz", |
| 73 | + remoteRoot: remoteRoot, |
| 74 | + localStorageDirectory: localStorageDirectory) |
| 75 | + |
| 76 | + let images = [UInt8](imagesData).dropFirst(16).map(Float.init) |
| 77 | + let labels = [UInt8](labelsData).dropFirst(8).map(Int32.init) |
| 78 | + |
| 79 | + let rowCount = labels.count |
| 80 | + let (imageWidth, imageHeight) = (28, 28) |
| 81 | + |
| 82 | + if flattening { |
| 83 | + var flattenedImages = |
| 84 | + Tensor(shape: [rowCount, imageHeight * imageWidth], scalars: images) |
| 85 | + / 255.0 |
| 86 | + if normalizing { |
| 87 | + flattenedImages = flattenedImages * 2.0 - 1.0 |
| 88 | + } |
| 89 | + return LabeledExample(label: Tensor(labels), data: flattenedImages) |
| 90 | + } else { |
| 91 | + return LabeledExample( |
| 92 | + label: Tensor(labels), |
| 93 | + data: |
| 94 | + Tensor(shape: [rowCount, 1, imageHeight, imageWidth], scalars: images) |
| 95 | + .transposed(permutation: [0, 2, 3, 1]) / 255 // NHWC |
| 96 | + ) |
| 97 | + } |
| 98 | +} |
0 commit comments