Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions models/claim_breadth/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ files
```
export GCP_PROJECT=`gcloud config get-value project`
export BUCKET=gs://[YOUR BUCKET NAME]
gsutil mb $BUCKET
gcloud storage buckets create $BUCKET
```

#### Enable relevant API's in the GCP console.
Expand Down Expand Up @@ -103,7 +103,7 @@ bq --project=$GCP_PROJECT query --max_rows=100000 --format=csv "$(cat generate_e
sed -i '2 d' cpc_embedding_vocab.txt
sed -i '/^\s*$/d' cpc_embedding_vocab.txt
# Copy to GCS for use in training and remove local copy.
gsutil cp ./cpc_embedding_vocab.txt $BUCKET
gcloud storage cp ./cpc_embedding_vocab.txt $BUCKET
rm ./cpc_embedding_vocab.txt
```

Expand Down Expand Up @@ -200,7 +200,7 @@ trained model files from GCP and set up a model version on cloud ML:
```
export MODEL_NAME=patent_claims
export VERSION='v1'
export SAVED_MODEL=`gsutil ls -d "$GCS_JOB_DIR/export/model/[0-9]*/"`
export SAVED_MODEL=`gcloud storage ls "$GCS_JOB_DIR/export/model/[0-9]*/"`
gcloud ml-engine models create $MODEL_NAME
gcloud ml-engine versions create $VERSION --model $MODEL_NAME --origin $SAVED_MODEL --runtime-version=1.4
export MODEL_VERSION_STR="$MODEL_NAME/versions/$VERSION"
Expand Down
16 changes: 8 additions & 8 deletions tools/csv_upload.pysh
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ import csv
parser = argparse.ArgumentParser(description="Upload a CSV file to a BigQuery table")
parser.add_argument("--dry_run", default=False, action="store_true", help="Do not upload.")
parser.add_argument("--bq_bin", default="bq", help="Path to the BigQuery CLI")
parser.add_argument("--gsutil_bin", default="gsutil", help="Path to the GSUtil CLI")
parser.add_argument("--gcloud_bin", default="gcloud", help="Path to the gcloud CLI")
parser.add_argument("--project_id", default="", help="Google Cloud Project ID to store temporary Google Cloud Storage files in. If empty, uses the project from the table name.")
parser.add_argument("--storage_bucket", default="", help="Google Cloud Storage bucket name. This bucket must be in the same region as --location. If empty, creates a new bucket under this project_id.")
parser.add_argument("--overwrite", default=False, action="store_true", help="Overwrite the table if it exists.")
Expand Down Expand Up @@ -94,7 +94,7 @@ if args.header and len(table_files) > 1:

# Upload to bucket.
# Clear bucket space
gsutil = sh.Command(args.gsutil_bin)
gcloud = sh.Command(args.gcloud_bin)

project_id = args.project_id
if not project_id:
Expand All @@ -112,18 +112,18 @@ if not bucket:
bucket = "gs://" + bucket

try:
gsutil("ls", bucket)
gcloud("storage", "ls", bucket)
print("Bucket %s exists" % bucket)
except:
if args.location == "EU":
bucket_location = "europe-west1"
else:
bucket_location = "us-east1"

mb_args = ["mb", "-c", "regional", "-l", bucket_location, "-p", project_id, bucket]
print("gsutil %s" % mb_args)
mb_args = ["storage", "buckets", "create", bucket, "--default-storage-class", "regional", "--location", bucket_location, "--project", project_id]
print("gcloud %s" % " ".join(mb_args))
if not args.dry_run:
gsutil(*mb_args)
gcloud(*mb_args)
print("Created new bucket")

# Split to 4G, gzip and upload CSV files. Skip the header lines.
Expand Down Expand Up @@ -159,7 +159,7 @@ class Splitter:
self.upload_paths.append(path_split)
self.parts += 1
print("Uploading %s" % path_split)
self.upload_proc = gsutil(gzip_pipe, "cp", "-", path_split, _in_bufsize=buf, _bg=True, _internal_bufsize=16 * 2 ** 20)
self.upload_proc = gcloud(gzip_pipe, "storage", "cp", "-", path_split, _in_bufsize=buf, _bg=True, _internal_bufsize=16 * 2 ** 20)
print("Upload proc: %s" % self.upload_proc.pid)

self.size += chunk_size
Expand Down Expand Up @@ -277,5 +277,5 @@ for table in sorted(table_files.keys()):
if not args.dry_run:
bq(*bq_args)
print("Removing uploaded files %s" % uploaded_paths)
gsutil("rm", *uploaded_paths)
gcloud("storage", "rm", *uploaded_paths)
print("Done creating %s" % table)