mirror of
https://github.com/dutchcoders/transfer.sh.git
synced 2025-01-12 20:00:17 +01:00
298 lines
7.8 KiB
Go
298 lines
7.8 KiB
Go
// Copyright 2015 Google LLC
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package bigquery
|
|
|
|
import (
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
|
|
"cloud.google.com/go/internal/testutil"
|
|
"github.com/google/go-cmp/cmp"
|
|
"github.com/google/go-cmp/cmp/cmpopts"
|
|
bq "google.golang.org/api/bigquery/v2"
|
|
)
|
|
|
|
func defaultLoadJob() *bq.Job {
|
|
return &bq.Job{
|
|
JobReference: &bq.JobReference{JobId: "RANDOM", ProjectId: "client-project-id"},
|
|
Configuration: &bq.JobConfiguration{
|
|
Load: &bq.JobConfigurationLoad{
|
|
DestinationTable: &bq.TableReference{
|
|
ProjectId: "client-project-id",
|
|
DatasetId: "dataset-id",
|
|
TableId: "table-id",
|
|
},
|
|
SourceUris: []string{"uri"},
|
|
},
|
|
},
|
|
}
|
|
}
|
|
|
|
func stringFieldSchema() *FieldSchema {
|
|
return &FieldSchema{Name: "fieldname", Type: StringFieldType}
|
|
}
|
|
|
|
func nestedFieldSchema() *FieldSchema {
|
|
return &FieldSchema{
|
|
Name: "nested",
|
|
Type: RecordFieldType,
|
|
Schema: Schema{stringFieldSchema()},
|
|
}
|
|
}
|
|
|
|
func bqStringFieldSchema() *bq.TableFieldSchema {
|
|
return &bq.TableFieldSchema{
|
|
Name: "fieldname",
|
|
Type: "STRING",
|
|
}
|
|
}
|
|
|
|
func bqNestedFieldSchema() *bq.TableFieldSchema {
|
|
return &bq.TableFieldSchema{
|
|
Name: "nested",
|
|
Type: "RECORD",
|
|
Fields: []*bq.TableFieldSchema{bqStringFieldSchema()},
|
|
}
|
|
}
|
|
|
|
func TestLoad(t *testing.T) {
|
|
defer fixRandomID("RANDOM")()
|
|
c := &Client{projectID: "client-project-id"}
|
|
|
|
testCases := []struct {
|
|
dst *Table
|
|
src LoadSource
|
|
jobID string
|
|
location string
|
|
config LoadConfig
|
|
want *bq.Job
|
|
}{
|
|
{
|
|
dst: c.Dataset("dataset-id").Table("table-id"),
|
|
src: NewGCSReference("uri"),
|
|
want: defaultLoadJob(),
|
|
},
|
|
{
|
|
dst: c.Dataset("dataset-id").Table("table-id"),
|
|
src: NewGCSReference("uri"),
|
|
location: "loc",
|
|
want: func() *bq.Job {
|
|
j := defaultLoadJob()
|
|
j.JobReference.Location = "loc"
|
|
return j
|
|
}(),
|
|
},
|
|
{
|
|
dst: c.Dataset("dataset-id").Table("table-id"),
|
|
jobID: "ajob",
|
|
config: LoadConfig{
|
|
CreateDisposition: CreateNever,
|
|
WriteDisposition: WriteTruncate,
|
|
Labels: map[string]string{"a": "b"},
|
|
TimePartitioning: &TimePartitioning{Expiration: 1234 * time.Millisecond},
|
|
Clustering: &Clustering{Fields: []string{"cfield1"}},
|
|
DestinationEncryptionConfig: &EncryptionConfig{KMSKeyName: "keyName"},
|
|
SchemaUpdateOptions: []string{"ALLOW_FIELD_ADDITION"},
|
|
},
|
|
src: NewGCSReference("uri"),
|
|
want: func() *bq.Job {
|
|
j := defaultLoadJob()
|
|
j.Configuration.Labels = map[string]string{"a": "b"}
|
|
j.Configuration.Load.CreateDisposition = "CREATE_NEVER"
|
|
j.Configuration.Load.WriteDisposition = "WRITE_TRUNCATE"
|
|
j.Configuration.Load.TimePartitioning = &bq.TimePartitioning{
|
|
Type: "DAY",
|
|
ExpirationMs: 1234,
|
|
}
|
|
j.Configuration.Load.Clustering = &bq.Clustering{
|
|
Fields: []string{"cfield1"},
|
|
}
|
|
j.Configuration.Load.DestinationEncryptionConfiguration = &bq.EncryptionConfiguration{KmsKeyName: "keyName"}
|
|
j.JobReference = &bq.JobReference{
|
|
JobId: "ajob",
|
|
ProjectId: "client-project-id",
|
|
}
|
|
j.Configuration.Load.SchemaUpdateOptions = []string{"ALLOW_FIELD_ADDITION"}
|
|
return j
|
|
}(),
|
|
},
|
|
{
|
|
dst: c.Dataset("dataset-id").Table("table-id"),
|
|
src: func() *GCSReference {
|
|
g := NewGCSReference("uri")
|
|
g.MaxBadRecords = 1
|
|
g.AllowJaggedRows = true
|
|
g.AllowQuotedNewlines = true
|
|
g.IgnoreUnknownValues = true
|
|
return g
|
|
}(),
|
|
want: func() *bq.Job {
|
|
j := defaultLoadJob()
|
|
j.Configuration.Load.MaxBadRecords = 1
|
|
j.Configuration.Load.AllowJaggedRows = true
|
|
j.Configuration.Load.AllowQuotedNewlines = true
|
|
j.Configuration.Load.IgnoreUnknownValues = true
|
|
return j
|
|
}(),
|
|
},
|
|
{
|
|
dst: c.Dataset("dataset-id").Table("table-id"),
|
|
src: func() *GCSReference {
|
|
g := NewGCSReference("uri")
|
|
g.Schema = Schema{
|
|
stringFieldSchema(),
|
|
nestedFieldSchema(),
|
|
}
|
|
return g
|
|
}(),
|
|
want: func() *bq.Job {
|
|
j := defaultLoadJob()
|
|
j.Configuration.Load.Schema = &bq.TableSchema{
|
|
Fields: []*bq.TableFieldSchema{
|
|
bqStringFieldSchema(),
|
|
bqNestedFieldSchema(),
|
|
}}
|
|
return j
|
|
}(),
|
|
},
|
|
{
|
|
dst: c.Dataset("dataset-id").Table("table-id"),
|
|
src: func() *GCSReference {
|
|
g := NewGCSReference("uri")
|
|
g.SkipLeadingRows = 1
|
|
g.SourceFormat = JSON
|
|
g.Encoding = UTF_8
|
|
g.FieldDelimiter = "\t"
|
|
g.Quote = "-"
|
|
return g
|
|
}(),
|
|
want: func() *bq.Job {
|
|
j := defaultLoadJob()
|
|
j.Configuration.Load.SkipLeadingRows = 1
|
|
j.Configuration.Load.SourceFormat = "NEWLINE_DELIMITED_JSON"
|
|
j.Configuration.Load.Encoding = "UTF-8"
|
|
j.Configuration.Load.FieldDelimiter = "\t"
|
|
hyphen := "-"
|
|
j.Configuration.Load.Quote = &hyphen
|
|
return j
|
|
}(),
|
|
},
|
|
{
|
|
dst: c.Dataset("dataset-id").Table("table-id"),
|
|
src: NewGCSReference("uri"),
|
|
want: func() *bq.Job {
|
|
j := defaultLoadJob()
|
|
// Quote is left unset in GCSReference, so should be nil here.
|
|
j.Configuration.Load.Quote = nil
|
|
return j
|
|
}(),
|
|
},
|
|
{
|
|
dst: c.Dataset("dataset-id").Table("table-id"),
|
|
src: func() *GCSReference {
|
|
g := NewGCSReference("uri")
|
|
g.ForceZeroQuote = true
|
|
return g
|
|
}(),
|
|
want: func() *bq.Job {
|
|
j := defaultLoadJob()
|
|
empty := ""
|
|
j.Configuration.Load.Quote = &empty
|
|
return j
|
|
}(),
|
|
},
|
|
{
|
|
dst: c.Dataset("dataset-id").Table("table-id"),
|
|
src: func() *ReaderSource {
|
|
r := NewReaderSource(strings.NewReader("foo"))
|
|
r.SkipLeadingRows = 1
|
|
r.SourceFormat = JSON
|
|
r.Encoding = UTF_8
|
|
r.FieldDelimiter = "\t"
|
|
r.Quote = "-"
|
|
return r
|
|
}(),
|
|
want: func() *bq.Job {
|
|
j := defaultLoadJob()
|
|
j.Configuration.Load.SourceUris = nil
|
|
j.Configuration.Load.SkipLeadingRows = 1
|
|
j.Configuration.Load.SourceFormat = "NEWLINE_DELIMITED_JSON"
|
|
j.Configuration.Load.Encoding = "UTF-8"
|
|
j.Configuration.Load.FieldDelimiter = "\t"
|
|
hyphen := "-"
|
|
j.Configuration.Load.Quote = &hyphen
|
|
return j
|
|
}(),
|
|
},
|
|
{
|
|
dst: c.Dataset("dataset-id").Table("table-id"),
|
|
src: func() *GCSReference {
|
|
g := NewGCSReference("uri")
|
|
g.SourceFormat = Avro
|
|
return g
|
|
}(),
|
|
config: LoadConfig{
|
|
UseAvroLogicalTypes: true,
|
|
},
|
|
want: func() *bq.Job {
|
|
j := defaultLoadJob()
|
|
j.Configuration.Load.SourceFormat = "AVRO"
|
|
j.Configuration.Load.UseAvroLogicalTypes = true
|
|
return j
|
|
}(),
|
|
},
|
|
{
|
|
dst: c.Dataset("dataset-id").Table("table-id"),
|
|
src: func() *ReaderSource {
|
|
r := NewReaderSource(strings.NewReader("foo"))
|
|
r.SourceFormat = Avro
|
|
return r
|
|
}(),
|
|
config: LoadConfig{
|
|
UseAvroLogicalTypes: true,
|
|
},
|
|
want: func() *bq.Job {
|
|
j := defaultLoadJob()
|
|
j.Configuration.Load.SourceUris = nil
|
|
j.Configuration.Load.SourceFormat = "AVRO"
|
|
j.Configuration.Load.UseAvroLogicalTypes = true
|
|
return j
|
|
}(),
|
|
},
|
|
}
|
|
|
|
for i, tc := range testCases {
|
|
loader := tc.dst.LoaderFrom(tc.src)
|
|
loader.JobID = tc.jobID
|
|
loader.Location = tc.location
|
|
tc.config.Src = tc.src
|
|
tc.config.Dst = tc.dst
|
|
loader.LoadConfig = tc.config
|
|
got, _ := loader.newJob()
|
|
checkJob(t, i, got, tc.want)
|
|
|
|
jc, err := bqToJobConfig(got.Configuration, c)
|
|
if err != nil {
|
|
t.Fatalf("#%d: %v", i, err)
|
|
}
|
|
diff := testutil.Diff(jc.(*LoadConfig), &loader.LoadConfig,
|
|
cmp.AllowUnexported(Table{}, Client{}),
|
|
cmpopts.IgnoreUnexported(ReaderSource{}))
|
|
if diff != "" {
|
|
t.Errorf("#%d: (got=-, want=+:\n%s", i, diff)
|
|
}
|
|
}
|
|
}
|