2018-06-19 15:30:26 +02:00
|
|
|
// Copyright 2015 Google LLC
|
|
|
|
//
|
|
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
// you may not use this file except in compliance with the License.
|
|
|
|
// You may obtain a copy of the License at
|
|
|
|
//
|
|
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
//
|
|
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
// See the License for the specific language governing permissions and
|
|
|
|
// limitations under the License.
|
|
|
|
|
|
|
|
package bigquery
|
|
|
|
|
|
|
|
import (
|
2019-03-17 20:19:56 +01:00
|
|
|
"context"
|
2018-06-19 15:30:26 +02:00
|
|
|
"errors"
|
|
|
|
|
|
|
|
"cloud.google.com/go/internal/trace"
|
|
|
|
bq "google.golang.org/api/bigquery/v2"
|
|
|
|
)
|
|
|
|
|
|
|
|
// QueryConfig holds the configuration for a query job.
|
|
|
|
type QueryConfig struct {
|
|
|
|
// Dst is the table into which the results of the query will be written.
|
|
|
|
// If this field is nil, a temporary table will be created.
|
|
|
|
Dst *Table
|
|
|
|
|
|
|
|
// The query to execute. See https://cloud.google.com/bigquery/query-reference for details.
|
|
|
|
Q string
|
|
|
|
|
|
|
|
// DefaultProjectID and DefaultDatasetID specify the dataset to use for unqualified table names in the query.
|
|
|
|
// If DefaultProjectID is set, DefaultDatasetID must also be set.
|
|
|
|
DefaultProjectID string
|
|
|
|
DefaultDatasetID string
|
|
|
|
|
|
|
|
// TableDefinitions describes data sources outside of BigQuery.
|
|
|
|
// The map keys may be used as table names in the query string.
|
|
|
|
//
|
|
|
|
// When a QueryConfig is returned from Job.Config, the map values
|
|
|
|
// are always of type *ExternalDataConfig.
|
|
|
|
TableDefinitions map[string]ExternalData
|
|
|
|
|
|
|
|
// CreateDisposition specifies the circumstances under which the destination table will be created.
|
|
|
|
// The default is CreateIfNeeded.
|
|
|
|
CreateDisposition TableCreateDisposition
|
|
|
|
|
|
|
|
// WriteDisposition specifies how existing data in the destination table is treated.
|
|
|
|
// The default is WriteEmpty.
|
|
|
|
WriteDisposition TableWriteDisposition
|
|
|
|
|
|
|
|
// DisableQueryCache prevents results being fetched from the query cache.
|
|
|
|
// If this field is false, results are fetched from the cache if they are available.
|
|
|
|
// The query cache is a best-effort cache that is flushed whenever tables in the query are modified.
|
|
|
|
// Cached results are only available when TableID is unspecified in the query's destination Table.
|
|
|
|
// For more information, see https://cloud.google.com/bigquery/querying-data#querycaching
|
|
|
|
DisableQueryCache bool
|
|
|
|
|
|
|
|
// DisableFlattenedResults prevents results being flattened.
|
|
|
|
// If this field is false, results from nested and repeated fields are flattened.
|
|
|
|
// DisableFlattenedResults implies AllowLargeResults
|
|
|
|
// For more information, see https://cloud.google.com/bigquery/docs/data#nested
|
|
|
|
DisableFlattenedResults bool
|
|
|
|
|
|
|
|
// AllowLargeResults allows the query to produce arbitrarily large result tables.
|
|
|
|
// The destination must be a table.
|
|
|
|
// When using this option, queries will take longer to execute, even if the result set is small.
|
|
|
|
// For additional limitations, see https://cloud.google.com/bigquery/querying-data#largequeryresults
|
|
|
|
AllowLargeResults bool
|
|
|
|
|
|
|
|
// Priority specifies the priority with which to schedule the query.
|
|
|
|
// The default priority is InteractivePriority.
|
|
|
|
// For more information, see https://cloud.google.com/bigquery/querying-data#batchqueries
|
|
|
|
Priority QueryPriority
|
|
|
|
|
|
|
|
// MaxBillingTier sets the maximum billing tier for a Query.
|
|
|
|
// Queries that have resource usage beyond this tier will fail (without
|
|
|
|
// incurring a charge). If this field is zero, the project default will be used.
|
|
|
|
MaxBillingTier int
|
|
|
|
|
|
|
|
// MaxBytesBilled limits the number of bytes billed for
|
|
|
|
// this job. Queries that would exceed this limit will fail (without incurring
|
|
|
|
// a charge).
|
|
|
|
// If this field is less than 1, the project default will be
|
|
|
|
// used.
|
|
|
|
MaxBytesBilled int64
|
|
|
|
|
|
|
|
// UseStandardSQL causes the query to use standard SQL. The default.
|
|
|
|
// Deprecated: use UseLegacySQL.
|
|
|
|
UseStandardSQL bool
|
|
|
|
|
|
|
|
// UseLegacySQL causes the query to use legacy SQL.
|
|
|
|
UseLegacySQL bool
|
|
|
|
|
|
|
|
// Parameters is a list of query parameters. The presence of parameters
|
|
|
|
// implies the use of standard SQL.
|
|
|
|
// If the query uses positional syntax ("?"), then no parameter may have a name.
|
|
|
|
// If the query uses named syntax ("@p"), then all parameters must have names.
|
|
|
|
// It is illegal to mix positional and named syntax.
|
|
|
|
Parameters []QueryParameter
|
|
|
|
|
|
|
|
// TimePartitioning specifies time-based partitioning
|
|
|
|
// for the destination table.
|
|
|
|
TimePartitioning *TimePartitioning
|
|
|
|
|
2019-03-17 20:19:56 +01:00
|
|
|
// Clustering specifies the data clustering configuration for the destination table.
|
|
|
|
Clustering *Clustering
|
|
|
|
|
2018-06-19 15:30:26 +02:00
|
|
|
// The labels associated with this job.
|
|
|
|
Labels map[string]string
|
|
|
|
|
|
|
|
// If true, don't actually run this job. A valid query will return a mostly
|
|
|
|
// empty response with some processing statistics, while an invalid query will
|
|
|
|
// return the same error it would if it wasn't a dry run.
|
|
|
|
//
|
|
|
|
// Query.Read will fail with dry-run queries. Call Query.Run instead, and then
|
|
|
|
// call LastStatus on the returned job to get statistics. Calling Status on a
|
|
|
|
// dry-run job will fail.
|
|
|
|
DryRun bool
|
|
|
|
|
|
|
|
// Custom encryption configuration (e.g., Cloud KMS keys).
|
|
|
|
DestinationEncryptionConfig *EncryptionConfig
|
|
|
|
|
|
|
|
// Allows the schema of the destination table to be updated as a side effect of
|
|
|
|
// the query job.
|
|
|
|
SchemaUpdateOptions []string
|
|
|
|
}
|
|
|
|
|
|
|
|
func (qc *QueryConfig) toBQ() (*bq.JobConfiguration, error) {
|
|
|
|
qconf := &bq.JobConfigurationQuery{
|
|
|
|
Query: qc.Q,
|
|
|
|
CreateDisposition: string(qc.CreateDisposition),
|
|
|
|
WriteDisposition: string(qc.WriteDisposition),
|
|
|
|
AllowLargeResults: qc.AllowLargeResults,
|
|
|
|
Priority: string(qc.Priority),
|
|
|
|
MaximumBytesBilled: qc.MaxBytesBilled,
|
|
|
|
TimePartitioning: qc.TimePartitioning.toBQ(),
|
2019-03-17 20:19:56 +01:00
|
|
|
Clustering: qc.Clustering.toBQ(),
|
2018-06-19 15:30:26 +02:00
|
|
|
DestinationEncryptionConfiguration: qc.DestinationEncryptionConfig.toBQ(),
|
|
|
|
SchemaUpdateOptions: qc.SchemaUpdateOptions,
|
|
|
|
}
|
|
|
|
if len(qc.TableDefinitions) > 0 {
|
|
|
|
qconf.TableDefinitions = make(map[string]bq.ExternalDataConfiguration)
|
|
|
|
}
|
|
|
|
for name, data := range qc.TableDefinitions {
|
|
|
|
qconf.TableDefinitions[name] = data.toBQ()
|
|
|
|
}
|
|
|
|
if qc.DefaultProjectID != "" || qc.DefaultDatasetID != "" {
|
|
|
|
qconf.DefaultDataset = &bq.DatasetReference{
|
|
|
|
DatasetId: qc.DefaultDatasetID,
|
|
|
|
ProjectId: qc.DefaultProjectID,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if tier := int64(qc.MaxBillingTier); tier > 0 {
|
|
|
|
qconf.MaximumBillingTier = &tier
|
|
|
|
}
|
|
|
|
f := false
|
|
|
|
if qc.DisableQueryCache {
|
|
|
|
qconf.UseQueryCache = &f
|
|
|
|
}
|
|
|
|
if qc.DisableFlattenedResults {
|
|
|
|
qconf.FlattenResults = &f
|
|
|
|
// DisableFlattenResults implies AllowLargeResults.
|
|
|
|
qconf.AllowLargeResults = true
|
|
|
|
}
|
|
|
|
if qc.UseStandardSQL && qc.UseLegacySQL {
|
|
|
|
return nil, errors.New("bigquery: cannot provide both UseStandardSQL and UseLegacySQL")
|
|
|
|
}
|
|
|
|
if len(qc.Parameters) > 0 && qc.UseLegacySQL {
|
|
|
|
return nil, errors.New("bigquery: cannot provide both Parameters (implying standard SQL) and UseLegacySQL")
|
|
|
|
}
|
|
|
|
ptrue := true
|
|
|
|
pfalse := false
|
|
|
|
if qc.UseLegacySQL {
|
|
|
|
qconf.UseLegacySql = &ptrue
|
|
|
|
} else {
|
|
|
|
qconf.UseLegacySql = &pfalse
|
|
|
|
}
|
|
|
|
if qc.Dst != nil && !qc.Dst.implicitTable() {
|
|
|
|
qconf.DestinationTable = qc.Dst.toBQ()
|
|
|
|
}
|
|
|
|
for _, p := range qc.Parameters {
|
|
|
|
qp, err := p.toBQ()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
qconf.QueryParameters = append(qconf.QueryParameters, qp)
|
|
|
|
}
|
|
|
|
return &bq.JobConfiguration{
|
|
|
|
Labels: qc.Labels,
|
|
|
|
DryRun: qc.DryRun,
|
|
|
|
Query: qconf,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func bqToQueryConfig(q *bq.JobConfiguration, c *Client) (*QueryConfig, error) {
|
|
|
|
qq := q.Query
|
|
|
|
qc := &QueryConfig{
|
|
|
|
Labels: q.Labels,
|
|
|
|
DryRun: q.DryRun,
|
|
|
|
Q: qq.Query,
|
|
|
|
CreateDisposition: TableCreateDisposition(qq.CreateDisposition),
|
|
|
|
WriteDisposition: TableWriteDisposition(qq.WriteDisposition),
|
|
|
|
AllowLargeResults: qq.AllowLargeResults,
|
|
|
|
Priority: QueryPriority(qq.Priority),
|
|
|
|
MaxBytesBilled: qq.MaximumBytesBilled,
|
|
|
|
UseLegacySQL: qq.UseLegacySql == nil || *qq.UseLegacySql,
|
|
|
|
TimePartitioning: bqToTimePartitioning(qq.TimePartitioning),
|
2019-03-17 20:19:56 +01:00
|
|
|
Clustering: bqToClustering(qq.Clustering),
|
2018-06-19 15:30:26 +02:00
|
|
|
DestinationEncryptionConfig: bqToEncryptionConfig(qq.DestinationEncryptionConfiguration),
|
|
|
|
SchemaUpdateOptions: qq.SchemaUpdateOptions,
|
|
|
|
}
|
|
|
|
qc.UseStandardSQL = !qc.UseLegacySQL
|
|
|
|
|
|
|
|
if len(qq.TableDefinitions) > 0 {
|
|
|
|
qc.TableDefinitions = make(map[string]ExternalData)
|
|
|
|
}
|
|
|
|
for name, qedc := range qq.TableDefinitions {
|
|
|
|
edc, err := bqToExternalDataConfig(&qedc)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
qc.TableDefinitions[name] = edc
|
|
|
|
}
|
|
|
|
if qq.DefaultDataset != nil {
|
|
|
|
qc.DefaultProjectID = qq.DefaultDataset.ProjectId
|
|
|
|
qc.DefaultDatasetID = qq.DefaultDataset.DatasetId
|
|
|
|
}
|
|
|
|
if qq.MaximumBillingTier != nil {
|
|
|
|
qc.MaxBillingTier = int(*qq.MaximumBillingTier)
|
|
|
|
}
|
|
|
|
if qq.UseQueryCache != nil && !*qq.UseQueryCache {
|
|
|
|
qc.DisableQueryCache = true
|
|
|
|
}
|
|
|
|
if qq.FlattenResults != nil && !*qq.FlattenResults {
|
|
|
|
qc.DisableFlattenedResults = true
|
|
|
|
}
|
|
|
|
if qq.DestinationTable != nil {
|
|
|
|
qc.Dst = bqToTable(qq.DestinationTable, c)
|
|
|
|
}
|
|
|
|
for _, qp := range qq.QueryParameters {
|
|
|
|
p, err := bqToQueryParameter(qp)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
qc.Parameters = append(qc.Parameters, p)
|
|
|
|
}
|
|
|
|
return qc, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// QueryPriority specifies a priority with which a query is to be executed.
|
|
|
|
type QueryPriority string
|
|
|
|
|
|
|
|
const (
|
2019-03-17 20:19:56 +01:00
|
|
|
// BatchPriority specifies that the query should be scheduled with the
|
|
|
|
// batch priority. BigQuery queues each batch query on your behalf, and
|
|
|
|
// starts the query as soon as idle resources are available, usually within
|
|
|
|
// a few minutes. If BigQuery hasn't started the query within 24 hours,
|
|
|
|
// BigQuery changes the job priority to interactive. Batch queries don't
|
|
|
|
// count towards your concurrent rate limit, which can make it easier to
|
|
|
|
// start many queries at once.
|
|
|
|
//
|
|
|
|
// More information can be found at https://cloud.google.com/bigquery/docs/running-queries#batchqueries.
|
|
|
|
BatchPriority QueryPriority = "BATCH"
|
|
|
|
// InteractivePriority specifies that the query should be scheduled with
|
|
|
|
// interactive priority, which means that the query is executed as soon as
|
|
|
|
// possible. Interactive queries count towards your concurrent rate limit
|
|
|
|
// and your daily limit. It is the default priority with which queries get
|
|
|
|
// executed.
|
|
|
|
//
|
|
|
|
// More information can be found at https://cloud.google.com/bigquery/docs/running-queries#queries.
|
2018-06-19 15:30:26 +02:00
|
|
|
InteractivePriority QueryPriority = "INTERACTIVE"
|
|
|
|
)
|
|
|
|
|
|
|
|
// A Query queries data from a BigQuery table. Use Client.Query to create a Query.
|
|
|
|
type Query struct {
|
|
|
|
JobIDConfig
|
|
|
|
QueryConfig
|
|
|
|
client *Client
|
|
|
|
}
|
|
|
|
|
|
|
|
// Query creates a query with string q.
|
|
|
|
// The returned Query may optionally be further configured before its Run method is called.
|
|
|
|
func (c *Client) Query(q string) *Query {
|
|
|
|
return &Query{
|
|
|
|
client: c,
|
|
|
|
QueryConfig: QueryConfig{Q: q},
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Run initiates a query job.
|
|
|
|
func (q *Query) Run(ctx context.Context) (j *Job, err error) {
|
|
|
|
ctx = trace.StartSpan(ctx, "cloud.google.com/go/bigquery.Query.Run")
|
|
|
|
defer func() { trace.EndSpan(ctx, err) }()
|
|
|
|
|
|
|
|
job, err := q.newJob()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
j, err = q.client.insertJob(ctx, job, nil)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return j, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
func (q *Query) newJob() (*bq.Job, error) {
|
|
|
|
config, err := q.QueryConfig.toBQ()
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return &bq.Job{
|
|
|
|
JobReference: q.JobIDConfig.createJobRef(q.client),
|
|
|
|
Configuration: config,
|
|
|
|
}, nil
|
|
|
|
}
|
|
|
|
|
|
|
|
// Read submits a query for execution and returns the results via a RowIterator.
|
|
|
|
// It is a shorthand for Query.Run followed by Job.Read.
|
|
|
|
func (q *Query) Read(ctx context.Context) (*RowIterator, error) {
|
|
|
|
job, err := q.Run(ctx)
|
|
|
|
if err != nil {
|
|
|
|
return nil, err
|
|
|
|
}
|
|
|
|
return job.Read(ctx)
|
|
|
|
}
|