158 lines
4.0 KiB
Go
158 lines
4.0 KiB
Go
// Package mbigquery implements connecting to Google's BigQuery service and
|
|
// simplifying a number of interactions with it.
|
|
package mbigquery
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/mediocregopher/mediocre-go-lib/m"
|
|
"github.com/mediocregopher/mediocre-go-lib/mcfg"
|
|
"github.com/mediocregopher/mediocre-go-lib/mdb"
|
|
"github.com/mediocregopher/mediocre-go-lib/mlog"
|
|
|
|
"cloud.google.com/go/bigquery"
|
|
"google.golang.org/api/googleapi"
|
|
)
|
|
|
|
// TODO this file needs tests
|
|
|
|
func isErrAlreadyExists(err error) bool {
|
|
if err == nil {
|
|
return false
|
|
}
|
|
if gerr, ok := err.(*googleapi.Error); ok && gerr.Code == 409 {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// BigQuery is a wrapper around a bigquery client providing more functionality.
|
|
type BigQuery struct {
|
|
*bigquery.Client
|
|
gce *mdb.GCE
|
|
log *mlog.Logger
|
|
|
|
// key is dataset/tableName
|
|
tablesL sync.Mutex
|
|
tables map[[2]string]*bigquery.Table
|
|
tableUploaders map[[2]string]*bigquery.Uploader
|
|
}
|
|
|
|
// Cfg configures and returns a BigQuery instance which will be usable once Run
|
|
// is called on the passed in Cfg instance.
|
|
func Cfg(cfg *mcfg.Cfg) *BigQuery {
|
|
cfg = cfg.Child("bigquery")
|
|
bq := BigQuery{
|
|
gce: mdb.CfgGCE(cfg),
|
|
tables: map[[2]string]*bigquery.Table{},
|
|
tableUploaders: map[[2]string]*bigquery.Uploader{},
|
|
}
|
|
bq.log = m.Log(cfg, &bq)
|
|
cfg.Start.Then(func(ctx context.Context) error {
|
|
bq.log.Info("connecting to bigquery")
|
|
var err error
|
|
bq.Client, err = bigquery.NewClient(ctx, bq.gce.Project, bq.gce.ClientOptions()...)
|
|
return mlog.ErrWithKV(err, &bq)
|
|
})
|
|
return &bq
|
|
}
|
|
|
|
// KV implements the mlog.KVer interface.
|
|
func (bq *BigQuery) KV() mlog.KV {
|
|
return bq.gce.KV()
|
|
}
|
|
|
|
// Table initializes and returns the table instance with the given dataset and
|
|
// schema information. This method caches the Table/Uploader instances it
|
|
// returns, so multiple calls with the same dataset/tableName will only actually
|
|
// create those instances on the first call.
|
|
func (bq *BigQuery) Table(
|
|
ctx context.Context,
|
|
dataset, tableName string,
|
|
schemaObj interface{},
|
|
) (
|
|
*bigquery.Table, *bigquery.Uploader, error,
|
|
) {
|
|
bq.tablesL.Lock()
|
|
defer bq.tablesL.Unlock()
|
|
|
|
key := [2]string{dataset, tableName}
|
|
if table, ok := bq.tables[key]; ok {
|
|
return table, bq.tableUploaders[key], nil
|
|
}
|
|
|
|
kv := mlog.KV{"dataset": dataset, "table": tableName}
|
|
bq.log.Debug("creating/grabbing table", kv)
|
|
|
|
schema, err := bigquery.InferSchema(schemaObj)
|
|
if err != nil {
|
|
return nil, nil, mlog.ErrWithKV(err, bq, kv)
|
|
}
|
|
|
|
ds := bq.Dataset(dataset)
|
|
if err := ds.Create(ctx, nil); err != nil && !isErrAlreadyExists(err) {
|
|
return nil, nil, mlog.ErrWithKV(err, bq, kv)
|
|
}
|
|
|
|
table := ds.Table(tableName)
|
|
meta := &bigquery.TableMetadata{
|
|
Name: tableName,
|
|
Schema: schema,
|
|
}
|
|
if err := table.Create(ctx, meta); err != nil && !isErrAlreadyExists(err) {
|
|
return nil, nil, mlog.ErrWithKV(err, bq, kv)
|
|
}
|
|
uploader := table.Uploader()
|
|
|
|
bq.tables[key] = table
|
|
bq.tableUploaders[key] = uploader
|
|
return table, uploader, nil
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
const timeFormat = "2006-01-02 15:04:05 MST"
|
|
|
|
// Time wraps a time.Time object and provides marshaling/unmarshaling for
|
|
// bigquery's time format.
|
|
type Time struct {
|
|
time.Time
|
|
}
|
|
|
|
// MarshalText implements the encoding.TextMarshaler interface.
|
|
func (t Time) MarshalText() ([]byte, error) {
|
|
str := t.Time.Format(timeFormat)
|
|
return []byte(str), nil
|
|
}
|
|
|
|
// UnmarshalText implements the encoding.TextUnmarshaler interface.
|
|
func (t *Time) UnmarshalText(b []byte) error {
|
|
tt, err := time.Parse(timeFormat, string(b))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
t.Time = tt
|
|
return nil
|
|
}
|
|
|
|
// MarshalJSON implements the json.Marshaler interface.
|
|
func (t *Time) MarshalJSON() ([]byte, error) {
|
|
b, err := t.MarshalText()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return json.Marshal(string(b))
|
|
}
|
|
|
|
// UnmarshalJSON implements the json.Unmarshaler interface.
|
|
func (t *Time) UnmarshalJSON(b []byte) error {
|
|
var str string
|
|
if err := json.Unmarshal(b, &str); err != nil {
|
|
return err
|
|
}
|
|
return t.UnmarshalText([]byte(str))
|
|
}
|