mediocre-go-lib/mdb/mbigquery/bigquery.go
Brian Picciano 4b446a0efc mctx: refactor so that contexts no longer carry mutable data
This change required refactoring nearly every package in this project,
but it does a lot to simplify mctx and make other code using it easier
to think about.

Other code, such as mlog and mcfg, had to be slightly modified for this
change to work as well.
2019-02-07 19:42:12 -05:00

174 lines
4.7 KiB
Go

// Package mbigquery implements connecting to Google's BigQuery service and
// simplifying a number of interactions with it.
package mbigquery
import (
"context"
"encoding/json"
"sync"
"time"
"github.com/mediocregopher/mediocre-go-lib/mctx"
"github.com/mediocregopher/mediocre-go-lib/mdb"
"github.com/mediocregopher/mediocre-go-lib/merr"
"github.com/mediocregopher/mediocre-go-lib/mlog"
"github.com/mediocregopher/mediocre-go-lib/mrun"
"cloud.google.com/go/bigquery"
"google.golang.org/api/googleapi"
)
// TODO this file needs tests
func isErrAlreadyExists(err error) bool {
if err == nil {
return false
}
if gerr, ok := err.(*googleapi.Error); ok && gerr.Code == 409 {
return true
}
return false
}
// BigQuery is a wrapper around a bigquery client providing more functionality.
type BigQuery struct {
*bigquery.Client
gce *mdb.GCE
ctx context.Context
// key is dataset/tableName
tablesL sync.Mutex
tables map[[2]string]*bigquery.Table
tableUploaders map[[2]string]*bigquery.Uploader
}
// MNew returns a BigQuery instance which will be initialized and configured
// when the start event is triggered on the returned (see mrun.Start). The
// BigQuery instance will have Close called on it when the stop event is
// triggered on the returned Context (see mrun.Stop).
//
// gce is optional and can be passed in if there's an existing gce object which
// should be used, otherwise a new one will be created with mdb.MGCE.
func MNew(ctx context.Context, gce *mdb.GCE) (context.Context, *BigQuery) {
if gce == nil {
ctx, gce = mdb.MGCE(ctx, "")
}
bq := &BigQuery{
gce: gce,
tables: map[[2]string]*bigquery.Table{},
tableUploaders: map[[2]string]*bigquery.Uploader{},
ctx: mctx.NewChild(ctx, "bigquery"),
}
// TODO the equivalent functionality as here will be added with annotations
// bq.log.SetKV(bq)
bq.ctx = mrun.OnStart(bq.ctx, func(innerCtx context.Context) error {
mlog.Info(bq.ctx, "connecting to bigquery")
var err error
bq.Client, err = bigquery.NewClient(innerCtx, bq.gce.Project, bq.gce.ClientOptions()...)
return merr.WithKV(err, bq.KV())
})
bq.ctx = mrun.OnStop(bq.ctx, func(context.Context) error {
return bq.Client.Close()
})
return mctx.WithChild(ctx, bq.ctx), bq
}
// KV implements the mlog.KVer interface.
func (bq *BigQuery) KV() map[string]interface{} {
return bq.gce.KV()
}
// Table initializes and returns the table instance with the given dataset and
// schema information. This method caches the Table/Uploader instances it
// returns, so multiple calls with the same dataset/tableName will only actually
// create those instances on the first call.
func (bq *BigQuery) Table(
ctx context.Context,
dataset, tableName string,
schemaObj interface{},
) (
*bigquery.Table, *bigquery.Uploader, error,
) {
bq.tablesL.Lock()
defer bq.tablesL.Unlock()
key := [2]string{dataset, tableName}
if table, ok := bq.tables[key]; ok {
return table, bq.tableUploaders[key], nil
}
kv := mlog.KV{"bigQueryDataset": dataset, "bigQueryTable": tableName}
mlog.Debug(bq.ctx, "creating/grabbing table", kv)
schema, err := bigquery.InferSchema(schemaObj)
if err != nil {
return nil, nil, merr.WithKV(err, bq.KV(), kv.KV())
}
ds := bq.Dataset(dataset)
if err := ds.Create(ctx, nil); err != nil && !isErrAlreadyExists(err) {
return nil, nil, merr.WithKV(err, bq.KV(), kv.KV())
}
table := ds.Table(tableName)
meta := &bigquery.TableMetadata{
Name: tableName,
Schema: schema,
}
if err := table.Create(ctx, meta); err != nil && !isErrAlreadyExists(err) {
return nil, nil, merr.WithKV(err, bq.KV(), kv.KV())
}
uploader := table.Uploader()
bq.tables[key] = table
bq.tableUploaders[key] = uploader
return table, uploader, nil
}
////////////////////////////////////////////////////////////////////////////////
const timeFormat = "2006-01-02 15:04:05 MST"
// Time wraps a time.Time object and provides marshaling/unmarshaling for
// bigquery's time format.
type Time struct {
time.Time
}
// MarshalText implements the encoding.TextMarshaler interface.
func (t Time) MarshalText() ([]byte, error) {
str := t.Time.Format(timeFormat)
return []byte(str), nil
}
// UnmarshalText implements the encoding.TextUnmarshaler interface.
func (t *Time) UnmarshalText(b []byte) error {
tt, err := time.Parse(timeFormat, string(b))
if err != nil {
return err
}
t.Time = tt
return nil
}
// MarshalJSON implements the json.Marshaler interface.
func (t *Time) MarshalJSON() ([]byte, error) {
b, err := t.MarshalText()
if err != nil {
return nil, err
}
return json.Marshal(string(b))
}
// UnmarshalJSON implements the json.Unmarshaler interface.
func (t *Time) UnmarshalJSON(b []byte) error {
var str string
if err := json.Unmarshal(b, &str); err != nil {
return err
}
return t.UnmarshalText([]byte(str))
}