4b446a0efc
This change required refactoring nearly every package in this project, but it does a lot to simplify mctx and make other code using it easier to think about. Other code, such as mlog and mcfg, had to be slightly modified for this change to work as well.
174 lines
4.7 KiB
Go
174 lines
4.7 KiB
Go
// Package mbigquery implements connecting to Google's BigQuery service and
|
|
// simplifying a number of interactions with it.
|
|
package mbigquery
|
|
|
|
import (
|
|
"context"
|
|
"encoding/json"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/mediocregopher/mediocre-go-lib/mctx"
|
|
"github.com/mediocregopher/mediocre-go-lib/mdb"
|
|
"github.com/mediocregopher/mediocre-go-lib/merr"
|
|
"github.com/mediocregopher/mediocre-go-lib/mlog"
|
|
"github.com/mediocregopher/mediocre-go-lib/mrun"
|
|
|
|
"cloud.google.com/go/bigquery"
|
|
"google.golang.org/api/googleapi"
|
|
)
|
|
|
|
// TODO this file needs tests
|
|
|
|
func isErrAlreadyExists(err error) bool {
|
|
if err == nil {
|
|
return false
|
|
}
|
|
if gerr, ok := err.(*googleapi.Error); ok && gerr.Code == 409 {
|
|
return true
|
|
}
|
|
return false
|
|
}
|
|
|
|
// BigQuery is a wrapper around a bigquery client providing more functionality.
|
|
type BigQuery struct {
|
|
*bigquery.Client
|
|
gce *mdb.GCE
|
|
ctx context.Context
|
|
|
|
// key is dataset/tableName
|
|
tablesL sync.Mutex
|
|
tables map[[2]string]*bigquery.Table
|
|
tableUploaders map[[2]string]*bigquery.Uploader
|
|
}
|
|
|
|
// MNew returns a BigQuery instance which will be initialized and configured
|
|
// when the start event is triggered on the returned (see mrun.Start). The
|
|
// BigQuery instance will have Close called on it when the stop event is
|
|
// triggered on the returned Context (see mrun.Stop).
|
|
//
|
|
// gce is optional and can be passed in if there's an existing gce object which
|
|
// should be used, otherwise a new one will be created with mdb.MGCE.
|
|
func MNew(ctx context.Context, gce *mdb.GCE) (context.Context, *BigQuery) {
|
|
if gce == nil {
|
|
ctx, gce = mdb.MGCE(ctx, "")
|
|
}
|
|
|
|
bq := &BigQuery{
|
|
gce: gce,
|
|
tables: map[[2]string]*bigquery.Table{},
|
|
tableUploaders: map[[2]string]*bigquery.Uploader{},
|
|
ctx: mctx.NewChild(ctx, "bigquery"),
|
|
}
|
|
|
|
// TODO the equivalent functionality as here will be added with annotations
|
|
// bq.log.SetKV(bq)
|
|
|
|
bq.ctx = mrun.OnStart(bq.ctx, func(innerCtx context.Context) error {
|
|
mlog.Info(bq.ctx, "connecting to bigquery")
|
|
var err error
|
|
bq.Client, err = bigquery.NewClient(innerCtx, bq.gce.Project, bq.gce.ClientOptions()...)
|
|
return merr.WithKV(err, bq.KV())
|
|
})
|
|
bq.ctx = mrun.OnStop(bq.ctx, func(context.Context) error {
|
|
return bq.Client.Close()
|
|
})
|
|
return mctx.WithChild(ctx, bq.ctx), bq
|
|
}
|
|
|
|
// KV implements the mlog.KVer interface.
|
|
func (bq *BigQuery) KV() map[string]interface{} {
|
|
return bq.gce.KV()
|
|
}
|
|
|
|
// Table initializes and returns the table instance with the given dataset and
|
|
// schema information. This method caches the Table/Uploader instances it
|
|
// returns, so multiple calls with the same dataset/tableName will only actually
|
|
// create those instances on the first call.
|
|
func (bq *BigQuery) Table(
|
|
ctx context.Context,
|
|
dataset, tableName string,
|
|
schemaObj interface{},
|
|
) (
|
|
*bigquery.Table, *bigquery.Uploader, error,
|
|
) {
|
|
bq.tablesL.Lock()
|
|
defer bq.tablesL.Unlock()
|
|
|
|
key := [2]string{dataset, tableName}
|
|
if table, ok := bq.tables[key]; ok {
|
|
return table, bq.tableUploaders[key], nil
|
|
}
|
|
|
|
kv := mlog.KV{"bigQueryDataset": dataset, "bigQueryTable": tableName}
|
|
mlog.Debug(bq.ctx, "creating/grabbing table", kv)
|
|
|
|
schema, err := bigquery.InferSchema(schemaObj)
|
|
if err != nil {
|
|
return nil, nil, merr.WithKV(err, bq.KV(), kv.KV())
|
|
}
|
|
|
|
ds := bq.Dataset(dataset)
|
|
if err := ds.Create(ctx, nil); err != nil && !isErrAlreadyExists(err) {
|
|
return nil, nil, merr.WithKV(err, bq.KV(), kv.KV())
|
|
}
|
|
|
|
table := ds.Table(tableName)
|
|
meta := &bigquery.TableMetadata{
|
|
Name: tableName,
|
|
Schema: schema,
|
|
}
|
|
if err := table.Create(ctx, meta); err != nil && !isErrAlreadyExists(err) {
|
|
return nil, nil, merr.WithKV(err, bq.KV(), kv.KV())
|
|
}
|
|
uploader := table.Uploader()
|
|
|
|
bq.tables[key] = table
|
|
bq.tableUploaders[key] = uploader
|
|
return table, uploader, nil
|
|
}
|
|
|
|
////////////////////////////////////////////////////////////////////////////////
|
|
|
|
const timeFormat = "2006-01-02 15:04:05 MST"
|
|
|
|
// Time wraps a time.Time object and provides marshaling/unmarshaling for
|
|
// bigquery's time format.
|
|
type Time struct {
|
|
time.Time
|
|
}
|
|
|
|
// MarshalText implements the encoding.TextMarshaler interface.
|
|
func (t Time) MarshalText() ([]byte, error) {
|
|
str := t.Time.Format(timeFormat)
|
|
return []byte(str), nil
|
|
}
|
|
|
|
// UnmarshalText implements the encoding.TextUnmarshaler interface.
|
|
func (t *Time) UnmarshalText(b []byte) error {
|
|
tt, err := time.Parse(timeFormat, string(b))
|
|
if err != nil {
|
|
return err
|
|
}
|
|
t.Time = tt
|
|
return nil
|
|
}
|
|
|
|
// MarshalJSON implements the json.Marshaler interface.
|
|
func (t *Time) MarshalJSON() ([]byte, error) {
|
|
b, err := t.MarshalText()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
return json.Marshal(string(b))
|
|
}
|
|
|
|
// UnmarshalJSON implements the json.Unmarshaler interface.
|
|
func (t *Time) UnmarshalJSON(b []byte) error {
|
|
var str string
|
|
if err := json.Unmarshal(b, &str); err != nil {
|
|
return err
|
|
}
|
|
return t.UnmarshalText([]byte(str))
|
|
}
|