Files
cloud-services/pkg/remotefileupload/batchuploader.go

260 lines
8.5 KiB
Go

package remotefileupload
import (
"bytes"
"context"
"fmt"
"net/url"
"sync"
"time"
"fiskerinc.com/modules/logger"
"fiskerinc.com/modules/utils/envtool"
"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/appendblob"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror"
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/sas"
"github.com/pkg/errors"
"fiskerinc.com/modules/utils/elptr"
)
// Comments on other versions I created
// Using a buffer instead of byte array, it would randomly drop one number
// was doing buffer.Write(separator), buffer.Write(x) and I would get something like 4,3,,8,1
// Using a rwmutex on map, and a mutex on []byte, read to check out the item, then write if we where assigning
// couldn't get it to work. Would drop numbers as well
// Using the sync on the inner string proves to be slightly more performant than locking the whole thing
var (
RunBatchTimer = true // For local testing, if you don't want to upload to azure, set to false
ConnectToAzBlob = true // For local testing, set to false to not use credentials
batchMaxSize = envtool.GetEnvInt("AZURE_STORAGE_BATCH_UPLOAD_MAX_MIB", 2)
)
func NewAzureBatchUploader(azureStorageContainerName string, azureFileExtension string, messageBatchTimeSeconds int, batchSeparator string) (Uploader, error) {
a := &AzureBatchUploader{
accountName: azureAccount,
containerName: azureStorageContainerName,
fileExtension: azureFileExtension,
}
var cred *azblob.SharedKeyCredential
var err error
if ConnectToAzBlob {
cred, err = azblob.NewSharedKeyCredential(a.accountName, azureAccountKey)
if err != nil {
return a, errors.WithStack(err)
}
}
containerPath := fmt.Sprintf("https://%s.blob.core.windows.net/%s/", a.accountName, a.containerName)
a.containerPath = containerPath
a.azureCredentials = cred
a.separator = []byte(batchSeparator)
a.logsToSend = &logsMapMutex{logs: map[string]*stringMutex{},
Mutex: sync.Mutex{}}
if RunBatchTimer {
a.batchTicker = time.NewTicker(time.Duration(messageBatchTimeSeconds) * time.Second)
go func() {
for {
select {
case <-a.batchTicker.C:
a.uploadNow()
}
}
}()
}
return a, nil
}
// So the creation of logs to send
type AzureBatchUploader struct {
accountName string
containerName string
fileExtension string
containerPath string
azureCredentials *azblob.SharedKeyCredential
logsToSend *logsMapMutex // A map of strings
separator []byte
batchTicker *time.Ticker
}
// Ideally we lock the map on when we have to change insert a new value, otherwise
// we rely on the string itself to lock
type logsMapMutex struct {
logs map[string]*stringMutex
sync.Mutex // Not really the mutex I want, will probably swap for the sync.Map later
}
// Removing mutex, can't easily guarantee that the outside map wo't change as we are trying to modify our
// string, without the mutex just becoming repetitive
type stringMutex struct {
Body []byte
logValue LogPayload
sasToken string
}
// Instead of directly uploading, we need to delay our upload
func (a *AzureBatchUploader) Upload(block []byte, logValue LogPayload, filePath ...string) (string, error) {
blobURL := a.azureBlobURL(a.containerPath, filePath)
// Lock a.logs
a.logsToSend.Mutex.Lock()
sendMap := a.logsToSend
defer sendMap.Unlock()
// a.logs cant' get changed
mstring, ok := sendMap.logs[blobURL]
if !ok {
var sasTokenURL string
// If we don't have the log inside, we likely need to generate the file, and the sas token. Should implement some sort of caching for this though
if ConnectToAzBlob {
blobPath := a.azureBlobFilePath(filePath)
var err error
sasTokenURL, err = a.generateSASToken(blobPath)
if err != nil {
logger.Error().Str(logValue.Title, logValue.Value).Err(err).Send()
return "", err
}
}
sendMap.logs[blobURL] = &stringMutex{
Body: block,
logValue: logValue,
sasToken: sasTokenURL,
}
} else {
// Tried using a buffer, but did not seem to improve performance
mstring.Body = append(append(mstring.Body, a.separator...), block...)
}
// To prevent us from taking up too much memory, we will send out data early
// This will slow down other logs coming in, so do not make this value too small
if len(sendMap.logs[blobURL].Body) > 1024*1024*batchMaxSize {
a.uploadBlock(blobURL, sendMap.logs[blobURL], context.Background())
sendMap.logs[blobURL].Body = make([]byte, 0)
}
blobURL = blobURL + "?" + sendMap.logs[blobURL].sasToken
return blobURL, nil
}
// Called once the batch has been expired, we actually upload. Can probably just call the azure upload service itself,
// but refactor later
func (a *AzureBatchUploader) uploadNow() {
// Swap in new log holder
// Acquire a lock on a.logs. Now we will wait until other writes are finished, but someone could lock behind us
a.logsToSend.Lock()
messageMap := a.logsToSend
// No longer swap the whole object, just replace the map. Will create a backup for the upload time unfortunately
// Unlocking
defer messageMap.Unlock()
ctx := context.Background()
for blobURL, block := range messageMap.logs {
a.uploadBlock(blobURL, block, ctx)
}
// While we still have a lock on the map, we swap it out
messageMap.logs = make(map[string]*stringMutex)
}
func (a *AzureBatchUploader) uploadBlock(blobURL string, block *stringMutex, ctx context.Context) (err error) {
client, err := appendblob.NewClientWithSharedKeyCredential(blobURL, a.azureCredentials, &appendblob.ClientOptions{
ClientOptions: policy.ClientOptions{
Retry: policy.RetryOptions{
MaxRetries: 1,
MaxRetryDelay: 1 * time.Minute,
},
},
})
if err != nil {
logger.Error().Str("Message", "Failed to create new client with shared key credential").Err(err).Send()
return
}
logValue := block.logValue
logger.Debug().Str(logValue.Title, logValue.Value).Msgf("sending block of length %d to azure container: %s", len(block.Body), blobURL)
block.Body = append(block.Body, a.separator...)
// Instead of trying to send data to a blob, and then determining if it exists, lets just check if it exists
_, err = client.GetProperties(ctx, nil)
if err != nil {
if !bloberror.HasCode(err, bloberror.BlobNotFound) {
logger.Error().Str(logValue.Title, logValue.Value).Err(err).Send()
return
}
_, err = client.Create(ctx, nil)
if err != nil {
logger.Error().Str(logValue.Title, logValue.Value).Err(err).Send()
return
}
}
body := block.Body
// 2014*1024*4 == 4 MiB,
MiB4 := 1024 * 1024 * 4
for len(body) > 0 {
max := MiB4
if len(body) < max {
max = len(body)
}
reader := NopCloser(bytes.NewReader(body[0:max]))
_, err = client.AppendBlock(ctx, reader, nil)
if err != nil {
logger.Error().Str(logValue.Title, logValue.Value).Err(err).Msgf("Max: %d, len(body): %d", max, len(body))
return
}
body = body[max:]
logger.Debug().Str(logValue.Title, logValue.Value).Msgf("upload complete")
}
return
}
func (a *AzureBatchUploader) azureBlobFilePath(filepath []string) string {
fileName := fmt.Sprintf("%s%s", "raw", a.fileExtension)
finalPath, _ := url.JoinPath("", filepath...)
finalPath, _ = url.JoinPath(finalPath, fileName)
return finalPath
}
// basePath is the url to the blob storage (<account>.azurebloburl.net/<containername>)
// filepath will be added onto basepath /<your>/<file>/<path>
func (a *AzureBatchUploader) azureBlobURL(basePath string, filePath []string) string {
fileName := fmt.Sprintf("%s%s", "raw", a.fileExtension)
finalPath, _ := url.JoinPath(basePath, filePath...)
finalPath, _ = url.JoinPath(finalPath, fileName)
return finalPath
}
func (a *AzureBatchUploader) generateSASToken(blobName string) (token string, err error) {
// blob name is something like this: 19UUA56873A044568/2023/01/11/raw.log
sasQueryParams, err := sas.BlobSignatureValues{
Protocol: sas.ProtocolHTTPS,
StartTime: time.Now().UTC().Add(-1 * time.Hour), // reduce an hour from current time to avoid signature issue
ExpiryTime: time.Now().UTC().Add(3 * 365 * 24 * time.Hour), // 3 years-ish
Permissions: elptr.ElPtr(sas.BlobPermissions{Read: true}).String(),
ContainerName: a.containerName,
BlobName: blobName,
}.SignWithSharedKey(a.azureCredentials)
if err != nil {
logger.Error().Err(err).Msg("Failed to sas.BlobSignatureValues")
return
}
token = sasQueryParams.Encode()
return
}
/* func MutexLocked(m *sync.Mutex) bool {
state := reflect.ValueOf(m).Elem().FieldByName("state")
const mutexLocked int64 = 1
return state.Int()&mutexLocked == mutexLocked
} */