Initial cloud-services repo - gateway service + pkg modules
This commit is contained in:
259
pkg/remotefileupload/batchuploader.go
Normal file
259
pkg/remotefileupload/batchuploader.go
Normal file
@@ -0,0 +1,259 @@
|
||||
package remotefileupload
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"fiskerinc.com/modules/logger"
|
||||
"fiskerinc.com/modules/utils/envtool"
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/appendblob"
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror"
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/sas"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"fiskerinc.com/modules/utils/elptr"
|
||||
)
|
||||
|
||||
// Comments on other versions I created
|
||||
// Using a buffer instead of byte array, it would randomly drop one number
|
||||
// was doing buffer.Write(separator), buffer.Write(x) and I would get something like 4,3,,8,1
|
||||
// Using a rwmutex on map, and a mutex on []byte, read to check out the item, then write if we where assigning
|
||||
// couldn't get it to work. Would drop numbers as well
|
||||
|
||||
// Using the sync on the inner string proves to be slightly more performant than locking the whole thing
|
||||
|
||||
var (
|
||||
RunBatchTimer = true // For local testing, if you don't want to upload to azure, set to false
|
||||
ConnectToAzBlob = true // For local testing, set to false to not use credentials
|
||||
batchMaxSize = envtool.GetEnvInt("AZURE_STORAGE_BATCH_UPLOAD_MAX_MIB", 2)
|
||||
)
|
||||
|
||||
func NewAzureBatchUploader(azureStorageContainerName string, azureFileExtension string, messageBatchTimeSeconds int, batchSeparator string) (Uploader, error) {
|
||||
a := &AzureBatchUploader{
|
||||
accountName: azureAccount,
|
||||
containerName: azureStorageContainerName,
|
||||
fileExtension: azureFileExtension,
|
||||
}
|
||||
|
||||
var cred *azblob.SharedKeyCredential
|
||||
var err error
|
||||
if ConnectToAzBlob {
|
||||
cred, err = azblob.NewSharedKeyCredential(a.accountName, azureAccountKey)
|
||||
if err != nil {
|
||||
return a, errors.WithStack(err)
|
||||
}
|
||||
}
|
||||
|
||||
containerPath := fmt.Sprintf("https://%s.blob.core.windows.net/%s/", a.accountName, a.containerName)
|
||||
|
||||
a.containerPath = containerPath
|
||||
a.azureCredentials = cred
|
||||
|
||||
a.separator = []byte(batchSeparator)
|
||||
a.logsToSend = &logsMapMutex{logs: map[string]*stringMutex{},
|
||||
Mutex: sync.Mutex{}}
|
||||
|
||||
if RunBatchTimer {
|
||||
a.batchTicker = time.NewTicker(time.Duration(messageBatchTimeSeconds) * time.Second)
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-a.batchTicker.C:
|
||||
a.uploadNow()
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
return a, nil
|
||||
}
|
||||
|
||||
// So the creation of logs to send
|
||||
type AzureBatchUploader struct {
|
||||
accountName string
|
||||
containerName string
|
||||
fileExtension string
|
||||
containerPath string
|
||||
azureCredentials *azblob.SharedKeyCredential
|
||||
logsToSend *logsMapMutex // A map of strings
|
||||
separator []byte
|
||||
batchTicker *time.Ticker
|
||||
}
|
||||
|
||||
// Ideally we lock the map on when we have to change insert a new value, otherwise
|
||||
// we rely on the string itself to lock
|
||||
type logsMapMutex struct {
|
||||
logs map[string]*stringMutex
|
||||
sync.Mutex // Not really the mutex I want, will probably swap for the sync.Map later
|
||||
}
|
||||
|
||||
// Removing mutex, can't easily guarantee that the outside map wo't change as we are trying to modify our
|
||||
// string, without the mutex just becoming repetitive
|
||||
type stringMutex struct {
|
||||
Body []byte
|
||||
logValue LogPayload
|
||||
sasToken string
|
||||
}
|
||||
|
||||
// Instead of directly uploading, we need to delay our upload
|
||||
func (a *AzureBatchUploader) Upload(block []byte, logValue LogPayload, filePath ...string) (string, error) {
|
||||
blobURL := a.azureBlobURL(a.containerPath, filePath)
|
||||
// Lock a.logs
|
||||
a.logsToSend.Mutex.Lock()
|
||||
sendMap := a.logsToSend
|
||||
defer sendMap.Unlock()
|
||||
// a.logs cant' get changed
|
||||
mstring, ok := sendMap.logs[blobURL]
|
||||
if !ok {
|
||||
var sasTokenURL string
|
||||
// If we don't have the log inside, we likely need to generate the file, and the sas token. Should implement some sort of caching for this though
|
||||
if ConnectToAzBlob {
|
||||
blobPath := a.azureBlobFilePath(filePath)
|
||||
var err error
|
||||
sasTokenURL, err = a.generateSASToken(blobPath)
|
||||
if err != nil {
|
||||
logger.Error().Str(logValue.Title, logValue.Value).Err(err).Send()
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
sendMap.logs[blobURL] = &stringMutex{
|
||||
Body: block,
|
||||
logValue: logValue,
|
||||
sasToken: sasTokenURL,
|
||||
}
|
||||
} else {
|
||||
// Tried using a buffer, but did not seem to improve performance
|
||||
mstring.Body = append(append(mstring.Body, a.separator...), block...)
|
||||
}
|
||||
|
||||
// To prevent us from taking up too much memory, we will send out data early
|
||||
// This will slow down other logs coming in, so do not make this value too small
|
||||
if len(sendMap.logs[blobURL].Body) > 1024*1024*batchMaxSize {
|
||||
a.uploadBlock(blobURL, sendMap.logs[blobURL], context.Background())
|
||||
sendMap.logs[blobURL].Body = make([]byte, 0)
|
||||
}
|
||||
blobURL = blobURL + "?" + sendMap.logs[blobURL].sasToken
|
||||
return blobURL, nil
|
||||
}
|
||||
|
||||
// Called once the batch has been expired, we actually upload. Can probably just call the azure upload service itself,
|
||||
// but refactor later
|
||||
func (a *AzureBatchUploader) uploadNow() {
|
||||
// Swap in new log holder
|
||||
// Acquire a lock on a.logs. Now we will wait until other writes are finished, but someone could lock behind us
|
||||
a.logsToSend.Lock()
|
||||
messageMap := a.logsToSend
|
||||
// No longer swap the whole object, just replace the map. Will create a backup for the upload time unfortunately
|
||||
|
||||
// Unlocking
|
||||
defer messageMap.Unlock()
|
||||
ctx := context.Background()
|
||||
for blobURL, block := range messageMap.logs {
|
||||
a.uploadBlock(blobURL, block, ctx)
|
||||
}
|
||||
// While we still have a lock on the map, we swap it out
|
||||
messageMap.logs = make(map[string]*stringMutex)
|
||||
}
|
||||
|
||||
func (a *AzureBatchUploader) uploadBlock(blobURL string, block *stringMutex, ctx context.Context) (err error) {
|
||||
client, err := appendblob.NewClientWithSharedKeyCredential(blobURL, a.azureCredentials, &appendblob.ClientOptions{
|
||||
ClientOptions: policy.ClientOptions{
|
||||
Retry: policy.RetryOptions{
|
||||
MaxRetries: 1,
|
||||
MaxRetryDelay: 1 * time.Minute,
|
||||
},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
logger.Error().Str("Message", "Failed to create new client with shared key credential").Err(err).Send()
|
||||
return
|
||||
}
|
||||
logValue := block.logValue
|
||||
logger.Debug().Str(logValue.Title, logValue.Value).Msgf("sending block of length %d to azure container: %s", len(block.Body), blobURL)
|
||||
|
||||
block.Body = append(block.Body, a.separator...)
|
||||
// Instead of trying to send data to a blob, and then determining if it exists, lets just check if it exists
|
||||
_, err = client.GetProperties(ctx, nil)
|
||||
if err != nil {
|
||||
if !bloberror.HasCode(err, bloberror.BlobNotFound) {
|
||||
logger.Error().Str(logValue.Title, logValue.Value).Err(err).Send()
|
||||
return
|
||||
}
|
||||
_, err = client.Create(ctx, nil)
|
||||
if err != nil {
|
||||
logger.Error().Str(logValue.Title, logValue.Value).Err(err).Send()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
body := block.Body
|
||||
// 2014*1024*4 == 4 MiB,
|
||||
MiB4 := 1024 * 1024 * 4
|
||||
for len(body) > 0 {
|
||||
max := MiB4
|
||||
if len(body) < max {
|
||||
max = len(body)
|
||||
}
|
||||
reader := NopCloser(bytes.NewReader(body[0:max]))
|
||||
_, err = client.AppendBlock(ctx, reader, nil)
|
||||
if err != nil {
|
||||
logger.Error().Str(logValue.Title, logValue.Value).Err(err).Msgf("Max: %d, len(body): %d", max, len(body))
|
||||
return
|
||||
}
|
||||
body = body[max:]
|
||||
logger.Debug().Str(logValue.Title, logValue.Value).Msgf("upload complete")
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (a *AzureBatchUploader) azureBlobFilePath(filepath []string) string {
|
||||
fileName := fmt.Sprintf("%s%s", "raw", a.fileExtension)
|
||||
finalPath, _ := url.JoinPath("", filepath...)
|
||||
finalPath, _ = url.JoinPath(finalPath, fileName)
|
||||
|
||||
return finalPath
|
||||
}
|
||||
|
||||
// basePath is the url to the blob storage (<account>.azurebloburl.net/<containername>)
|
||||
// filepath will be added onto basepath /<your>/<file>/<path>
|
||||
func (a *AzureBatchUploader) azureBlobURL(basePath string, filePath []string) string {
|
||||
fileName := fmt.Sprintf("%s%s", "raw", a.fileExtension)
|
||||
finalPath, _ := url.JoinPath(basePath, filePath...)
|
||||
finalPath, _ = url.JoinPath(finalPath, fileName)
|
||||
|
||||
return finalPath
|
||||
}
|
||||
|
||||
func (a *AzureBatchUploader) generateSASToken(blobName string) (token string, err error) {
|
||||
// blob name is something like this: 19UUA56873A044568/2023/01/11/raw.log
|
||||
sasQueryParams, err := sas.BlobSignatureValues{
|
||||
Protocol: sas.ProtocolHTTPS,
|
||||
StartTime: time.Now().UTC().Add(-1 * time.Hour), // reduce an hour from current time to avoid signature issue
|
||||
ExpiryTime: time.Now().UTC().Add(3 * 365 * 24 * time.Hour), // 3 years-ish
|
||||
Permissions: elptr.ElPtr(sas.BlobPermissions{Read: true}).String(),
|
||||
ContainerName: a.containerName,
|
||||
BlobName: blobName,
|
||||
}.SignWithSharedKey(a.azureCredentials)
|
||||
|
||||
if err != nil {
|
||||
logger.Error().Err(err).Msg("Failed to sas.BlobSignatureValues")
|
||||
return
|
||||
}
|
||||
|
||||
token = sasQueryParams.Encode()
|
||||
return
|
||||
}
|
||||
|
||||
/* func MutexLocked(m *sync.Mutex) bool {
|
||||
state := reflect.ValueOf(m).Elem().FieldByName("state")
|
||||
const mutexLocked int64 = 1
|
||||
return state.Int()&mutexLocked == mutexLocked
|
||||
} */
|
||||
Reference in New Issue
Block a user