260 lines
8.5 KiB
Go
260 lines
8.5 KiB
Go
package remotefileupload
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"fmt"
|
|
"net/url"
|
|
"sync"
|
|
"time"
|
|
|
|
"fiskerinc.com/modules/logger"
|
|
"fiskerinc.com/modules/utils/envtool"
|
|
"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
|
|
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
|
|
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/appendblob"
|
|
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror"
|
|
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/sas"
|
|
|
|
"github.com/pkg/errors"
|
|
"fiskerinc.com/modules/utils/elptr"
|
|
)
|
|
|
|
// Comments on other versions I created
|
|
// Using a buffer instead of byte array, it would randomly drop one number
|
|
// was doing buffer.Write(separator), buffer.Write(x) and I would get something like 4,3,,8,1
|
|
// Using a rwmutex on map, and a mutex on []byte, read to check out the item, then write if we where assigning
|
|
// couldn't get it to work. Would drop numbers as well
|
|
|
|
// Using the sync on the inner string proves to be slightly more performant than locking the whole thing
|
|
|
|
var (
|
|
RunBatchTimer = true // For local testing, if you don't want to upload to azure, set to false
|
|
ConnectToAzBlob = true // For local testing, set to false to not use credentials
|
|
batchMaxSize = envtool.GetEnvInt("AZURE_STORAGE_BATCH_UPLOAD_MAX_MIB", 2)
|
|
)
|
|
|
|
func NewAzureBatchUploader(azureStorageContainerName string, azureFileExtension string, messageBatchTimeSeconds int, batchSeparator string) (Uploader, error) {
|
|
a := &AzureBatchUploader{
|
|
accountName: azureAccount,
|
|
containerName: azureStorageContainerName,
|
|
fileExtension: azureFileExtension,
|
|
}
|
|
|
|
var cred *azblob.SharedKeyCredential
|
|
var err error
|
|
if ConnectToAzBlob {
|
|
cred, err = azblob.NewSharedKeyCredential(a.accountName, azureAccountKey)
|
|
if err != nil {
|
|
return a, errors.WithStack(err)
|
|
}
|
|
}
|
|
|
|
containerPath := fmt.Sprintf("https://%s.blob.core.windows.net/%s/", a.accountName, a.containerName)
|
|
|
|
a.containerPath = containerPath
|
|
a.azureCredentials = cred
|
|
|
|
a.separator = []byte(batchSeparator)
|
|
a.logsToSend = &logsMapMutex{logs: map[string]*stringMutex{},
|
|
Mutex: sync.Mutex{}}
|
|
|
|
if RunBatchTimer {
|
|
a.batchTicker = time.NewTicker(time.Duration(messageBatchTimeSeconds) * time.Second)
|
|
go func() {
|
|
for {
|
|
select {
|
|
case <-a.batchTicker.C:
|
|
a.uploadNow()
|
|
}
|
|
}
|
|
}()
|
|
}
|
|
|
|
return a, nil
|
|
}
|
|
|
|
// So the creation of logs to send
|
|
type AzureBatchUploader struct {
|
|
accountName string
|
|
containerName string
|
|
fileExtension string
|
|
containerPath string
|
|
azureCredentials *azblob.SharedKeyCredential
|
|
logsToSend *logsMapMutex // A map of strings
|
|
separator []byte
|
|
batchTicker *time.Ticker
|
|
}
|
|
|
|
// Ideally we lock the map on when we have to change insert a new value, otherwise
|
|
// we rely on the string itself to lock
|
|
type logsMapMutex struct {
|
|
logs map[string]*stringMutex
|
|
sync.Mutex // Not really the mutex I want, will probably swap for the sync.Map later
|
|
}
|
|
|
|
// Removing mutex, can't easily guarantee that the outside map wo't change as we are trying to modify our
|
|
// string, without the mutex just becoming repetitive
|
|
type stringMutex struct {
|
|
Body []byte
|
|
logValue LogPayload
|
|
sasToken string
|
|
}
|
|
|
|
// Instead of directly uploading, we need to delay our upload
|
|
func (a *AzureBatchUploader) Upload(block []byte, logValue LogPayload, filePath ...string) (string, error) {
|
|
blobURL := a.azureBlobURL(a.containerPath, filePath)
|
|
// Lock a.logs
|
|
a.logsToSend.Mutex.Lock()
|
|
sendMap := a.logsToSend
|
|
defer sendMap.Unlock()
|
|
// a.logs cant' get changed
|
|
mstring, ok := sendMap.logs[blobURL]
|
|
if !ok {
|
|
var sasTokenURL string
|
|
// If we don't have the log inside, we likely need to generate the file, and the sas token. Should implement some sort of caching for this though
|
|
if ConnectToAzBlob {
|
|
blobPath := a.azureBlobFilePath(filePath)
|
|
var err error
|
|
sasTokenURL, err = a.generateSASToken(blobPath)
|
|
if err != nil {
|
|
logger.Error().Str(logValue.Title, logValue.Value).Err(err).Send()
|
|
return "", err
|
|
}
|
|
}
|
|
|
|
sendMap.logs[blobURL] = &stringMutex{
|
|
Body: block,
|
|
logValue: logValue,
|
|
sasToken: sasTokenURL,
|
|
}
|
|
} else {
|
|
// Tried using a buffer, but did not seem to improve performance
|
|
mstring.Body = append(append(mstring.Body, a.separator...), block...)
|
|
}
|
|
|
|
// To prevent us from taking up too much memory, we will send out data early
|
|
// This will slow down other logs coming in, so do not make this value too small
|
|
if len(sendMap.logs[blobURL].Body) > 1024*1024*batchMaxSize {
|
|
a.uploadBlock(blobURL, sendMap.logs[blobURL], context.Background())
|
|
sendMap.logs[blobURL].Body = make([]byte, 0)
|
|
}
|
|
blobURL = blobURL + "?" + sendMap.logs[blobURL].sasToken
|
|
return blobURL, nil
|
|
}
|
|
|
|
// Called once the batch has been expired, we actually upload. Can probably just call the azure upload service itself,
|
|
// but refactor later
|
|
func (a *AzureBatchUploader) uploadNow() {
|
|
// Swap in new log holder
|
|
// Acquire a lock on a.logs. Now we will wait until other writes are finished, but someone could lock behind us
|
|
a.logsToSend.Lock()
|
|
messageMap := a.logsToSend
|
|
// No longer swap the whole object, just replace the map. Will create a backup for the upload time unfortunately
|
|
|
|
// Unlocking
|
|
defer messageMap.Unlock()
|
|
ctx := context.Background()
|
|
for blobURL, block := range messageMap.logs {
|
|
a.uploadBlock(blobURL, block, ctx)
|
|
}
|
|
// While we still have a lock on the map, we swap it out
|
|
messageMap.logs = make(map[string]*stringMutex)
|
|
}
|
|
|
|
func (a *AzureBatchUploader) uploadBlock(blobURL string, block *stringMutex, ctx context.Context) (err error) {
|
|
client, err := appendblob.NewClientWithSharedKeyCredential(blobURL, a.azureCredentials, &appendblob.ClientOptions{
|
|
ClientOptions: policy.ClientOptions{
|
|
Retry: policy.RetryOptions{
|
|
MaxRetries: 1,
|
|
MaxRetryDelay: 1 * time.Minute,
|
|
},
|
|
},
|
|
})
|
|
if err != nil {
|
|
logger.Error().Str("Message", "Failed to create new client with shared key credential").Err(err).Send()
|
|
return
|
|
}
|
|
logValue := block.logValue
|
|
logger.Debug().Str(logValue.Title, logValue.Value).Msgf("sending block of length %d to azure container: %s", len(block.Body), blobURL)
|
|
|
|
block.Body = append(block.Body, a.separator...)
|
|
// Instead of trying to send data to a blob, and then determining if it exists, lets just check if it exists
|
|
_, err = client.GetProperties(ctx, nil)
|
|
if err != nil {
|
|
if !bloberror.HasCode(err, bloberror.BlobNotFound) {
|
|
logger.Error().Str(logValue.Title, logValue.Value).Err(err).Send()
|
|
return
|
|
}
|
|
_, err = client.Create(ctx, nil)
|
|
if err != nil {
|
|
logger.Error().Str(logValue.Title, logValue.Value).Err(err).Send()
|
|
return
|
|
}
|
|
}
|
|
|
|
body := block.Body
|
|
// 2014*1024*4 == 4 MiB,
|
|
MiB4 := 1024 * 1024 * 4
|
|
for len(body) > 0 {
|
|
max := MiB4
|
|
if len(body) < max {
|
|
max = len(body)
|
|
}
|
|
reader := NopCloser(bytes.NewReader(body[0:max]))
|
|
_, err = client.AppendBlock(ctx, reader, nil)
|
|
if err != nil {
|
|
logger.Error().Str(logValue.Title, logValue.Value).Err(err).Msgf("Max: %d, len(body): %d", max, len(body))
|
|
return
|
|
}
|
|
body = body[max:]
|
|
logger.Debug().Str(logValue.Title, logValue.Value).Msgf("upload complete")
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func (a *AzureBatchUploader) azureBlobFilePath(filepath []string) string {
|
|
fileName := fmt.Sprintf("%s%s", "raw", a.fileExtension)
|
|
finalPath, _ := url.JoinPath("", filepath...)
|
|
finalPath, _ = url.JoinPath(finalPath, fileName)
|
|
|
|
return finalPath
|
|
}
|
|
|
|
// basePath is the url to the blob storage (<account>.azurebloburl.net/<containername>)
|
|
// filepath will be added onto basepath /<your>/<file>/<path>
|
|
func (a *AzureBatchUploader) azureBlobURL(basePath string, filePath []string) string {
|
|
fileName := fmt.Sprintf("%s%s", "raw", a.fileExtension)
|
|
finalPath, _ := url.JoinPath(basePath, filePath...)
|
|
finalPath, _ = url.JoinPath(finalPath, fileName)
|
|
|
|
return finalPath
|
|
}
|
|
|
|
func (a *AzureBatchUploader) generateSASToken(blobName string) (token string, err error) {
|
|
// blob name is something like this: 19UUA56873A044568/2023/01/11/raw.log
|
|
sasQueryParams, err := sas.BlobSignatureValues{
|
|
Protocol: sas.ProtocolHTTPS,
|
|
StartTime: time.Now().UTC().Add(-1 * time.Hour), // reduce an hour from current time to avoid signature issue
|
|
ExpiryTime: time.Now().UTC().Add(3 * 365 * 24 * time.Hour), // 3 years-ish
|
|
Permissions: elptr.ElPtr(sas.BlobPermissions{Read: true}).String(),
|
|
ContainerName: a.containerName,
|
|
BlobName: blobName,
|
|
}.SignWithSharedKey(a.azureCredentials)
|
|
|
|
if err != nil {
|
|
logger.Error().Err(err).Msg("Failed to sas.BlobSignatureValues")
|
|
return
|
|
}
|
|
|
|
token = sasQueryParams.Encode()
|
|
return
|
|
}
|
|
|
|
/* func MutexLocked(m *sync.Mutex) bool {
|
|
state := reflect.ValueOf(m).Elem().FieldByName("state")
|
|
const mutexLocked int64 = 1
|
|
return state.Int()&mutexLocked == mutexLocked
|
|
} */
|