package remotefileupload import ( "bytes" "context" "fmt" "net/url" "sync" "time" "github.com/fiskerinc/cloud-services/pkg/logger" "github.com/fiskerinc/cloud-services/pkg/utils/envtool" "github.com/Azure/azure-sdk-for-go/sdk/azcore/policy" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/appendblob" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror" "github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/sas" "github.com/pkg/errors" "github.com/fiskerinc/cloud-services/pkg/utils/elptr" ) // Comments on other versions I created // Using a buffer instead of byte array, it would randomly drop one number // was doing buffer.Write(separator), buffer.Write(x) and I would get something like 4,3,,8,1 // Using a rwmutex on map, and a mutex on []byte, read to check out the item, then write if we where assigning // couldn't get it to work. Would drop numbers as well // Using the sync on the inner string proves to be slightly more performant than locking the whole thing var ( RunBatchTimer = true // For local testing, if you don't want to upload to azure, set to false ConnectToAzBlob = true // For local testing, set to false to not use credentials batchMaxSize = envtool.GetEnvInt("AZURE_STORAGE_BATCH_UPLOAD_MAX_MIB", 2) ) func NewAzureBatchUploader(azureStorageContainerName string, azureFileExtension string, messageBatchTimeSeconds int, batchSeparator string) (Uploader, error) { a := &AzureBatchUploader{ accountName: azureAccount, containerName: azureStorageContainerName, fileExtension: azureFileExtension, } var cred *azblob.SharedKeyCredential var err error if ConnectToAzBlob { cred, err = azblob.NewSharedKeyCredential(a.accountName, azureAccountKey) if err != nil { return a, errors.WithStack(err) } } containerPath := fmt.Sprintf("https://%s.blob.core.windows.net/%s/", a.accountName, a.containerName) a.containerPath = containerPath a.azureCredentials = cred a.separator = []byte(batchSeparator) a.logsToSend = &logsMapMutex{logs: map[string]*stringMutex{}, Mutex: sync.Mutex{}} if RunBatchTimer { a.batchTicker = time.NewTicker(time.Duration(messageBatchTimeSeconds) * time.Second) go func() { for { select { case <-a.batchTicker.C: a.uploadNow() } } }() } return a, nil } // So the creation of logs to send type AzureBatchUploader struct { accountName string containerName string fileExtension string containerPath string azureCredentials *azblob.SharedKeyCredential logsToSend *logsMapMutex // A map of strings separator []byte batchTicker *time.Ticker } // Ideally we lock the map on when we have to change insert a new value, otherwise // we rely on the string itself to lock type logsMapMutex struct { logs map[string]*stringMutex sync.Mutex // Not really the mutex I want, will probably swap for the sync.Map later } // Removing mutex, can't easily guarantee that the outside map wo't change as we are trying to modify our // string, without the mutex just becoming repetitive type stringMutex struct { Body []byte logValue LogPayload sasToken string } // Instead of directly uploading, we need to delay our upload func (a *AzureBatchUploader) Upload(block []byte, logValue LogPayload, filePath ...string) (string, error) { blobURL := a.azureBlobURL(a.containerPath, filePath) // Lock a.logs a.logsToSend.Mutex.Lock() sendMap := a.logsToSend defer sendMap.Unlock() // a.logs cant' get changed mstring, ok := sendMap.logs[blobURL] if !ok { var sasTokenURL string // If we don't have the log inside, we likely need to generate the file, and the sas token. Should implement some sort of caching for this though if ConnectToAzBlob { blobPath := a.azureBlobFilePath(filePath) var err error sasTokenURL, err = a.generateSASToken(blobPath) if err != nil { logger.Error().Str(logValue.Title, logValue.Value).Err(err).Send() return "", err } } sendMap.logs[blobURL] = &stringMutex{ Body: block, logValue: logValue, sasToken: sasTokenURL, } } else { // Tried using a buffer, but did not seem to improve performance mstring.Body = append(append(mstring.Body, a.separator...), block...) } // To prevent us from taking up too much memory, we will send out data early // This will slow down other logs coming in, so do not make this value too small if len(sendMap.logs[blobURL].Body) > 1024*1024*batchMaxSize { a.uploadBlock(blobURL, sendMap.logs[blobURL], context.Background()) sendMap.logs[blobURL].Body = make([]byte, 0) } blobURL = blobURL + "?" + sendMap.logs[blobURL].sasToken return blobURL, nil } // Called once the batch has been expired, we actually upload. Can probably just call the azure upload service itself, // but refactor later func (a *AzureBatchUploader) uploadNow() { // Swap in new log holder // Acquire a lock on a.logs. Now we will wait until other writes are finished, but someone could lock behind us a.logsToSend.Lock() messageMap := a.logsToSend // No longer swap the whole object, just replace the map. Will create a backup for the upload time unfortunately // Unlocking defer messageMap.Unlock() ctx := context.Background() for blobURL, block := range messageMap.logs { a.uploadBlock(blobURL, block, ctx) } // While we still have a lock on the map, we swap it out messageMap.logs = make(map[string]*stringMutex) } func (a *AzureBatchUploader) uploadBlock(blobURL string, block *stringMutex, ctx context.Context) (err error) { client, err := appendblob.NewClientWithSharedKeyCredential(blobURL, a.azureCredentials, &appendblob.ClientOptions{ ClientOptions: policy.ClientOptions{ Retry: policy.RetryOptions{ MaxRetries: 1, MaxRetryDelay: 1 * time.Minute, }, }, }) if err != nil { logger.Error().Str("Message", "Failed to create new client with shared key credential").Err(err).Send() return } logValue := block.logValue logger.Debug().Str(logValue.Title, logValue.Value).Msgf("sending block of length %d to azure container: %s", len(block.Body), blobURL) block.Body = append(block.Body, a.separator...) // Instead of trying to send data to a blob, and then determining if it exists, lets just check if it exists _, err = client.GetProperties(ctx, nil) if err != nil { if !bloberror.HasCode(err, bloberror.BlobNotFound) { logger.Error().Str(logValue.Title, logValue.Value).Err(err).Send() return } _, err = client.Create(ctx, nil) if err != nil { logger.Error().Str(logValue.Title, logValue.Value).Err(err).Send() return } } body := block.Body // 2014*1024*4 == 4 MiB, MiB4 := 1024 * 1024 * 4 for len(body) > 0 { max := MiB4 if len(body) < max { max = len(body) } reader := NopCloser(bytes.NewReader(body[0:max])) _, err = client.AppendBlock(ctx, reader, nil) if err != nil { logger.Error().Str(logValue.Title, logValue.Value).Err(err).Msgf("Max: %d, len(body): %d", max, len(body)) return } body = body[max:] logger.Debug().Str(logValue.Title, logValue.Value).Msgf("upload complete") } return } func (a *AzureBatchUploader) azureBlobFilePath(filepath []string) string { fileName := fmt.Sprintf("%s%s", "raw", a.fileExtension) finalPath, _ := url.JoinPath("", filepath...) finalPath, _ = url.JoinPath(finalPath, fileName) return finalPath } // basePath is the url to the blob storage (.azurebloburl.net/) // filepath will be added onto basepath /// func (a *AzureBatchUploader) azureBlobURL(basePath string, filePath []string) string { fileName := fmt.Sprintf("%s%s", "raw", a.fileExtension) finalPath, _ := url.JoinPath(basePath, filePath...) finalPath, _ = url.JoinPath(finalPath, fileName) return finalPath } func (a *AzureBatchUploader) generateSASToken(blobName string) (token string, err error) { // blob name is something like this: 19UUA56873A044568/2023/01/11/raw.log sasQueryParams, err := sas.BlobSignatureValues{ Protocol: sas.ProtocolHTTPS, StartTime: time.Now().UTC().Add(-1 * time.Hour), // reduce an hour from current time to avoid signature issue ExpiryTime: time.Now().UTC().Add(3 * 365 * 24 * time.Hour), // 3 years-ish Permissions: elptr.ElPtr(sas.BlobPermissions{Read: true}).String(), ContainerName: a.containerName, BlobName: blobName, }.SignWithSharedKey(a.azureCredentials) if err != nil { logger.Error().Err(err).Msg("Failed to sas.BlobSignatureValues") return } token = sasQueryParams.Encode() return } /* func MutexLocked(m *sync.Mutex) bool { state := reflect.ValueOf(m).Elem().FieldByName("state") const mutexLocked int64 = 1 return state.Int()&mutexLocked == mutexLocked } */