Initial cloud-services repo - gateway service + pkg modules
This commit is contained in:
4
pkg/remotefileupload/README.md
Normal file
4
pkg/remotefileupload/README.md
Normal file
@@ -0,0 +1,4 @@
|
||||
Contains code from cloud/cargo/handlers/events.go
|
||||
Needed for cloud/valet/handlers/log_trex.go
|
||||
|
||||
So code doesn't need to be in two different places
|
||||
76
pkg/remotefileupload/aws.go
Normal file
76
pkg/remotefileupload/aws.go
Normal file
@@ -0,0 +1,76 @@
|
||||
package remotefileupload
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"path/filepath"
|
||||
|
||||
"fiskerinc.com/modules/logger"
|
||||
"fiskerinc.com/modules/utils/envtool"
|
||||
|
||||
"github.com/aws/aws-sdk-go/aws"
|
||||
"github.com/aws/aws-sdk-go/aws/session"
|
||||
"github.com/aws/aws-sdk-go/service/s3"
|
||||
)
|
||||
|
||||
var (
|
||||
awsBucketRegion = envtool.GetEnv("AWS_REGION", "us-west-2")
|
||||
awsBucketName = envtool.GetEnv("AWS_BUCKET_NAME", "fisker-data-test")
|
||||
awsFileExtension = envtool.GetEnv("AWS_FILE_EXTENSION", ".csv")
|
||||
)
|
||||
|
||||
// NewS3Uploader creates a new S3Uploader instance using env variables
|
||||
// requires ENV vars: AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, AWS_SESSION_TOKEN
|
||||
func NewS3Uploader(awsBucketName string) Uploader {
|
||||
var uploader *S3Uploader
|
||||
|
||||
cfg := aws.NewConfig().WithRegion(awsBucketRegion)
|
||||
sess := session.Must(session.NewSession())
|
||||
|
||||
uploader = &S3Uploader{
|
||||
service: s3.New(sess, cfg),
|
||||
bucketRegion: awsBucketRegion,
|
||||
bucketName: awsBucketName,
|
||||
fileExtension: awsFileExtension,
|
||||
}
|
||||
return uploader
|
||||
}
|
||||
|
||||
type S3Uploader struct {
|
||||
service *s3.S3
|
||||
bucketRegion string
|
||||
bucketName string
|
||||
fileExtension string
|
||||
}
|
||||
|
||||
// Upload creates a new object within S3 under the designated path
|
||||
//
|
||||
// objects can be up to 15MB before requiring multi-upload
|
||||
func (s *S3Uploader) Upload(block []byte, logValue LogPayload, filePath ...string) (string, error) {
|
||||
fileType := http.DetectContentType(block)
|
||||
|
||||
path := s.s3ObjectURL(filePath)
|
||||
input := &s3.PutObjectInput{
|
||||
Bucket: aws.String(s.bucketName),
|
||||
Key: aws.String(path),
|
||||
ContentType: aws.String(fileType),
|
||||
Body: bytes.NewReader(block),
|
||||
}
|
||||
|
||||
logger.Debug().Str(logValue.Title, logValue.Value).Msgf("sending block of length %d to aws object: %s", len(block), path)
|
||||
_, err := s.service.PutObject(input)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
logger.Debug().Str(logValue.Title, logValue.Value).Msgf("upload complete")
|
||||
return path, nil
|
||||
}
|
||||
|
||||
// s3ObjectURL is the URL formatter for an S3 object
|
||||
func (s *S3Uploader) s3ObjectURL(filePath []string) string {
|
||||
fileName := fmt.Sprintf("%s%s", "raw", s.fileExtension)
|
||||
finalPath := filepath.Join(filepath.Join(filePath...), fileName)
|
||||
return finalPath
|
||||
}
|
||||
30
pkg/remotefileupload/aws_test.go
Normal file
30
pkg/remotefileupload/aws_test.go
Normal file
@@ -0,0 +1,30 @@
|
||||
package remotefileupload_test
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"fiskerinc.com/modules/remotefileupload"
|
||||
"fiskerinc.com/modules/utils/envtool"
|
||||
|
||||
"fiskerinc.com/modules/testhelper"
|
||||
)
|
||||
|
||||
func TestNewAWSUploaderIntegration(t *testing.T) {
|
||||
t.Skip()
|
||||
|
||||
awsBucketName := envtool.GetEnv("AWS_BUCKET_NAME", "fisker-data-test")
|
||||
_ = remotefileupload.NewS3Uploader(awsBucketName)
|
||||
}
|
||||
|
||||
func TestAWSUploadIntegration(t *testing.T) {
|
||||
t.Skip()
|
||||
|
||||
awsBucketName := envtool.GetEnv("AWS_BUCKET_NAME", "fisker-data-test")
|
||||
s := remotefileupload.NewS3Uploader(awsBucketName)
|
||||
|
||||
_, err := s.Upload([]byte("testblock"), remotefileupload.LogPayload{Title: "vin", Value: "TESTVIN123"}, "TESTVIN123", "TESTVERSION123")
|
||||
if err != nil {
|
||||
t.Errorf(testhelper.TestErrorTemplate, "TestAzureUploadIntegration", "error", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
123
pkg/remotefileupload/azure.go
Normal file
123
pkg/remotefileupload/azure.go
Normal file
@@ -0,0 +1,123 @@
|
||||
package remotefileupload
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"net/url"
|
||||
|
||||
"fiskerinc.com/modules/logger"
|
||||
"fiskerinc.com/modules/utils/envtool"
|
||||
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/appendblob"
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror"
|
||||
"github.com/pkg/errors"
|
||||
)
|
||||
|
||||
var (
|
||||
azureAccount = envtool.GetEnv("AZURE_STORAGE_ACCOUNT", "REPLACE_ME")
|
||||
azureAccountKey = envtool.GetEnv("AZURE_STORAGE_ACCESS_KEY", "REPLACE_ME")
|
||||
)
|
||||
|
||||
// NewAzureUploader creates a new AzureUploader instance using env variables
|
||||
func NewAzureUploader(azureStorageContainerName string, azureFileExtension string) (Uploader, error) {
|
||||
a := &AzureUploader{
|
||||
accountName: azureAccount,
|
||||
containerName: azureStorageContainerName,
|
||||
fileExtension: azureFileExtension,
|
||||
}
|
||||
|
||||
cred, err := azblob.NewSharedKeyCredential(a.accountName, azureAccountKey)
|
||||
if err != nil {
|
||||
return a, errors.WithStack(err)
|
||||
}
|
||||
|
||||
containerPath := fmt.Sprintf("https://%s.blob.core.windows.net/%s/", a.accountName, a.containerName)
|
||||
|
||||
a.containerPath = containerPath
|
||||
a.azureCredentials = cred
|
||||
return a, nil
|
||||
}
|
||||
|
||||
// AzureUploader stores file location and creds to perform AppendBlock operation to blobs
|
||||
type AzureUploader struct {
|
||||
accountName string
|
||||
containerName string
|
||||
fileExtension string
|
||||
containerPath string
|
||||
azureCredentials *azblob.SharedKeyCredential
|
||||
}
|
||||
|
||||
// Upload appends new chunk of data to end of blob
|
||||
// if blob doesn't exist, creates blob and then appends data
|
||||
// logName: A name for if something goes wrong: i.e.: filePath[logIndex]
|
||||
// logIndex: What piece of data should be logged if something goes wrong
|
||||
func (a *AzureUploader) Upload(block []byte, logValue LogPayload, filePath ...string) (string, error) {
|
||||
ctx := context.Background()
|
||||
blobURL := a.azureBlobURL(a.containerPath, filePath)
|
||||
client, err := appendblob.NewClientWithSharedKeyCredential(blobURL, a.azureCredentials, nil)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
logger.Debug().Str(logValue.Title, logValue.Value).Msgf("sending block of length %d to azure container: %s", len(block), blobURL)
|
||||
|
||||
reader := NopCloser(bytes.NewReader(block))
|
||||
func() {
|
||||
// Instead of trying to send data to a blob, and then determining if it exists, lets just check if it exists
|
||||
_, err := client.GetProperties(ctx, nil)
|
||||
if err != nil {
|
||||
if !bloberror.HasCode(err, bloberror.BlobNotFound) {
|
||||
logger.Error().Str(logValue.Title, logValue.Value).Err(err).Send()
|
||||
return
|
||||
}
|
||||
_, err = client.Create(ctx, nil)
|
||||
if err != nil {
|
||||
logger.Error().Str(logValue.Title, logValue.Value).Err(err).Send()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
_, err = client.AppendBlock(ctx, reader, nil)
|
||||
if err != nil {
|
||||
logger.Error().Str(logValue.Title, logValue.Value).Err(err).Send()
|
||||
return
|
||||
}
|
||||
|
||||
logger.Debug().Str(logValue.Title, logValue.Value).Msgf("upload complete")
|
||||
}()
|
||||
return blobURL, nil
|
||||
}
|
||||
|
||||
// basePath is the url to the blob storage (<account>.azurebloburl.net/<containername>)
|
||||
// filepath will be added onto basepath /<your>/<file>/<path>
|
||||
func (a *AzureUploader) azureBlobURL(basePath string, filePath []string) string {
|
||||
fileName := fmt.Sprintf("%s%s", "raw", a.fileExtension)
|
||||
finalPath, _ := url.JoinPath(basePath, filePath...)
|
||||
finalPath, _ = url.JoinPath(finalPath, fileName)
|
||||
|
||||
return finalPath
|
||||
}
|
||||
|
||||
type nopCloser struct {
|
||||
io.ReadSeeker
|
||||
}
|
||||
|
||||
func (n nopCloser) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// NopCloser returns a ReadSeekCloser with a no-op close method wrapping the provided io.ReadSeeker.
|
||||
func NopCloser(rs io.ReadSeeker) io.ReadSeekCloser {
|
||||
return nopCloser{rs}
|
||||
}
|
||||
|
||||
// Azure Account is the whole storage account name such as fiskercloudstg
|
||||
// AzureCotnainerName is the name of the specific container such as trexlogs
|
||||
// Then the path is the path to the file i.e.: "someVIN", "2023", "05", "03", "raw.log"
|
||||
func AzureFilePathLink(AzureAccount, AzureContainerName string, PathPieces ...string) (link string, err error) {
|
||||
link = fmt.Sprintf("https://%s.blob.core.windows.net/%s", AzureAccount, AzureContainerName)
|
||||
return url.JoinPath(link, PathPieces...)
|
||||
}
|
||||
63
pkg/remotefileupload/azure_performance_test.go
Normal file
63
pkg/remotefileupload/azure_performance_test.go
Normal file
@@ -0,0 +1,63 @@
|
||||
package remotefileupload
|
||||
|
||||
import (
|
||||
"math/rand"
|
||||
"strconv"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// without the goroutine, only sends 11
|
||||
func BenchmarkAzureUpload(b *testing.B) {
|
||||
azureAccount = "fiskerclouddev"
|
||||
azureAccountKey = "REPLACE_ME"
|
||||
|
||||
uploader, err := NewAzureUploader("trex-logs", ".txt")
|
||||
if err != nil {
|
||||
b.Error(err)
|
||||
}
|
||||
|
||||
benchmarkUploadTime(uploader, b)
|
||||
}
|
||||
|
||||
func BenchmarkAzureBatchUpload(b *testing.B) {
|
||||
azureAccount = "fiskerclouddev"
|
||||
azureAccountKey = "REPLACE_ME"
|
||||
|
||||
uploader, err := NewAzureBatchUploader("trex-logs", ".txt", 1, "\n")
|
||||
if err != nil {
|
||||
b.Error(err)
|
||||
}
|
||||
|
||||
benchmarkUploadTime(uploader, b)
|
||||
}
|
||||
|
||||
// 1762456 messages,
|
||||
func benchmarkUploadTime(uploader Uploader, b *testing.B) {
|
||||
endTimer := time.NewTimer(time.Second * 5)
|
||||
messagesSent := 0
|
||||
Loop:
|
||||
for {
|
||||
select {
|
||||
case <-endTimer.C:
|
||||
break Loop
|
||||
default:
|
||||
SendMessage(time.Now(), uploader, b)
|
||||
messagesSent++
|
||||
}
|
||||
}
|
||||
|
||||
time.Sleep(5 * time.Second)
|
||||
b.Logf("Benchmark %s 'sent' %d messages\n", b.Name(), messagesSent)
|
||||
}
|
||||
|
||||
func SendMessage(t time.Time, uploader Uploader, b *testing.B) {
|
||||
thread := rand.Intn(10)
|
||||
_, err := uploader.Upload([]byte(t.Format(time.RubyDate)), LogPayload{
|
||||
Title: "This",
|
||||
Value: "Some",
|
||||
}, "/benchmark", strconv.Itoa(thread))
|
||||
if err != nil {
|
||||
b.Error(b)
|
||||
}
|
||||
}
|
||||
87
pkg/remotefileupload/azure_test.go
Normal file
87
pkg/remotefileupload/azure_test.go
Normal file
@@ -0,0 +1,87 @@
|
||||
package remotefileupload_test
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"fiskerinc.com/modules/remotefileupload"
|
||||
"fiskerinc.com/modules/utils/envtool"
|
||||
|
||||
"fiskerinc.com/modules/testhelper"
|
||||
)
|
||||
|
||||
// In order to run the integration tests on the uploader, the go routine that makes the upload needs to be not a goroutine
|
||||
func TestNewAzureUploaderIntegration(t *testing.T) {
|
||||
t.Skip()
|
||||
|
||||
azureContainerName := envtool.GetEnv("AZURE_TREX_LOGS_STORAGE_CONTAINER_NAME", "raw-can")
|
||||
_, err := remotefileupload.NewAzureUploader(azureContainerName, ".csv")
|
||||
if err != nil {
|
||||
t.Errorf(testhelper.TestErrorTemplate, "TestNewAzureUploaderIntegration", nil, err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func TestAzureUploadIntegration(t *testing.T) {
|
||||
t.Skip()
|
||||
|
||||
azureContainerName := envtool.GetEnv("AZURE_TREX_LOGS_STORAGE_CONTAINER_NAME", "raw-can")
|
||||
a, err := remotefileupload.NewAzureUploader(azureContainerName, ".csv")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
date := fmt.Sprintf("%04d/%02d/%02d", time.Now().Year(), time.Now().Month(), time.Now().Day())
|
||||
_, err = a.Upload([]byte("{'id':'testJson'}"), remotefileupload.LogPayload{Title: "vin", Value: "TESTVIN123"}, "TESTVIN123", date)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
func TestAzureUploadTestAppend(t *testing.T) {
|
||||
t.Skip()
|
||||
a, err := remotefileupload.NewAzureUploader("trex-logs", ".csv")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
_, err = a.Upload([]byte("Hello"), remotefileupload.LogPayload{Title: "vin", Value: "path"}, "path")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
_, err = a.Upload([]byte("goodbye"), remotefileupload.LogPayload{Title: "vin", Value: "path"}, "path")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
_, err = a.Upload([]byte("again"), remotefileupload.LogPayload{Title: "vin", Value: "path"}, "path")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
time.Sleep(time.Minute)
|
||||
}
|
||||
|
||||
func TestURLGeneration(t *testing.T) {
|
||||
link, err := remotefileupload.AzureFilePathLink("dev-account", "trex-logs", "trex", "12345678", "2022", "log.txt")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if link != "https://dev-account.blob.core.windows.net/trex-logs/trex/12345678/2022/log.txt" {
|
||||
t.Errorf("Link did not match: %s", link)
|
||||
}
|
||||
|
||||
link, err = remotefileupload.AzureFilePathLink("dev-account", "trex-logs", "trex", "/12345678/2022", "log.txt")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
if link != "https://dev-account.blob.core.windows.net/trex-logs/trex/12345678/2022/log.txt" {
|
||||
t.Errorf("Link did not match: %s", link)
|
||||
}
|
||||
}
|
||||
318
pkg/remotefileupload/backup.go
Normal file
318
pkg/remotefileupload/backup.go
Normal file
@@ -0,0 +1,318 @@
|
||||
package remotefileupload
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"fiskerinc.com/modules/logger"
|
||||
"fiskerinc.com/modules/utils/elptr"
|
||||
"fiskerinc.com/modules/utils/envtool"
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/appendblob"
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blob"
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/blockblob"
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/sas"
|
||||
)
|
||||
|
||||
var (
|
||||
backupContainerName = envtool.GetEnv("AZURE_STORAGE_BACKUP_CONTAINER", "raw-can-archive")
|
||||
ttl = envtool.GetEnvInt64("AZURE_STORAGE_BACKUP_TTL", 60*24) // 60 days
|
||||
azureRawCompressedContainerName = envtool.GetEnv("AZURE_STORAGE_RAW_COMPRESSED_CONTAINER", "raw-can-compressed")
|
||||
)
|
||||
|
||||
type Backup struct {
|
||||
azureAccount string
|
||||
azureAccountKey string
|
||||
containerName string
|
||||
cred *azblob.SharedKeyCredential
|
||||
}
|
||||
|
||||
var (
|
||||
errTTL = "Failed to set ttl %s"
|
||||
errCopy = "Failed to copy %s"
|
||||
errDelete = "Failed to delete %s"
|
||||
errClient = "Failed to create client"
|
||||
errParquetWriter = "Failed to create parquet wirter %s"
|
||||
errDownload = "Failed to download file %s"
|
||||
blobNotExists = "The specified blob does not exist."
|
||||
)
|
||||
|
||||
func NewBackup(azureAccount string, azureAccountKey string, containerName string) *Backup {
|
||||
bk := &Backup{
|
||||
azureAccount: azureAccount,
|
||||
azureAccountKey: azureAccountKey,
|
||||
containerName: containerName,
|
||||
}
|
||||
bk.cred, _ = azblob.NewSharedKeyCredential(azureAccount, azureAccountKey)
|
||||
return bk
|
||||
}
|
||||
|
||||
// remove deletes a file from Azure Blob Storage.
|
||||
//
|
||||
// Parameters:
|
||||
// - context: context, Backgroud as of now.
|
||||
// - filePath: The path of the file to be removed.
|
||||
//
|
||||
// Returns:
|
||||
// - err: return err if occur otherwise nil.
|
||||
//
|
||||
// Deletes appendblock blob from storage. If the removal operation encounters an error, it logs
|
||||
// an error message and returns error. Otherwise, it returns nil to indicate
|
||||
// a successful removal.
|
||||
func (b *Backup) remove(ctx context.Context, filePath string) error {
|
||||
|
||||
// Construct the full path of the file in Azure Blob Storage
|
||||
fullPath := b.azureBlobURL(b.getContainerPath(b.containerName), filePath)
|
||||
|
||||
client, err := appendblob.NewClientWithSharedKeyCredential(fullPath, b.cred, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
_, err = client.Delete(context.Background(), nil)
|
||||
return err
|
||||
}
|
||||
|
||||
// Move, copy a blob from Azure Blob Storage to Azure Blob Storage as cool tier block blob type.
|
||||
// Set TTL to new blob and remove the original blob
|
||||
// Parameters:
|
||||
// - context: context, Backgroud as of now.
|
||||
// - filePath: path of the src file.
|
||||
//
|
||||
// Returns:
|
||||
// - err: An error, if any, that occurred during the SAS token generation process.
|
||||
func (b *Backup) Move(ctx context.Context, filePath string) error {
|
||||
|
||||
backupPath := filePath
|
||||
|
||||
// Construct the full path of the src file in Azure Blob Storage
|
||||
srcPath := b.azureBlobURL(b.getContainerPath(b.containerName), filePath)
|
||||
|
||||
// Construct the full path of the dest file in Azure Blob Storage
|
||||
destPath := b.azureBlobURL(b.getContainerPath(backupContainerName), backupPath)
|
||||
|
||||
// Generate a Shared Access Signature (SAS) token for src file with read permissions
|
||||
srcSAS, _ := b.generateSASToken(filePath, sas.BlobPermissions{Read: true}, b.containerName)
|
||||
|
||||
client, err := blockblob.NewClientWithSharedKeyCredential(destPath, b.cred, &blockblob.ClientOptions{
|
||||
ClientOptions: policy.ClientOptions{
|
||||
Retry: policy.RetryOptions{
|
||||
MaxRetries: 1,
|
||||
MaxRetryDelay: 1 * time.Minute,
|
||||
},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
tier := blob.AccessTierCool // Set cool tier type as cold tier not supported for this version of sdk
|
||||
_, err = client.UploadBlobFromURL(ctx, fmt.Sprintf("%s?%s", srcPath, srcSAS), &blockblob.UploadBlobFromURLOptions{
|
||||
Tier: &tier,
|
||||
})
|
||||
|
||||
if err != nil && !strings.Contains(err.Error(), blobNotExists) {
|
||||
logger.Err(err).Msg(fmt.Sprintf(errCopy, srcPath))
|
||||
return err
|
||||
}
|
||||
|
||||
err = b.setTTL(ctx, destPath)
|
||||
if err != nil && !strings.Contains(err.Error(), blobNotExists) {
|
||||
logger.Err(err).Msg(fmt.Sprintf(errTTL, destPath))
|
||||
}
|
||||
|
||||
err = b.remove(ctx, filePath)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), blobNotExists) {
|
||||
return nil
|
||||
}
|
||||
logger.Err(err).Msg(fmt.Sprintf(errDelete, destPath))
|
||||
}
|
||||
|
||||
return err
|
||||
|
||||
}
|
||||
|
||||
// setTTL set a Time-to-Live (TTL) expiration policy to an Azure Blob Storage file.
|
||||
//
|
||||
// Parameters:
|
||||
// - context: context, Backgroud as of now.
|
||||
// - fileUrl: The URL of the Azure Blob Storage file to which the TTL policy will be added.
|
||||
//
|
||||
// Returns:
|
||||
// - error: An error, if any, that occurred during the TTL policy addition process. It returns nil if successful.
|
||||
//
|
||||
// The setTTL function is responsible for adding a Time-to-Live (TTL) expiration policy
|
||||
// to a specific file located in Azure Blob Storage. A TTL policy allows you to specify
|
||||
// a duration after which the file will be automatically deleted from storage.
|
||||
func (b *Backup) setTTL(ctx context.Context, fileUrl string) error {
|
||||
|
||||
blockBlobClient, err := blockblob.NewClientWithSharedKeyCredential(fileUrl, b.cred, &blockblob.ClientOptions{
|
||||
ClientOptions: policy.ClientOptions{
|
||||
Retry: policy.RetryOptions{
|
||||
MaxRetries: 1,
|
||||
MaxRetryDelay: 1 * time.Minute,
|
||||
},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// set expiry on block blob 4 hours relative to now
|
||||
_, err = blockBlobClient.SetExpiry(context.Background(), blockblob.ExpiryTypeRelativeToNow(ttl*int64(time.Hour)), nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// validate set expiry operation
|
||||
resp, err := blockBlobClient.GetProperties(ctx, nil)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if resp.ExpiresOn == nil {
|
||||
return nil
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// generateAzureSASToken generates a Shared Access Signature (SAS) token for an Azure Blob Storage blob.
|
||||
//
|
||||
// Parameters:
|
||||
// - blobName: The name of the blob for which the SAS token is generated.
|
||||
// - permission: The BlobPermissions object specifying the permissions granted by the SAS token.
|
||||
// - containerName: The containerName of the blob.
|
||||
//
|
||||
// Returns:
|
||||
// - token: The generated SAS token string.
|
||||
// - err: An error, if any, that occurred during the SAS token generation process.
|
||||
func (b *Backup) generateSASToken(blobName string, permission sas.BlobPermissions, containerName string) (token string, err error) {
|
||||
// blob name is something like this: 19UUA56873A044568/2023/01/11/raw.log
|
||||
cred, err := azblob.NewSharedKeyCredential(b.azureAccount, b.azureAccountKey)
|
||||
if err != nil {
|
||||
logger.Err(err).Msg("[backup]:[NewSharedKeyCredential]")
|
||||
return
|
||||
}
|
||||
sasQueryParams, err := sas.BlobSignatureValues{
|
||||
Protocol: sas.ProtocolHTTPS,
|
||||
StartTime: time.Now().UTC().Add(-1 * time.Hour), // reduce an hour from current time to avoid signature issue
|
||||
ExpiryTime: time.Now().UTC().Add(3 * 365 * 24 * time.Hour), // 3 years-ish
|
||||
Permissions: elptr.ElPtr(permission).String(),
|
||||
ContainerName: containerName,
|
||||
BlobName: blobName,
|
||||
}.SignWithSharedKey(cred)
|
||||
|
||||
if err != nil {
|
||||
logger.Err(err).Msg("Failed to sas.BlobSignatureValues")
|
||||
return
|
||||
}
|
||||
|
||||
token = sasQueryParams.Encode()
|
||||
return
|
||||
}
|
||||
|
||||
func (b *Backup) azureBlobURL(basePath string, filePath string) string {
|
||||
finalPath, _ := url.JoinPath(basePath, filePath)
|
||||
return finalPath
|
||||
}
|
||||
|
||||
func (b *Backup) getContainerPath(containerName string) string {
|
||||
return fmt.Sprintf("https://%s.blob.core.windows.net/%s/", b.azureAccount, containerName)
|
||||
}
|
||||
|
||||
// ToParquet converts data from an Azure Blob csv to a Parquet file and stores it in another container.
|
||||
//
|
||||
// This function takes an `blobName` representing the source Azure Blob csv and performs the following steps:
|
||||
//
|
||||
// 1. Downloads data from the source Azure Blob identified by `blobName`.
|
||||
// 2. Converts the retrieved data into a Parquet file using Parquet Writer.
|
||||
//
|
||||
// Parameters:
|
||||
// - blobName: The name of the source Azure Blob csv that contains the data to be converted to Parquet.
|
||||
//
|
||||
// Returns:
|
||||
// - error: An error logs and returns if any step of the conversion or storage process encounters an issue. It returns nil on success.
|
||||
func (b *Backup) ToParquet(blobName string, guard chan struct{}) error {
|
||||
|
||||
var err error
|
||||
srcBlobURL := b.azureBlobURL(b.getContainerPath(backupContainerName), blobName)
|
||||
parquetBlobName := b.changeFileExt(blobName, "parquet")
|
||||
parquetBlobURL := b.azureBlobURL(b.getContainerPath(azureRawCompressedContainerName), parquetBlobName)
|
||||
|
||||
client, err := blockblob.NewClientWithSharedKeyCredential(srcBlobURL, b.cred, nil)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), blobNotExists) {
|
||||
return nil
|
||||
}
|
||||
logger.Err(err).Msg(errClient)
|
||||
return err
|
||||
}
|
||||
|
||||
downloadResp, err := client.DownloadStream(context.Background(), nil)
|
||||
if err != nil {
|
||||
if strings.Contains(err.Error(), blobNotExists) {
|
||||
return nil
|
||||
}
|
||||
logger.Err(err).Msg(fmt.Sprintf(errDownload, srcBlobURL))
|
||||
return err
|
||||
}
|
||||
|
||||
defer downloadResp.Body.Close()
|
||||
|
||||
csvToParquet := NewCSVtoParquet(b.azureAccount, b.azureAccountKey, parquetBlobURL)
|
||||
|
||||
guard <- struct{}{} // for reader
|
||||
go func() {
|
||||
defer func() {
|
||||
<-guard
|
||||
}()
|
||||
csvToParquet.Read(downloadResp.Body)
|
||||
}()
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
guard <- struct{}{} // for writer
|
||||
go func(w *sync.WaitGroup) {
|
||||
defer func() {
|
||||
w.Done()
|
||||
<-guard
|
||||
}()
|
||||
err = csvToParquet.Write()
|
||||
if err != nil {
|
||||
logger.Err(err).Msg(fmt.Sprintf(errParquetWriter, parquetBlobURL))
|
||||
}
|
||||
}(&wg)
|
||||
|
||||
wg.Wait()
|
||||
return err
|
||||
|
||||
}
|
||||
|
||||
// changeFileExt updates the file extension of a given blob name and returns the modified blob name.
|
||||
//
|
||||
// This method takes an existing `blobName` and replaces its file extension with the specified `fileExt`.
|
||||
// It then returns the modified blob name as a string.
|
||||
//
|
||||
// Parameters:
|
||||
// - blobName: The original blob name, including its current file extension.
|
||||
// - fileExt: The new file extension to replace the existing one. The `fileExt` should not include the dot (e.g., "txt").
|
||||
//
|
||||
// Returns:
|
||||
// - string: The modified blob name with the updated file extension.
|
||||
func (b *Backup) changeFileExt(blobName, fileExt string) string {
|
||||
if len(fileExt) > 0 && string(fileExt[0]) == "." {
|
||||
fileExt = fileExt[1:]
|
||||
}
|
||||
if len(blobName) == 0 {
|
||||
return fmt.Sprintf(".%s", fileExt)
|
||||
}
|
||||
arr := strings.Split(blobName, ".")
|
||||
if len(arr) == 1 {
|
||||
return fmt.Sprintf("%s.%s", arr[0], fileExt)
|
||||
}
|
||||
arr[len(arr)-1] = fileExt
|
||||
return strings.Join(arr, ".")
|
||||
}
|
||||
167
pkg/remotefileupload/backup_test.go
Normal file
167
pkg/remotefileupload/backup_test.go
Normal file
@@ -0,0 +1,167 @@
|
||||
package remotefileupload
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var (
|
||||
guard = make(chan struct{}, 100)
|
||||
)
|
||||
|
||||
func TestAzureBlobURL(t *testing.T) {
|
||||
backup := NewBackup("", "", "")
|
||||
var inputs = []struct {
|
||||
base string
|
||||
filePath string
|
||||
expected string
|
||||
}{
|
||||
{ // Test case 1: basePath is empty, filePath is empty
|
||||
base: "",
|
||||
filePath: "",
|
||||
expected: "",
|
||||
},
|
||||
{ // Test case 2: basePath is not empty, filePath is empty
|
||||
base: "/base",
|
||||
filePath: "",
|
||||
expected: "/base",
|
||||
},
|
||||
{ // Test case 3: basePath is not empty, filePath is not empty
|
||||
base: "/base",
|
||||
filePath: "dir1/dir2",
|
||||
expected: "/base/dir1/dir2",
|
||||
},
|
||||
{ // Test case 4: basePath is empty, filePath is not empty
|
||||
base: "",
|
||||
filePath: "dir1/dir2/dir3/raw.csv",
|
||||
expected: "dir1/dir2/dir3/raw.csv",
|
||||
},
|
||||
{ // Test case 4: worng basePath, filePath file path
|
||||
base: "/base//",
|
||||
filePath: "//dir1/dir2/dir3/raw.csv",
|
||||
expected: "/base/dir1/dir2/dir3/raw.csv",
|
||||
},
|
||||
}
|
||||
|
||||
for _, input := range inputs {
|
||||
result := backup.azureBlobURL(input.base, input.filePath)
|
||||
if result != input.expected {
|
||||
t.Errorf("Expected %s, got %s", input.expected, result)
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestRemove(t *testing.T) {
|
||||
t.Skip()
|
||||
ctx := context.Background()
|
||||
backup := NewBackup("fakeAccount", "fakeAccountKey", "fakeContainer")
|
||||
fakePath := "fakeDir1/fakeDir2/file.txt"
|
||||
|
||||
err := backup.remove(ctx, fakePath)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("Expected no error, got %v", err.Error())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestSetTTL(t *testing.T) {
|
||||
t.Skip()
|
||||
ctx := context.Background()
|
||||
backup := NewBackup("fakeAccount", "fakeAccountKey", "fakeContainer")
|
||||
fakeFileUrl := "https://fakeAccount.blob.core.windows.net/fakeContainer/fakeDir1/fakeDir2/file.txt"
|
||||
|
||||
err := backup.setTTL(ctx, fakeFileUrl)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("Expected no error, got %v", err.Error())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
func TestGetContainerPath(t *testing.T) {
|
||||
inputes := []struct {
|
||||
containerName string
|
||||
expected string
|
||||
}{
|
||||
{"container1", "https://fakeAccount.blob.core.windows.net/container1/"},
|
||||
{"container2", "https://fakeAccount.blob.core.windows.net/container2/"},
|
||||
{"container3", "https://fakeAccount.blob.core.windows.net/container3/"},
|
||||
}
|
||||
backup := NewBackup("fakeAccount", "fakeAccountKey", "fakeContainer")
|
||||
|
||||
for _, input := range inputes {
|
||||
result := backup.getContainerPath(input.containerName)
|
||||
if result != input.expected {
|
||||
t.Errorf("Expected %v, got %v", input.expected, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestMove(t *testing.T) {
|
||||
t.Skip()
|
||||
ctx := context.Background()
|
||||
backup := NewBackup("fakeAccount", "fakeAccountKey", "fakeContainer")
|
||||
|
||||
fakeFilePath := "fakeDir1/fakeDir2/file.txt"
|
||||
|
||||
err := backup.Move(ctx, fakeFilePath)
|
||||
|
||||
if err != nil {
|
||||
t.Errorf("Expected no error, got %v", err.Error())
|
||||
}
|
||||
}
|
||||
|
||||
func TestChangeFileExt(t *testing.T) {
|
||||
backup := NewBackup("fakeAccount", "fakeAccountKey", "fakeContainer")
|
||||
inputs := []struct {
|
||||
fileUrl string
|
||||
ext string
|
||||
expected string
|
||||
}{
|
||||
{"document.pdf", "txt", "document.txt"},
|
||||
{"dir1/dir2/document.txt", "pdf", "dir1/dir2/document.pdf"},
|
||||
{"", "pdf", ".pdf"},
|
||||
{"document", "txt", "document.txt"},
|
||||
{"document", ".txt", "document.txt"},
|
||||
{"document.txt", ".pdf", "document.pdf"},
|
||||
{"https://fakeAccount.blob.core.windows.net/fakeContainer/fakeVin/fakeVersion/yyyy/mm/dd/raw.csv", ".parquet", "https://fakeAccount.blob.core.windows.net/fakeContainer/fakeVin/fakeVersion/yyyy/mm/dd/raw.parquet"},
|
||||
{"fakeVin/fakeVersion/yyyy/mm/dd/file.txt", "pdf", "fakeVin/fakeVersion/yyyy/mm/dd/file.pdf"},
|
||||
}
|
||||
|
||||
for _, input := range inputs {
|
||||
result := backup.changeFileExt(input.fileUrl, input.ext)
|
||||
if result != input.expected {
|
||||
t.Errorf("Expected %v, got %v", input.expected, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestToParquet(t *testing.T) {
|
||||
|
||||
t.Skip()
|
||||
backup := NewBackup("fakeAccount", "fakeAccountKey", "fakeContainer")
|
||||
|
||||
fakeFilePath := []string{
|
||||
"fakeVin1/fakeVersion1/yyyy/mm/dd/raw.csv",
|
||||
"fakeVin2/fakeVersion2/yyyy/mm/dd/raw.csv",
|
||||
"fakeVin3/fakeVersio3/yyyy/mm/dd/raw.csv",
|
||||
}
|
||||
|
||||
var wg sync.WaitGroup
|
||||
for _, url := range fakeFilePath {
|
||||
wg.Add(1)
|
||||
go func(url string) {
|
||||
defer wg.Done()
|
||||
err := backup.ToParquet(url, guard)
|
||||
if err != nil {
|
||||
t.Errorf("Expected nil error, got %v", err.Error())
|
||||
}
|
||||
}(url)
|
||||
}
|
||||
|
||||
wg.Wait()
|
||||
|
||||
}
|
||||
259
pkg/remotefileupload/batchuploader.go
Normal file
259
pkg/remotefileupload/batchuploader.go
Normal file
@@ -0,0 +1,259 @@
|
||||
package remotefileupload
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"net/url"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"fiskerinc.com/modules/logger"
|
||||
"fiskerinc.com/modules/utils/envtool"
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/azcore/policy"
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob"
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/appendblob"
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/bloberror"
|
||||
"github.com/Azure/azure-sdk-for-go/sdk/storage/azblob/sas"
|
||||
|
||||
"github.com/pkg/errors"
|
||||
"fiskerinc.com/modules/utils/elptr"
|
||||
)
|
||||
|
||||
// Comments on other versions I created
|
||||
// Using a buffer instead of byte array, it would randomly drop one number
|
||||
// was doing buffer.Write(separator), buffer.Write(x) and I would get something like 4,3,,8,1
|
||||
// Using a rwmutex on map, and a mutex on []byte, read to check out the item, then write if we where assigning
|
||||
// couldn't get it to work. Would drop numbers as well
|
||||
|
||||
// Using the sync on the inner string proves to be slightly more performant than locking the whole thing
|
||||
|
||||
var (
|
||||
RunBatchTimer = true // For local testing, if you don't want to upload to azure, set to false
|
||||
ConnectToAzBlob = true // For local testing, set to false to not use credentials
|
||||
batchMaxSize = envtool.GetEnvInt("AZURE_STORAGE_BATCH_UPLOAD_MAX_MIB", 2)
|
||||
)
|
||||
|
||||
func NewAzureBatchUploader(azureStorageContainerName string, azureFileExtension string, messageBatchTimeSeconds int, batchSeparator string) (Uploader, error) {
|
||||
a := &AzureBatchUploader{
|
||||
accountName: azureAccount,
|
||||
containerName: azureStorageContainerName,
|
||||
fileExtension: azureFileExtension,
|
||||
}
|
||||
|
||||
var cred *azblob.SharedKeyCredential
|
||||
var err error
|
||||
if ConnectToAzBlob {
|
||||
cred, err = azblob.NewSharedKeyCredential(a.accountName, azureAccountKey)
|
||||
if err != nil {
|
||||
return a, errors.WithStack(err)
|
||||
}
|
||||
}
|
||||
|
||||
containerPath := fmt.Sprintf("https://%s.blob.core.windows.net/%s/", a.accountName, a.containerName)
|
||||
|
||||
a.containerPath = containerPath
|
||||
a.azureCredentials = cred
|
||||
|
||||
a.separator = []byte(batchSeparator)
|
||||
a.logsToSend = &logsMapMutex{logs: map[string]*stringMutex{},
|
||||
Mutex: sync.Mutex{}}
|
||||
|
||||
if RunBatchTimer {
|
||||
a.batchTicker = time.NewTicker(time.Duration(messageBatchTimeSeconds) * time.Second)
|
||||
go func() {
|
||||
for {
|
||||
select {
|
||||
case <-a.batchTicker.C:
|
||||
a.uploadNow()
|
||||
}
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
return a, nil
|
||||
}
|
||||
|
||||
// So the creation of logs to send
|
||||
type AzureBatchUploader struct {
|
||||
accountName string
|
||||
containerName string
|
||||
fileExtension string
|
||||
containerPath string
|
||||
azureCredentials *azblob.SharedKeyCredential
|
||||
logsToSend *logsMapMutex // A map of strings
|
||||
separator []byte
|
||||
batchTicker *time.Ticker
|
||||
}
|
||||
|
||||
// Ideally we lock the map on when we have to change insert a new value, otherwise
|
||||
// we rely on the string itself to lock
|
||||
type logsMapMutex struct {
|
||||
logs map[string]*stringMutex
|
||||
sync.Mutex // Not really the mutex I want, will probably swap for the sync.Map later
|
||||
}
|
||||
|
||||
// Removing mutex, can't easily guarantee that the outside map wo't change as we are trying to modify our
|
||||
// string, without the mutex just becoming repetitive
|
||||
type stringMutex struct {
|
||||
Body []byte
|
||||
logValue LogPayload
|
||||
sasToken string
|
||||
}
|
||||
|
||||
// Instead of directly uploading, we need to delay our upload
|
||||
func (a *AzureBatchUploader) Upload(block []byte, logValue LogPayload, filePath ...string) (string, error) {
|
||||
blobURL := a.azureBlobURL(a.containerPath, filePath)
|
||||
// Lock a.logs
|
||||
a.logsToSend.Mutex.Lock()
|
||||
sendMap := a.logsToSend
|
||||
defer sendMap.Unlock()
|
||||
// a.logs cant' get changed
|
||||
mstring, ok := sendMap.logs[blobURL]
|
||||
if !ok {
|
||||
var sasTokenURL string
|
||||
// If we don't have the log inside, we likely need to generate the file, and the sas token. Should implement some sort of caching for this though
|
||||
if ConnectToAzBlob {
|
||||
blobPath := a.azureBlobFilePath(filePath)
|
||||
var err error
|
||||
sasTokenURL, err = a.generateSASToken(blobPath)
|
||||
if err != nil {
|
||||
logger.Error().Str(logValue.Title, logValue.Value).Err(err).Send()
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
sendMap.logs[blobURL] = &stringMutex{
|
||||
Body: block,
|
||||
logValue: logValue,
|
||||
sasToken: sasTokenURL,
|
||||
}
|
||||
} else {
|
||||
// Tried using a buffer, but did not seem to improve performance
|
||||
mstring.Body = append(append(mstring.Body, a.separator...), block...)
|
||||
}
|
||||
|
||||
// To prevent us from taking up too much memory, we will send out data early
|
||||
// This will slow down other logs coming in, so do not make this value too small
|
||||
if len(sendMap.logs[blobURL].Body) > 1024*1024*batchMaxSize {
|
||||
a.uploadBlock(blobURL, sendMap.logs[blobURL], context.Background())
|
||||
sendMap.logs[blobURL].Body = make([]byte, 0)
|
||||
}
|
||||
blobURL = blobURL + "?" + sendMap.logs[blobURL].sasToken
|
||||
return blobURL, nil
|
||||
}
|
||||
|
||||
// Called once the batch has been expired, we actually upload. Can probably just call the azure upload service itself,
|
||||
// but refactor later
|
||||
func (a *AzureBatchUploader) uploadNow() {
|
||||
// Swap in new log holder
|
||||
// Acquire a lock on a.logs. Now we will wait until other writes are finished, but someone could lock behind us
|
||||
a.logsToSend.Lock()
|
||||
messageMap := a.logsToSend
|
||||
// No longer swap the whole object, just replace the map. Will create a backup for the upload time unfortunately
|
||||
|
||||
// Unlocking
|
||||
defer messageMap.Unlock()
|
||||
ctx := context.Background()
|
||||
for blobURL, block := range messageMap.logs {
|
||||
a.uploadBlock(blobURL, block, ctx)
|
||||
}
|
||||
// While we still have a lock on the map, we swap it out
|
||||
messageMap.logs = make(map[string]*stringMutex)
|
||||
}
|
||||
|
||||
func (a *AzureBatchUploader) uploadBlock(blobURL string, block *stringMutex, ctx context.Context) (err error) {
|
||||
client, err := appendblob.NewClientWithSharedKeyCredential(blobURL, a.azureCredentials, &appendblob.ClientOptions{
|
||||
ClientOptions: policy.ClientOptions{
|
||||
Retry: policy.RetryOptions{
|
||||
MaxRetries: 1,
|
||||
MaxRetryDelay: 1 * time.Minute,
|
||||
},
|
||||
},
|
||||
})
|
||||
if err != nil {
|
||||
logger.Error().Str("Message", "Failed to create new client with shared key credential").Err(err).Send()
|
||||
return
|
||||
}
|
||||
logValue := block.logValue
|
||||
logger.Debug().Str(logValue.Title, logValue.Value).Msgf("sending block of length %d to azure container: %s", len(block.Body), blobURL)
|
||||
|
||||
block.Body = append(block.Body, a.separator...)
|
||||
// Instead of trying to send data to a blob, and then determining if it exists, lets just check if it exists
|
||||
_, err = client.GetProperties(ctx, nil)
|
||||
if err != nil {
|
||||
if !bloberror.HasCode(err, bloberror.BlobNotFound) {
|
||||
logger.Error().Str(logValue.Title, logValue.Value).Err(err).Send()
|
||||
return
|
||||
}
|
||||
_, err = client.Create(ctx, nil)
|
||||
if err != nil {
|
||||
logger.Error().Str(logValue.Title, logValue.Value).Err(err).Send()
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
body := block.Body
|
||||
// 2014*1024*4 == 4 MiB,
|
||||
MiB4 := 1024 * 1024 * 4
|
||||
for len(body) > 0 {
|
||||
max := MiB4
|
||||
if len(body) < max {
|
||||
max = len(body)
|
||||
}
|
||||
reader := NopCloser(bytes.NewReader(body[0:max]))
|
||||
_, err = client.AppendBlock(ctx, reader, nil)
|
||||
if err != nil {
|
||||
logger.Error().Str(logValue.Title, logValue.Value).Err(err).Msgf("Max: %d, len(body): %d", max, len(body))
|
||||
return
|
||||
}
|
||||
body = body[max:]
|
||||
logger.Debug().Str(logValue.Title, logValue.Value).Msgf("upload complete")
|
||||
}
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
func (a *AzureBatchUploader) azureBlobFilePath(filepath []string) string {
|
||||
fileName := fmt.Sprintf("%s%s", "raw", a.fileExtension)
|
||||
finalPath, _ := url.JoinPath("", filepath...)
|
||||
finalPath, _ = url.JoinPath(finalPath, fileName)
|
||||
|
||||
return finalPath
|
||||
}
|
||||
|
||||
// basePath is the url to the blob storage (<account>.azurebloburl.net/<containername>)
|
||||
// filepath will be added onto basepath /<your>/<file>/<path>
|
||||
func (a *AzureBatchUploader) azureBlobURL(basePath string, filePath []string) string {
|
||||
fileName := fmt.Sprintf("%s%s", "raw", a.fileExtension)
|
||||
finalPath, _ := url.JoinPath(basePath, filePath...)
|
||||
finalPath, _ = url.JoinPath(finalPath, fileName)
|
||||
|
||||
return finalPath
|
||||
}
|
||||
|
||||
func (a *AzureBatchUploader) generateSASToken(blobName string) (token string, err error) {
|
||||
// blob name is something like this: 19UUA56873A044568/2023/01/11/raw.log
|
||||
sasQueryParams, err := sas.BlobSignatureValues{
|
||||
Protocol: sas.ProtocolHTTPS,
|
||||
StartTime: time.Now().UTC().Add(-1 * time.Hour), // reduce an hour from current time to avoid signature issue
|
||||
ExpiryTime: time.Now().UTC().Add(3 * 365 * 24 * time.Hour), // 3 years-ish
|
||||
Permissions: elptr.ElPtr(sas.BlobPermissions{Read: true}).String(),
|
||||
ContainerName: a.containerName,
|
||||
BlobName: blobName,
|
||||
}.SignWithSharedKey(a.azureCredentials)
|
||||
|
||||
if err != nil {
|
||||
logger.Error().Err(err).Msg("Failed to sas.BlobSignatureValues")
|
||||
return
|
||||
}
|
||||
|
||||
token = sasQueryParams.Encode()
|
||||
return
|
||||
}
|
||||
|
||||
/* func MutexLocked(m *sync.Mutex) bool {
|
||||
state := reflect.ValueOf(m).Elem().FieldByName("state")
|
||||
const mutexLocked int64 = 1
|
||||
return state.Int()&mutexLocked == mutexLocked
|
||||
} */
|
||||
407
pkg/remotefileupload/batchuploader_test.go
Normal file
407
pkg/remotefileupload/batchuploader_test.go
Normal file
@@ -0,0 +1,407 @@
|
||||
package remotefileupload
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/gob"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestBlockUpload(t *testing.T) {
|
||||
t.Skip()
|
||||
azureAccount = "REPLACE_ME"
|
||||
azureAccountKey = "REPLACE_ME"
|
||||
|
||||
RunBatchTimer = false
|
||||
uploader, err := NewAzureBatchUploader("trex-logs", ".txt", 30, ",")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
|
||||
p, ok := uploader.(*AzureBatchUploader)
|
||||
if !ok {
|
||||
t.Error("Could not convert uploader to azure batch uploader")
|
||||
}
|
||||
typedUploader := *p
|
||||
|
||||
filePath := typedUploader.azureBlobURL(typedUploader.containerPath, []string{"4mibUpload"})
|
||||
fakeFile := stringMutex{
|
||||
Body: []byte{},
|
||||
logValue: LogPayload{Title: "4mibTetFile", Value: "4mibUpload"},
|
||||
}
|
||||
|
||||
// Making it 5 Mibs
|
||||
for x := 0; len(fakeFile.Body) < 1024*1024*5; x++ {
|
||||
fakeFile.Body = append(fakeFile.Body, []byte(fmt.Sprintf("%d,", x))...)
|
||||
}
|
||||
err = typedUploader.uploadBlock(filePath, &fakeFile, context.Background())
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlockUploadCheckPath(t *testing.T) {
|
||||
t.Skip()
|
||||
azureAccount = "REPLACE_ME"
|
||||
azureAccountKey = "REPLACE_ME"
|
||||
|
||||
uploader, err := NewAzureBatchUploader("trex-logs", ".txt", 30, ",")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
logP := LogPayload{
|
||||
Title: "Test",
|
||||
Value: "Value",
|
||||
}
|
||||
path, err := uploader.Upload([]byte("Hello This is a file path test"), logP, "/file", "test")
|
||||
if err != nil{
|
||||
t.Error(err)
|
||||
return
|
||||
}
|
||||
|
||||
p, ok := uploader.(*AzureBatchUploader)
|
||||
if !ok {
|
||||
t.Error("Could not convert uploader to azure batch uploader")
|
||||
return
|
||||
}
|
||||
typedUploader := *p
|
||||
|
||||
typedUploader.uploadNow()
|
||||
// Going to this path should give you the file
|
||||
t.Log(path)
|
||||
}
|
||||
// Adds the same number to all threads in a goroutine
|
||||
func TestMutexValues(t *testing.T) {
|
||||
a := AzureBatchUploader{
|
||||
accountName: "fakeName",
|
||||
containerName: "fakeContainer",
|
||||
fileExtension: ".txt",
|
||||
separator: []byte{','},
|
||||
}
|
||||
|
||||
ConnectToAzBlob = false
|
||||
a.logsToSend = &logsMapMutex{logs: map[string]*stringMutex{},
|
||||
Mutex: sync.Mutex{}}
|
||||
|
||||
logP := LogPayload{
|
||||
Title: "Test",
|
||||
Value: "Value",
|
||||
}
|
||||
gr := sync.WaitGroup{}
|
||||
gr.Add(100)
|
||||
|
||||
for x := 0; x < 100; x++ {
|
||||
//t.Logf("Number is %d\n", x)
|
||||
go func(y int) {
|
||||
for z := 0; z < 100; z++ {
|
||||
_, _ = a.Upload([]byte(strconv.Itoa(y)), logP, fmt.Sprintf("/file/test%d", z))
|
||||
}
|
||||
gr.Done()
|
||||
}(x)
|
||||
}
|
||||
|
||||
gr.Wait()
|
||||
sendMap := *(a.logsToSend)
|
||||
|
||||
for x := 0; x < 100; x++ {
|
||||
filePath := fmt.Sprintf("file/test%d/raw.txt", x)
|
||||
mstring, ok := sendMap.logs[filePath]
|
||||
if !ok {
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
t.Log(string(mstring.Body))
|
||||
if !checkNumberString(string(mstring.Body), 100, t) {
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Adds all numbers to one thread in a goroutine
|
||||
func TestMutexValuesOrderSwap(t *testing.T) {
|
||||
a := AzureBatchUploader{
|
||||
accountName: "fakeName",
|
||||
containerName: "fakeContainer",
|
||||
fileExtension: ".txt",
|
||||
separator: []byte{','},
|
||||
}
|
||||
|
||||
a.logsToSend = &logsMapMutex{logs: map[string]*stringMutex{},
|
||||
Mutex: sync.Mutex{}}
|
||||
|
||||
logP := LogPayload{
|
||||
Title: "Test",
|
||||
Value: "Value",
|
||||
}
|
||||
gr := sync.WaitGroup{}
|
||||
gr.Add(100)
|
||||
|
||||
for x := 0; x < 100; x++ {
|
||||
//t.Logf("Number is %d\n", x)
|
||||
go func(y int) {
|
||||
for z := 0; z < 100; z++ {
|
||||
_, _ = a.Upload([]byte(strconv.Itoa(z)), logP, fmt.Sprintf("/file/test%d", y))
|
||||
}
|
||||
gr.Done()
|
||||
}(x)
|
||||
}
|
||||
|
||||
gr.Wait()
|
||||
sendMap := *(a.logsToSend)
|
||||
|
||||
for x := 0; x < 100; x++ {
|
||||
filePath := fmt.Sprintf("file/test%d/raw.txt", x)
|
||||
mstring, ok := sendMap.logs[filePath]
|
||||
if !ok {
|
||||
t.Fail()
|
||||
}
|
||||
|
||||
t.Log(string(mstring.Body))
|
||||
if !checkNumberString(string(mstring.Body), 100, t) {
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestDoesMapSwap(t *testing.T) {
|
||||
t.Skip()
|
||||
|
||||
azureAccount = "REPLACE_ME"
|
||||
azureAccountKey = "REPLACE_ME"
|
||||
RunBatchTimer = false
|
||||
|
||||
up, err := NewAzureBatchUploader("trex-logs", ".log", 60, "\n")
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
p, ok := up.(*AzureBatchUploader)
|
||||
if !ok {
|
||||
t.Error("Could not convert uploader to azure batch uploader")
|
||||
}
|
||||
a := *p
|
||||
logP := LogPayload{
|
||||
Title: "Test",
|
||||
Value: "Value",
|
||||
}
|
||||
|
||||
wg := sync.WaitGroup{}
|
||||
wg.Add(99)
|
||||
a.Upload([]byte(strconv.Itoa(0)), logP, "/file/test")
|
||||
for x := 1; x < 100; x++ {
|
||||
//t.Logf("Number is %d\n", x)
|
||||
go func(x int) {
|
||||
time.Sleep(time.Millisecond * time.Duration(rand.Int63n(10)))
|
||||
a.Upload([]byte(strconv.Itoa(x)), logP, "/file/test")
|
||||
wg.Done()
|
||||
}(x)
|
||||
}
|
||||
time.Sleep(5 * time.Millisecond)
|
||||
a.uploadNow()
|
||||
wg.Wait()
|
||||
a.uploadNow()
|
||||
}
|
||||
|
||||
// Writes is the number of numbers to write, threads is how many cars are sending in data
|
||||
func benchmarkMutex1(writes, threads int, b *testing.B) {
|
||||
a := AzureBatchUploader{
|
||||
accountName: "fakeName",
|
||||
containerName: "fakeContainer",
|
||||
fileExtension: ".txt",
|
||||
separator: []byte{','},
|
||||
}
|
||||
|
||||
a.logsToSend = &logsMapMutex{logs: map[string]*stringMutex{},
|
||||
Mutex: sync.Mutex{}}
|
||||
|
||||
logP := LogPayload{
|
||||
Title: "Test",
|
||||
Value: "Value",
|
||||
}
|
||||
gr := sync.WaitGroup{}
|
||||
gr.Add(threads)
|
||||
for x := 0; x < threads; x++ {
|
||||
//t.Logf("Number is %d\n", x)
|
||||
go func(y int) {
|
||||
for z := 0; z < writes; z++ {
|
||||
p, _ := a.Upload([]byte(strconv.Itoa(z)), logP, fmt.Sprintf("/file/test%d", y))
|
||||
_ = p
|
||||
}
|
||||
gr.Done()
|
||||
}(x)
|
||||
|
||||
}
|
||||
gr.Wait()
|
||||
}
|
||||
|
||||
func BenchmarkMutex1w100t100(b *testing.B) {
|
||||
benchmarkMutex1(100, 100, b)
|
||||
}
|
||||
|
||||
func BenchmarkMutex1w100t1000(b *testing.B) {
|
||||
benchmarkMutex1(100, 1000, b)
|
||||
}
|
||||
|
||||
func BenchmarkMutex1w100t10000(b *testing.B) {
|
||||
benchmarkMutex1(100, 10000, b)
|
||||
}
|
||||
|
||||
// BenchmarkMutex1w100t100000-16 1000000000 0.5827 ns/op 0 B/op 0 allocs/op
|
||||
func BenchmarkMutex1w100t100000(b *testing.B) {
|
||||
benchmarkMutex1(100, 10000, b)
|
||||
}
|
||||
|
||||
// BenchmarkMutex1w1000t100000-16 1 4727771437 ns/op 8845213144 B/op 208939787 allocs/op
|
||||
// BenchmarkMutex1w1000t100000-16 1 6521959624 ns/op 9321626472 B/op 228890209 allocs/op
|
||||
func BenchmarkMutex1w1000t100000(b *testing.B) {
|
||||
benchmarkMutex1(1000, 10000, b)
|
||||
}
|
||||
|
||||
func checkNumberString(str string, max int, t *testing.T) (success bool) {
|
||||
var err error
|
||||
stringNumbers := strings.Split(str, ",")
|
||||
numbers := make([]int, len(stringNumbers))
|
||||
|
||||
for x, num := range stringNumbers {
|
||||
numbers[x], err = strconv.Atoi(num)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
t.FailNow()
|
||||
}
|
||||
}
|
||||
|
||||
sort.Ints(numbers)
|
||||
for x := 0; x < max; x++ {
|
||||
if numbers[x] != x {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// inclusive start, exclusive end
|
||||
func checkNumberStringRange(str string, start int, end int, t *testing.T) (success bool) {
|
||||
var err error
|
||||
stringNumbers := strings.Split(str, ",")
|
||||
numbers := make([]int, len(stringNumbers))
|
||||
|
||||
for x, num := range stringNumbers {
|
||||
numbers[x], err = strconv.Atoi(num)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
t.FailNow()
|
||||
}
|
||||
}
|
||||
|
||||
sort.Ints(numbers)
|
||||
|
||||
x := start
|
||||
for _, num := range numbers {
|
||||
if num != x {
|
||||
return false
|
||||
}
|
||||
x++
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// This uses only ~38.439708 megabytes, in actual log sizing
|
||||
func BenchmarkTotalSize(b *testing.B) {
|
||||
// This is a longish message
|
||||
testMSG := `{"level":"error","timestamp":"2022-Nov-30 22:17:26.250332","line_number":0,"filename":"dummy","msg":"ws_handshake: The WebSocket handshake was declined by the remote peer"}`
|
||||
RunBatchTimer = false
|
||||
ConnectToAzBlob = false
|
||||
uploader, err := NewAzureBatchUploader("fakeName", ".txt", 30, "\n")
|
||||
if err != nil {
|
||||
b.Error(err)
|
||||
}
|
||||
|
||||
p, ok := uploader.(*AzureBatchUploader)
|
||||
if !ok {
|
||||
b.Error("Could not convert uploader to azure batch uploader")
|
||||
}
|
||||
typedUploader := *p
|
||||
|
||||
// Simulate getting a message over 1 minute every 15 seconds from 50,000 cars
|
||||
for x := 0; x <= 50000; x++ {
|
||||
pl := LogPayload{
|
||||
Title: "VIN",
|
||||
Value: fmt.Sprintf("VINNUMBER%d", x),
|
||||
}
|
||||
|
||||
_, err = typedUploader.Upload([]byte(testMSG), pl, pl.Value)
|
||||
if err != nil {
|
||||
b.Error(err)
|
||||
}
|
||||
_, err = typedUploader.Upload([]byte(testMSG), pl, pl.Value)
|
||||
if err != nil {
|
||||
b.Error(err)
|
||||
}
|
||||
_, err = typedUploader.Upload([]byte(testMSG), pl, pl.Value)
|
||||
if err != nil {
|
||||
b.Error(err)
|
||||
}
|
||||
_, err = typedUploader.Upload([]byte(testMSG), pl, pl.Value)
|
||||
if err != nil {
|
||||
b.Error(err)
|
||||
}
|
||||
}
|
||||
|
||||
b.Log(getRealSizeOf(typedUploader.logsToSend.logs))
|
||||
}
|
||||
|
||||
func getRealSizeOf(v interface{}) (int, error) {
|
||||
b := new(bytes.Buffer)
|
||||
if err := gob.NewEncoder(b).Encode(v); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
return b.Len(), nil
|
||||
}
|
||||
|
||||
func TestOrder(t *testing.T){
|
||||
|
||||
RunBatchTimer = false
|
||||
ConnectToAzBlob = false
|
||||
uploader, err := NewAzureBatchUploader("fakeName", ".txt", 30, "\n")
|
||||
p, ok := uploader.(*AzureBatchUploader)
|
||||
if !ok {
|
||||
t.Error("Could not convert uploader to azure batch uploader")
|
||||
}
|
||||
typedUploader := *p
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
}
|
||||
pl := LogPayload{
|
||||
Title: "VIN",
|
||||
Value: "SomeFakeVin",
|
||||
}
|
||||
for x := 0; x < 1000; x ++{
|
||||
|
||||
_, err := typedUploader.Upload([]byte(fmt.Sprint(x)), pl, pl.Value)
|
||||
if err != nil {
|
||||
t.Error(err)
|
||||
t.FailNow()
|
||||
}
|
||||
}
|
||||
|
||||
val := typedUploader.logsToSend.logs["https://REPLACE_ME.blob.core.windows.net/fakeName/SomeFakeVin/raw.txt"]
|
||||
numberArray := strings.Split(string(val.Body), "\n")
|
||||
for x := 0; x < len(numberArray) - 1; x ++ {
|
||||
a, _ := strconv.Atoi(numberArray[x])
|
||||
b, _ := strconv.Atoi(numberArray[x+1])
|
||||
if a + 1 != b {
|
||||
t.Logf("Failed got %s before %s\n", numberArray[x], numberArray[x+1])
|
||||
t.Fail()
|
||||
}
|
||||
}
|
||||
}
|
||||
140
pkg/remotefileupload/csvtoparquet.go
Normal file
140
pkg/remotefileupload/csvtoparquet.go
Normal file
@@ -0,0 +1,140 @@
|
||||
package remotefileupload
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"fmt"
|
||||
"io"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"fiskerinc.com/modules/utils/envtool"
|
||||
)
|
||||
|
||||
var (
|
||||
parquetFileSizeIdeal = int64(envtool.GetEnvInt("PARQUET_FILE_SIZE_IN_COMPRESSED", 1024*1024*200))
|
||||
skipAzure = false
|
||||
)
|
||||
|
||||
type ICSVtoParquet interface {
|
||||
Read(io.Reader) error
|
||||
Write() error
|
||||
}
|
||||
|
||||
type csvToParquet struct {
|
||||
azureAccount string
|
||||
azureAccountKey string
|
||||
queue chan string
|
||||
parquetBlobPath string
|
||||
counter int
|
||||
}
|
||||
|
||||
func NewCSVtoParquet(azureAccount, azureAccountKey, parquetBlobUrl string) ICSVtoParquet {
|
||||
return &csvToParquet{
|
||||
azureAccount: azureAccount,
|
||||
azureAccountKey: azureAccountKey,
|
||||
queue: make(chan string, 20),
|
||||
parquetBlobPath: getPathFromURL(parquetBlobUrl),
|
||||
}
|
||||
}
|
||||
|
||||
// Read reads lines from the provided io.Reader and sends them to a buffered channel.
|
||||
// The function uses a bufio.Reader to efficiently read lines until it encounters an EOF (end of file).
|
||||
// Each read line is sent to a pre-initialized buffered channel 'queue' for further processing.
|
||||
// The channel is closed once all lines are read or if an error occurs during the process.
|
||||
//
|
||||
// Parameters:
|
||||
//
|
||||
// reader (io.Reader): The input stream from which lines are read.
|
||||
//
|
||||
// Returns:
|
||||
//
|
||||
// error: If an error occurs during reading, it is returned. Otherwise, returns nil.
|
||||
func (cp *csvToParquet) Read(reader io.Reader) error {
|
||||
defer close(cp.queue) // Close the channel when file done.
|
||||
bio := bufio.NewReader(reader)
|
||||
for {
|
||||
line, err := bio.ReadString('\n')
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
cp.queue <- line
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cp *csvToParquet) newWriter() (ParquetBlobWriter, error) {
|
||||
if skipAzure {
|
||||
cp.generateFile()
|
||||
return NewFakeAzureParquetBlobWriter()
|
||||
}
|
||||
return NewAzureParquetBlobWriter(cp.generateFile(), cp.azureAccount, cp.azureAccountKey)
|
||||
}
|
||||
|
||||
func (cp *csvToParquet) Write() error {
|
||||
var writer ParquetBlobWriter
|
||||
var err error
|
||||
writer, err = cp.newWriter()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
defer func() {
|
||||
writer.Close()
|
||||
}()
|
||||
|
||||
for line := range cp.queue {
|
||||
splitedRaw := strings.Split(line, ",")
|
||||
if len(splitedRaw) < 3 {
|
||||
continue
|
||||
}
|
||||
timeStamp, _ := strconv.ParseInt(splitedRaw[0], 10, 64)
|
||||
idAs64, _ := strconv.ParseInt(splitedRaw[1], 10, 32)
|
||||
id := int32(idAs64)
|
||||
|
||||
payload := ParquetCANMessage{
|
||||
TimestampUSec: &timeStamp,
|
||||
ID: &id,
|
||||
Data: &splitedRaw[2],
|
||||
}
|
||||
err = writer.Write(payload)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// if size is greater to 200MB, start writing in new file to avoid memory issue
|
||||
if writer.Size() >= parquetFileSizeIdeal {
|
||||
writer.Close()
|
||||
writer, err = cp.newWriter()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (cp *csvToParquet) generateFile() string {
|
||||
file := fmt.Sprintf("%v/%v-%d.parquet", cp.parquetBlobPath, "raw", cp.counter)
|
||||
cp.counter++
|
||||
return file
|
||||
}
|
||||
|
||||
// getPathFromURL takes a file path as input and returns the path without the file name.
|
||||
// It splits the input path using "/" as the separator, removes the last element (file name),
|
||||
// and then joins the remaining elements back together with "/" as the separator.
|
||||
// If the input is an empty string or contains only the root directory, the function returns an empty string.
|
||||
//
|
||||
// Parameters:
|
||||
//
|
||||
// file (string): The input file path from which to extract the directory path.
|
||||
//
|
||||
// Returns:
|
||||
//
|
||||
// string: The directory path without the file name.
|
||||
func getPathFromURL(file string) string {
|
||||
splitPath := strings.Split(file, "/")
|
||||
splitPath = splitPath[:len(splitPath)-1]
|
||||
return strings.Join(splitPath, "/")
|
||||
}
|
||||
161
pkg/remotefileupload/csvtoparquet_test.go
Normal file
161
pkg/remotefileupload/csvtoparquet_test.go
Normal file
@@ -0,0 +1,161 @@
|
||||
package remotefileupload
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"bytes"
|
||||
"log"
|
||||
"reflect"
|
||||
"sync"
|
||||
"testing"
|
||||
)
|
||||
|
||||
var content = `1691443566877007,816,AAAA+gAAAAA=
|
||||
1691443566877013,801,AAAAAAABAAA=
|
||||
1691443566877019,835,AAAEAAAAAAA=
|
||||
1691443566877059,1410,AAAkAAAAAAA=
|
||||
1691443566877064,821,AAAAAAgAAAA=
|
||||
1691443566877069,1304,AgAAAAAAAAA=
|
||||
1691443566877074,1298,BAAAAAAAAAA=
|
||||
1691443566877078,902,AQAAAAAAAAA=
|
||||
1691443566877082,1137,AAAAAAAAAgA=
|
||||
1691443566877085,54,CAAAAAAAAAA=
|
||||
1691443566877089,54,BAAAAAAAAAA=
|
||||
1691443566877093,1329,AAAAYagAAAA=
|
||||
1691443566877096,608,YAAAAAAAAAA=
|
||||
1691443566877100,1297,AIAAAAAAAAA=
|
||||
1691443566877118,1268,AAAAADwAAAA=
|
||||
1691443566877122,757,AAAAAAAeAAA=
|
||||
1691443566877126,882,AAAAAABOAAA=
|
||||
1691443566877143,1284,AAAAAAAH0AA=
|
||||
1691443566877147,1285,AAAAAAAAgAA=
|
||||
1691443566877167,1408,AAAAAAAtAAA=
|
||||
1691443566877173,1584,AAAAAAAAAC0=
|
||||
1691443566877512,873,AAAAAMgAAAA=
|
||||
1691443567878825,1317,AAA+pngR/pc=
|
||||
1691443567878850,816,AAAA4QAAAAA=
|
||||
`
|
||||
|
||||
var contentArray = []string{
|
||||
"1691443566877007,816,AAAA+gAAAAA=\n",
|
||||
"1691443566877013,801,AAAAAAABAAA=\n",
|
||||
"1691443566877019,835,AAAEAAAAAAA=\n",
|
||||
"1691443566877059,1410,AAAkAAAAAAA=\n",
|
||||
"1691443566877064,821,AAAAAAgAAAA=\n",
|
||||
"1691443566877069,1304,AgAAAAAAAAA=\n",
|
||||
"1691443566877074,1298,BAAAAAAAAAA=\n",
|
||||
"1691443566877078,902,AQAAAAAAAAA=\n",
|
||||
"1691443566877082,1137,AAAAAAAAAgA=\n",
|
||||
"1691443566877085,54,CAAAAAAAAAA=\n",
|
||||
"1691443566877089,54,BAAAAAAAAAA=\n",
|
||||
"1691443566877093,1329,AAAAYagAAAA=\n",
|
||||
"1691443566877096,608,YAAAAAAAAAA=\n",
|
||||
"1691443566877100,1297,AIAAAAAAAAA=\n",
|
||||
"1691443566877118,1268,AAAAADwAAAA=\n",
|
||||
"1691443566877122,757,AAAAAAAeAAA=\n",
|
||||
"1691443566877126,882,AAAAAABOAAA=\n",
|
||||
"1691443566877143,1284,AAAAAAAH0AA=\n",
|
||||
"1691443566877147,1285,AAAAAAAAgAA=\n",
|
||||
"1691443566877167,1408,AAAAAAAtAAA=\n",
|
||||
"1691443566877173,1584,AAAAAAAAAC0=\n",
|
||||
"1691443566877512,873,AAAAAMgAAAA=\n",
|
||||
"1691443567878825,1317,AAA+pngR/pc=\n",
|
||||
"1691443567878850,816,AAAA4QAAAAA=\n",
|
||||
}
|
||||
|
||||
func TestRead(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expectedData []string
|
||||
expectedErr error
|
||||
}{
|
||||
{
|
||||
name: "ReadLinesSuccessfully",
|
||||
input: content,
|
||||
expectedData: contentArray,
|
||||
expectedErr: nil,
|
||||
},
|
||||
{
|
||||
name: "EmptyInput",
|
||||
input: "",
|
||||
expectedData: []string{},
|
||||
expectedErr: nil,
|
||||
},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
t.Run(test.name, func(t *testing.T) {
|
||||
|
||||
cv := &csvToParquet{
|
||||
queue: make(chan string, 40),
|
||||
}
|
||||
reader := bufio.NewReader(bytes.NewBufferString(test.input))
|
||||
err := cv.Read(reader)
|
||||
var result []string
|
||||
for item := range cv.queue {
|
||||
result = append(result, item)
|
||||
}
|
||||
|
||||
if !reflect.DeepEqual(result, test.expectedData) && !(len(result) == 0 && len(test.expectedData) == 0) {
|
||||
t.Errorf("For test '%s', expected queue %v, but got %v", test.name, test.expectedData, result)
|
||||
}
|
||||
|
||||
if !errorsEqual(err, test.expectedErr) {
|
||||
t.Errorf("For test '%s', expected error '%v', but got '%v'", test.name, test.expectedErr, err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func errorsEqual(err1, err2 error) bool {
|
||||
if err1 == nil && err2 == nil {
|
||||
return true
|
||||
}
|
||||
if err1 == nil || err2 == nil {
|
||||
return false
|
||||
}
|
||||
return err1.Error() == err2.Error()
|
||||
}
|
||||
|
||||
func TestGetPathFromURL(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{"path/to/file.txt", "path/to"},
|
||||
{"another/path/to/image.jpg", "another/path/to"},
|
||||
{"root", ""},
|
||||
{"", ""},
|
||||
{"/absolute/path/file.txt", "/absolute/path"},
|
||||
}
|
||||
|
||||
for _, test := range tests {
|
||||
result := getPathFromURL(test.input)
|
||||
if result != test.expected {
|
||||
t.Errorf("For input %s, expected %s, but got %s", test.input, test.expected, result)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func BenchmarkReadWrite(b *testing.B) {
|
||||
skipAzure = true
|
||||
parquetFileSizeIdeal = 100
|
||||
reader := bufio.NewReader(bytes.NewBufferString(content))
|
||||
|
||||
for i := 0; i < b.N; i++ {
|
||||
csvToParquet := NewCSVtoParquet("", "", "https://yourstorageaccount.blob.core.windows.net/raw.csv")
|
||||
go csvToParquet.Read(reader)
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(1)
|
||||
go func(w *sync.WaitGroup) {
|
||||
err := csvToParquet.Write()
|
||||
|
||||
if err != nil {
|
||||
log.Println(err)
|
||||
}
|
||||
w.Done()
|
||||
}(&wg)
|
||||
|
||||
wg.Wait()
|
||||
}
|
||||
}
|
||||
5
pkg/remotefileupload/errors.go
Normal file
5
pkg/remotefileupload/errors.go
Normal file
@@ -0,0 +1,5 @@
|
||||
package remotefileupload
|
||||
|
||||
import "github.com/pkg/errors"
|
||||
|
||||
var ErrInvalidUploader = errors.New("invalid uploader type")
|
||||
95
pkg/remotefileupload/parquet.go
Normal file
95
pkg/remotefileupload/parquet.go
Normal file
@@ -0,0 +1,95 @@
|
||||
package remotefileupload
|
||||
|
||||
import (
|
||||
"context"
|
||||
|
||||
"fiskerinc.com/modules/logger"
|
||||
az "github.com/Azure/azure-storage-blob-go/azblob"
|
||||
pqAZ "github.com/xitongsys/parquet-go-source/azblob"
|
||||
"github.com/xitongsys/parquet-go/source"
|
||||
"github.com/xitongsys/parquet-go/writer"
|
||||
)
|
||||
|
||||
var (
|
||||
parquetThreadCount int64 = 4
|
||||
)
|
||||
|
||||
var (
|
||||
errOnCloseWriter = "Unable to close writer"
|
||||
)
|
||||
|
||||
// Required struct to intake compressed parquet files which lists fields as optional
|
||||
//
|
||||
// hence the pointers to int,string
|
||||
type ParquetCANMessage struct {
|
||||
TimestampUSec *int64 `json:"epoch_usec" parquet:"name=epoch_usec, type=INT64"`
|
||||
ID *int32 `json:"id" parquet:"name=id, type=INT32"`
|
||||
Data *string `json:"data" parquet:"name=data, type=BYTE_ARRAY"`
|
||||
}
|
||||
|
||||
// NewAzureParquetBlobWriter creates a new instance of ParquetBlobWriter that can be used to write Parquet files to Azure Blob Storage.
|
||||
//
|
||||
// Parameters:
|
||||
// - blobUrl: The URL of the Azure Blob Storage container where the Parquet files will be stored.
|
||||
//
|
||||
// Returns:
|
||||
// - ParquetBlobWriter: An instance of ParquetBlobWriter.
|
||||
// - error: An error if there was a problem creating the writer.
|
||||
func NewAzureParquetBlobWriter(blobUrl, azureAccount, azureAccountKey string) (ParquetBlobWriter, error) {
|
||||
creds, err := az.NewSharedKeyCredential(azureAccount, azureAccountKey)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
fr, err := pqAZ.NewAzBlobFileWriter(
|
||||
context.Background(),
|
||||
blobUrl,
|
||||
creds,
|
||||
pqAZ.WriterOptions{},
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
pr, err := writer.NewParquetWriter(fr, new(ParquetCANMessage), parquetThreadCount)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &AzureParquetBlobWriter{blob: fr, fileWriter: pr}, nil
|
||||
}
|
||||
|
||||
type ParquetBlobWriter interface {
|
||||
Write(payload interface{}) error
|
||||
Size() int64
|
||||
Close()
|
||||
}
|
||||
|
||||
type AzureParquetBlobWriter struct {
|
||||
blob source.ParquetFile
|
||||
fileWriter *writer.ParquetWriter
|
||||
}
|
||||
|
||||
func (w *AzureParquetBlobWriter) Write(payload interface{}) error {
|
||||
err := w.fileWriter.Write(payload)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *AzureParquetBlobWriter) Size() int64 {
|
||||
return w.fileWriter.Size
|
||||
}
|
||||
|
||||
func (w *AzureParquetBlobWriter) Close() {
|
||||
err := w.fileWriter.WriteStop()
|
||||
if err != nil {
|
||||
logger.Debug().Msgf("%v: %s", err, errOnCloseWriter)
|
||||
}
|
||||
|
||||
err = w.blob.Close()
|
||||
if err != nil {
|
||||
logger.Debug().Msgf("%v:%s", err, errOnCloseWriter)
|
||||
}
|
||||
}
|
||||
31
pkg/remotefileupload/parquet_mock.go
Normal file
31
pkg/remotefileupload/parquet_mock.go
Normal file
@@ -0,0 +1,31 @@
|
||||
package remotefileupload
|
||||
|
||||
import (
|
||||
"unsafe"
|
||||
)
|
||||
|
||||
func NewFakeAzureParquetBlobWriter() (ParquetBlobWriter, error) {
|
||||
var data []interface{}
|
||||
return &FakeAzureParquetBlobWriter{
|
||||
data: data,
|
||||
}, nil
|
||||
}
|
||||
|
||||
type FakeAzureParquetBlobWriter struct {
|
||||
data []interface{}
|
||||
size int64
|
||||
}
|
||||
|
||||
func (w *FakeAzureParquetBlobWriter) Write(payload interface{}) error {
|
||||
w.size += int64(unsafe.Sizeof(payload))
|
||||
w.data = append(w.data, payload)
|
||||
return nil
|
||||
}
|
||||
|
||||
func (w *FakeAzureParquetBlobWriter) Size() int64 {
|
||||
return w.size
|
||||
}
|
||||
|
||||
func (w *FakeAzureParquetBlobWriter) Close() {
|
||||
return
|
||||
}
|
||||
19
pkg/remotefileupload/uploader.go
Normal file
19
pkg/remotefileupload/uploader.go
Normal file
@@ -0,0 +1,19 @@
|
||||
package remotefileupload
|
||||
|
||||
var (
|
||||
AWSType string = "aws"
|
||||
AzureType string = "azure"
|
||||
AzureBatchType string = "azure_batch"
|
||||
)
|
||||
|
||||
type Uploader interface {
|
||||
// Upload filePath is the substring pieces of where you want the file stored: ex (dog,cat,mouse) => dog/cat/mouse
|
||||
Upload(data []byte, logValue LogPayload, filePath ...string) (path string, err error)
|
||||
}
|
||||
|
||||
type LogPayload struct {
|
||||
Title string // When we log, this will be hey your {Title} {Value} errored
|
||||
Value string
|
||||
}
|
||||
|
||||
type uploaderFilePathBuild func()
|
||||
Reference in New Issue
Block a user