Files
cloud-services/pkg/remotefileupload/csvtoparquet_test.go

162 lines
4.1 KiB
Go

package remotefileupload
import (
"bufio"
"bytes"
"log"
"reflect"
"sync"
"testing"
)
var content = `1691443566877007,816,AAAA+gAAAAA=
1691443566877013,801,AAAAAAABAAA=
1691443566877019,835,AAAEAAAAAAA=
1691443566877059,1410,AAAkAAAAAAA=
1691443566877064,821,AAAAAAgAAAA=
1691443566877069,1304,AgAAAAAAAAA=
1691443566877074,1298,BAAAAAAAAAA=
1691443566877078,902,AQAAAAAAAAA=
1691443566877082,1137,AAAAAAAAAgA=
1691443566877085,54,CAAAAAAAAAA=
1691443566877089,54,BAAAAAAAAAA=
1691443566877093,1329,AAAAYagAAAA=
1691443566877096,608,YAAAAAAAAAA=
1691443566877100,1297,AIAAAAAAAAA=
1691443566877118,1268,AAAAADwAAAA=
1691443566877122,757,AAAAAAAeAAA=
1691443566877126,882,AAAAAABOAAA=
1691443566877143,1284,AAAAAAAH0AA=
1691443566877147,1285,AAAAAAAAgAA=
1691443566877167,1408,AAAAAAAtAAA=
1691443566877173,1584,AAAAAAAAAC0=
1691443566877512,873,AAAAAMgAAAA=
1691443567878825,1317,AAA+pngR/pc=
1691443567878850,816,AAAA4QAAAAA=
`
var contentArray = []string{
"1691443566877007,816,AAAA+gAAAAA=\n",
"1691443566877013,801,AAAAAAABAAA=\n",
"1691443566877019,835,AAAEAAAAAAA=\n",
"1691443566877059,1410,AAAkAAAAAAA=\n",
"1691443566877064,821,AAAAAAgAAAA=\n",
"1691443566877069,1304,AgAAAAAAAAA=\n",
"1691443566877074,1298,BAAAAAAAAAA=\n",
"1691443566877078,902,AQAAAAAAAAA=\n",
"1691443566877082,1137,AAAAAAAAAgA=\n",
"1691443566877085,54,CAAAAAAAAAA=\n",
"1691443566877089,54,BAAAAAAAAAA=\n",
"1691443566877093,1329,AAAAYagAAAA=\n",
"1691443566877096,608,YAAAAAAAAAA=\n",
"1691443566877100,1297,AIAAAAAAAAA=\n",
"1691443566877118,1268,AAAAADwAAAA=\n",
"1691443566877122,757,AAAAAAAeAAA=\n",
"1691443566877126,882,AAAAAABOAAA=\n",
"1691443566877143,1284,AAAAAAAH0AA=\n",
"1691443566877147,1285,AAAAAAAAgAA=\n",
"1691443566877167,1408,AAAAAAAtAAA=\n",
"1691443566877173,1584,AAAAAAAAAC0=\n",
"1691443566877512,873,AAAAAMgAAAA=\n",
"1691443567878825,1317,AAA+pngR/pc=\n",
"1691443567878850,816,AAAA4QAAAAA=\n",
}
func TestRead(t *testing.T) {
tests := []struct {
name string
input string
expectedData []string
expectedErr error
}{
{
name: "ReadLinesSuccessfully",
input: content,
expectedData: contentArray,
expectedErr: nil,
},
{
name: "EmptyInput",
input: "",
expectedData: []string{},
expectedErr: nil,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
cv := &csvToParquet{
queue: make(chan string, 40),
}
reader := bufio.NewReader(bytes.NewBufferString(test.input))
err := cv.Read(reader)
var result []string
for item := range cv.queue {
result = append(result, item)
}
if !reflect.DeepEqual(result, test.expectedData) && !(len(result) == 0 && len(test.expectedData) == 0) {
t.Errorf("For test '%s', expected queue %v, but got %v", test.name, test.expectedData, result)
}
if !errorsEqual(err, test.expectedErr) {
t.Errorf("For test '%s', expected error '%v', but got '%v'", test.name, test.expectedErr, err)
}
})
}
}
func errorsEqual(err1, err2 error) bool {
if err1 == nil && err2 == nil {
return true
}
if err1 == nil || err2 == nil {
return false
}
return err1.Error() == err2.Error()
}
func TestGetPathFromURL(t *testing.T) {
tests := []struct {
input string
expected string
}{
{"path/to/file.txt", "path/to"},
{"another/path/to/image.jpg", "another/path/to"},
{"root", ""},
{"", ""},
{"/absolute/path/file.txt", "/absolute/path"},
}
for _, test := range tests {
result := getPathFromURL(test.input)
if result != test.expected {
t.Errorf("For input %s, expected %s, but got %s", test.input, test.expected, result)
}
}
}
func BenchmarkReadWrite(b *testing.B) {
skipAzure = true
parquetFileSizeIdeal = 100
reader := bufio.NewReader(bytes.NewBufferString(content))
for i := 0; i < b.N; i++ {
csvToParquet := NewCSVtoParquet("", "", "https://yourstorageaccount.blob.core.windows.net/raw.csv")
go csvToParquet.Read(reader)
var wg sync.WaitGroup
wg.Add(1)
go func(w *sync.WaitGroup) {
err := csvToParquet.Write()
if err != nil {
log.Println(err)
}
w.Done()
}(&wg)
wg.Wait()
}
}