package remotefileupload import ( "bufio" "bytes" "log" "reflect" "sync" "testing" ) var content = `1691443566877007,816,AAAA+gAAAAA= 1691443566877013,801,AAAAAAABAAA= 1691443566877019,835,AAAEAAAAAAA= 1691443566877059,1410,AAAkAAAAAAA= 1691443566877064,821,AAAAAAgAAAA= 1691443566877069,1304,AgAAAAAAAAA= 1691443566877074,1298,BAAAAAAAAAA= 1691443566877078,902,AQAAAAAAAAA= 1691443566877082,1137,AAAAAAAAAgA= 1691443566877085,54,CAAAAAAAAAA= 1691443566877089,54,BAAAAAAAAAA= 1691443566877093,1329,AAAAYagAAAA= 1691443566877096,608,YAAAAAAAAAA= 1691443566877100,1297,AIAAAAAAAAA= 1691443566877118,1268,AAAAADwAAAA= 1691443566877122,757,AAAAAAAeAAA= 1691443566877126,882,AAAAAABOAAA= 1691443566877143,1284,AAAAAAAH0AA= 1691443566877147,1285,AAAAAAAAgAA= 1691443566877167,1408,AAAAAAAtAAA= 1691443566877173,1584,AAAAAAAAAC0= 1691443566877512,873,AAAAAMgAAAA= 1691443567878825,1317,AAA+pngR/pc= 1691443567878850,816,AAAA4QAAAAA= ` var contentArray = []string{ "1691443566877007,816,AAAA+gAAAAA=\n", "1691443566877013,801,AAAAAAABAAA=\n", "1691443566877019,835,AAAEAAAAAAA=\n", "1691443566877059,1410,AAAkAAAAAAA=\n", "1691443566877064,821,AAAAAAgAAAA=\n", "1691443566877069,1304,AgAAAAAAAAA=\n", "1691443566877074,1298,BAAAAAAAAAA=\n", "1691443566877078,902,AQAAAAAAAAA=\n", "1691443566877082,1137,AAAAAAAAAgA=\n", "1691443566877085,54,CAAAAAAAAAA=\n", "1691443566877089,54,BAAAAAAAAAA=\n", "1691443566877093,1329,AAAAYagAAAA=\n", "1691443566877096,608,YAAAAAAAAAA=\n", "1691443566877100,1297,AIAAAAAAAAA=\n", "1691443566877118,1268,AAAAADwAAAA=\n", "1691443566877122,757,AAAAAAAeAAA=\n", "1691443566877126,882,AAAAAABOAAA=\n", "1691443566877143,1284,AAAAAAAH0AA=\n", "1691443566877147,1285,AAAAAAAAgAA=\n", "1691443566877167,1408,AAAAAAAtAAA=\n", "1691443566877173,1584,AAAAAAAAAC0=\n", "1691443566877512,873,AAAAAMgAAAA=\n", "1691443567878825,1317,AAA+pngR/pc=\n", "1691443567878850,816,AAAA4QAAAAA=\n", } func TestRead(t *testing.T) { tests := []struct { name string input string expectedData []string expectedErr error }{ { name: "ReadLinesSuccessfully", input: content, expectedData: contentArray, expectedErr: nil, }, { name: "EmptyInput", input: "", expectedData: []string{}, expectedErr: nil, }, } for _, test := range tests { t.Run(test.name, func(t *testing.T) { cv := &csvToParquet{ queue: make(chan string, 40), } reader := bufio.NewReader(bytes.NewBufferString(test.input)) err := cv.Read(reader) var result []string for item := range cv.queue { result = append(result, item) } if !reflect.DeepEqual(result, test.expectedData) && !(len(result) == 0 && len(test.expectedData) == 0) { t.Errorf("For test '%s', expected queue %v, but got %v", test.name, test.expectedData, result) } if !errorsEqual(err, test.expectedErr) { t.Errorf("For test '%s', expected error '%v', but got '%v'", test.name, test.expectedErr, err) } }) } } func errorsEqual(err1, err2 error) bool { if err1 == nil && err2 == nil { return true } if err1 == nil || err2 == nil { return false } return err1.Error() == err2.Error() } func TestGetPathFromURL(t *testing.T) { tests := []struct { input string expected string }{ {"path/to/file.txt", "path/to"}, {"another/path/to/image.jpg", "another/path/to"}, {"root", ""}, {"", ""}, {"/absolute/path/file.txt", "/absolute/path"}, } for _, test := range tests { result := getPathFromURL(test.input) if result != test.expected { t.Errorf("For input %s, expected %s, but got %s", test.input, test.expected, result) } } } func BenchmarkReadWrite(b *testing.B) { skipAzure = true parquetFileSizeIdeal = 100 reader := bufio.NewReader(bytes.NewBufferString(content)) for i := 0; i < b.N; i++ { csvToParquet := NewCSVtoParquet("", "", "https://yourstorageaccount.blob.core.windows.net/raw.csv") go csvToParquet.Read(reader) var wg sync.WaitGroup wg.Add(1) go func(w *sync.WaitGroup) { err := csvToParquet.Write() if err != nil { log.Println(err) } w.Done() }(&wg) wg.Wait() } }