首页 > 文章列表 > 使用字节流读取 CSV 文件

使用字节流读取 CSV 文件

251 2024-05-02
问题内容

当我将 csv 文件读入二维字节切片时,我遇到了一些奇怪的行为。前 42 行很好,之后似乎将额外的行结尾放入了数据中,这使事情变得混乱:

前 42 次中的第一行:

row 0: 504921600000000000,truck_0,south,trish,h-2,v2.3,1500,150,12,52.31854,4.72037,124,0,221,0,25

追加 43 行后的第一行:

row 0: 504921600000000000,truck_49,south,andy,f-150,v2.0,2000,200,15,38.9349,179.94282,289,0,269,0
row 1: 25

重现问题的最少代码:

package main

import (
    "bufio"
    "log"
    "os"
)

type filedatasource struct {
    scanner *bufio.scanner
}

type batch struct {
    rows [][]byte
}

func (b *batch) len() uint {
    return uint(len(b.rows))
}

func (b *batch) append(row []byte) {
    b.rows = append(b.rows, row)
    for index, row := range b.rows {
        log.printf("row %d: %sn", index, string(row))
    }
    if len(b.rows) > 43 {

        log.fatalf("asdf")
    }
}

type factory struct{}

func (f *factory) new() *batch {
    return &batch{rows: make([][]byte, 0)}
}

func main() {
    file, _ := os.open("/tmp/data1.csv")
    scanner := bufio.newscanner(bufio.newreadersize(file, 4<<20))
    b := batch{}

    for scanner.scan() {
        b.append(scanner.bytes())
    }
}

我使用的csv:

504921600000000000,truck_0,South,Trish,H-2,v2.3,1500,150,12,52.31854,4.72037,124,0,221,0,25
504921600000000000,truck_1,South,Albert,F-150,v1.5,2000,200,15,72.45258,68.83761,255,0,181,0,25
504921600000000000,truck_2,North,Derek,F-150,v1.5,2000,200,15,24.5208,28.09377,428,0,304,0,25
504921600000000000,truck_3,East,Albert,F-150,v2.0,2000,200,15,18.11037,98.65573,387,0,192,0,25
504921600000000000,truck_4,West,Andy,G-2000,v1.5,5000,300,19,81.93919,56.12266,236,0,335,0,25
504921600000000000,truck_5,East,Seth,F-150,v2.0,2000,200,15,5.00552,114.50557,89,0,187,0,25
504921600000000000,truck_6,East,Trish,G-2000,v1.0,5000,300,19,41.59689,57.90174,395,0,150,0,25
504921600000000000,truck_8,South,Seth,G-2000,v1.0,5000,300,19,21.89157,44.58919,411,0,232,0,25
504921600000000000,truck_9,South,Andy,H-2,v2.3,1500,150,12,15.67271,112.4023,402,0,75,0,25
504921600000000000,truck_10,North,Albert,F-150,v2.3,2000,200,15,35.05682,36.20513,359,0,68,0,25
504921600000000000,truck_7,East,Andy,H-2,v2.0,1500,150,12,7.74826,14.96075,105,0,323,0,25
504921600000000000,truck_11,South,Derek,F-150,v1.0,2000,200,15,87.9924,134.71544,293,0,133,0,25
504921600000000000,truck_14,North,Albert,H-2,v1.0,1500,150,12,66.68217,105.76965,222,0,252,0,25
504921600000000000,truck_18,West,Trish,F-150,v2.0,2000,200,15,67.15164,153.56165,252,0,240,0,25
504921600000000000,truck_20,North,Rodney,G-2000,v2.0,5000,300,19,38.88807,65.86698,104,0,44,0,25
504921600000000000,truck_21,East,Derek,G-2000,v2.0,5000,300,19,81.87812,167.8083,345,0,327,0,25
504921600000000000,truck_22,West,Albert,G-2000,v1.5,5000,300,19,39.9433,16.0241,449,0,42,0,25
504921600000000000,truck_23,South,Andy,F-150,v2.0,2000,200,15,73.28358,98.05159,198,0,276,0,25
504921600000000000,truck_24,West,Rodney,G-2000,v2.3,5000,300,19,22.19262,0.27462,223,0,318,0,25
504921600000000000,truck_25,North,Trish,F-150,v2.0,2000,200,15,17.26704,16.91226,461,0,183,0,25
504921600000000000,truck_26,South,Seth,F-150,v1.5,2000,200,15,45.65327,144.60354,58,0,182,0,25
504921600000000000,truck_12,East,Trish,G-2000,v1.0,5000,300,19,36.03928,113.87118,39,0,294,0,25
504921600000000000,truck_13,West,Derek,H-2,v1.0,1500,150,12,14.07479,110.77267,152,0,69,0,25
504921600000000000,truck_27,West,Seth,G-2000,v1.5,5000,300,19,79.55971,97.86182,252,0,345,0,25
504921600000000000,truck_28,West,Rodney,G-2000,v1.5,5000,300,19,60.33457,4.62029,74,0,199,0,25
504921600000000000,truck_16,South,Albert,G-2000,v1.5,5000,300,19,51.16438,121.32451,455,0,290,0,25
504921600000000000,truck_19,West,Derek,G-2000,v1.5,5000,300,19,19.69355,139.493,451,0,300,0,25
504921600000000000,truck_31,North,Albert,G-2000,v1.0,5000,300,19,0.75251,116.83474,455,0,49,0,25
504921600000000000,truck_32,West,Seth,F-150,v2.0,2000,200,15,4.07566,164.43909,297,0,277,0,25
504921600000000000,truck_33,West,Rodney,G-2000,v1.5,5000,300,19,89.19448,10.47499,407,0,169,0,25
504921600000000000,truck_34,West,Rodney,G-2000,v2.0,5000,300,19,73.7383,10.79582,488,0,170,0,25
504921600000000000,truck_35,West,Seth,G-2000,v2.3,5000,300,19,60.02428,2.51011,480,0,307,0,25
504921600000000000,truck_36,North,Andy,G-2000,v1.0,5000,300,19,87.52877,45.07308,161,0,128,0,25
504921600000000000,truck_38,West,Andy,H-2,v2.3,,150,12,63.54604,119.82031,282,0,325,0,25
504921600000000000,truck_39,East,Derek,G-2000,v1.5,5000,300,19,33.83548,3.90996,294,0,123,0,25
504921600000000000,truck_40,West,Albert,H-2,v2.0,1500,150,12,32.32773,118.43138,276,0,316,0,25
504921600000000000,truck_41,East,Rodney,F-150,v1.0,2000,200,15,68.85572,173.23123,478,0,207,0,25
504921600000000000,truck_42,West,Trish,F-150,v2.0,2000,200,15,38.45195,171.2884,113,0,180,0,25
504921600000000000,truck_43,East,Derek,H-2,v2.0,1500,150,12,52.90189,49.76966,295,0,195,0,25
504921600000000000,truck_44,South,Seth,H-2,v1.0,1500,150,12,32.33297,3.89306,396,0,320,0,25
504921600000000000,truck_30,East,Andy,G-2000,v1.5,5000,300,19,29.62198,83.73482,291,0,267,0,25
504921600000000000,truck_46,West,Seth,H-2,v2.3,1500,150,12,26.07966,118.49629,321,,267,0,25
504921600000000000,truck_37,South,Andy,G-2000,v2.0,5000,300,19,57.90077,77.20136,77,0,179,0,25
504921600000000000,truck_49,South,Andy,F-150,v2.0,2000,200,15,38.9349,179.94282,289,0,269,0,25
504921600000000000,truck_53,West,Seth,G-2000,v2.3,5000,300,19,25.02,157.45082,272,0,5,0,25
504921600000000000,truck_54,North,Andy,H-2,v2.0,1500,150,12,87.62736,106.0376,360,0,66,0,25
504921600000000000,truck_55,East,Albert,G-2000,v1.0,5000,300,19,78.56605,71.16225,295,0,150,0,25
504921600000000000,truck_56,North,Derek,F-150,v2.0,2000,200,15,23.51619,123.22682,71,0,209,0,25
504921600000000000,truck_57,South,Rodney,F-150,v2.3,2000,200,15,26.07996,159.92716,454,0,22,0,25
504921600000000000,truck_58,South,Derek,F-150,v2.0,2000,200,15,84.79333,79.23813,175,0,246,0,25
504921600000000000,truck_59,East,Andy,H-2,v2.0,1500,150,12,8.7621,82.48318,82,0,55,0,25
504921600000000000,truck_45,East,Trish,G-2000,v1.0,5000,300,19,17.48624,100.78121,306,0,193,0,25
504921600000000000,truck_47,South,Derek,G-2000,v1.5,5000,300,19,41.62173,110.80422,111,0,78,0,25
504921600000000000,truck_48,East,Trish,G-2000,v1.5,5000,300,19,63.90773,141.50555,53,0,,0,25
504921600000000000,truck_50,East,Andy,H-2,v2.3,1500,150,12,45.44111,172.39833,219,0,88,0,25
504921600000000000,truck_51,East,Rodney,F-150,v2.3,2000,200,15,89.03645,91.57675,457,0,337,0,25
504921600000000000,truck_52,West,Derek,G-2000,v1.0,5000,300,19,89.0133,97.8037,23,0,168,0,25
504921600000000000,truck_61,East,Albert,G-2000,v2.3,5000,300,19,75.91676,167.78366,462,0,60,0,25
504921600000000000,truck_62,East,Derek,H-2,v1.5,1500,150,12,54.61668,103.21398,231,0,143,0,25
504921600000000000,truck_63,South,Rodney,H-2,v2.0,1500,150,12,37.13702,149.25546,46,0,118,0,25
504921600000000000,truck_64,South,Albert,G-2000,v2.0,5000,300,19,45.04214,10.73002,447,0,253,0,25
504921600000000000,truck_60,South,Derek,H-2,v1.5,1500,150,12,57.99184,33.45994,310,0,93,0,25
504921600000000000,truck_67,South,Seth,H-2,v1.0,1500,150,12,4.62985,155.01707,308,0,22,0,25
504921600000000000,truck_68,West,Rodney,F-150,v1.5,2000,200,15,16.90741,123.03863,303,0,43,0,25
504921600000000000,truck_69,East,Derek,H-2,v2.3,1500,150,12,79.88424,120.79121,407,0,138,0,25
504921600000000000,truck_70,North,Albert,H-2,v2.0,1500,150,12,77.87592,164.70924,270,0,21,0,25
504921600000000000,truck_71,West,Seth,G-2000,v2.3,5000,300,19,72.75635,78.0365,391,0,32,0,25
504921600000000000,truck_73,North,Seth,F-150,v1.5,2000,200,15,37.67468,91.09732,489,0,103,0,25
504921600000000000,truck_74,North,Trish,H-2,v1.0,1500,150,12,41.4456,158.13897,206,0,79,0,25
504921600000000000,truck_75,South,Andy,F-150,v1.5,2000,200,15,4.11709,175.65994,378,0,176,0,25
504921600000000000,truck_66,South,Seth,G-2000,v2.0,5000,300,19,42.24286,151.8978,227,0,67,0,25
504921600000000000,truck_72,South,Andy,G-2000,v2.3,5000,300,19,82.46228,2.44504,487,0,39,0,25
504921600000000000,truck_76,South,Rodney,F-150,v2.3,2000,200,15,71.62798,121.89842,283,0,164,0,25
504921600000000000,truck_78,South,Seth,F-150,v2.0,2000,200,15,13.96218,39.04615,433,0,326,0,25
504921600000000000,truck_79,South,Andy,G-2000,v2.0,5000,300,19,56.54137,,46,0,127,0,25
504921600000000000,truck_81,West,Rodney,G-2000,v2.3,5000,300,19,59.42624,115.59744,68,0,296,0,25
504921600000000000,truck_83,South,Albert,F-150,v2.0,2000,200,15,49.20261,115.98262,449,0,132,0,25
504921600000000000,truck_84,West,Derek,H-2,v1.0,1500,150,12,70.16476,59.05399,301,0,134,0,25
504921600000000000,truck_85,West,Derek,G-2000,v1.0,5000,300,19,11.75251,142.86513,358,0,339,0,25
504921600000000000,truck_86,West,Rodney,G-2000,v1.0,5000,300,19,30.92821,127.53274,367,0,162,0,25
504921600000000000,truck_87,West,Rodney,H-2,v2.0,1500,150,12,32.86913,155.7666,122,0,337,0,25
504921600000000000,truck_88,West,Andy,G-2000,v1.5,5000,300,19,60.03367,9.5707,204,0,333,0,25
504921600000000000,truck_80,East,Andy,G-2000,v2.3,5000,300,,46.13937,137.42962,295,0,290,0,25
504921600000000000,truck_91,East,Derek,F-150,v2.0,2000,200,15,7.13401,52.78885,100,0,147,0,25
504921600000000000,truck_93,North,Derek,G-2000,v2.0,5000,300,19,11.46065,20.57173,242,0,148,0,25
504921600000000000,truck_94,North,Derek,F-150,v1.0,2000,200,15,59.53287,26.98247,427,0,341,0,25
504921600000000000,truck_95,East,Albert,G-2000,v2.0,5000,300,19,37.31513,134.40078,383,0,121,0,25
504921600000000000,truck_96,East,Albert,G-2000,v1.5,5000,300,19,15.78803,146.68255,348,0,189,0,25
504921600000000000,truck_97,South,Seth,F-150,v1.0,2000,200,15,14.08559,18.49763,369,0,34,0,25
504921600000000000,truck_98,South,Albert,G-2000,v1.5,5000,300,19,15.1474,71.85194,89,0,238,0,25
504921600000000000,truck_77,East,Trish,F-150,v2.0,2000,200,15,80.5734,17.68311,389,0,218,0,25
504921600000000000,truck_82,West,Derek,H-2,v2.0,1500,150,12,57.00976,90.13642,102,0,296,0,25
504921600000000000,truck_92,North,Derek,H-2,v1.0,1500,150,12,54.40335,153.5809,123,0,150,0,25
504921600000000000,truck_99,West,Trish,G-2000,v1.5,5000,300,19,62.73061,26.1884,309,0,202,0,25
504921610000000000,truck_1,South,Albert,F-150,v1.5,2000,200,15,72.45157,68.83919,259,0,180,2,27.5
504921610000000000,truck_2,North,Derek,F-150,v1.5,2000,200,15,24.5195,28.09369,434,6,302,0,22.1
504921610000000000,truck_3,East,Albert,F-150,v2.0,2000,200,15,18.107,98.66002,390,,190,0,21.2
504921610000000000,truck_4,West,Andy,G-2000,v1.5,5000,300,19,81.9438,56.12717,244,8,334,2,27.6
504921610000000000,truck_5,East,Seth,F-150,v2.0,2000,200,15,5.00695,114.50676,92,7,183,2,28.5
504921610000000000,truck_6,East,Trish,G-2000,v1.0,5000,300,19,41.59389,57.90166,403,0,149,0,22.7
504921610000000000,truck_7,East,Andy,H-2,v2.0,1500,150,12,7.74392,14.95756,,0,320,0,28.2
504921610000000000,truck_12,East,Trish,G-2000,v1.0,5000,300,19,36.03979,113.8752,34,0,293,1,26.3
504921610000000000,truck_13,West,Derek,H-2,v1.0,1500,150,12,14.07315,110.77235,150,0,72,,21.9
504921610000000000,truck_14,North,Albert,H-2,v1.0,1500,150,12,,105.76727,218,5,253,1,21.9
504921610000000000,truck_15,South,Albert,H-2,v1.5,1500,150,12,6.78254,166.86685,5,0,110,0,26.3
504921610000000000,truck_16,South,Albert,G-2000,v1.5,5000,300,19,51.16405,121.32556,445,0,294,3,29.9
504921610000000000,truck_17,West,Derek,H-2,v1.5,1500,150,12,8.12913,56.57343,9,0,6,4,29
504921610000000000,truck_18,West,Trish,F-150,v2.0,2000,200,15,67.15167,153.56094,260,1,239,1,23.3
504921610000000000,truck_19,West,Derek,G-2000,v1.5,5000,300,19,19.69456,139.49545,448,4,298,0,29.9
504921610000000000,truck_20,North,Rodney,G-2000,v2.0,5000,300,19,38.88968,65.86504,103,0,41,1,23.6
504921610000000000,truck_21,East,Derek,G-2000,v2.0,5000,300,19,81.88232,167.81287,345,0,326,0,20.8
504921610000000000,truck_0,South,Trish,H-2,v2.3,1500,150,12,52.32335,4.71786,128,9,225,0,25.8
504921610000000000,truck_22,West,Albert,G-2000,v1.5,5000,300,19,39.94345,16.02353,440,1,45,0,27.8
504921610000000000,truck_8,South,Seth,G-2000,v1.0,5000,300,19,21.89464,44.58628,402,0,234,0,20.3
504921610000000000,truck_23,South,Andy,F-150,v2.0,2000,200,15,73.28131,98.05635,201,7,277,0,25.3
504921610000000000,truck_24,West,Rodney,G-2000,v2.3,5000,300,19,22.19506,0.27702,217,0,321,2,29.5
504921610000000000,truck_9,South,Andy,H-2,v2.3,1500,150,12,,112.40429,402,9,75,4,29.5
504921610000000000,truck_26,South,Seth,F-150,v1.5,2000,200,15,45.65798,144.60844,59,1,183,0,21.7
504921610000000000,truck_27,West,Seth,G-2000,v1.5,5000,300,19,79.55699,97.86561,255,7,348,2,20.2
504921610000000000,truck_25,North,Trish,F-150,v2.0,2000,200,15,17.26506,16.91691,453,8,186,0,24.3
504921610000000000,truck_28,West,Rodney,G-2000,v1.5,5000,300,19,60.33272,4.61578,84,3,198,0,23.1
504921610000000000,truck_29,East,Rodney,G-2000,v2.0,5000,300,19,80.30331,146.54254,340,5,118,0,25.6
504921610000000000,truck_30,East,Andy,G-2000,v1.5,5000,300,19,29.62434,83.73246,300,0,270,4,22.3
504921610000000000,truck_33,West,Rodney,G-2000,v1.5,5000,300,19,89.19593,10.47733,403,8,170,0,29.6
504921610000000000,truck_36,North,Andy,G-2000,v1.0,5000,300,19,87.53087,45.07276,163,0,132,1,27.6

我期望行 [][]byte 包含逐行的 csv 数据


正确答案


正如已经建议的,您确实应该使用encoding/csv

也就是说,您的问题的原因在 bytes() 函数上方的 godoc:

// bytes returns the most recent token generated by a call to scan.
// the underlying array may point to data that will be overwritten
// by a subsequent call to scan. it does no allocation.
func (s *scanner) bytes() []byte {
    return s.token
}

因此,后续调用 scan() 可能会修改返回的字节切片。为了避免这种情况,您需要复制字节切片,例如

for scanner.Scan() {
    row := scanner.Bytes()
    bs := make([]byte, len(row))
    copy(bs, row)
    b.Append(bs)
}