TPCCLIB
Loading...
Searching...
No Matches
csvio.c
Go to the documentation of this file.
1
5/*****************************************************************************/
6#include "tpcclibConfig.h"
7/*****************************************************************************/
8#include <stdio.h>
9#include <stdlib.h>
10#include <math.h>
11#include <time.h>
12#include <string.h>
13/*****************************************************************************/
14#include "tpccsv.h"
15/*****************************************************************************/
16
17/*****************************************************************************/
29 CSV *csv,
31 FILE *fp
32) {
33 if(fp==NULL) return TPCERROR_CANNOT_WRITE;
34 if(csv==NULL || csv->nr<1) return TPCERROR_NO_DATA;
35
36 for(int i=0; i<csv->nr; i++)
37 if(fprintf(fp, "%d\t%d\t%s\n", 1+csv->c[i].row, 1+csv->c[i].col, csv->c[i].content)<5)
39
40 return(TPCERROR_OK);
41}
42/*****************************************************************************/
43
44/*****************************************************************************/
54 CSV *csv,
58 int regular,
60 FILE *fp,
62 TPCSTATUS *status
63) {
64 int verbose=0; if(status!=NULL) verbose=status->verbose;
65 if(fp==NULL) {
66 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_CANNOT_WRITE);
68 }
69 if(csv==NULL || csv->nr<1) {
70 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
71 return TPCERROR_NO_DATA;
72 }
73 if(csv->separator!=',' && csv->separator!=';' && csv->separator!='\t' && csv->separator!=' ') {
74 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_INVALID_SEPARATOR);
76 }
77 if(verbose>10) {
78 printf("%s():\n", __func__);
79 printf("regular := %d\n", regular);
80 printf("csv_nr := %d\n", csv->nr);
81 printf("csv_row_nr := %d\n", csv->row_nr);
82 printf("csv_col_nr := %d\n", csv->col_nr);
83 if(csv->separator=='\t') printf("csv_separator := tab\n");
84 else if(csv->separator==' ') printf("csv_separator := space\n");
85 else printf("csv_separator := '%c'\n", csv->separator);
86 }
87
88 /* Write in file */
89 int wn=0;
90 for(int ri=0; ri<csv->row_nr; ri++) {
91 int n=csv->col_nr;
92 if(regular==0) {n=csvRowLength(csv, ri); if(n==0) continue;}
93 for(int ci=0; ci<n; ci++) {
94 if(ci>0) wn+=fprintf(fp, "%c", csv->separator);
95 char *cptr=csvCell(csv, ri, ci);
96 if(cptr!=NULL) wn+=fprintf(fp, "%s", cptr);
97 }
98 wn+=fprintf(fp, "\n");
99 }
100 if(wn<1) {
101 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_CANNOT_WRITE);
103 }
104
105 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_OK);
106 return(TPCERROR_OK);
107}
108/*****************************************************************************/
109
110/*****************************************************************************/
126 CSV *csv,
128 FILE *fp,
130 TPCSTATUS *status
131) {
132 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_FAIL);
133 if(csv==NULL || fp==NULL) return TPCERROR_FAIL;
134 int verbose=0; if(status!=NULL) verbose=status->verbose;
135 if(verbose>10) printf("%s()\n", __func__);
136
137 /* Get the size of the ASCII part of the file */
138 size_t fsize=asciiFileSize(fp, NULL);
139 if(verbose>11) printf(" ASCII size := %d\n", (int)fsize);
140 /* If ASCII part is too small, then lets consider that an error */
141 if(fsize<1) {
142 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
143 return TPCERROR_NO_DATA;
144 }
145 /* If ASCII part is too large, then lets consider that an error */
146 if(fsize>50000000) {
147 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_TOO_BIG);
148 return TPCERROR_TOO_BIG;
149 }
150 /* Read that to a string */
151 rewind(fp);
152 char *data;
153 data=asciiFileRead(fp, NULL, fsize+1); rewind(fp);
154 if(data==NULL) {
155 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
156 return TPCERROR_NO_DATA;
157 }
158 if(verbose>20) printf(" ASCII file read\n");
159
160 /* Read one line at a time from the string and determine the field and decimal separators */
161 int i=0, j;
162 int tab_nr=0, sem_nr=0, com_nr=0, dot_nr=0, spa_nr=0;
163 char *cptr, *line, *lptr;
164 cptr=data;
165 while((line=strTokenDup(cptr, "\n\r", &j))!=NULL) {
166 if(verbose>80) printf("line='%s'\n", line);
167 /* If line starts with '#' then jump over it */
168 if(line[0]=='#') {free(line); cptr+=j; continue;}
169 /* If line contains only space characters then jump over it */
170 if(strIsSpaceOnly(line)) {free(line); cptr+=j; continue;}
171 /* Compute the nr of dots, commas etc outside quotes */
172 lptr=line; while((lptr=strstrNoQuotation(lptr, "\t"))!=NULL) {tab_nr++;lptr++;}
173 lptr=line; while((lptr=strstrNoQuotation(lptr, ";"))!=NULL) {sem_nr++; lptr++;}
174 lptr=line; while((lptr=strstrNoQuotation(lptr, ","))!=NULL) {com_nr++; lptr++;}
175 lptr=line; while((lptr=strstrNoQuotation(lptr, "."))!=NULL) {dot_nr++; lptr++;}
176 lptr=line; while((lptr=strstrNoQuotation(lptr, " "))!=NULL) {spa_nr++; lptr++;}
177 free(line); cptr+=j; i++;
178 }
179 if(verbose>10) {
180 printf("dataline_nr := %d\n", i);
181 printf("semicolon_nr := %d\n", sem_nr);
182 printf("tabulator_nr := %d\n", tab_nr);
183 printf("dot_nr := %d\n", dot_nr);
184 printf("comma_nr := %d\n", com_nr);
185 printf("space_nr := %d\n", spa_nr);
186 }
187 if(sem_nr==0 && tab_nr==0 && dot_nr==0 && com_nr==0 && spa_nr==0) {
188 csv->separator='\t'; // the default
189 } else if(sem_nr>0) {
190 // If at least one semi-colon, then assume that it is the field separator
191 csv->separator=';';
192 } else if(tab_nr>0) {
193 // If at least one tab, then assume that it is the field separator
194 csv->separator='\t';
195 } else if(spa_nr==0) {
196 // If no spaces, then comma must be the field separator
197 csv->separator=',';
198 } else {
199 // Spaces exist, so is space or comma the field separator ?
200 if(com_nr==0) {
201 // No commas, thus space is probably field separator
202 csv->separator=' ';
203 } else if(dot_nr>0) {
204 // Dots and commas exist, probably decimal point, and comma as field separator
205 csv->separator=',';
206 } else {
207 // No dots, but commas and spaces; lets assume that the more frequent one is the field separator
208 if(com_nr>spa_nr) csv->separator=','; else csv->separator=' ';
209 }
210 }
211 if(verbose>10) {
212 if(csv->separator=='\t') printf("field_separator := tab\n");
213 else if(csv->separator==' ') printf("field_separator := space\n");
214 else printf("field_separator := %c\n", csv->separator);
215 }
216
217 /* Copy field values into CSV */
218 cptr=data; i=0; int ret;
219 while((line=strTokenDup(cptr, "\n\r", &j))!=NULL) {
220 /* If line starts with '#' then jump over it */
221 if(line[0]=='#') {free(line); cptr+=j; continue;}
222 /* If line contains only space characters then jump over it */
223 if(strIsSpaceOnly(line)) {free(line); cptr+=j; continue;}
224 /* Write contents into CSV as a new data row */
225 if(csv->separator!=' ') ret=csvPutLine(csv, line, status);
226 else ret=csvPutLineWithSpaces(csv, line, status);
227 if(verbose>1 && ret!=0) fprintf(stderr, "Warning: cannot read line %d: '%s'.\n", i, line);
228 /* Prepare for the next line */
229 free(line); cptr+=j; i++;
230 }
231 if(i==0) {
232 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
233 return TPCERROR_NO_DATA;
234 }
235
236 free(data);
237 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_OK);
238 return(TPCERROR_OK);
239}
240/*****************************************************************************/
241
242/*****************************************************************************/
253 CSV *csv,
255 const char *line,
257 TPCSTATUS *status
258) {
259 if(csv==NULL) return TPCERROR_FAIL;
260 if(line==NULL || strlen(line)<1) {
261 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
262 return TPCERROR_NO_DATA;
263 }
264 int verbose=0; if(status!=NULL) verbose=status->verbose;
265 if(verbose>10) printf("%s():\n", __func__);
266 if(verbose>12) printf("'%s'\n", line);
267
268 //size_t len=strlen(line);
269 char delimiter=csv->separator;
270
271 /* Space is not supported here */
272 if(delimiter==' ') {
273 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_INVALID_SEPARATOR);
275 }
276
277 int ret, i, last_was_delim=0;
278 int field_nr=0;
279 char *cptr=(char*)line, *lptr, *s;
280 /* If the first character is delimiter, we have had an empty field */
281 if(*cptr==delimiter) {
282 if(verbose>20) printf("first char is delimiter.\n");
283 ret=csvPutString(csv, "", !field_nr); if(ret!=TPCERROR_OK) {
284 statusSet(status, __func__, __FILE__, __LINE__, ret);
285 return ret;
286 }
287 last_was_delim=1; cptr++; field_nr++;
288 }
289 /* Read all fields */
290 int single_quotation=0;
291 int double_quotation=0;
292 lptr=cptr;
293 while(*cptr && *lptr) {
294 if(verbose>20) printf("cptr='%s'\n", cptr);
295 /* Read next field */
296 lptr=cptr; i=0;
297 while(*lptr) {
298 // jump over quoted sequences
299 if(*lptr=='\'') {
300 if(single_quotation==0 && strchr(lptr+1, '\'')!=NULL) single_quotation=1;
301 else single_quotation=0;
302 lptr++; i++; continue;
303 }
304 if(*lptr=='\"') {
305 if(double_quotation==0 && strchr(lptr+1, '\"')!=NULL) double_quotation=1;
306 else double_quotation=0;
307 lptr++; i++; continue;
308 }
309 if(single_quotation==1 || double_quotation==1) {lptr++; i++; continue;}
310 // if this character is the delimiter, then stop
311 if(*lptr==delimiter) break;
312 // otherwise continue search
313 lptr++; i++;
314 }
315 s=strndup(cptr, i);
316 if(verbose>20) printf(" s='%s'\n", s);
317 ret=csvPutString(csv, s, !field_nr); if(ret!=TPCERROR_OK) {
318 statusSet(status, __func__, __FILE__, __LINE__, ret);
319 free(s); return ret;
320 }
321 free(s); field_nr++;
322 if(*lptr==delimiter) {
323 last_was_delim=1; cptr+=(i+1);
324 } else {last_was_delim=0; cptr+=(i+1);}
325 }
326 if(verbose>20) printf("line finished.\n");
327 /* If the last character is delimiter, we have an empty field in the end */
328 if(last_was_delim) {
329 if(verbose>20) printf("last char is delimiter.\n");
330 ret=csvPutString(csv, "", !field_nr); if(ret!=TPCERROR_OK) {
331 statusSet(status, __func__, __FILE__, __LINE__, ret);
332 return ret;
333 }
334 field_nr++;
335 }
336 if(verbose>20) printf("ending %s()\n", __func__);
337 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_OK);
338 return(TPCERROR_OK);
339}
340/*****************************************************************************/
341
342/*****************************************************************************/
353 CSV *csv,
355 const char *line,
357 TPCSTATUS *status
358) {
359 if(csv==NULL) return TPCERROR_FAIL;
360 if(line==NULL || strlen(line)<1) {
361 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_NO_DATA);
362 return TPCERROR_NO_DATA;
363 }
364 int verbose=0; if(status!=NULL) verbose=status->verbose;
365 if(verbose>10) printf("%s():\n", __func__);
366 if(verbose>12) printf("'%s'\n", line);
367
368 char *cptr=(char*)line;
369 char *lptr=cptr, *s;
370 int single_quotation=0;
371 int double_quotation=0;
372 int ret;
373 size_t j;
374 int field_nr=0;
375 while(*cptr && *lptr) {
376 if(verbose>20) printf("cptr='%s'\n", cptr);
377 // Pass the spaces
378 j=strspn(cptr, " \t\n\r"); cptr+=j; if(!cptr) break;
379 // Find the end of token
380 lptr=cptr; j=0;
381 while(*lptr) {
382 // jump over quoted sequences */
383 if(*lptr=='\'') {
384 if(single_quotation==0 && strchr(lptr+1, '\'')!=NULL) single_quotation=1;
385 else single_quotation=0;
386 lptr++; j++; continue;
387 }
388 if(*lptr=='\"') {
389 if(double_quotation==0 && strchr(lptr+1, '\"')!=NULL) double_quotation=1;
390 else double_quotation=0;
391 lptr++; j++; continue;
392 }
393 if(single_quotation==1 || double_quotation==1) {lptr++; j++; continue;}
394 // if this character is the delimiter, then stop
395 if(*lptr==' ') break;
396 // otherwise continue search
397 lptr++; j++;
398 }
399 if(j==0) break;
400 s=strndup(cptr, j);
401 if(verbose>20) printf(" s='%s'\n", s);
402 ret=csvPutString(csv, s, !field_nr); if(ret!=TPCERROR_OK) {
403 statusSet(status, __func__, __FILE__, __LINE__, ret);
404 free(s); return ret;
405 }
406 free(s); cptr+=j; field_nr++;
407 if(verbose>20) printf(" csv.nr=%d\n", csv->nr);
408 }
409
410 statusSet(status, __func__, __FILE__, __LINE__, TPCERROR_OK);
411 return(TPCERROR_OK);
412}
413/*****************************************************************************/
414
415/*****************************************************************************/
424 CSV *csv
425) {
426 if(csv==NULL) return TPCERROR_FAIL;
427 for(int i=0; i<csv->nr; i++) strCleanSpaces(csv->c[i].content);
428 return(TPCERROR_OK);
429}
430/*****************************************************************************/
431
432/*****************************************************************************/
char * csvCell(CSV *csv, int row, int col)
Definition csv.c:358
int csvPutString(CSV *csv, const char *s, int newline)
Definition csv.c:144
int csvRowLength(CSV *csv, int row)
Definition csv.c:244
int csvPutLineWithSpaces(CSV *csv, const char *line, TPCSTATUS *status)
Definition csvio.c:351
int csvRead(CSV *csv, FILE *fp, TPCSTATUS *status)
Definition csvio.c:124
int csvCleanSpaces(CSV *csv)
Definition csvio.c:422
int csvWrite(CSV *csv, int regular, FILE *fp, TPCSTATUS *status)
Definition csvio.c:52
int csvList(CSV *csv, FILE *fp)
Definition csvio.c:27
int csvPutLine(CSV *csv, const char *line, TPCSTATUS *status)
Definition csvio.c:251
char * asciiFileRead(FILE *fp, char *data, size_t maxlen)
size_t asciiFileSize(FILE *fp, int *nonprintable)
void statusSet(TPCSTATUS *s, const char *func, const char *srcfile, int srcline, tpcerror error)
Definition statusmsg.c:142
char * strstrNoQuotation(const char *haystack, const char *needle)
Definition stringext.c:225
int strCleanSpaces(char *s)
Definition stringext.c:300
int strIsSpaceOnly(char *s)
Definition stringext.c:671
char * strTokenDup(const char *s1, const char *s2, int *next)
Definition stringext.c:413
char * strndup(const char *s, size_t n)
Definition stringext.c:205
int col
Definition tpccsv.h:28
int row
Definition tpccsv.h:26
char * content
Definition tpccsv.h:30
Definition tpccsv.h:36
int row_nr
Definition tpccsv.h:44
int col_nr
Definition tpccsv.h:46
char separator
Definition tpccsv.h:49
CSV_item * c
Definition tpccsv.h:38
int nr
Definition tpccsv.h:42
int verbose
Verbose level, used by statusPrint() etc.
Header file for library libtpccsv.
@ TPCERROR_INVALID_SEPARATOR
Invalid field delimiter.
@ TPCERROR_FAIL
General error.
@ TPCERROR_TOO_BIG
File is too big.
@ TPCERROR_OK
No error.
@ TPCERROR_NO_DATA
File contains no data.
@ TPCERROR_CANNOT_WRITE
Cannot write file.