1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33 package net.sf.flatpack;
34
35 import java.io.BufferedReader;
36 import java.io.IOException;
37 import java.io.Reader;
38 import java.util.List;
39
40 import org.slf4j.Logger;
41 import org.slf4j.LoggerFactory;
42
43 import net.sf.flatpack.structure.ColumnMetaData;
44 import net.sf.flatpack.structure.Row;
45 import net.sf.flatpack.util.FPConstants;
46 import net.sf.flatpack.util.ParserUtils;
47
48
49
50
51
52
53 public abstract class AbstractDelimiterParser extends AbstractParser {
54 private static final Logger LOGGER = LoggerFactory.getLogger(AbstractDelimiterParser.class);
55 private static final String LINE_BREAK = System.lineSeparator();
56
57 private char delimiter = 0;
58 private char qualifier = 0;
59 private boolean ignoreFirstRecord = false;
60
61 private int lineCount = 0;
62
63 public AbstractDelimiterParser(final Reader dataSourceReader, final String dataDefinition, final char delimiter, final char qualifier,
64 final boolean ignoreFirstRecord) {
65 super(dataSourceReader, dataDefinition);
66 this.delimiter = delimiter;
67 this.qualifier = qualifier;
68 this.ignoreFirstRecord = ignoreFirstRecord;
69 }
70
71 public AbstractDelimiterParser(final Reader dataSourceReader, final char delimiter, final char qualifier, final boolean ignoreFirstRecord) {
72 super(dataSourceReader);
73 this.delimiter = delimiter;
74 this.qualifier = qualifier;
75 this.ignoreFirstRecord = ignoreFirstRecord;
76 }
77
78 @Override
79 protected DataSet doParse() {
80 try {
81 lineCount = 0;
82 return doDelimitedFile(getDataSourceReader(), shouldCreateMDFromFile());
83 } catch (final IOException e) {
84 LOGGER.error("error accessing/creating inputstream", e);
85 }
86 return null;
87 }
88
89 protected abstract boolean shouldCreateMDFromFile();
90
91 protected char getDelimiter() {
92 return delimiter;
93 }
94
95 protected void setDelimiter(final char delimiter) {
96 this.delimiter = delimiter;
97 }
98
99 protected boolean isIgnoreFirstRecord() {
100 return ignoreFirstRecord;
101 }
102
103 protected void setIgnoreFirstRecord(final boolean ignoreFirstRecord) {
104 this.ignoreFirstRecord = ignoreFirstRecord;
105 }
106
107 protected char getQualifier() {
108 return qualifier;
109 }
110
111 protected void setQualifier(final char qualifier) {
112 this.qualifier = qualifier;
113 }
114
115 protected int getLineCount() {
116 return lineCount;
117 }
118
119
120
121
122
123
124
125
126 private DataSet doDelimitedFile(final Reader dataSource, final boolean createMDFromFile) throws IOException {
127 if (dataSource == null) {
128 throw new IllegalArgumentException("dataSource is null");
129 }
130 final DefaultDataSet ds = new DefaultDataSet(getPzMetaData(), this);
131 try (BufferedReader br = new BufferedReader(dataSource)) {
132
133 ds.setPZConvertProps(ParserUtils.loadConvertProperties());
134
135 boolean processedFirst = false;
136
137 String line = null;
138 int estimatedColCount = FPConstants.SPLITLINE_SIZE_INIT;
139 while ((line = fetchNextRecord(br, getQualifier(), getDelimiter())) != null) {
140
141 if (!processedFirst && isIgnoreFirstRecord()) {
142 processedFirst = true;
143 continue;
144 } else if (!processedFirst && createMDFromFile) {
145 processedFirst = true;
146 setPzMetaData(ParserUtils.getPZMetaDataFromFile(line, delimiter, qualifier, this, isAddSuffixToDuplicateColumnNames()));
147 ds.setMetaData(getPzMetaData());
148 continue;
149 }
150
151
152
153 if (oddNumberOfQualifier(line, getQualifier())) {
154 addError(ds, "Odd number of Qualifier characters", lineCount, 1, isStoreRawDataToDataError() ? line : null);
155 continue;
156 }
157
158 List<String> columns = ParserUtils.splitLine(line, getDelimiter(), getQualifier(), estimatedColCount, isPreserveLeadingWhitespace(),
159 isPreserveTrailingWhitespace());
160 final String mdkey = ParserUtils.getCMDKeyForDelimitedFile(getPzMetaData(), columns);
161 final List<ColumnMetaData> metaData = ParserUtils.getColumnMetaData(mdkey, getPzMetaData());
162 final int columnCount = metaData.size();
163 estimatedColCount = columnCount;
164
165 if (columns.size() > columnCount) {
166
167
168 if (isIgnoreExtraColumns()) {
169
170
171 columns = columns.subList(0, columnCount);
172 addError(ds, "Flatpack truncated line to correct number of columns", lineCount, 1, isStoreRawDataToDataError() ? line : null);
173 } else {
174 addError(ds, "Too many columns expected: " + columnCount + " Flatpack got: " + columns.size(), lineCount, 2,
175 isStoreRawDataToDataError() ? line : null);
176 continue;
177 }
178 } else if (columns.size() < columnCount) {
179 if (isHandlingShortLines()) {
180
181 while (columns.size() < columnCount) {
182 columns.add("");
183 }
184
185
186 addError(ds, "Flatpack padded line to correct number of columns", lineCount, 1, isStoreRawDataToDataError() ? line : null);
187
188 } else {
189 addError(ds, "Too few columns expected: " + columnCount + " only got: " + columns.size(), lineCount, 2,
190 isStoreRawDataToDataError() ? line : null);
191 continue;
192 }
193 }
194
195 final Row row = new Row();
196 row.setMdkey(mdkey.equals(FPConstants.DETAIL_ID) ? null : mdkey);
197
198 row.setCols(columns);
199 row.setRowNumber(lineCount);
200 if (isFlagEmptyRows()) {
201
202 row.setEmpty(ParserUtils.isListElementsEmpty(columns));
203 }
204 if (isStoreRawDataToDataSet()) {
205
206
207 row.setRawData(line);
208 }
209
210
211 ds.addRow(row);
212
213 }
214 } finally {
215 closeReaders();
216 }
217 return ds;
218 }
219
220 private boolean oddNumberOfQualifier(final String line, final char q) {
221 if (line == null || line.isEmpty()) {
222 return false;
223 }
224 int count = 0;
225 int idx = 0;
226 while ((idx = line.indexOf(q, idx)) != -1) {
227 count++;
228 idx++;
229 }
230
231 return count % 2 != 0;
232 }
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251 protected String fetchNextRecord(final BufferedReader aContentReader, final char aQualifier, final char aDelimiter) throws IOException {
252 if (aQualifier == FPConstants.NO_QUALIFIER) {
253
254 return aContentReader.readLine();
255 }
256
257 StringBuilder lineData = null;
258 String line = null;
259 boolean multiline = false;
260
261
262 while ((line = aContentReader.readLine()) != null) {
263 if (lineData == null) {
264 lineData = new StringBuilder(line);
265 } else {
266 lineData.append(LINE_BREAK).append(line);
267 }
268
269 multiline = isMultiline(line.toCharArray(), multiline, aQualifier, aDelimiter);
270 if (!multiline) {
271
272 break;
273 }
274 }
275
276 if (lineData != null) {
277 lineCount++;
278
279 final String result = lineData.toString();
280
281 return result.endsWith(LINE_BREAK) ? result.substring(0, result.length() - LINE_BREAK.length()) : result;
282 }
283
284 return null;
285 }
286
287
288
289
290
291
292
293
294
295 protected boolean isMultiline(final char[] aСhrArray, boolean aMultiline, final char aQualifier, final char aDelimiter) {
296
297
298 int position = 0;
299
300 if (aСhrArray == null || aСhrArray.length == 0) {
301 return aMultiline;
302 }
303 do {
304
305 if (!aMultiline && aСhrArray[position] == aDelimiter) {
306
307 position++;
308 } else if (!aMultiline && aСhrArray[position] != aQualifier) {
309
310
311
312
313 while (++position < aСhrArray.length) {
314 if (aСhrArray[position] == aDelimiter) {
315 position++;
316 break;
317 }
318 }
319
320 if (position >= aСhrArray.length) {
321
322
323 return false;
324 }
325 } else {
326
327
328
329
330
331 if (aMultiline && position == 0 && aСhrArray[0] == aQualifier && aСhrArray.length > 1 && aСhrArray[1] == aQualifier) {
332 position++;
333 } else if (aMultiline && position == 0 && aСhrArray[0] == aQualifier && aСhrArray.length > 1 && aСhrArray[1] != aQualifier) {
334
335
336 aMultiline = false;
337 } else {
338 aMultiline = true;
339 }
340
341 if (aСhrArray[position] == aQualifier) {
342
343 position++;
344 }
345
346
347 while (position < aСhrArray.length) {
348 if (aСhrArray[position] == aQualifier) {
349 if (position == aСhrArray.length - 1 || aСhrArray[position + 1] != aQualifier) {
350
351 position++;
352 aMultiline = false;
353 break;
354 } else {
355
356 position += 2;
357 }
358 } else {
359 position++;
360 }
361 }
362 }
363 } while (position <= aСhrArray.length - 1);
364
365 return aMultiline;
366 }
367 }