Thursday, 8 August 2013

Read empty cell (first column) using Apache POI Event model

Read empty cell (first column) using Apache POI Event model

This is an enhancement to my old question: Read empty cell using Apache
POI Event model.
Actually I am trying to read empty cells and it works when empty cells
occur in middle or end columns. However, if first column has empty cells,
it doesn't treat it as BlankRecord.sid in below code. And due to this the
value for that cell is set to empty string. I want to treat the first
column too as BlankRecord so that it will be set to null.
Here is the code for xls:
public void processRecord(Record record) {
int thisRow = -1;
String thisStr = null;
switch (record.getSid()) {
case BoundSheetRecord.sid:
boundSheetRecords.add(record);
break;
case BOFRecord.sid:
BOFRecord br = (BOFRecord)record;
if(br.getType() == BOFRecord.TYPE_WORKSHEET) {
// Works by ordering the BSRs by the location of their
BOFRecords, and then knowing that we
// process BOFRecords in byte offset order
if(orderedBSRs == null) {
orderedBSRs =
BoundSheetRecord.orderByBofPosition(boundSheetRecords);
}
// Check the existence of sheets
if(sheetIndex == 0) {
for(int i=0;i<excelSheetList.length;i++) {
boolean found = false;
for(int j=0;j<orderedBSRs.length;j++) {
if(this.getExcelSheetSpecification().equals(MSExcelAdapter.USE_WORKSHEET_NAME))
{
String sheetName = ((BoundSheetRecord)
boundSheetRecords.get(j)).getSheetname();
if(excelSheetList[i].equals(sheetName)) {
found = true;
break;
}
} else {
try {
if(Integer.parseInt(excelSheetList[i])
== j) {
found = true;
break;
}
} catch (NumberFormatException e) {
}
}
}
if(!found)
this.warning("processRecord()","Sheet: " +
excelSheetList[i] + " does not exist.");
}
}
readCurrentSheet = true;
sheetIndex++;
if(this.getExcelSheetSpecification().equals(MSExcelAdapter.USE_WORKSHEET_NAME))
{
String sheetName = ((BoundSheetRecord)
boundSheetRecords.get(sheetIndex-1)).getSheetname();
if(!canRead(sheetName)) {
readCurrentSheet = false;
}
} else {
if(!canRead(sheetIndex + "")) {
readCurrentSheet = false;
}
}
}
break;
case SSTRecord.sid:
sstRecord = (SSTRecord) record;
break;
case BlankRecord.sid:
BlankRecord brec = (BlankRecord) record;
thisRow = brec.getRow();
thisStr = null;
values.add(thisStr);
columnCount++;
break;
case FormulaRecord.sid:
FormulaRecord frec = (FormulaRecord) record;
thisRow = frec.getRow();
if(Double.isNaN( frec.getValue() )) {
// Formula result is a string
// This is stored in the next record
outputNextStringRecord = true;
nextRow = frec.getRow();
} else {
thisStr = formatListener.formatNumberDateCell(frec);
}
break;
case StringRecord.sid:
if(outputNextStringRecord) {
// String for formula
StringRecord srec = (StringRecord)record;
thisStr = srec.getString();
thisRow = nextRow;
outputNextStringRecord = false;
}
break;
case LabelSSTRecord.sid:
if(readCurrentSheet) {
LabelSSTRecord lsrec = (LabelSSTRecord) record;
thisRow = lsrec.getRow() + 1;
if(rowNumberList.contains(thisRow + "") ||
(rowNumberList.contains(END_OF_ROWS) &&
thisRow >= secondLastRow)) {
if(sstRecord == null) {
thisStr = "(No SST Record, can't identify
string)";
} else {
thisStr =
sstRecord.getString(lsrec.getSSTIndex()).toString();
}
}
}
break;
case NumberRecord.sid:
if(readCurrentSheet) {
NumberRecord numrec = (NumberRecord) record;
thisRow = numrec.getRow() + 1;
if(rowNumberList.contains(thisRow + "") ||
(rowNumberList.contains(END_OF_ROWS) &&
thisRow >= secondLastRow)) {
thisStr =
formatListener.formatNumberDateCell(numrec);
// Format
}
}
break;
default:
break;
}
// Handle missing column
if(record instanceof MissingCellDummyRecord) {
thisStr = "";
}
// If we got something to print out, do so
if(thisStr != null) {
values.add(thisStr);
columnCount++;
}
// Handle end of row
if(record instanceof LastCellOfRowDummyRecord) {
.....
}
...
Here is the code for xlsx:
/**
* Default handler for parsing an excel sheet
* @see org.xml.sax.helpers.DefaultHandler
*/
private class SheetHandler extends DefaultHandler {
private SharedStringsTable sst;
private String lastContents;
private boolean nextIsString;
private MSExcelReader reader;
private int thisColumn = -1;
private int lastColumnNumber = -1; // The last column printed to
the output stream
private SheetHandler(SharedStringsTable sst, MSExcelReader reader) {
this.sst = sst;
this.reader = reader;
}
public void startElement(String uri, String localName, String
name, Attributes attributes) throws SAXException {
// c => cell
if(name.equals("c")) {
// Figure out if the value is an index in the SST
String cellType = attributes.getValue("t");
if(cellType != null && cellType.equals("s")) {
nextIsString = true;
} else {
nextIsString = false;
}
// Get the cell reference
String r = attributes.getValue("r");
int firstDigit = -1;
for (int c = 0; c < r.length(); ++c) {
if (Character.isDigit(r.charAt(c))) {
firstDigit = c;
break;
}
}
thisColumn = nameToColumn(r.substring(0, firstDigit));
}
// Clear contents cache
lastContents = "";
}
public void endElement(String uri, String localName, String name)
throws SAXException {
// Process the last contents as required.
// Do now, as characters() may be called more than once
if(nextIsString) {
try {
int idx = Integer.parseInt(lastContents);
lastContents = new
XSSFRichTextString(sst.getEntryAt(idx)).toString();
} catch (NumberFormatException e) {
}
}
// v => contents of a cell
// Output after we've seen the string contents
if(name.equals("v")) {
for (int i = lastColumnNumber; i < thisColumn - 1; ++i)
values.add(null); // Add empty string for missing
columns
values.add(lastContents);
// Update column
if (thisColumn > -1)
lastColumnNumber = thisColumn;
}
if(name.equals("row")) {
...
Same as my old question things to mention: I am not using the usermodel
(org.apache.poi.ss.usermodel) but an Event API to process xls and xlsx
files.
I am implementing HSSFListener and overriding its processRecord(Record
record) method for xls files. For xlsx files I am using
javax.xml.parsers.SAXParser and org.xml.sax.XMLReader.
I am using JDK7 with Apache POI 3.7. Can someone please help?

No comments:

Post a Comment