/** This class contains methods for working with comma-separated-values tables. TODO: * Write out spreadsheet * Fill in undefined values * Copy selected columns to new table * Extract columns * Append column * Append row * Setter */ class CSV { /** An array of column names */ var colNames /** A dictionary of column names to column numbers. */ var colNamesToNumbers = new dict /** A dictionary of column numbers to column names. */ var colNumbersToNames = new dict /** A 2-dimensional array of data. */ var data = new array /** The number of rows in the dataset (excluding headers) */ var rows = undef /** The number of columns in the dataset */ var cols = undef /** getColumn returns the values in an entire column. The column can be indexed either by a string indicating the column name or the zero-based column number integer. */ getColumn[colID] := { colNum = colIDToColumnNumber[colID] if colNum == undef { println["Invalid column ID $colID"] return undef } return data.getColumn[colNum] } /** Turns a column ID into a zero-based column number. The column can be indexed either by a string indicating the column name or the zero-based column number integer. */ colIDToColumnNumber[colID] := { if isInteger[colID] // TODO: Check validity of column number return colID else { colNum = colNamesToNumbers@colID if colNum == undef return undef // TODO: Check validity of column number return colNum } } /** Turns an array of column IDs into an array of zero-based column numbers. The columns can be indexed either by a string indicating the column name or the zero-based column number integer. */ colIDsToColumnNumbers[colIDs] := { result = new array for c = colIDs result.push[colIDToColumnNumber[c]] return result } /** Renames a column. The column can be indexed either by a string indicating the column name or the zero-based column number integer. */ renameColumn[colID, newName] := { colNum = colIDToColumnNumber[colID] if colNum == undef { println["Invalid column ID $colID"] return } oldName = colNames@colNum colNames@colNum = newName colNamesToNumbers.remove[oldName] colNamesToNumbers@newName = colNum colNumbersToNames@colNum = newName } /** Checks to see if all of the values in the specified column are numeric */ areNumeric[colID] := { colNum = colIDToColumnNumber[colID] if colNum == undef { println["Invalid column ID $colID"] return false } for r = 0 to rows-1 { val = data@r@colNum if (isString[val] and ! isNumericString[val]) or (!isString[val] and ! isUnit[val]) return false } return true } /** Finds any rows in the column that are non-numeric and returns an array of [rownum, value] pairs. */ findNonNumeric[colID] := { colNum = colIDToColumnNumber[colID] if colNum == undef { println["Invalid column ID $colID"] return false } res = new array for r = 0 to rows-1 { val = data@r@colNum if (isString[val] and ! isNumericString[val]) or (!isString[val] and ! isUnit[val]) res.push[[r, val]] } return res } /** Finds any columns in the table that are non-numeric and changes them to the specified value. */ replaceNonNumeric[colID, toValue] := { colNum = colIDToColumnNumber[colID] if colNum == undef { println["Invalid column ID $colID"] return false } for r = 0 to rows-1 { val = data@r@colNum if (isString[val] and ! isNumericString[val]) or (!isString[val] and ! isUnit[val]) data@row@colNum = toValue } } /** Gets the value at colID, row */ get[colID, row] := { colNum = colIDToColumnNumber[colID] if colNum == undef { println["Invalid column ID $colID"] return false } return data@row@colNum } /** Returns a set of all the different values in the specified column. */ differentValues[colID] := { return toSet[getColumn[colID]] } /** Returns a set of all the different values in the specified column and their counts, most common first. */ differentValuesCount[colID] := { return countToArray[getColumn[colID]] } /** Parses a file with header row. */ class parseFileWithHeader[fileDesignator] := { csv = new CSV lineNum = 0 for line = lines[fileDesignator] { if lineNum == 0 csv.setHeaders[parseQuoted[line]] else { csv.data.push[parseQuoted[line]] } lineNum = lineNum + 1 } [csv.rows, csv.cols] = csv.data.dimensions[] return csv } /** This sets up the names of the header columns. */ setHeaders[colNameArray] := { colNames = deepCopy[colNameArray] len = length[colNames] for i = 0 to len-1 { name = colNames@i colNamesToNumbers@name = i colNumbersToNames@i = name } } /** This parses a string containing an entire row of a database with possibly-quoted values. */ class parseQuoted[str, separator=",", quote="\""] := { chars = charList[str] inString = false // True if we're inside a quoted string wasString = false // True if last thing parsed was a string result = new array len = length[chars] currStr = "" pos = 0 while pos < len { c = chars@pos if c == separator { if inString currStr = currStr + c else { if wasString result.push[currStr] else result.push[maybeAsInteger[currStr]] wasString = false currStr="" } } else if c == quote { if inString // Close quote (or unescaped quote?) { inString = false wasString = true } else { // Open quote wasString = true inString = true } } else { // Raw char, append it currStr = currStr + c } pos = pos + 1 } if currStr != "" if wasString result.push[currStr] else result.push[maybeAsInteger[currStr]] return result } /** Makes the specified columns numeric. */ makeColumnsNumeric[colIDs] := { for colID = colIDs makeColumnNumeric[colID] } /** Makes the specified column numeric. */ makeColumnNumeric[colID] := { colNum = colIDToColumnNumber[colID] if colNum == undef { println["Invalid column ID $colID"] return undef } for row = 0 to rows-1 data@row@colNum = maybeAsInteger[data@row@colNum] } /** Sorts the whole table, ordering by the specified column(s). */ sortBy[colIDs, ascending=true] := { colNums = colIDsToColumnNumbers[toArray[colIDs]] sort[data, byColumns[colNums]] if !ascending reverse[data] } /** Returns true if a string represents a numeric value (integer or floating-point) that is parseable by Frink or Java (and contains no other text, including no leading nor trailing whitespace.) */ class isNumericString[str] := { if str =~ %r/^([-+]?(?:[0-9]+|(?:[0-9]+\.[0-9]*(?:[eE][-+]?[0-9]+)?)|(?:\.[0-9]+(?:[eE][-+]?[0-9]+)?)|(?:[0-9]+(?:[eE][-+]?[0-9]+))))$/ return true else return false } /** Function to turn a spreadsheet column designator into a zero-based column index. For example, this turns column name "A" to 0 and column name "Z" to 25 and column name "AA" to 26 and column name "AB" to 27. */ class ssColumnToIndex[name] := { name = uc[trim[name]] ret = 0 len = length[name] pow = 1 for i = len-1 to 0 step -1 { ret = ret + (char[substrLen[name, i, 1]] - char["A"] + 1) * pow pow = pow * 26 } return ret-1 // We want zero-based column number. } /** Function to turn a zero-based column number into an alphabetic spreadsheet column designator. For example, this turns column number 0 to "A" and column number 25 to "Z" to 25 and column number 26 to "AA" and and column number 27 to "AB" and column number 701 to "ZZ" and column number 702 to "AAA". Also see https://oeis.org/A218729 */ class ssIndexToColumn[num] := { res = "" num = num + 1 while num > 0 { res = char[((num-1) mod 26) + char["A"]] + res num = (num-1) div 26 } return res } /** Tests a string to see if it's numeric and if so, returns it either as a floating point number or an integer if the floating-point number rounds to itself. Otherwise, returns the original string. */ class maybeAsInteger[str] := { if isNumericString[str] { oval = eval[str] rval = round[oval] if rval == oval return rval else return oval } return str } }