Download or view CSV.frink in plain text format
/** This class contains methods for working with comma-separated-values tables.
TODO:
* Write out spreadsheet
* Fill in undefined values
* Copy selected columns to new table
* Extract columns
* Append column
* Append row
* Setter
*/
class CSV
{
/** An array of column names */
var colNames
/** A dictionary of column names to column numbers. */
var colNamesToNumbers = new dict
/** A dictionary of column numbers to column names. */
var colNumbersToNames = new dict
/** A 2-dimensional array of data. */
var data = new array
/** The number of rows in the dataset (excluding headers) */
var rows = undef
/** The number of columns in the dataset */
var cols = undef
/** getColumn returns the values in an entire column. The column can be
indexed either by a string indicating the column name or the zero-based
column number integer. */
getColumn[colID] :=
{
colNum = colIDToColumnNumber[colID]
if colNum == undef
{
println["Invalid column ID $colID"]
return undef
}
return data.getColumn[colNum]
}
/** Turns a column ID into a zero-based column number. The column can be
indexed either by a string indicating the column name or the zero-based
column number integer. */
colIDToColumnNumber[colID] :=
{
if isInteger[colID]
// TODO: Check validity of column number
return colID
else
{
colNum = colNamesToNumbers@colID
if colNum == undef
return undef
// TODO: Check validity of column number
return colNum
}
}
/** Turns an array of column IDs into an array of zero-based column
numbers. The columns can be indexed either by a string indicating the
column name or the zero-based column number integer. */
colIDsToColumnNumbers[colIDs] :=
{
result = new array
for c = colIDs
result.push[colIDToColumnNumber[c]]
return result
}
/** Renames a column. The column can be indexed either by a string
indicating the column name or the zero-based column number integer. */
renameColumn[colID, newName] :=
{
colNum = colIDToColumnNumber[colID]
if colNum == undef
{
println["Invalid column ID $colID"]
return
}
oldName = colNames@colNum
colNames@colNum = newName
colNamesToNumbers.remove[oldName]
colNamesToNumbers@newName = colNum
colNumbersToNames@colNum = newName
}
/** Checks to see if all of the values in the specified column are numeric */
areNumeric[colID] :=
{
colNum = colIDToColumnNumber[colID]
if colNum == undef
{
println["Invalid column ID $colID"]
return false
}
for r = 0 to rows-1
{
val = data@r@colNum
if (isString[val] and ! isNumericString[val]) or (!isString[val] and ! isUnit[val])
return false
}
return true
}
/** Finds any rows in the column that are non-numeric and returns an array
of [rownum, value] pairs. */
findNonNumeric[colID] :=
{
colNum = colIDToColumnNumber[colID]
if colNum == undef
{
println["Invalid column ID $colID"]
return false
}
res = new array
for r = 0 to rows-1
{
val = data@r@colNum
if (isString[val] and ! isNumericString[val]) or (!isString[val] and ! isUnit[val])
res.push[[r, val]]
}
return res
}
/** Finds any columns in the table that are non-numeric and changes them
to the specified value. */
replaceNonNumeric[colID, toValue] :=
{
colNum = colIDToColumnNumber[colID]
if colNum == undef
{
println["Invalid column ID $colID"]
return false
}
for r = 0 to rows-1
{
val = data@r@colNum
if (isString[val] and ! isNumericString[val]) or (!isString[val] and ! isUnit[val])
data@row@colNum = toValue
}
}
/** Gets the value at colID, row */
get[colID, row] :=
{
colNum = colIDToColumnNumber[colID]
if colNum == undef
{
println["Invalid column ID $colID"]
return false
}
return data@row@colNum
}
/** Returns a set of all the different values in the specified column. */
differentValues[colID] :=
{
return toSet[getColumn[colID]]
}
/** Returns a set of all the different values in the specified column and
their counts, most common first. */
differentValuesCount[colID] :=
{
return countToArray[getColumn[colID]]
}
/** Parses a file with header row. */
class parseFileWithHeader[fileDesignator] :=
{
csv = new CSV
lineNum = 0
for line = lines[fileDesignator]
{
if lineNum == 0
csv.setHeaders[parseQuoted[line]]
else
{
csv.data.push[parseQuoted[line]]
}
lineNum = lineNum + 1
}
[csv.rows, csv.cols] = csv.data.dimensions[]
return csv
}
/** This sets up the names of the header columns. */
setHeaders[colNameArray] :=
{
colNames = deepCopy[colNameArray]
len = length[colNames]
for i = 0 to len-1
{
name = colNames@i
colNamesToNumbers@name = i
colNumbersToNames@i = name
}
}
/** This parses a string containing an entire row of a database with
possibly-quoted values. */
class parseQuoted[str, separator=",", quote="\""] :=
{
chars = charList[str]
inString = false // True if we're inside a quoted string
wasString = false // True if last thing parsed was a string
result = new array
len = length[chars]
currStr = ""
pos = 0
while pos < len
{
c = chars@pos
if c == separator
{
if inString
currStr = currStr + c
else
{
if wasString
result.push[currStr]
else
result.push[maybeAsInteger[currStr]]
wasString = false
currStr=""
}
} else
if c == quote
{
if inString // Close quote (or unescaped quote?)
{
inString = false
wasString = true
} else
{ // Open quote
wasString = true
inString = true
}
} else
{
// Raw char, append it
currStr = currStr + c
}
pos = pos + 1
}
if currStr != ""
if wasString
result.push[currStr]
else
result.push[maybeAsInteger[currStr]]
return result
}
/** Makes the specified columns numeric. */
makeColumnsNumeric[colIDs] :=
{
for colID = colIDs
makeColumnNumeric[colID]
}
/** Makes the specified column numeric. */
makeColumnNumeric[colID] :=
{
colNum = colIDToColumnNumber[colID]
if colNum == undef
{
println["Invalid column ID $colID"]
return undef
}
for row = 0 to rows-1
data@row@colNum = maybeAsInteger[data@row@colNum]
}
/** Sorts the whole table, ordering by the specified column(s). */
sortBy[colIDs, ascending=true] :=
{
colNums = colIDsToColumnNumbers[toArray[colIDs]]
sort[data, byColumns[colNums]]
if !ascending
reverse[data]
}
/** Returns true if a string represents a numeric value (integer or
floating-point) that is parseable by Frink or Java (and contains no other
text, including no leading nor trailing whitespace.)
*/
class isNumericString[str] :=
{
if str =~ %r/^([-+]?(?:[0-9]+|(?:[0-9]+\.[0-9]*(?:[eE][-+]?[0-9]+)?)|(?:\.[0-9]+(?:[eE][-+]?[0-9]+)?)|(?:[0-9]+(?:[eE][-+]?[0-9]+))))$/
return true
else
return false
}
/** Function to turn a spreadsheet column designator into a zero-based
column index.
For example, this turns column name "A" to 0 and column name "Z" to 25
and column name "AA" to 26 and column name "AB" to 27.
*/
class ssColumnToIndex[name] :=
{
name = uc[trim[name]]
ret = 0
len = length[name]
pow = 1
for i = len-1 to 0 step -1
{
ret = ret + (char[substrLen[name, i, 1]] - char["A"] + 1) * pow
pow = pow * 26
}
return ret-1 // We want zero-based column number.
}
/** Function to turn a zero-based column number into an alphabetic
spreadsheet column designator.
For example, this turns column number 0 to "A" and column number
25 to "Z" to 25 and column number 26 to "AA" and and column number
27 to "AB" and column number 701 to "ZZ" and column number 702 to "AAA".
Also see https://oeis.org/A218729
*/
class ssIndexToColumn[num] :=
{
res = ""
num = num + 1
while num > 0
{
res = char[((num-1) mod 26) + char["A"]] + res
num = (num-1) div 26
}
return res
}
/** Tests a string to see if it's numeric and if so, returns it either as
a floating point number or an integer if the floating-point number
rounds to itself. Otherwise, returns the original string. */
class maybeAsInteger[str] :=
{
if isNumericString[str]
{
oval = eval[str]
rval = round[oval]
if rval == oval
return rval
else
return oval
}
return str
}
}
Download or view CSV.frink in plain text format
This is a program written in the programming language Frink.
For more information, view the Frink
Documentation or see More Sample Frink Programs.
Alan Eliasen was born 20217 days, 23 hours, 52 minutes ago.