class Dataframe

Dataframe
Reference
Object

Overview

NOTE The CSV Spec(RFC 4180) does state that leading and trailing whitespaces should not be ignored, some implementations of CSV do still use them. For this reason, Dataframe will ignore leading and trailing whitespace in unquoted cells.

Defined in:

builder/csv_builder.cr
column.cr
common.cr
dataframe.cr
parser/csv_lexer.cr
parser/csv_parser.cr
row.cr

Constant Summary

VERSION = "0.1.0"

Constructors

.from_csv(string_or_io : String | IO, headers : Bool = true, separator : Char = CSVLexer::DEFAULT_SEPARATOR, quote_char : Char = CSVLexer::DEFAULT_QUOTE_CHAR) : Dataframe
.new(headers : Array(String), rows : Array(Array(Type)))
Creates a new Dataframe instance with the given headers and rows.
.new(column_names : Array(String))
Creates a new Dataframe instance with the specified headers, and columns of type String, but with no data.
.new(columns)
Creates a new Dataframe instance with column names and types defined by columns, but with no data.
.new
Creates an empty Dataframe instance, with no columns or rows.

Instance Method Summary

#<<(row : Array(Type))
Append.
#<<(row : Row)
Append.
#==(other : Dataframe) : Bool
#[](header : String) : Dataframe::Column
#[]=(header : String, new_column : Column) : self
#add_column(header : String, type : ColumnType = String)
Adds a new empty column to self.
#add_column(header : String, data : Array(Type))
Adds a new column to self, with content specified by data.
#add_row(row : Array(Type))
Append a new row to the bottom of self.
#add_row(row : Row)
Append a new row to the bottom of self.
#columns : Hash(String, Column(String) | Column(Int32) | Column(Float64) | Column(Bool))
Returns the columns of self as a Hash with the headers as keys, and Columns as values.
#data : Array(Array(Bool | Float64 | Int32 | String | Nil))
#each(& : Array(Type) -> ) : Nil
Iterates over the rows of self.
#each_row(& : Row -> ) : Nil
Iterates over the rows of self, returning each row as an instance of Row.
#full_join(other : Dataframe, on : Array(String)) : Dataframe
Returns a new Dataframe that is the result of a full join of the receiver and other, using the headers in on to match rows.
#headers : Array(String)
Returns the column names as an Array.
#inner_join(other : Dataframe, on : Array(String)) : Dataframe
Returns a new Dataframe that is the result of an inner join of the receiver and other, using the headers in on to match rows.
#left_join(other : Dataframe, on : Array(String)) : Dataframe
Returns a new Dataframe that is the result of a left outer join of the receiver and other, using the headers in on to match rows.
#order_columns(new_headers : Array(String)) : Dataframe
Returns a new Dataframe with columns ordered by new_headers.
#order_columns!(new_headers : Array(String)) : self
Modifies self by rearranging columns in order specified by new_headers.
#reject(& : Row -> ) : Dataframe
Returns a Dataframe with all the elements in the collection for which the passed block is falsey.
#reject!(& : Row -> ) : self
Modifies self, deleting the rows in the collection for which the passed block is truthy.
#reject_columns(headers : Array(String)) : Dataframe
Returns a new Dataframe without the given columns.
#reject_columns!(headers : Array(String)) : self
Removes a list of columns.
#rename_column(old_header, new_header)
Changes the header of the specified column to a new value.
#right_join(other : Dataframe, on : Array(String)) : Dataframe
Returns a new Dataframe that is the result of a right outer join of the receiver and other, using the headers in on to match rows.
#row_count
Returns the number of rows in the Dataframe.
#rows : Array(Row)
Returns the data of the Dataframe as an array of Row.
#select(& : Row -> ) : Dataframe
Returns a new Dataframe with only rows for which the passed block is truthy.
#select!(& : Row -> ) : self
Returns a new Dataframe with only rows for which the passed block is truthy.
#select_columns(headers : Array(String)) : Dataframe
Returns a new Dataframe with the given columns.
#select_columns!(headers : Array(String)) : self
Removes every column except the given ones.
#shape : Tuple(Int32, Int32)
Returns a Tuple of the dataframe's dimensions in the form of { rows, columns }
#sort_by(& : Row -> ) : Dataframe
#sort_by(column : String, desc = false) : Dataframe
#sort_by!(& : Row -> ) : self
#sort_by!(column : String, desc = false) : self
#to_csv(separator : Char = CSV::DEFAULT_SEPARATOR, quote_char : Char = CSV::DEFAULT_QUOTE_CHAR) : String

Constructor Detail

def self.from_csv(string_or_io : String | IO, headers : Bool = true, separator : Char = CSVLexer::DEFAULT_SEPARATOR, quote_char : Char = CSVLexer::DEFAULT_QUOTE_CHAR) : Dataframe #

[View source]

def self.new(headers : Array(String), rows : Array(Array(Type))) #

Creates a new Dataframe instance with the given headers and rows.

Raises an InvalidDataframe error if the headers and each row don't all have the same length.

[View source]

def self.new(column_names : Array(String)) #

Creates a new Dataframe instance with the specified headers, and columns of type String, but with no data.

[View source]

def self.new(columns) #

Creates a new Dataframe instance with column names and types defined by columns, but with no data.

[View source]

def self.new #

Creates an empty Dataframe instance, with no columns or rows.

[View source]

Instance Method Detail

def <<(row : Array(Type)) #

Append. Alias for #add_row.

[View source]

def <<(row : Row) #

Append. Alias for #add_row.

[View source]

def ==(other : Dataframe) : Bool #

[View source]

def [](header : String) : Dataframe::Column #

[View source]

def []=(header : String, new_column : Column) : self #

[View source]

def add_column(header : String, type : ColumnType = String) #

Adds a new empty column to self.

[View source]

def add_column(header : String, data : Array(Type)) #

Adds a new column to self, with content specified by data.

The type is determined by the content of data.

NOTE: If data doesn't have any non-null values, a runtime error will occur.

[View source]

def add_row(row : Array(Type)) #

Append a new row to the bottom of self.

[View source]

def add_row(row : Row) #

Append a new row to the bottom of self.

[View source]

def columns : Hash(String, Column(String) | Column(Int32) | Column(Float64) | Column(Bool)) #

Returns the columns of self as a Hash with the headers as keys, and Columns as values.

NOTE: Because this method returns a union of all possible types, it's necessary to cast the column as the proper type before running any type specific methods.

column = dataframe.columns["Age"].as(Dataframe::Column(Int32))

column.map! do |e|
  e.nil? ? e : e + 1
end

[View source]

def data : Array(Array(Bool | Float64 | Int32 | String | Nil)) #

[View source]

def each(& : Array(Type) -> ) : Nil #

Iterates over the rows of self.

[View source]

def each_row(& : Row -> ) : Nil #

Iterates over the rows of self, returning each row as an instance of Row.

[View source]

def full_join(other : Dataframe, on : Array(String)) : Dataframe #

Returns a new Dataframe that is the result of a full join of the receiver and other, using the headers in on to match rows.

[View source]

def headers : Array(String) #

Returns the column names as an Array.

[View source]

def inner_join(other : Dataframe, on : Array(String)) : Dataframe #

Returns a new Dataframe that is the result of an inner join of the receiver and other, using the headers in on to match rows.

[View source]

def left_join(other : Dataframe, on : Array(String)) : Dataframe #

Returns a new Dataframe that is the result of a left outer join of the receiver and other, using the headers in on to match rows.

[View source]

def order_columns(new_headers : Array(String)) : Dataframe #

Returns a new Dataframe with columns ordered by new_headers.

NOTE: Any column with names omitted from new_headers will not be included in the new Dataframe.

CrystalDoc.info

dataframe

class Dataframe

Overview

Defined in:

Constant Summary

Constructors

Instance Method Summary

Constructor Detail

Instance Method Detail