SQLΒΆ
DataFusion also offers a SQL API, read the full reference here
In [1]: import datafusion
In [2]: from datafusion import col
In [3]: import pyarrow
# create a context
In [4]: ctx = datafusion.SessionContext()
# register a CSV
In [5]: ctx.register_csv('pokemon', 'pokemon.csv')
# create a new statement via SQL
In [6]: df = ctx.sql('SELECT "Attack"+"Defense", "Attack"-"Defense" FROM pokemon')
# collect and convert to pandas DataFrame
In [7]: df.to_pandas()
Out[7]:
pokemon.Attack + pokemon.Defense pokemon.Attack - pokemon.Defense
0 98 0
1 125 -1
2 165 -1
3 223 -23
4 95 9
.. ... ...
158 190 10
159 109 19
160 149 19
161 229 39
162 200 20
[163 rows x 2 columns]