Ballista Rust ClientΒΆ
To connect to a Ballista cluster from Rust, first start by creating a BallistaContext
.
let config = BallistaConfig::builder()
.set("ballista.shuffle.partitions", "4")
.build()?;
// connect to Ballista scheduler
let ctx = BallistaContext::remote("localhost", 50050, &config);
Here is a full example using the DataFrame API.
#[tokio::main]
async fn main() -> Result<()> {
let config = BallistaConfig::builder()
.set("ballista.shuffle.partitions", "4")
.build()?;
// connect to Ballista scheduler
let ctx = BallistaContext::remote("localhost", 50050, &config);
let testdata = datafusion::test_util::parquet_test_data();
let filename = &format!("{}/alltypes_plain.parquet", testdata);
// define the query using the DataFrame trait
let df = ctx
.read_parquet(filename)?
.select_columns(&["id", "bool_col", "timestamp_col"])?
.filter(col("id").gt(lit(1)))?;
// print the results
df.show().await?;
Ok(())
}
Here is a full example demonstrating SQL usage.
#[tokio::main]
async fn main() -> Result<()> {
let config = BallistaConfig::builder()
.set("ballista.shuffle.partitions", "4")
.build()?;
// connect to Ballista scheduler
let ctx = BallistaContext::remote("localhost", 50050, &config);
let testdata = datafusion::test_util::arrow_test_data();
// register csv file with the execution context
ctx.register_csv(
"aggregate_test_100",
&format!("{}/csv/aggregate_test_100.csv", testdata),
CsvReadOptions::new(),
)?;
// execute the query
let df = ctx.sql(
"SELECT c1, MIN(c12), MAX(c12) \
FROM aggregate_test_100 \
WHERE c11 > 0.1 AND c11 < 0.9 \
GROUP BY c1",
)?;
// print the results
df.show().await?;
Ok(())
}