|
scala>
val dfNoNull = dfWithDate.drop()
dfNoNull: org.apache.spark.sql.DataFrame =
[InvoiceNo: string, StockCode: string ... 7 more fields]
scala>
val rolledUpDF = dfNoNull.rollup("Date",
"Country").agg(sum("Quantity")).selectExpr("Date",
"Country", "`sum(Quantity)` as total_quantity").orderBy("Date")
rolledUpDF:
org.apache.spark.sql.Dataset[org.apache.spark.sql.Row] = [Date: date,
Country: string ... 1 more field]
scala>
rolledUpDF.show()
+----------+--------------+--------------+
|
Date|
Country|total_quantity|
+----------+--------------+--------------+
|
null| null| 5176450|
|2010-12-01|
Norway| 1852|
|2010-12-01|
France| 449|
|2010-12-01| null| 26814|
|2010-12-01|
Australia| 107|
|2010-12-01|
Netherlands| 97|
|2010-12-01|
Germany| 117|
|2010-12-01| EIRE| 243|
|2010-12-01|United Kingdom| 23949|
|2010-12-02|
Germany| 146|
|2010-12-02| null| 21023|
|2010-12-02| EIRE| 4|
|2010-12-02|United Kingdom| 20873|
|2010-12-03|
Poland| 140|
|2010-12-03|
Switzerland| 110|
|2010-12-03|
France| 239|
|2010-12-03| null| 14830|
|2010-12-03| Italy| 164|
|2010-12-03|
Portugal| 65|
|2010-12-03| Spain| 400|
+----------+--------------+--------------+
only showing top 20 rows
|
No comments:
Post a Comment