scala> df.groupBy().pivot("Country")
res69:
org.apache.spark.sql.RelationalGroupedDataset = RelationalGroupedDataset:
[grouping expressions: [], value: [InvoiceNo: string, StockCode: string ...
6 more fields], type: Pivot]
scala>
df.groupBy().pivot("Country").count()
res70: org.apache.spark.sql.DataFrame =
[Australia: bigint, Austria: bigint ... 36 more fields]
scala>
df.groupBy().pivot("Country").count().printSchema
root
|--
Australia: long (nullable = true)
|-- Austria: long (nullable = true)
|-- Bahrain: long (nullable = true)
|-- Belgium: long (nullable = true)
|-- Brazil: long (nullable = true)
|-- Canada:
long (nullable = true)
|-- Channel
Islands: long (nullable = true)
|-- Cyprus:
long (nullable = true)
|-- Czech
Republic: long (nullable = true)
|--
Denmark: long (nullable = true)
|-- EIRE:
long (nullable = true)
|--
European Community: long (nullable = true)
|--
Finland: long (nullable = true)
|-- France:
long (nullable = true)
|--
Germany: long (nullable = true)
|-- Greece:
long (nullable = true)
|-- Hong
Kong: long (nullable = true)
|--
Iceland: long (nullable = true)
|-- Israel:
long (nullable = true)
|-- Italy:
long (nullable = true)
|-- Japan:
long (nullable = true)
|--
Lebanon: long (nullable = true)
|--
Lithuania: long (nullable = true)
|-- Malta:
long (nullable = true)
|--
Netherlands: long (nullable = true)
|-- Norway:
long (nullable = true)
|-- Poland:
long (nullable = true)
|--
Portugal: long (nullable = true)
|-- RSA:
long (nullable = true)
|-- Saudi
Arabia: long (nullable = true)
|--
Singapore: long (nullable = true)
|-- Spain:
long (nullable = true)
|-- Sweden:
long (nullable = true)
|--
Switzerland: long (nullable = true)
|-- USA:
long (nullable = true)
|-- United
Arab Emirates: long (nullable = true)
|-- United
Kingdom: long (nullable = true)
|--
Unspecified: long (nullable = true)
scala>
df.groupBy().pivot("Country").count().show
+---------+-------+-------+-------+------+------+---------------+------+--------------+-------+----+------------------+-------+------+-------+------+---------+-------+------+-----+-----+-------+---------+-----+-----------+------+------+--------+---+------------+---------+-----+------+-----------+---+--------------------+--------------+-----------+
|Australia|Austria|Bahrain|Belgium|Brazil|Canada|Channel
Islands|Cyprus|Czech Republic|Denmark|EIRE|European
Community|Finland|France|Germany|Greece|Hong
Kong|Iceland|Israel|Italy|Japan|Lebanon|Lithuania|Malta|Netherlands|Norway|Poland|Portugal|RSA|Saudi
Arabia|Singapore|Spain|Sweden|Switzerland|USA|United Arab Emirates|United
Kingdom|Unspecified|
+---------+-------+-------+-------+------+------+---------------+------+--------------+-------+----+------------------+-------+------+-------+------+---------+-------+------+-----+-----+-------+---------+-----+-----------+------+------+--------+---+------------+---------+-----+------+-----------+---+--------------------+--------------+-----------+
|
1259| 401| 19|
2069| 32| 151| 758| 622| 30| 389|8196| 61| 695|
8557| 9495| 146|
288| 182| 297|
803| 358| 45| 35|
127| 2371| 1086|
341| 1519| 58| 10| 229| 2533| 462| 2002|291| 68| 495478| 446|
+---------+-------+-------+-------+------+------+---------------+------+--------------+-------+----+------------------+-------+------+-------+------+---------+-------+------+-----+-----+-------+---------+-----+-----------+------+------+--------+---+------------+---------+-----+------+-----------+---+--------------------+--------------+-----------+
scala>
df.groupBy().pivot("Country",Seq("Canada","Germany")).count().show
+------+-------+
|Canada|Germany|
+------+-------+
|
151| 9495|
+------+-------+
|
No comments:
Post a Comment