|
| 1 | +package org.jetbrains.kotlinx.dataframe.examples.plugin |
| 2 | + |
| 3 | +import org.jetbrains.kotlinx.dataframe.DataFrame |
| 4 | +import org.jetbrains.kotlinx.dataframe.annotations.DataSchema |
| 5 | +import org.jetbrains.kotlinx.dataframe.api.add |
| 6 | +import org.jetbrains.kotlinx.dataframe.api.aggregate |
| 7 | +import org.jetbrains.kotlinx.dataframe.api.convert |
| 8 | +import org.jetbrains.kotlinx.dataframe.api.convertTo |
| 9 | +import org.jetbrains.kotlinx.dataframe.api.filter |
| 10 | +import org.jetbrains.kotlinx.dataframe.api.groupBy |
| 11 | +import org.jetbrains.kotlinx.dataframe.api.into |
| 12 | +import org.jetbrains.kotlinx.dataframe.api.max |
| 13 | +import org.jetbrains.kotlinx.dataframe.api.rename |
| 14 | +import org.jetbrains.kotlinx.dataframe.api.renameToCamelCase |
| 15 | +import org.jetbrains.kotlinx.dataframe.api.with |
| 16 | +import org.jetbrains.kotlinx.dataframe.io.readCsv |
| 17 | +import org.jetbrains.kotlinx.dataframe.io.writeCsv |
| 18 | +import org.jetbrains.kotlinx.kandy.dsl.plot |
| 19 | +import org.jetbrains.kotlinx.kandy.letsplot.export.save |
| 20 | +import org.jetbrains.kotlinx.kandy.letsplot.feature.layout |
| 21 | +import org.jetbrains.kotlinx.kandy.letsplot.layers.bars |
| 22 | +import java.net.URL |
| 23 | + |
| 24 | +// Declare data schema for the DataFrame from jetbrains_repositories.csv. |
| 25 | +@DataSchema |
| 26 | +data class Repositories( |
| 27 | + val full_name: String, |
| 28 | + val html_url: URL, |
| 29 | + val stargazers_count: Int, |
| 30 | + val topics: String, |
| 31 | + val watchers: Int, |
| 32 | +) |
| 33 | + |
| 34 | +// Define kinds of repositories. |
| 35 | +enum class RepoKind { |
| 36 | + Kotlin, |
| 37 | + IntelliJ, |
| 38 | + Other, |
| 39 | +} |
| 40 | + |
| 41 | +// A rule for determining the kind of repository based on its name and topics. |
| 42 | +fun getKind(fullName: String, topics: List<String>): RepoKind { |
| 43 | + fun checkContains(name: String) = name in topics || fullName.lowercase().contains(name) |
| 44 | + |
| 45 | + return when { |
| 46 | + checkContains("kotlin") -> RepoKind.Kotlin |
| 47 | + checkContains("idea") || checkContains("intellij") -> RepoKind.IntelliJ |
| 48 | + else -> RepoKind.Other |
| 49 | + } |
| 50 | +} |
| 51 | + |
| 52 | +fun main() { |
| 53 | + val repos = DataFrame |
| 54 | + // Read DataFrame from the CSV file. |
| 55 | + .readCsv("https://raw.githubusercontent.com/Kotlin/dataframe/master/data/jetbrains_repositories.csv") |
| 56 | + // And convert it to match the `Repositories` schema. |
| 57 | + .convertTo<Repositories>() |
| 58 | + |
| 59 | + // With Compiler Plugin, the DataFrame schema changes immediately after each operation: |
| 60 | + // For example, if a new column is added or the old one is renamed (or its type is changed) |
| 61 | + // during the operation, you can use the new name immediately in the following operations: |
| 62 | + repos |
| 63 | + // Add a new "name" column... |
| 64 | + .add("name") { full_name.substringAfterLast("/") } |
| 65 | + // ... and now we can use "name" extension in DataFrame operations, such as `filter`. |
| 66 | + .filter { name.lowercase().contains("kotlin") } |
| 67 | + |
| 68 | + // Let's update the DataFrame with some operations using these features. |
| 69 | + val reposUpdated = repos |
| 70 | + // Rename columns to CamelCase. |
| 71 | + // Note that after that, in the following operations, extension properties will have |
| 72 | + // new names corresponding to the column names. |
| 73 | + .renameToCamelCase() |
| 74 | + // Rename "stargazersCount" column to "stars". |
| 75 | + .rename { stargazersCount }.into("stars") |
| 76 | + // And we can immediately use the updated name in the filtering. |
| 77 | + .filter { stars > 50 } |
| 78 | + // Convert values in the "topic" column (which were `String` initially) |
| 79 | + // to the list of topics. |
| 80 | + .convert { topics }.with { |
| 81 | + val inner = it.removeSurrounding("[", "]") |
| 82 | + if (inner.isEmpty()) emptyList() else inner.split(',').map(String::trim) |
| 83 | + } |
| 84 | + // Now "topics" is a `List<String>` column. |
| 85 | + // Add a new column with the number of topics. |
| 86 | + .add("topicCount") { topics.size } |
| 87 | + // Add a new column with the kind of repository. |
| 88 | + .add("kind") { getKind(fullName, topics) } |
| 89 | + |
| 90 | + // Write the updated DataFrame to a CSV file. |
| 91 | + reposUpdated.writeCsv("jetbrains_repositories_new.csv") |
| 92 | + |
| 93 | + reposUpdated |
| 94 | + // Group repositories by kind |
| 95 | + .groupBy { kind } |
| 96 | + // And then compute the maximum stars in each group. |
| 97 | + .aggregate { |
| 98 | + max { stars } into "maxStars" |
| 99 | + } |
| 100 | + // Build a bar plot showing the maximum number of stars per repository kind. |
| 101 | + .plot { |
| 102 | + bars { |
| 103 | + x(kind) |
| 104 | + y(maxStars) |
| 105 | + } |
| 106 | + layout.title = "Max stars per repo kind" |
| 107 | + } |
| 108 | + // Save the plot to an SVG file. |
| 109 | + .save("kindToStars.svg") |
| 110 | +} |
0 commit comments