From 084d09d62546ddd08145aad6ae4d855d281448e7 Mon Sep 17 00:00:00 2001 From: Philip Garus Date: Thu, 8 Sep 2022 10:16:57 +0200 Subject: [PATCH 1/3] implemented github API license URL extraction for remote github repository links --- .../StrategyLicenseUrlGuesser.java | 62 ++++++++++++++++-- .../tools/solicitor/reader/ort/OrtReader.java | 8 +-- .../src/main/resources/application.properties | 3 + core/src/test/resources/analyzer-result.json | 4 +- documentation/master-solicitor.asciidoc | 64 +++++++++++-------- 5 files changed, 103 insertions(+), 38 deletions(-) diff --git a/core/src/main/java/com/devonfw/tools/solicitor/licensetexts/StrategyLicenseUrlGuesser.java b/core/src/main/java/com/devonfw/tools/solicitor/licensetexts/StrategyLicenseUrlGuesser.java index 0b4449f2..bd18ee70 100644 --- a/core/src/main/java/com/devonfw/tools/solicitor/licensetexts/StrategyLicenseUrlGuesser.java +++ b/core/src/main/java/com/devonfw/tools/solicitor/licensetexts/StrategyLicenseUrlGuesser.java @@ -3,14 +3,20 @@ */ package com.devonfw.tools.solicitor.licensetexts; +import java.io.IOException; +import java.io.InputStream; +import java.nio.charset.StandardCharsets; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; import java.time.temporal.ChronoUnit; +import org.apache.commons.io.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; +import org.springframework.beans.factory.annotation.Value; import com.devonfw.tools.solicitor.SolicitorVersion; +import com.devonfw.tools.solicitor.common.SolicitorRuntimeException; import com.devonfw.tools.solicitor.common.content.ContentProvider; import com.devonfw.tools.solicitor.common.content.web.WebContent; @@ -24,6 +30,9 @@ public class StrategyLicenseUrlGuesser implements LicenseUrlGuesser { private ContentProvider webContentProvider; private SolicitorVersion solicitorVersion; + + @Value("${solicitor.githubtoken}") + private String token; /** * The constructor. @@ -56,14 +65,19 @@ private void setTrace(String trace, StringBuilder traceBuilder) { traceBuilder.append(trace).append('\n'); } - // https://github.com/nodelib/nodelib/tree/master/packages/fs/fs.stat - // https://raw.githubusercontent.com/nodelib/nodelib/master/packages/fs/fs.stat/README.md // helper method that normalizes a github url and retrieves the raw link to // a given license private String normalizeGitURL(String url, StringBuilder traceBuilder) { - String oldURL = url; - if (url.contains("github")) { + // case that github remote repository link is given + if (url.contains("github.com") && url.endsWith(".git")) { + url = githubAPILicenseUrl(url, token); + if(!url.equals(oldURL) && !url.contains("api.github.com")) { + setTrace("URL changed from " + oldURL + " to " + url, traceBuilder); + return url; + } + } + if (url.contains("github.com")) { // use https for all github URLs url = url.replace("http:", "https:"); // omit repo suffix if existent @@ -154,4 +168,44 @@ public GuessedLicenseUrlContent getContentForUri(String uri) { return new GuessedLicenseUrlContent(guessedUrl, auditLogBuilder.toString()); } + //tries to get github license file location based of vsc-link + public String githubAPILicenseUrl(String link, String token) { + + String result = ""; + if (link.contains("github.com")) { + if (link.endsWith(".git")) { + link = link.substring(0, link.length() - 4); + } + link = link.replace("git://", "https://"); + link = link.replace("ssh://", "https://"); + link = link.replace("git@", ""); + if (!link.contains("api.github.com")) { + link = link.replace("github.com/", "api.github.com/repos/"); + link = link.concat("/license"); + } + + String command = "curl -H \"Accept: application/vnd.github+json\" -H \"Authorization: token "+ token + "\" -i " + link; + ProcessBuilder processBuilder = new ProcessBuilder(command.split(" ")); + try { + Process process = processBuilder.start(); + InputStream inputStream = process.getInputStream(); + result = IOUtils.toString(inputStream, StandardCharsets.UTF_8); + if (result.contains("download_url")) { + result = result.substring(result.indexOf("\"download_url\": ")); + result = result.substring(17,result.indexOf(",")-1); + } + if (result.contains("\"message\": \"Moved Permanently\"")) { + String tempLink = result.substring(result.indexOf("\"url\": ")); + tempLink = tempLink.substring(17,result.indexOf(",")-1); + result = githubAPILicenseUrl(tempLink, token); + } + if (result.contains("\"message\": \"Not Found\"")) { + result = link; + } + } catch (IOException e) { + throw new SolicitorRuntimeException("Could not handle command call for api request'" + command + "'", e); + } + } + return result; + } } diff --git a/core/src/main/java/com/devonfw/tools/solicitor/reader/ort/OrtReader.java b/core/src/main/java/com/devonfw/tools/solicitor/reader/ort/OrtReader.java index 45aea82a..c0aca338 100644 --- a/core/src/main/java/com/devonfw/tools/solicitor/reader/ort/OrtReader.java +++ b/core/src/main/java/com/devonfw/tools/solicitor/reader/ort/OrtReader.java @@ -61,12 +61,12 @@ public void readInventory(String type, String sourceUrl, Application application Map singlePackage = (Map) iterator.get("package"); String id = (String) singlePackage.get("id"); Map vcsProcessed = (Map) singlePackage.get("vcs_processed"); - String repo = (String) vcsProcessed.get("url"); + String licenseUrl = (String) vcsProcessed.get("url"); String pURL = (String) singlePackage.get("purl"); String homePage = (String) singlePackage.get("homepage_url"); if (homePage == null || homePage.isEmpty()) { - homePage = repo; + homePage = licenseUrl; } ApplicationComponent appComponent = getModelFactory().newApplicationComponent(); @@ -92,11 +92,11 @@ public void readInventory(String type, String sourceUrl, Application application List lic = (List) singlePackage.get("declared_licenses"); if (lic.isEmpty()) { // add empty raw license if no license info attached - addRawLicense(appComponent, null, null, sourceUrl); + addRawLicense(appComponent, null, licenseUrl, sourceUrl); } else { for (Object cl : lic) { licenseCount++; - addRawLicense(appComponent, cl.toString(), null, sourceUrl); + addRawLicense(appComponent, cl.toString(), licenseUrl, sourceUrl); } } doLogging(sourceUrl, application, componentCount, licenseCount); diff --git a/core/src/main/resources/application.properties b/core/src/main/resources/application.properties index 8d56c0fb..217af3a4 100644 --- a/core/src/main/resources/application.properties +++ b/core/src/main/resources/application.properties @@ -41,6 +41,9 @@ solicitor.classpath-guessedlicenseurl-cache-locations=licenseurls # Deprecated features are deactivated by default. If set to true they might be (temporarily) activated. solicitor.deprecated-features-allowed=false +# input for personal github api token to increase ORT-reader rate limit +solicitor.githubtoken= + ## Feature flags for activation of non-standard/experimental functionality # Incorporate scancode infos into model solicitor.feature-flag.scancode=false diff --git a/core/src/test/resources/analyzer-result.json b/core/src/test/resources/analyzer-result.json index 22a7f571..0098293f 100644 --- a/core/src/test/resources/analyzer-result.json +++ b/core/src/test/resources/analyzer-result.json @@ -49,7 +49,7 @@ }, "vcs_processed" : { "type" : "", - "url" : "", + "url" : "ssh://git@github.com/hamcrest/JavaHamcrest.git", "revision" : "", "path" : "" }, @@ -92,7 +92,7 @@ }, "vcs_processed" : { "type" : "Git", - "url" : "https://github.com/testproject.git", + "url" : "ssh://git@github.com/hamcrest/JavaHamcrest.git", "revision" : "", "path" : "testproject" } diff --git a/documentation/master-solicitor.asciidoc b/documentation/master-solicitor.asciidoc index 1ff67838..ae9b5604 100644 --- a/documentation/master-solicitor.asciidoc +++ b/documentation/master-solicitor.asciidoc @@ -11,7 +11,7 @@ SPDX-License-Identifier: Apache-2.0 == Introduction -Todays software projects often make use of large amounts of Open Source software. Being +Today's software projects often make use of large amounts of Open Source software. Being compliant with the license obligations of the used software components is a prerequisite for every such project. This results in different requirements that the project might need to fulfill. Those requirements can be grouped into two main categories: * Things that need to be done to actually fulfill license obligations @@ -26,14 +26,14 @@ While working on these easy looking tasks, they might get complex due to various * The number of open source components might be quite large (>> 100 for a typical webapplication based on state of the art programming frameworks) * Agile development and rapid changes of used components result in frequent changes of the inventory -* Open Source usage scenarios and license obligations might be OK in one context (e.g. in the relation between a software developer and his client) but might be completely inacceptable in another context (e.g. when the client distributes the same software to end customers) -* Legal interpretation of license conditions often differ from organisation to organisation and result in different compliance rules to be respected. +* Open Source usage scenarios and license obligations might be OK in one context (e.g. in the relation between a software developer and his client) but might be completely unacceptable in another context (e.g. when the client distributes the same software to end customers) +* Legal interpretation of license conditions often differ from organization to organization and result in different compliance rules to be respected. * License information for components is often not available in a standardized form which would allow automatic processing * Tools for supporting the license management processes are often specific to a technology or build tool and do not support all aspects of OSS license management. Of course there are specific commercial tool suites which address the IP rights and license domain. But due to high complexity and license costs those tools are out of reach for most projects - at least for permanent use. -_Solicitor_ tries to address some of the issues hightlighted above. In its initial version it is a tool for programmatically executing a process which was originally defined as an Excel-supported manual process. +_Solicitor_ tries to address some of the issues highlighted above. In its initial version it is a tool for programmatically executing a process which was originally defined as an Excel-supported manual process. When running _Solicitor_ three subsequent processing steps are executed: @@ -67,7 +67,7 @@ In the _normalization_ step the license information is completed and unified. Information not contained in the raw data is added. Where possible the applicable licenses are expressed by https://spdx.org/[SPDX-IDs]. -Many open source compontents are available via multi licensing models. +Many open source components are available via multi licensing models. Within _qualification_ the finally applicable licenses are selected. In the _legal assessment_ the compliance of applicable licenses will be checked based on generic rules defined in company wide policies and possibly project specific project specific extensions. @@ -82,7 +82,7 @@ image:solution.png[width=100%,scaledwidth=100%] There are three major technical components: The _reader_ and _writer_ components are performing import and export of data. The business logic - doing _normalization_, _qualification_ and _legal assessment_ is done by a _rule engine_. Rules are mainly defined via _decision tables_. _Solicitor_ comes with a starting set of rules for _normalization_ and _qualification_ but these rulesets need to be extended within the projects. Rules for legal evaluation need to be completely defined by the user. -_Solicitor_ is working without additional persisted data: When being executed it generates the output direcly from the read input data after processing the business rules. +_Solicitor_ is working without additional persisted data: When being executed it generates the output directly from the read input data after processing the business rules. === Data Model @@ -121,7 +121,7 @@ The internal business data model consists of 6 entities: | engagementType | EngagementType | the engagement type; possible values: INTERN, EXTERN | clientName | String | name of the client | goToMarketModel | GoToMarketModel | the go-to-market-model; possible values: LICENSE -| contractAllowsOss | boolean | does the contract explicitely allow OSS? +| contractAllowsOss | boolean | does the contract explicitly allow OSS? | ossPolicyFollowed | boolean | is the companies OSS policy followed? | customerProvidesOss | boolean | does the customer provide the OSS? |=== @@ -242,13 +242,13 @@ To get an overview of the available command line options use [listing] java -jar solicitor.jar -h -.Adressing of resources +.Addressing of resources **** -For unique adressing of resources *to be read* (configuration files, input data, rule templates and decision tables) _Solicitor_ makes use of the Spring ResourceLoader functionality, see https://docs.spring.io/spring-framework/docs/current/spring-framework-reference/core.html#resources-resourceloader . This allows to load from the classpath, the filesystem or even via http get. +For unique addressing of resources *to be read* (configuration files, input data, rule templates and decision tables) _Solicitor_ makes use of the Spring ResourceLoader functionality, see https://docs.spring.io/spring-framework/docs/current/spring-framework-reference/core.html#resources-resourceloader . This allows to load from the classpath, the filesystem or even via http get. If you want to reference a file in the filesystem you need to write it as follows: `file:path/to/file.txt` -Note that this only applies to resources being read. Output files are adressed without that prefix. +Note that this only applies to resources being read. Output files are addressed without that prefix. **** === Project Configuration File @@ -320,7 +320,7 @@ The leading section of the config file defines some metadata and the engagement <9> does the customer provide the OSS? (boolean) ==== Applications -Within this section the different applications (=deliverables) of the engagement are defined. Furtheron for each application at least one reader needs to be defined which imports the component and license information. +Within this section the different applications (= deliverables) of the engagement are defined. Furthermore, for each application at least one reader needs to be defined which imports the component and license information. [listing] "applications" : [ { @@ -375,7 +375,7 @@ They are defined as a sequence of rule templates and corresponding XLS (or CSV) "ruleSource" : "classpath:samples/LegalEvaluationSample.xls", "templateSource" : "classpath:com/.../rules/rule_templates/LegalEvaluation.drt", "ruleGroup" : "LegalEvaluation", - "decription" : "final legal evaluation based on the rules defined by legal" + "description" : "final legal evaluation based on the rules defined by legal" } ], <1> type of the rule; only possible value: `dt` which stands for "decision table" @@ -383,7 +383,7 @@ They are defined as a sequence of rule templates and corresponding XLS (or CSV) data (given by `ruleSource`) does not exist; if set to `false` a missing XLS/CSV table will result in program termination <3> location of the tabular decision table data. This might either point directly to the XLS or CSV file -or only give the resource name without suffix. In this case _Solictor_ will dynamically test for existing +or only give the resource name without suffix. In this case _Solicitor_ will dynamically test for existing resources by appending suffixes _xls_ and _csv_. <4> location of the drools rule template to be used to define the rules together with the decision table data <5> id of the group of rules; used to reference it e.g. when doing logging @@ -449,7 +449,7 @@ To simplify setting up a new project _Solicitor_ provides an option to create a java -jar solicitor.jar -wiz some/directory/path Besides the necessary configuration file this includes also empty XLS or CSV files for defining project -specific rules which amend the builtin rules. Furtheron a sample `license.xml` file is provided to +specific rules which amend the builtin rules. Furthermore, a sample `license.xml` file is provided to directly enable execution of solicitor and check functionality. This configuration then serves as starting point for project specific configuration. @@ -601,7 +601,7 @@ These configurations may also be used to overwrite options of a https://commons. Important: In case that a component has multiple licenses attached, there needs to be a separate line in the csv file for each license. -WARNING: The CSV reader currently does not fill the attribute `packageUrl`. Any functionality/reporting based on this attribute will be disfunctional for data read by the CSV reader. +WARNING: The CSV reader currently does not fill the attribute `packageUrl`. Any functionality/reporting based on this attribute will be dysfunctional for data read by the CSV reader. === NPM @@ -785,7 +785,13 @@ In _Solicitor_ the data is read with the following part of the config } ] ---- -WARNING: The ORT reader currently does not yet fill the attribute `licenseUrl`. Any functionality/reporting based on this attribute will be disfunctional for data read by the ORT reader. +It is important to note that a personal Github token is required on executing _Solicitor_ as it is needed to fill the LicenseURL data via Github API requests. You can generate tokens on your account https://github.com/settings/tokens[here]. This token increases the rate limit of the Github API from 60 to 5000. +The token can be inputted via _Solicitor_ properties like this: + +---- +java -Dsolicitor.githubtoken=token -jar solicitor.jar -c file:solicitor.cfg +---- + === Gradle (Windows) @@ -917,7 +923,7 @@ is defined by the sequence of declaration in the config file. Processing of the be finished when there are no more rules to fire in that group. Processing of the next group will then start. Rule groups which have been finished processing will not be resumed even if rules within that group might have been activated again due to changes of the facts. === Extended comparison syntax -By default any condtions given in the fields of decision tables are simple textual comparisons: The condition +By default any conditions given in the fields of decision tables are simple textual comparisons: The condition is true if the property of the model is identical to the given value in the XLS (or CSV) sheet. Depending on the configuration of the rule templates for some fields, an extended syntax might be available. For those fields the following syntax applies: @@ -940,7 +946,7 @@ In this phase the license data imported via the readers is cleaned and normalize The phase itself consists of two decision tables / rule groups: -==== Decision Table: Explicitely setting Licenses +==== Decision Table: Explicitly setting Licenses With this decision table is is possible to explicitely assign NormalizedLicenses to components. This will be used if the imported RawLicense data is either incomplete or incorrect. Items which have been processed by rules of this group will not be reprocessed by the next rule group. Decision table data: `LicenseAssignmentV2*.xls/csv` @@ -993,7 +999,7 @@ Within this phase the actually applicable licenses will be selected for each com This phase consists of two decision tables. ==== Choosing specific License in case of Multi-Licensing -This group of rules has the speciality that it might match to a group of NormalizedLicenses associated to an ApplicationComponent. In case that multiple licenses are associated to an ApplicationComponent one of them might be selected as "effective" license and the others might be marked as `Ignored`. +This group of rules has the specialty that it might match to a group of NormalizedLicenses associated to an ApplicationComponent. In case that multiple licenses are associated to an ApplicationComponent one of them might be selected as "effective" license and the others might be marked as `Ignored`. Decision table data: `MultiLicenseSelection*.xls/csv` @@ -1036,7 +1042,7 @@ icon:magic[]: On these fields the <> might be used === Phase 3: Legal evaluation -The third phase ist the legal evaluation of the licenses and the check, whether OSS usage is according to defined legal policies. Again this phase comprises two decision tables. +The third phase is the legal evaluation of the licenses and the check, whether OSS usage is according to defined legal policies. Again this phase comprises two decision tables. ==== Pre-Evaluation based on common rules Within the pre evaluation the license info is checked against standard OSS usage policies. This roughly qualifies the usage and might already determine licenses which are OK in any case or which need to be further evaluated. Furtheron they qualify whether the license text or source code needs to be included in the distribution. The rules in this decision table are only based on the `effectiveNormalizedLicense` and do not consider any project, application of component information. @@ -1088,7 +1094,7 @@ groups to be processed in sequence. When using the builtin default base configur To use your own rule data there are three approaches: * Include your own `rules` section in the project configuration file (so not inheriting from the builtin base configuration file) and reference your own decision tables there. -* Create your own "Solicitor Extension" which might completely redefine/replace the buitin `Solicitor` setup including all decision tables and the base configuration file. See <> for details. +* Create your own "Solicitor Extension" which might completely redefine/replace the built-in `Solicitor` setup including all decision tables and the base configuration file. See <> for details. * Make use of the optional project specific decision tables which are defined in the default base configuration: For every builtin decision table there is an optional external decision table (expected in the filesystem) which will be checked for existence. If such external decision table exists it will be processed first - before processing the builtin decision table. Thus is it possible to amend / override the builtin rules by project specific rules. When you create the starter configuration of your project as described in <>, those project specific decision tables are automatically created. == Reporting and Creating output documents @@ -1134,7 +1140,7 @@ WARNING: The result table column `OBJ_` gives access to the native So When using the command line option `-d` _Solicitor_ can determine difference information between two different data models (e.g. the difference between the licenses of the current release and a former release.) The difference is calculated on the result of the above described SQL statements: * First the internal reporting database is created for the current data model and all defined SQL statements are executed -* Then the internal database is recreated for the "old" data model and all defined SQL stements are executed again +* Then the internal database is recreated for the "old" data model and all defined SQL statements are executed again * Finally for each defined result table the difference between the current result and the "old" result is calculated @@ -1148,7 +1154,7 @@ The correlation algorithm will first try to match rows using `CORR_KEY_0`. It wi * there are no unmatched "new" rows OR * there are no unmatched "old" rows -The result of the correlation / difference calulation is stored in the reporting table data structure. For each row the status is accessible if +The result of the correlation / difference calculation is stored in the reporting table data structure. For each row the status is accessible if * The row is "new" (did not exist in the old data) * The row is unchanged (no changes in the field values representing the properties of the _Solicitor_ data model) @@ -1175,7 +1181,7 @@ NOTE: Above example also shows how the case sensitive column names have to be ha === Writers -The above dscribed SQL processing is identical for all Writers. Writers only differ in the +The above described SQL processing is identical for all Writers. Writers only differ in the way how the output document is created based on a template and the reporting table data obtained by the SQL transformation. @@ -1188,7 +1194,7 @@ directly put to the into Velocity Context. For further information see the * Velocity Documentation -* The _Solicitor_ JavaDoc (which also includes datails on how to access the diff information for rows and fields of reporting data tables) +* The _Solicitor_ JavaDoc (which also includes details on how to access the diff information for rows and fields of reporting data tables) * The samples included in _Solicitor_ ==== Excel Writer @@ -1206,7 +1212,7 @@ reporting data tables as defined in the Writer configuration like e.g.: Whenever such a string is found in a cell this indicates that this row is a template row. For each entry in the respective resporting data table a copy of this row is created and the attribute replacement will be done with the data from that reporting table. (The pattern `\#...#` will be removed when copying.) ===== Attribute replacement -Within each row which was copied in the previous step the templating logic searches for the string pattern `$someAttributeName$` where `someAttributeName` corresponds to the column names of the reporting table. Any such occurence is replaced with the corresponding data value. +Within each row which was copied in the previous step the templating logic searches for the string pattern `$someAttributeName$` where `someAttributeName` corresponds to the column names of the reporting table. Any such occurrence is replaced with the corresponding data value. ===== Representation of Diff Information In case that a difference processing (new vs. old model data) was done this will be represented @@ -1236,6 +1242,7 @@ In general it is possible to manually correct this by editing the downloaded and Currently license URL guessing is based solely on the URL given in `NormalizedLicense.effectiveNormalizedLicenseUrl`. It will try the following approaches: +* If the original URL is a remote Github repository URL (with a .git ending) then a Github API call is attempted to retrieve the correct license URL. * If the original URL is a Github-URL and matches patterns which are known to return HTML-formatted content then the URL is rewritten to point to a raw version of the content. * If the original URL points to a Github project page (not to a file), then the algorithm will try different typical locations (like e.g. looking for file `LICENSE`). If found it will return this URL as result. * If no "better" URL could be guessed it will return the original URL. @@ -1311,7 +1318,7 @@ The general workflow when integrating with ScanCode consists of the following 3 . Execute _Solicitor_ in a "classic" way i.e. just based on the data provided via the Readers as described in <>. Besides the normal reports/documents generated this will also create scripts for downloading the needed OSS source codes and run Scancode. -. Download source codes and run ScanCode by executing the generated scripts. The downloadad sources and ScanCode results will be saved to a directory tree in the local filesytem. +. Download source codes and run ScanCode by executing the generated scripts. The downloaded sources and ScanCode results will be saved to a directory tree in the local filesytem. . Execute _Solicitor_ a second time. For all ApplicationComponents where ScanCode information is available (stored in the local directory tree) the license data as obtained from the Readers is replaced by this information. The data model is enriched with the found copyright and notice file information. Reports (see <>) are now based on the ScanCode data (where available). @@ -1396,7 +1403,7 @@ artifacts: <5> URL pointing to license text. <6> Copyrights to set. Optional. If defined then all found copyrights will be replaced by the list of copyrights given here. <7> A single copyright. -<8> Another copyright. Note that due to YAML syntax any string containing `:` needs to be enclosed with parantheses +<8> Another copyright. Note that due to YAML syntax any string containing `:` needs to be enclosed with parentheses <9> Further packages to follow. ==== Decision table rules @@ -1533,6 +1540,7 @@ java -Dloader.path=path/to/the/extension.zip -jar solicitor.jar >. * Added reader for data generated by OSS Review Toolkit (ORT). See <>. * Added support for API changes of new scancode release (v31) https://github.com/nexB/scancode-toolkit/releases/tag/v31.0.1. * https://github.com/devonfw/solicitor/issues/124: Added documentation of '--production' option for npm-license-checker plugin. From e040a9d6f8fc06675335ebbe3ba8eb93cc323f52 Mon Sep 17 00:00:00 2001 From: Philip Garus Date: Mon, 7 Nov 2022 08:20:24 +0100 Subject: [PATCH 2/3] added base functionality of github API license check with Spring WebClient --- core/pom.xml | 4 + .../StrategyLicenseUrlGuesser.java | 80 ++++++++++--------- 2 files changed, 47 insertions(+), 37 deletions(-) diff --git a/core/pom.xml b/core/pom.xml index 5376ce74..835e38a4 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -61,6 +61,10 @@ org.springframework.boot spring-boot-starter-data-jpa + + org.springframework.boot + spring-boot-starter-webflux + org.hsqldb hsqldb diff --git a/core/src/main/java/com/devonfw/tools/solicitor/licensetexts/StrategyLicenseUrlGuesser.java b/core/src/main/java/com/devonfw/tools/solicitor/licensetexts/StrategyLicenseUrlGuesser.java index bd18ee70..8927aaf8 100644 --- a/core/src/main/java/com/devonfw/tools/solicitor/licensetexts/StrategyLicenseUrlGuesser.java +++ b/core/src/main/java/com/devonfw/tools/solicitor/licensetexts/StrategyLicenseUrlGuesser.java @@ -3,23 +3,22 @@ */ package com.devonfw.tools.solicitor.licensetexts; -import java.io.IOException; -import java.io.InputStream; -import java.nio.charset.StandardCharsets; import java.time.ZonedDateTime; import java.time.format.DateTimeFormatter; import java.time.temporal.ChronoUnit; -import org.apache.commons.io.IOUtils; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.springframework.beans.factory.annotation.Value; +import org.springframework.http.MediaType; +import org.springframework.web.reactive.function.client.WebClient; import com.devonfw.tools.solicitor.SolicitorVersion; -import com.devonfw.tools.solicitor.common.SolicitorRuntimeException; import com.devonfw.tools.solicitor.common.content.ContentProvider; import com.devonfw.tools.solicitor.common.content.web.WebContent; +import reactor.core.publisher.Mono; + /** * A {@link LicenseUrlGuesser} which tries to strategically find a possible better license URL. */ @@ -33,7 +32,10 @@ public class StrategyLicenseUrlGuesser implements LicenseUrlGuesser { @Value("${solicitor.githubtoken}") private String token; + + private WebClient client = WebClient.create("https://api.github.com"); + /** * The constructor. * @@ -170,42 +172,46 @@ public GuessedLicenseUrlContent getContentForUri(String uri) { //tries to get github license file location based of vsc-link public String githubAPILicenseUrl(String link, String token) { - + + String fallbackLink = link; + String result = ""; if (link.contains("github.com")) { if (link.endsWith(".git")) { link = link.substring(0, link.length() - 4); - } - link = link.replace("git://", "https://"); - link = link.replace("ssh://", "https://"); - link = link.replace("git@", ""); - if (!link.contains("api.github.com")) { - link = link.replace("github.com/", "api.github.com/repos/"); - link = link.concat("/license"); - } - - String command = "curl -H \"Accept: application/vnd.github+json\" -H \"Authorization: token "+ token + "\" -i " + link; - ProcessBuilder processBuilder = new ProcessBuilder(command.split(" ")); - try { - Process process = processBuilder.start(); - InputStream inputStream = process.getInputStream(); - result = IOUtils.toString(inputStream, StandardCharsets.UTF_8); - if (result.contains("download_url")) { - result = result.substring(result.indexOf("\"download_url\": ")); - result = result.substring(17,result.indexOf(",")-1); - } - if (result.contains("\"message\": \"Moved Permanently\"")) { - String tempLink = result.substring(result.indexOf("\"url\": ")); - tempLink = tempLink.substring(17,result.indexOf(",")-1); - result = githubAPILicenseUrl(tempLink, token); - } - if (result.contains("\"message\": \"Not Found\"")) { - result = link; - } - } catch (IOException e) { - throw new SolicitorRuntimeException("Could not handle command call for api request'" + command + "'", e); } + link = link.replace("git://", ""); + link = link.replace("ssh://", ""); + link = link.replace("git@", ""); + link = link.replace("https://", ""); + link = link.replace("api.github.com/", ""); + link = link.replace("github.com/", ""); + + //TODO it should be better to parse the response directly into a JSON object, not string + result = client.get() + .uri("/repos/" + link + "/license") + .header("Accept", "application/vnd.github+json") + .header("Authorization", "Bearer " + token) + .accept(MediaType.APPLICATION_JSON) + .retrieve() + .onStatus(status -> status.isError(), + response -> Mono.empty()) + .bodyToMono(String.class) + .block(); //TODO this blocks the thread probably + + if (result.contains("download_url")) { + result = result.substring(result.indexOf("\"download_url\":")); + result = result.substring(16,result.indexOf(",")-1); + } + if (result.contains("\"message\":\"Moved Permanently\"")) { + String tempLink = result.substring(result.indexOf("\"url\":")); + tempLink = tempLink.substring(7,result.indexOf(",")-1); + result = githubAPILicenseUrl(tempLink, token); + } + if (result.contains("\"message\":\"Not Found\"")) { + result = fallbackLink; + } } - return result; - } + return result; + } } From 41af3c02b5f1d6f2e2b00f25f75a781d5f52071e Mon Sep 17 00:00:00 2001 From: ohecker <8004361+ohecker@users.noreply.github.com> Date: Wed, 14 Dec 2022 15:24:56 +0100 Subject: [PATCH 3/3] fixes of merge problems --- core/pom.xml | 2 +- .../com/devonfw/tools/solicitor/reader/ort/OrtReader.java | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/pom.xml b/core/pom.xml index 03a1f951..1a2b0781 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -63,7 +63,7 @@ org.springframework.boot - spring-boot-starter-webflux + spring-boot-starter-webflux org.hsqldb diff --git a/core/src/main/java/com/devonfw/tools/solicitor/reader/ort/OrtReader.java b/core/src/main/java/com/devonfw/tools/solicitor/reader/ort/OrtReader.java index 2a231f30..b51f7e30 100644 --- a/core/src/main/java/com/devonfw/tools/solicitor/reader/ort/OrtReader.java +++ b/core/src/main/java/com/devonfw/tools/solicitor/reader/ort/OrtReader.java @@ -91,11 +91,11 @@ public void readInventory(String type, String sourceUrl, Application application List lic = (List) singlePackage.get("declared_licenses"); if (lic.isEmpty()) { // add empty raw license if no license info attached - addRawLicense(appComponent, null, licenseUrl, sourceUrl); + addRawLicense(appComponent, null, repo, sourceUrl); } else { for (Object cl : lic) { licenseCount++; - addRawLicense(appComponent, cl.toString(), licenseUrl, sourceUrl); + addRawLicense(appComponent, cl.toString(), repo, sourceUrl); } } doLogging(sourceUrl, application, componentCount, licenseCount);