diff --git a/.github/workflows/check_licenses.yml b/.github/workflows/check_licenses.yml index f9973cb350..bf2797bb9d 100644 --- a/.github/workflows/check_licenses.yml +++ b/.github/workflows/check_licenses.yml @@ -155,7 +155,22 @@ jobs: diff -Naur licenses/licenses.yml licenses_aux.yaml > patch.txt || true echo "patch.txt file generated." - + - name: Process missing licenses with AI + env: + GOOGLE_API_KEY: ${{ secrets.GOOGLE_API_KEY }} + run: | + if [ -s temporal_print.yaml ]; then + echo "Asking Gemini about missing licenses..." + # Install Google library + pip install google-generativeai + + # Execute AI script + python licenses/processing_licenses_with_ai.py temporal_print.yaml + + # Create a patch file for licences found by AI + diff -Naur licenses/licenses.yml licenses_aux_llm.yaml > patch_llm.txt || true + fi + - name: Generate artifacts uses: actions/upload-artifact@v4 with: @@ -163,6 +178,8 @@ jobs: path: | missing_report.yaml patch.txt + licenses_aux_llm.yaml + patch_llm.txt - name: How to edit artifacts and apply patch diff --git a/easystacks/software.eessi.io/2025.06/eessi-2025.06-eb-5.1.1-2024a.yml b/easystacks/software.eessi.io/2025.06/eessi-2025.06-eb-5.1.1-2024a.yml index 2d0555424b..f5f4832e94 100644 --- a/easystacks/software.eessi.io/2025.06/eessi-2025.06-eb-5.1.1-2024a.yml +++ b/easystacks/software.eessi.io/2025.06/eessi-2025.06-eb-5.1.1-2024a.yml @@ -7,4 +7,3 @@ easyconfigs: - ollama-0.6.0-GCCcore-13.3.0.eb - MetaBAT-2.17-GCC-13.3.0.eb - p7zip-17.05-GCCcore-13.3.0.eb - diff --git a/licenses/processing_licenses_with_ai.py b/licenses/processing_licenses_with_ai.py new file mode 100644 index 0000000000..e6d44162ed --- /dev/null +++ b/licenses/processing_licenses_with_ai.py @@ -0,0 +1,60 @@ +import google.generativeai as genai +import warnings +import os + +secret = os.environ['GOOGLE_API_KEY'] + +warnings.filterwarnings("ignore", category=FutureWarning) +genai.configure(api_key=secret) + +file = genai.upload_file("temporal_print.yaml", mime_type="text/plain") +model = genai.GenerativeModel("gemini-2.5-pro") +# response = model.generate_content([file, "I want you to search ALL the 'not found' OR 'other' licenses on the temporal_print.yml, make a deep search on google or wherever to found this licenses and if they can be redistributed or not PLEASE. If there is another type of licenses which is not 'not found' OR 'other', PLEASE IGNORE IT. I need this to be 1000000000000000000 percent accurate, if you are NOT sure or didn't found something, DO NOT INVENT IT. Please, only search in official pages!!! Not a blog or something, I want the official pages of the program asked. By the way, PLEASE, GIVE ONLY THE INFORMATION, I DONT NEED MORE THINGS LIKE YOU TEXTING AND WRITING THINGS, JUST THE FORMAT ASKED, NOT EVEN IN PARENTHESES, I WANT THE SIMPLIEST NAME POSSIBLE, FOR EXAMPLE, IF ITS PROPIETARY LICENSE, DONT TELL ME THE WHOLE NAME, JUST PROPIETARY, AND THE SAME WITH OTHER EXAMPLES TOO, NOT JUST THIS ONE. IF YOU CANT FIND THE LICENSE LINK, PLEASE DONT PUT THE LICENSE TYPE, I NEED SO BAD THE RETRIEVED FROM INFORMATION, IF YOU DONT HAVE THAT, THE LICENSE TYPE MUST BE NOT FOUND. ALSO, THE NAMES MUST BE IN A SPECIFIC FORMAT, FIRST LETTER CAPITALS, THEN IN LOWERCASE LIKE THIS EXAMPLE: Apache-2.0, Propietary... IF ITS SOME TYPE OF ACRONYM, ALL CAPS PLEASE. JUST WRITE IT IN COMMON SENSE BASICALY, PLEASE, DONT IGNORE ANY 'other' OR 'not found' LICENSE, DO NOT IGNORE. IN THE LINK YOU RETRIEVED IT FROM, I NEED TO SEE THE DIRECT LINK, I DON'T NEED THE MAIN PAGE, JUST THE PAGE WHERE IT SAYS THE LICENSE. FINALLY, PLEASE PLEASE AND PLEASE, I NEED YOU TO BE CONSISTENT, SO YOU NEED TO BE 10000 PERCENT SURE, DONT TRY TO ACT COOL BY INVENTING SOME LICENSES, 10000000000 PERCENT ACCURACY, I WILL EXECUTE THIS CODE THOUSANDS OF TIMES, SO I NEED CONSISTENCY. PLEASE, DO NOT SEND ME 404 ERROR PAGES (not found), VERY IMPORTANT, IF YOU ARE NOT 100 PERCENT SURE ABOUT THE PAGE EXISTING, PLEASE, JUST SEND ME THE MAIN PAGE, PLEASE DO NOT TRY TO GUESS THE ROUTE TO THE LICENSE. IF THE LICENSE IS PROPIETARY, I JUST WANT TO KNOW ITS PROPIETARY, NOT A CUSTOM OR SOMETHING LIKE THAT, JUST PROPIETARY PLEASE. IF ANY CASE, IT GIVES OUT 2 LICENSES (or more), PLEASE, JUST WRITE ONE, THE MOST ACCURATE ONE, DONT ADD AND or SOMETHING LIKE THAT PLEASE, RESPECT THE yaml FORMAT, NO ANDs or ORs PLEASE. LAST THING, PLEASE DOUBLE CHECK (even TRIPLE if you can) BEFORE SENDING ME THIS. IN THIS FORMAT:\n : \n : \n License: \n Permission to redistribute: \n Retrieved from: "]) + +prompt = """ +I want you to search ALL the 'not found' OR 'other' licenses on the temporal_print.yml, +make a deep search on google or wherever to found this licenses and if they can be redistributed or not PLEASE. +If there is another type of licenses which is not 'not found' OR 'other', PLEASE IGNORE IT. + +I need this to be 1000000000000000000 percent accurate, if you are NOT sure or didn't found something, DO NOT INVENT IT. +Please, only search in official pages!!! Not a blog or something, I want the official pages of the program asked. + +By the way, PLEASE, GIVE ONLY THE INFORMATION, I DONT NEED MORE THINGS LIKE YOU TEXTING AND WRITING THINGS, JUST THE FORMAT ASKED, +NOT EVEN IN PARENTHESES, I WANT THE SIMPLIEST NAME POSSIBLE. +FOR EXAMPLE, IF ITS PROPIETARY LICENSE, DONT TELL ME THE WHOLE NAME, JUST PROPIETARY, AND THE SAME WITH OTHER EXAMPLES TOO. + +IF YOU CANT FIND THE LICENSE LINK, PLEASE DONT PUT THE LICENSE TYPE, I NEED SO BAD THE RETRIEVED FROM INFORMATION, +IF YOU DONT HAVE THAT, THE LICENSE TYPE MUST BE NOT FOUND. +ALSO, THE NAMES MUST BE IN A SPECIFIC FORMAT, FIRST LETTER CAPITALS, THEN IN LOWERCASE LIKE THIS EXAMPLE: Apache-2.0, Propietary... +IF ITS SOME TYPE OF ACRONYM, ALL CAPS PLEASE. JUST WRITE IT IN COMMON SENSE BASICALLY. + +PLEASE, DONT IGNORE ANY 'other' OR 'not found' LICENSE, DO NOT IGNORE. +IN THE LINK YOU RETRIEVED IT FROM, I NEED TO SEE THE DIRECT LINK, I DON'T NEED THE MAIN PAGE, JUST THE PAGE WHERE IT SAYS THE LICENSE. + +FINALLY, PLEASE, I NEED YOU TO BE CONSISTENT, SO YOU NEED TO BE 10000 PERCENT SURE, DONT TRY TO ACT COOL BY INVENTING SOME LICENSES. +PLEASE, DO NOT SEND ME 404 ERROR PAGES (not found), VERY IMPORTANT, IF YOU ARE NOT 100 PERCENT SURE ABOUT THE PAGE EXISTING, +PLEASE, JUST SEND ME THE MAIN PAGE, PLEASE DO NOT TRY TO GUESS THE ROUTE TO THE LICENSE. + +IF THE LICENSE IS PROPIETARY, I JUST WANT TO KNOW ITS PROPIETARY, NOT A CUSTOM OR SOMETHING LIKE THAT, JUST PROPIETARY PLEASE. +IF ANY CASE, IT GIVES OUT 2 LICENSES (or more), PLEASE, JUST WRITE ONE, THE MOST ACCURATE ONE, DONT ADD AND or SOMETHING LIKE THAT PLEASE, +RESPECT THE yaml FORMAT, NO ANDs or ORs PLEASE. LAST THING, PLEASE DOUBLE CHECK (even TRIPLE if you can) BEFORE SENDING ME THIS. + +IN THIS FORMAT: + : + : + License: + Permission to redistribute: + Retrieved from: +""" + +response = model.generate_content([file, prompt]) +print(response.text) +text = response.text.replace("```yaml", "").replace("```", "").strip() +with open("licenses_aux_llm.yaml", "w", encoding="utf-8") as f: + f.write(text) + +# This is to know which models are available with the API which is being used. +# print("--- AVAILABLE MODELS ---") +# for m in genai.list_models(): +# if 'generateContent' in m.supported_generation_methods: +# print(m.name) \ No newline at end of file