From ff2c1e5b2dc562dc39aadacdde8e3b24ecc597d6 Mon Sep 17 00:00:00 2001 From: Bit-ByteBandit <134239703+Bit-ByteBandit@users.noreply.github.com> Date: Wed, 3 Apr 2024 05:58:54 +0300 Subject: [PATCH 1/3] Update README.md --- README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b05bf12..ca85634 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # pastebin-scraper -A tool that leverages the API of https://psbdmp.ws/ to find emails and domains dumped in pastebin which could lead to finding some juicy information. +A tool that leverages the API of https://psbdmp.ws/ to find any text dumped in pastebin which could lead to finding some juicy information. ## Usage @@ -27,13 +27,13 @@ $ git clone https://github.com/streaak/pastebin-scraper.git $ cd pastebin-scraper $ sudo chmod +x ./scrape.sh $ sudo apt-get install jq -$ mkdir -p output/domain/ output/email/ output/general/ +$ mkdir output/general/ ``` ## Output -Outputs will either be stored in `./output/domain`, `./output/email` or `./output/general depending` depending on what you search for. The output will contain 2 files, `output.json` and `urls.txt`. `output.json` will contain the original json returned by the API in a beautified format and `urls.txt` will contain the URLs returned by the API in text format. +Outputs will be stored in `./output/general`. The output will contain 2 files, `results.json` and `urls.txt`. `results.json` will contain the dumped text value returned by the API and `urls.txt` will contain the URLs returned by the API. ## TODO From 265788f10a0219aa16b93fb62a358ac93235e029 Mon Sep 17 00:00:00 2001 From: Bit-ByteBandit <134239703+Bit-ByteBandit@users.noreply.github.com> Date: Wed, 3 Apr 2024 06:00:25 +0300 Subject: [PATCH 2/3] Update README.md --- README.md | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index ca85634..1436da1 100644 --- a/README.md +++ b/README.md @@ -7,17 +7,15 @@ A tool that leverages the API of https://psbdmp.ws/ to find any text dumped in p ``` $ ./scrape.sh Usage: -Search for domains - ./script.sh -d domain.com -Search for emails - ./script.sh -e foo@bar.com General search - ./script.sh -g foobar ``` ## Example ``` -$ ./scrape.sh -d facebook.com +$ ./scrape.sh -g foobar Searching pastebin... -Task completed. Output present in ./output/domain/facebook.com/ +Saving in ./output/general/foobar/results.txt ``` ## Installation From 3c1105a46df47ee16616aca7c0b8b25fd313b7bd Mon Sep 17 00:00:00 2001 From: Bit-ByteBandit <134239703+Bit-ByteBandit@users.noreply.github.com> Date: Wed, 3 Apr 2024 06:08:02 +0300 Subject: [PATCH 3/3] updating and fixing & improvement's fixing the tool to make it work in most shell environments updating api urls and a lot of improvement's --- scrape.sh | 76 +++++++++++++++++++++++++------------------------------ 1 file changed, 34 insertions(+), 42 deletions(-) diff --git a/scrape.sh b/scrape.sh index 6017060..2abe38c 100755 --- a/scrape.sh +++ b/scrape.sh @@ -1,55 +1,47 @@ - echo -e "$(tput setaf 1) ____ ____ _____ __ ____ ____ ____ ___ ____ +echo "$(tput setaf 1) ____ ____ _____ __ ____ ____ ____ ___ ____ | \| \ / ___/ / ]| \ / || \ / _]| \ | o ) o ) _____( \_ / / | D )| o || o ) [_ | D ) | _/| || |\__ |/ / | / | || _/ _]| / | | | O ||_____|/ \ / \_ | \ | _ || | | [_ | \ | | | | \ \ || . \| | || | | || . \ - |__| |_____| \___|\____||__|\_||__|__||__| |_____||__|\_|$(tput sgr0)" + |__| |_____| \___|\____||__|\_||__|__||__| |_____||__|\_| +$(tput sgr0)" -echo -e "\n\t\t\t\t By @Streaak" -if [ $# -eq 0 ] || [ $1 == '-h' ]; then - echo -e "$(tput setaf 2)\nUsage:$(tput sgr0)" - echo "Search for domains - ./script.sh -d domain.com" - echo "Search for emails - ./script.sh -e foo@bar.com" +echo "\n\t\t\t\t By @Streaak" + +if [ "$#" -eq 0 ] || [ "$1" = '-h' ]; then + echo "$(tput setaf 2)\nUsage:$(tput sgr0)" echo "General search - ./script.sh -g foobar" exit 0 fi -function scrape() { - echo "Searching pastebin..." - curl -s -X GET "$url" | python -m json.tool > ./output/$dir/output.json - cat ./output/$dir/output.json | jq -r '.data[] | .id' | awk '{print "https://psbdmp.ws/" $1 }' > ./output/$dir/urls.txt - echo "Task completed. Output present in ./output/$dir/$2" - exit 0 - return 0 -} - -if [[ $1 == '-g' ]] && [[ $2 != '' ]] ; then -dir=general/$2 -url=https://psbdmp.ws/api/search/$2 -mkdir -p ./output/$dir/ -scrape -elif [[ $2 == '' ]]; then - echo "Missing Value. Try to run ./script.sh -g " - exit 0 -fi +scrape() { + url="$1" + dir="$2" + echo "Searching pastebin...\n" + response=$(curl -s -X GET $url) + if [ "$response" = "[]" ]; then + echo "Nothing found in response." + exit 0 + fi + curl -s -X GET $url | jq -r '.[] | .id' | awk '{print "https://psbdmp.ws/api/v3/dump/" $1 }' > ./output/$dir/urls.txt + curl -s -X GET $url | jq -r '.[] | .text' > "./output/$dir/results.txt" + line_count=$(wc -l < "./output/$dir/results.txt") -if [[ $1 == '-e' ]] && [[ $2 != '' ]] ; then -dir=email/$2 -url=https://psbdmp.ws/api/search/email/$2 -mkdir -p ./output/$dir/ -scrape -elif [[ $2 == '' ]]; then - echo "Missing Value. Try to run ./script.sh -e " - exit 0 -fi + if [ "$line_count" -le 100 ]; then + cat "./output/$dir/results.txt" + else + + echo "Output More than 100 lines\n Saving in ./output/$dir/results.txt" + fi +} -if [[ $1 == '-d' ]] && [[ $2 != '' ]] ; then -dir=domain/$2 -url=https://psbdmp.ws/api/search/domain/$2 -mkdir -p ./output/$dir/ -scrape -elif [[ $2 == '' ]]; then - echo "Missing Value. Try to run ./script.sh -d " - exit 0 +if [ "$1" = '-g' ] && [ -n "$2" ]; then + dir="general/$2" + url="https://psbdmp.ws/api/v3/search/$2" + mkdir -p "./output/$dir/" + scrape "$url" "$dir" +else + echo "Missing Value. Try to run ./script.sh -g " + exit 0 fi