diff --git a/CHANGELOG.md b/CHANGELOG.md index 85bd19a86..0507df0b2 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -23,6 +23,7 @@ script prior to upgrading to minimize the downtime. - In the docker container the folder /home/clowder/data is now whitelisted by default for uploading by reference. This can be changed using the environment variable CLOWDER_SOURCEPATH. - The current CLA for developers of clowder. +- sitemap.xml route to list dataset pages so they can be crawled for thier embedded jsonld, for google dataset search ### Fixed - Send email to all admins in a single email when a user submits 'Request access' for a space diff --git a/app/controllers/Application.scala b/app/controllers/Application.scala index 344474f35..b61c0cb57 100644 --- a/app/controllers/Application.scala +++ b/app/controllers/Application.scala @@ -18,7 +18,8 @@ import scala.collection.mutable.ListBuffer @Singleton class Application @Inject()(files: FileService, collections: CollectionService, datasets: DatasetService, spaces: SpaceService, events: EventService, comments: CommentService, - sections: SectionService, users: UserService, selections: SelectionService) extends SecuredController { + sections: SectionService, users: UserService, selections: SelectionService, + tree: TreeService) extends SecuredController { /** * Redirect any url's that have a trailing / * @@ -84,6 +85,59 @@ class Application @Inject()(files: FileService, collections: CollectionService, } } + /** + * Returns the sitemap.xml for the datasets to be scraped for their jsonld scripts + * suggested to start like w/swagger route, but if I don't cache it, then I should change this + * otherwise it will need a filler file there; which I should provide as a cache + */ +import play.api.libs.json._ //put at top +import api.Permission.Permission //put at top +import models.User + + def sitemap = Action { implicit request => + Play.resource("/public/sitemap.xml") match { //in case we cache it here someday + case Some(resource) => { + val https = Utils.https(request) + val clowderurl = new URL(Utils.baseUrl(request)) + val host = if (clowderurl.getPort == -1) { + clowderurl.getHost + } else { + clowderurl.getHost + ":" + clowderurl.getPort + } + val user = User.anonymous //not found: value User + //val dd=tree.getDatasets(true,user) //not owned by anon + val dd = tree.getDatasets(false,user) + var resultStr="" + val top= """ + """ + resultStr = resultStr.concat(top) + var uStr = "" + dd.foreach( dd_ => { + val dd_id = (dd_ \ "id").as[String] + uStr = "\n" + clowderurl + "/datasets/" + dd_id + "" + resultStr = resultStr.concat(uStr) + }) + //was from route + //val d = scala.io.Source.fromURL(clowderurl + "/api/datasets") + //val sd = d.mkString + //val parsedJson = Json.parse(sd) + //val idl = (parsedJson \\ "id") + //idl.foreach( id => { + // val id_ = id.as[String] + // uStr = "\n" + clowderurl + "/datasets/" + id_ + "" + // resultStr = resultStr.concat(uStr) + //}) + //will rm above once getstatsets + resultStr = resultStr + "\n" + //could cache, in case we want to reuse later, w/Ok(reult.mkString) + //_would again check cache before creating, but still problems w/: + //might skip as would have to recheck permissions as well + Ok(resultStr.mkString) + } + case None => NotFound("Could not find sitemap.xml") + } + } + /** * Main page. */ diff --git a/conf/routes b/conf/routes index 0eb5f62b6..4cd844301 100644 --- a/conf/routes +++ b/conf/routes @@ -297,6 +297,12 @@ GET /javascriptRoutes # ---------------------------------------------------------------------- GET /swagger @controllers.Application.swagger GET /swaggerUI @controllers.Application.swaggerUI + +# ---------------------------------------------------------------------- +# SITEMAP +# ---------------------------------------------------------------------- +GET /sitemap.xml @controllers.Application.sitemap +GET /sitemap @controllers.Application.sitemap # ---------------------------------------------------------------------- # RESTful API diff --git a/conf/sitemap.xml b/conf/sitemap.xml new file mode 100644 index 000000000..a9a734872 --- /dev/null +++ b/conf/sitemap.xml @@ -0,0 +1,4 @@ +=placeholder right now: +Route setup to read from this cached file, so expects it + even though the caching hasn't been done yet + and right now it is returning it directly diff --git a/public/sitemap.xml b/public/sitemap.xml new file mode 100644 index 000000000..a57eac995 --- /dev/null +++ b/public/sitemap.xml @@ -0,0 +1,6 @@ +filler that will be replaced with cached sitemap +though that idea might be on hold if we have to + worry about who has access to this sitemap + as clowder v1 only has public and hidden +while v2 might get a private setting where + you can see that it is there but not download it w/o auth