Skip to content

Commit 0634f82

Browse files
authored
Merge pull request #33 from gessi-chatbots/datasets-hf
Datasets hf
2 parents 18591c4 + 8ca0c08 commit 0634f82

File tree

13 files changed

+46716
-44930
lines changed

13 files changed

+46716
-44930
lines changed

src/main/java/upc/edu/gessi/repo/controller/ReviewsAPI.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@
55
import org.springframework.http.ResponseEntity;
66
import org.springframework.web.bind.annotation.*;
77
import upc.edu.gessi.repo.dto.Review.ReviewDTO;
8+
import upc.edu.gessi.repo.dto.Review.ReviewFeatureDTO;
9+
10+
import java.util.List;
811

912

1013
@RestController
@@ -15,4 +18,9 @@ public interface ReviewsAPI extends CrudAPI<ReviewDTO> {
1518
ResponseEntity<byte[]> extractReviews(
1619
@RequestParam(name = "size", defaultValue = "10000", required = false) Integer size,
1720
@RequestParam(name = "market-segment", defaultValue = "Communication", required = false) String marketSegment);
21+
22+
@ApiOperation("Fetch reviews based on features")
23+
@GetMapping(value = "/by-features")
24+
ResponseEntity<List<ReviewFeatureDTO>> getReviewsByFeatures(
25+
@RequestBody List<String> features);
1826
}

src/main/java/upc/edu/gessi/repo/controller/impl/ReviewsController.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212
import upc.edu.gessi.repo.controller.ReviewsAPI;
1313
import upc.edu.gessi.repo.dto.MobileApplication.MobileApplicationBasicDataDTO;
1414
import upc.edu.gessi.repo.dto.Review.ReviewDTO;
15+
import upc.edu.gessi.repo.dto.Review.ReviewFeatureDTO;
1516
import upc.edu.gessi.repo.exception.*;
17+
import upc.edu.gessi.repo.exception.Reviews.NoReviewsFoundException;
1618
import upc.edu.gessi.repo.service.MobileApplicationService;
1719
import upc.edu.gessi.repo.service.ReviewService;
1820
import upc.edu.gessi.repo.service.ServiceFactory;
@@ -82,6 +84,17 @@ public ResponseEntity<byte[]> extractReviews(final Integer size, final String ma
8284
return new ResponseEntity<>(ttlFile, HttpStatus.OK);
8385
}
8486

87+
@Override
88+
public ResponseEntity<List<ReviewFeatureDTO>> getReviewsByFeatures(List<String> features) {
89+
try {
90+
return new ResponseEntity<>(((ReviewService) useService(ReviewService.class)).getByFeatures(features),
91+
HttpStatus.OK);
92+
} catch (NoReviewsFoundException e) {
93+
return new ResponseEntity<>(null, HttpStatus.NO_CONTENT);
94+
}
95+
96+
}
97+
8598
private Object useService(Class<?> clazz) {
8699
return serviceFactory.createService(clazz);
87100
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
package upc.edu.gessi.repo.dto.Review;
2+
3+
4+
import com.fasterxml.jackson.annotation.JsonFormat;
5+
import com.fasterxml.jackson.annotation.JsonInclude;
6+
import com.fasterxml.jackson.annotation.JsonProperty;
7+
import com.fasterxml.jackson.databind.annotation.JsonDeserialize;
8+
import com.fasterxml.jackson.databind.annotation.JsonSerialize;
9+
import lombok.AllArgsConstructor;
10+
import lombok.Data;
11+
import lombok.NoArgsConstructor;
12+
import upc.edu.gessi.repo.dto.serializer.CustomDateDeserializer;
13+
import upc.edu.gessi.repo.dto.serializer.CustomDateSerializer;
14+
15+
import java.io.Serializable;
16+
import java.util.Date;
17+
import java.util.List;
18+
19+
@Data
20+
@NoArgsConstructor
21+
@AllArgsConstructor
22+
@JsonInclude(JsonInclude.Include.NON_NULL)
23+
public class ReviewFeatureDTO implements Serializable {
24+
25+
@JsonProperty("reviewId")
26+
private String id;
27+
28+
@JsonProperty("review")
29+
private String reviewText;
30+
31+
private List<FeatureDTO> featureDTOs;
32+
}

src/main/java/upc/edu/gessi/repo/repository/FeatureRepository.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,6 @@ public interface FeatureRepository extends RDFCRUDRepository<FeatureDTO> {
2424

2525
List<SentenceAndFeatureDAO> findAllReviewDistinctFeaturesWithSentence();
2626

27-
List<ReviewDatasetDAO> findReviews(List<String> features);
27+
List<ReviewDatasetDAO> findReviews();
2828
}
2929

src/main/java/upc/edu/gessi/repo/repository/ReviewRepository.java

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44

55
import org.eclipse.rdf4j.query.TupleQueryResult;
66
import upc.edu.gessi.repo.dto.Review.ReviewDTO;
7+
import upc.edu.gessi.repo.dto.Review.ReviewFeatureDTO;
78
import upc.edu.gessi.repo.dto.Review.SentenceDTO;
89
import upc.edu.gessi.repo.dto.graph.GraphReview;
910
import upc.edu.gessi.repo.exception.Reviews.NoReviewsFoundException;
@@ -32,4 +33,6 @@ void addSentenceToReview(String reviewId,
3233
List<ReviewDTO> getReviewsByAppNameAndIdentifierWithLimit(String appName,
3334
String appIdentifier,
3435
Integer limit);
36+
37+
List<ReviewFeatureDTO> findAllByFeatures(List<String> features) throws NoReviewsFoundException;
3538
}

src/main/java/upc/edu/gessi/repo/repository/impl/FeatureRepositoryImpl.java

Lines changed: 45 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -236,50 +236,38 @@ public List<SentenceAndFeatureDAO> findAllReviewDistinctFeaturesWithSentence() {
236236
return processService.executeExtractSentenceFromReviewsScript(reviewSentenceAndFeatureDAOS);
237237

238238
}
239-
240239
@Override
241-
public List<ReviewDatasetDAO> findReviews(List<String> features) {
240+
public List<ReviewDatasetDAO> findReviews() {
242241
String csvFilePath = Paths.get("src/main/resources/reviews.csv").toString();
243-
244242
try (BufferedWriter writer = new BufferedWriter(new FileWriter(csvFilePath))) {
245-
writer.write("ReviewText,FeatureLabel,AppIdentifier");
243+
writer.write("ApplicationId, ReviewId, ReviewText, Date, TransFeatExFeatures");
246244
writer.newLine();
247245

248-
for (String feature : features) {
249-
boolean success = false;
250-
int retryCount = 0;
251-
252-
while (!success && retryCount < 3) {
253-
try {
254-
TupleQueryResult result = runSparqlQuery(featureQueryBuilder.featureReviewTextQueryBuilder(feature));
255-
while (result.hasNext()) {
256-
BindingSet bindings = result.next();
257-
if (bindings.getBinding("reviewText") != null
258-
&& bindings.getBinding("reviewText").getValue() != null
259-
&& bindings.getBinding("appIdentifier") != null
260-
&& bindings.getBinding("appIdentifier").getValue() != null
261-
&& bindings.getBinding("featureLabel") != null
262-
&& bindings.getBinding("featureLabel").getValue() != null) {
263-
264-
String reviewText = bindings.getBinding("reviewText").getValue().stringValue();
265-
String featureLabel = bindings.getBinding("featureLabel").getValue().stringValue();
266-
String appIdentifier = bindings.getBinding("appIdentifier").getValue().stringValue();
267-
268-
String row = String.format("\"%s\",\"%s\",\"%s\"", reviewText, featureLabel, appIdentifier);
269-
writer.write(row);
270-
writer.newLine();
271-
writer.flush();
272-
}
273-
}
274-
success = true;
275-
} catch (Exception e) {
276-
retryCount++;
277-
System.err.println("Error querying feature: " + feature + ". Retrying (" + retryCount + "/3)...");
278-
e.printStackTrace();
279-
if (retryCount == 3) {
280-
System.err.println("Failed to process feature after 3 attempts: " + feature);
281-
}
282-
}
246+
TupleQueryResult result = runSparqlQuery(featureQueryBuilder.featureReviewTextQueryBuilder());
247+
while (result.hasNext()) {
248+
BindingSet bindings = result.next();
249+
if (bindings.getBinding("appIdentifier") != null
250+
&& bindings.getBinding("appIdentifier").getValue() != null
251+
&& bindings.getBinding("reviewIdentifier") != null
252+
&& bindings.getBinding("reviewIdentifier").getValue() != null
253+
&& bindings.getBinding("reviewText") != null
254+
&& bindings.getBinding("reviewText").getValue() != null
255+
&& bindings.getBinding("date") != null
256+
&& bindings.getBinding("date").getValue() != null
257+
&& bindings.getBinding("TransFeatexFeatures") != null
258+
&& bindings.getBinding("TransFeatexFeatures").getValue() != null) {
259+
260+
String appIdentifier = escapeCsv(bindings.getBinding("appIdentifier").getValue().stringValue());
261+
String reviewIdentifier = escapeCsv(bindings.getBinding("reviewIdentifier").getValue().stringValue());
262+
String reviewText = escapeCsv(bindings.getBinding("reviewText").getValue().stringValue());
263+
String date = escapeCsv(bindings.getBinding("date").getValue().stringValue());
264+
String features = escapeCsv(bindings.getBinding("TransFeatexFeatures").getValue().stringValue());
265+
266+
String row = String.format("%s,%s,%s,%s,%s",
267+
appIdentifier, reviewIdentifier, reviewText, date, features);
268+
writer.write(row);
269+
writer.newLine();
270+
writer.flush();
283271
}
284272
}
285273
} catch (IOException e) {
@@ -289,4 +277,22 @@ public List<ReviewDatasetDAO> findReviews(List<String> features) {
289277

290278
return null;
291279
}
280+
281+
private String escapeCsv(String field) {
282+
if (field == null) {
283+
return "";
284+
}
285+
286+
field = field.replace("\t", " ");
287+
288+
if (field.contains("\"")) {
289+
field = field.replace("\"", "\"\"");
290+
}
291+
292+
if (field.contains(",") || field.contains("\n") || field.contains("\r") || field.contains(" ")) {
293+
field = "\"" + field + "\"";
294+
}
295+
296+
return field;
297+
}
292298
}

src/main/java/upc/edu/gessi/repo/repository/impl/ReviewRepositoryImpl.java

Lines changed: 50 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,5 @@
11
package upc.edu.gessi.repo.repository.impl;
22

3-
import io.swagger.models.auth.In;
4-
import jdk.jshell.execution.Util;
53
import org.eclipse.rdf4j.model.IRI;
64
import org.eclipse.rdf4j.model.Statement;
75
import org.eclipse.rdf4j.model.Value;
@@ -14,26 +12,20 @@
1412
import org.eclipse.rdf4j.repository.http.HTTPRepository;
1513
import org.springframework.beans.factory.annotation.Autowired;
1614
import org.springframework.stereotype.Repository;
17-
import upc.edu.gessi.repo.dto.Review.FeatureDTO;
18-
import upc.edu.gessi.repo.dto.Review.ReviewDTO;
19-
import upc.edu.gessi.repo.dto.Review.SentenceDTO;
20-
import upc.edu.gessi.repo.dto.Review.SentimentDTO;
15+
import upc.edu.gessi.repo.dto.LanguageModel.LanguageModelDTO;
16+
import upc.edu.gessi.repo.dto.Review.*;
2117
import upc.edu.gessi.repo.dto.graph.GraphReview;
2218
import upc.edu.gessi.repo.exception.Reviews.NoReviewsFoundException;
2319
import upc.edu.gessi.repo.exception.Reviews.ReviewNotFoundException;
2420
import upc.edu.gessi.repo.repository.ReviewRepository;
25-
import upc.edu.gessi.repo.util.ExcelUtils;
2621
import upc.edu.gessi.repo.util.ReviewQueryBuilder;
2722
import upc.edu.gessi.repo.util.SchemaIRI;
2823
import upc.edu.gessi.repo.util.Utils;
2924

3025
import java.nio.charset.StandardCharsets;
3126
import java.text.ParseException;
3227
import java.text.SimpleDateFormat;
33-
import java.util.ArrayList;
34-
import java.util.Collections;
35-
import java.util.Date;
36-
import java.util.List;
28+
import java.util.*;
3729

3830
@Repository
3931
public class ReviewRepositoryImpl implements ReviewRepository {
@@ -126,10 +118,10 @@ public List<ReviewDTO> findAllPaginated(final Integer page, final Integer size)
126118
}
127119

128120
@Override
129-
public List<ReviewDTO> findListed(List<String> reviewIds) throws NoReviewsFoundException {
121+
public List<ReviewDTO> findListed(final List<String> reviewIds) throws NoReviewsFoundException {
130122
TupleQueryResult reviewsResult = runSparqlQuery(reviewQueryBuilder.findReviewsByIds(reviewIds));
131123
if (!reviewsResult.hasNext()) {
132-
throw new NoReviewsFoundException("Any review was found");
124+
throw new NoReviewsFoundException("No review was found");
133125
}
134126
List<ReviewDTO> reviewDTOs = new ArrayList<>();
135127
while (reviewsResult.hasNext()) {
@@ -139,6 +131,22 @@ public List<ReviewDTO> findListed(List<String> reviewIds) throws NoReviewsFoundE
139131
return reviewDTOs;
140132
}
141133

134+
135+
@Override
136+
public List<ReviewFeatureDTO> findAllByFeatures(final List<String> features) throws NoReviewsFoundException {
137+
TupleQueryResult reviewsResult = runSparqlQuery(reviewQueryBuilder.findReviewsByFeatures(features));
138+
if (!reviewsResult.hasNext()) {
139+
throw new NoReviewsFoundException("No review was found");
140+
}
141+
List<ReviewFeatureDTO> reviewDTOs = new ArrayList<>();
142+
while (reviewsResult.hasNext()) {
143+
ReviewFeatureDTO reviewFeatureDTO = getReviewFeatureDTO(reviewsResult.next());
144+
reviewDTOs.add(reviewFeatureDTO);
145+
}
146+
return reviewDTOs;
147+
}
148+
149+
142150
@Override
143151
public IRI insert(ReviewDTO dto) {
144152
List<Statement> statements = new ArrayList<>();
@@ -383,6 +391,35 @@ private void commitChanges(final List<Statement> statements) {
383391
repoConnection.close();
384392
}
385393

394+
private ReviewFeatureDTO getReviewFeatureDTO(final BindingSet bindings) {
395+
ReviewFeatureDTO reviewFeatureDTO = new ReviewFeatureDTO();
396+
if (existsShortReviewBinding(bindings)) {
397+
if (bindings.getBinding("id") != null && bindings.getBinding("id").getValue() != null) {
398+
String idValue = bindings.getBinding("id").getValue().stringValue();
399+
reviewFeatureDTO.setId(idValue);
400+
}
401+
402+
if (bindings.getBinding("text") != null && bindings.getBinding("text").getValue() != null) {
403+
String textValue = bindings.getBinding("text").getValue().stringValue();
404+
reviewFeatureDTO.setReviewText(textValue);
405+
}
406+
407+
}
408+
409+
FeatureDTO featureDTO = new FeatureDTO();
410+
if (bindings.getBinding("feature") != null && bindings.getBinding("feature").getValue() != null) {
411+
String feature = bindings.getBinding("feature").getValue().stringValue();
412+
featureDTO.setFeature(feature);
413+
}
414+
if (bindings.getBinding("model") != null && bindings.getBinding("model").getValue() != null) {
415+
String model = bindings.getBinding("model").getValue().stringValue();
416+
featureDTO.setLanguageModel(new LanguageModelDTO(model));
417+
}
418+
reviewFeatureDTO.setFeatureDTOs(Collections.singletonList(featureDTO));
419+
420+
421+
return reviewFeatureDTO;
422+
}
386423

387424

388425
private ReviewDTO getReviewDTO(final BindingSet bindings) {

src/main/java/upc/edu/gessi/repo/service/ReviewService.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
package upc.edu.gessi.repo.service;
22

33
import upc.edu.gessi.repo.dto.Review.ReviewDTO;
4+
import upc.edu.gessi.repo.dto.Review.ReviewFeatureDTO;
5+
import upc.edu.gessi.repo.exception.Reviews.NoReviewsFoundException;
46

57
import java.util.List;
68

@@ -10,4 +12,9 @@ public interface ReviewService extends CrudService<ReviewDTO> {
1012
List<ReviewDTO> getAllSimplified();
1113

1214
Integer getReviewCount();
15+
16+
List<ReviewDTO> getByFeature(String feature);
17+
18+
List<ReviewFeatureDTO> getByFeatures(List<String> features) throws NoReviewsFoundException;
19+
1320
}

src/main/java/upc/edu/gessi/repo/service/impl/InductiveKnowledgeServiceImpl.java

Lines changed: 4 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -126,22 +126,17 @@ public byte[] generateAnalyticalExcel() throws IOException {
126126
insert50TopNouns(workbook);
127127
logger.info("Step 9: Inserting HeatMap");
128128
insertHeatMap(workbook);
129+
/*
130+
DISABLE COMMENT IN CASE OF WANTING TO GENERATE A REVIEW DATASET
131+
*/
129132
// logger.info("Step 10: Generating review dataset");
130133
// generateReviewDatasetCSV();
131134
logger.info("Step 10: Generating File in Byte[] format");
132135
return createByteArrayFromWorkbook(workbook);
133136
}
134137

135138
private void generateReviewDatasetCSV() {
136-
List<String> features = new ArrayList<>();
137-
for (SentenceAndFeatureDAO sentenceAndFeatureDAO : distinctFeatures) {
138-
String feature = sentenceAndFeatureDAO.getFeature();
139-
if (!features.contains(feature)) {
140-
features.add(feature);
141-
}
142-
}
143-
((FeatureRepository) useRepository(FeatureRepository.class))
144-
.findReviews(features);
139+
((FeatureRepository) useRepository(FeatureRepository.class)).findReviews();
145140
}
146141

147142
private void insertHeatMap(Workbook workbook) {

src/main/java/upc/edu/gessi/repo/service/impl/ReviewServiceImpl.java

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,13 @@
99
import upc.edu.gessi.repo.dto.Review.*;
1010
import upc.edu.gessi.repo.exception.NoObjectFoundException;
1111
import upc.edu.gessi.repo.exception.ObjectNotFoundException;
12+
import upc.edu.gessi.repo.exception.Reviews.NoReviewsFoundException;
1213
import upc.edu.gessi.repo.repository.RepositoryFactory;
1314
import upc.edu.gessi.repo.repository.ReviewRepository;
1415
import upc.edu.gessi.repo.repository.SentenceRepository;
1516
import upc.edu.gessi.repo.service.ReviewService;
1617

18+
import java.util.ArrayList;
1719
import java.util.List;
1820

1921

@@ -107,4 +109,15 @@ public List<ReviewDTO> getAllSimplified() {
107109
public Integer getReviewCount() {
108110
return ((ReviewRepository) useRepository(ReviewRepository.class)).getCount();
109111
}
112+
113+
@Override
114+
public List<ReviewDTO> getByFeature(String feature) {
115+
// TODO too much overhead due to graphDB, use the getByFeatures best
116+
return new ArrayList<>();
117+
}
118+
119+
@Override
120+
public List<ReviewFeatureDTO> getByFeatures(List<String> features) throws NoReviewsFoundException {
121+
return ((ReviewRepository) useRepository(ReviewRepository.class)).findAllByFeatures(features);
122+
}
110123
}

0 commit comments

Comments
 (0)