-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathExtractIsdocFunction.cs
More file actions
118 lines (100 loc) · 4.1 KB
/
ExtractIsdocFunction.cs
File metadata and controls
118 lines (100 loc) · 4.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
using System.Net;
using Microsoft.AspNetCore.WebUtilities;
using Microsoft.Azure.Functions.Worker;
using Microsoft.Azure.Functions.Worker.Http;
using Microsoft.Extensions.Logging;
using Microsoft.Extensions.Primitives;
using Microsoft.Net.Http.Headers;
namespace MKTestAzureFunction;
public sealed class ExtractIsdocFunction(IsdocAttachmentExtractor extractor, ILogger<ExtractIsdocFunction> logger)
{
private readonly IsdocAttachmentExtractor _extractor = extractor;
private readonly ILogger<ExtractIsdocFunction> _logger = logger;
[Function(nameof(ExtractIsdocFunction))]
public async Task<HttpResponseData> Run(
[HttpTrigger(AuthorizationLevel.Function, "post", Route = "extract-isdoc")] HttpRequestData request)
{
byte[] pdfContent;
try
{
pdfContent = await ReadPdfContentAsync(request);
}
catch (InvalidOperationException ex)
{
return await CreateTextResponseAsync(request, HttpStatusCode.BadRequest, ex.Message);
}
if (pdfContent.Length == 0)
{
return await CreateTextResponseAsync(request, HttpStatusCode.BadRequest, "PDF content is empty.");
}
try
{
using var pdfStream = new MemoryStream(pdfContent);
var xml = _extractor.ExtractIsdocXml(pdfStream);
var response = request.CreateResponse(HttpStatusCode.OK);
response.Headers.Add("Content-Type", "application/xml; charset=utf-8");
await response.WriteStringAsync(xml);
return response;
}
catch (InvalidOperationException ex)
{
return await CreateTextResponseAsync(request, HttpStatusCode.NotFound, ex.Message);
}
catch (Exception ex)
{
_logger.LogError(ex, "Failed to process PDF input.");
return await CreateTextResponseAsync(request, HttpStatusCode.BadRequest, "Invalid PDF input.");
}
}
private static async Task<byte[]> ReadPdfContentAsync(HttpRequestData request)
{
var contentType = request.Headers.TryGetValues("Content-Type", out var values)
? values.FirstOrDefault()
: null;
if (string.IsNullOrWhiteSpace(contentType) ||
!contentType.StartsWith("multipart/form-data", StringComparison.OrdinalIgnoreCase))
{
return await ReadToByteArrayAsync(request.Body);
}
if (!MediaTypeHeaderValue.TryParse(contentType, out var mediaType))
{
throw new InvalidOperationException("Invalid Content-Type header.");
}
var boundary = HeaderUtilities.RemoveQuotes(mediaType.Boundary).Value;
if (string.IsNullOrWhiteSpace(boundary))
{
throw new InvalidOperationException("Missing multipart boundary.");
}
var reader = new MultipartReader(boundary, request.Body);
MultipartSection? section;
while ((section = await reader.ReadNextSectionAsync()) is not null)
{
if (!ContentDispositionHeaderValue.TryParse(section.ContentDisposition, out var disposition) ||
!disposition.DispositionType.Equals("form-data", StringComparison.OrdinalIgnoreCase))
{
continue;
}
if (StringSegment.IsNullOrEmpty(disposition.FileName) && StringSegment.IsNullOrEmpty(disposition.FileNameStar))
{
continue;
}
return await ReadToByteArrayAsync(section.Body);
}
throw new InvalidOperationException("No file part found in multipart payload.");
}
private static async Task<byte[]> ReadToByteArrayAsync(Stream stream)
{
using var memoryStream = new MemoryStream();
await stream.CopyToAsync(memoryStream);
return memoryStream.ToArray();
}
private static async Task<HttpResponseData> CreateTextResponseAsync(
HttpRequestData request,
HttpStatusCode statusCode,
string message)
{
var response = request.CreateResponse(statusCode);
await response.WriteStringAsync(message);
return response;
}
}