RESTinio
Loading...
Searching...
No Matches
multipart_body.hpp
Go to the documentation of this file.
1/*
2 * RESTinio
3 */
4
5/*!
6 * @file
7 * @brief Various tools for working with multipart bodies.
8 *
9 * @since v.0.6.1
10 */
11
12#pragma once
13
14#include <restinio/helpers/string_algo.hpp>
15#include <restinio/helpers/easy_parser.hpp>
16#include <restinio/helpers/http_field_parsers/basics.hpp>
17#include <restinio/helpers/http_field_parsers/content-type.hpp>
18
19#include <restinio/http_headers.hpp>
20#include <restinio/request_handler.hpp>
21#include <restinio/expected.hpp>
22
23#include <restinio/impl/string_caseless_compare.hpp>
24
25#include <restinio/utils/metaprogramming.hpp>
26
27#include <iostream>
28
29namespace restinio
30{
31
33{
34
35//
36// split_multipart_body
37//
38/*!
39 * @brief Helper function for spliting a multipart body into a serie of
40 * separate parts.
41 *
42 * @return A list of separate parts. This list will be empty if no parts
43 * are found or if there is some error in the body format (for example if
44 * some part is opened by @a boundary but is not closed properly).
45 *
46 * @note
47 * A user should extract the value of @a boundary should from Content-Type
48 * field and modify it proper way (two leading hypens should be added to the
49 * value of "boundary" parameter) by him/herself. Helper function
50 * detect_boundary_for_multipart_body() can be used for that purpose.
51 *
52 * Usage example:
53 * @code
54 * using namespace restinio::multipart_body;
55 *
56 * const auto boundary = detect_boundary_for_multipart_body(
57 * req, "multipart", "form-data" );
58 * if( boundary )
59 * {
60 * const auto parts = split_multipart_body( req.body(), *boundary );
61 * for( restinio::string_view_t one_part : parts )
62 * {
63 * ... // Handling of a part.
64 * }
65 * }
66 * @endcode
67 *
68 * @since v.0.6.1
69 */
70[[nodiscard]]
71inline std::vector< string_view_t >
73 string_view_t body,
74 string_view_t boundary )
75{
76 using namespace restinio::string_algo;
77
78 std::vector< string_view_t > result;
79 std::vector< string_view_t > tmp_result;
80
81 const string_view_t eol{ "\r\n" };
82 const string_view_t last_separator{ "--\r\n" };
83
84 // Find the first boundary.
85 auto boundary_pos = body.find( boundary );
86 if( string_view_t::npos == boundary_pos )
87 // There is no initial separator in the body.
88 return result;
89
90 // The first body can be at the very begining of the body or
91 // there should be CRLF before the initial boundary.
92 if( boundary_pos != 0u &&
93 (boundary_pos < eol.size() ||
94 body.substr( boundary_pos - eol.size(), eol.size() ) != eol) )
95 return result;
96
97 auto remaining_body = body.substr( boundary_pos + boundary.size() );
98 if( starts_with( remaining_body, last_separator ) )
99 // The start boundary is the last boundary.
100 return result;
101
102 while( starts_with( remaining_body, eol ) )
103 {
104 remaining_body = remaining_body.substr( eol.size() );
105
106 boundary_pos = remaining_body.find( boundary );
107 if( string_view_t::npos == boundary_pos )
108 return result;
109
110 // There should be CRLF before the next boundary.
111 if( boundary_pos < eol.size() ||
112 remaining_body.substr( boundary_pos - eol.size(), eol.size() ) != eol )
113 return result;
114
115 tmp_result.push_back(
116 remaining_body.substr( 0u, boundary_pos - eol.size() ) );
117
118 remaining_body = remaining_body.substr( boundary_pos + boundary.size() );
119 // Is this boundary the last one?
120 if( starts_with( remaining_body, last_separator ) )
121 {
122 // Yes, our iteration can be stopped and we can return the result.
123 swap( tmp_result, result );
124 return result;
125 }
126 }
127
128 // We didn't find the last boundary. Or some error encountered in the format
129 // of the body.
130 //
131 // Empty result should be returned.
132 return result;
133}
134
135//
136// parsed_part_t
137//
138/*!
139 * @brief A description of parsed content of one part of a multipart body.
140 *
141 * @since v.0.6.1
142 */
144{
145 //! HTTP-fields local for that part.
146 /*!
147 * @note
148 * It can be empty if no HTTP-fields are found for that part.
149 */
151 //! The body of that part.
153};
154
155namespace impl
156{
157
159{
160
161using namespace restinio::http_field_parsers;
162
163namespace easy_parser = restinio::easy_parser;
164
165constexpr char CR = '\r';
166constexpr char LF = '\n';
167
168//
169// body_producer_t
170//
171/*!
172 * @brief A special producer that consumes the whole remaining
173 * content from the input stream.
174 *
175 * @attention
176 * This producer can be seen as a hack. It can't be used safely
177 * outside the context for that this producer was created. It's because
178 * body_producer_t doesn't shift the current position in the input
179 * stream.
180 *
181 * @since v.0.6.1
182 */
185{
186 [[nodiscard]]
188 try_parse( easy_parser::impl::source_t & from ) const noexcept
189 {
190 // Return the whole content from the current position.
191 return from.fragment( from.current_position() );
192 }
193};
194
195//
196// field_value_producer_t
197//
198/*!
199 * @brief A special producer that consumes the rest of the current
200 * line in the input stream until CR/LF will be found.
201 *
202 * @note
203 * CR and LF symbols are not consumed from the input stream.
204 *
205 * @since v.0.6.1
206 */
208 : public easy_parser::impl::producer_tag< std::string >
209{
210 [[nodiscard]]
212 try_parse( easy_parser::impl::source_t & from ) const
213 {
214 std::string accumulator;
215 auto ch = from.getch();
216 while( !ch.m_eof && ch.m_ch != CR && ch.m_ch != LF )
217 {
218 accumulator += ch.m_ch;
219 ch = from.getch();
220 }
221
222 if( ch.m_eof )
223 return make_unexpected( easy_parser::parse_error_t{
224 from.current_position(),
226 } );
227
228 // CR or LF symbol should be returned back.
229 from.putback();
230
231 return { std::move(accumulator) };
232 }
233};
234
235} /* namespace parser_details */
236
237//
238// make_parser
239//
240/*!
241 * @brief A factory function for a parser of a part of multipart message.
242 *
243 * Handles the following rule:
244@verbatim
245part := *( token ':' OWS field-value CR LF ) CR LF body
246@endverbatim
247 *
248 * Produces parsed_part_t instance.
249 *
250 * @since v.0.6.1
251 */
252[[nodiscard]]
253auto
255{
256 using namespace parser_details;
257
258 return produce< parsed_part_t >(
259 produce< http_header_fields_t >(
260 repeat( 0, N,
261 produce< http_header_field_t >(
262 token_p() >> to_lower() >> custom_consumer(
263 [](auto & f, std::string && v) {
264 f.name(std::move(v));
265 } ),
266 symbol(':'),
267 ows(),
268 field_value_producer_t{} >> custom_consumer(
269 [](auto & f, std::string && v) {
270 f.value(std::move(v));
271 } ),
272 symbol(CR), symbol(LF)
273 ) >> custom_consumer(
274 [](auto & to, http_header_field_t && v) {
275 to.add_field( std::move(v) );
276 } )
277 )
278 ) >> &parsed_part_t::fields,
279 symbol(CR), symbol(LF),
280 body_producer_t{} >> &parsed_part_t::body );
281}
282
283} /* namespace impl */
284
285//
286// try_parse_part
287//
288/*!
289 * @brief Helper function for parsing content of one part of a multipart body.
290 *
291 * This function is intended to be used with split_multipart_body():
292 * @code
293 * using namespace restinio::multipart_body;
294 *
295 * const auto boundary = detect_boundary_for_multipart_body(
296 * req, "multipart", "form-data" );
297 * if( boundary )
298 * {
299 * const auto parts = split_multipart_body( req.body(), *boundary );
300 * for( restinio::string_view_t one_part : parts )
301 * {
302 * const auto parsed_part = try_parse_part( one_part );
303 * if( parsed_part )
304 * {
305 * ... // Handle the content of the parsed part.
306 * }
307 * }
308 * }
309 * @endcode
310 *
311 * @since v.0.6.1
312 */
313[[nodiscard]]
315try_parse_part( string_view_t part )
316{
317 namespace easy_parser = restinio::easy_parser;
318
319 easy_parser::impl::source_t source{ part };
320
321 auto actual_producer = impl::make_parser();
322
323 return easy_parser::impl::top_level_clause_t< decltype(actual_producer) >{
324 std::move(actual_producer)
325 }.try_process( source );
326}
327
328//
329// handling_result_t
330//
331/*!
332 * @brief The result to be returned from user-provided handler of
333 * parts of multipart body.
334 *
335 * @since v.0.6.1
336 */
338{
339 //! Enumeration of parts should be continued.
340 //! If there is another part the user-provided handler will
341 //! be called for it.
343 //! Enumeration of parts should be stopped.
344 //! All remaining parts of multipart body will be skipped.
345 //! But the result of the enumeration will be successful.
347 //! Enumeration of parts should be ignored.
348 //! All remaining parts of multipart body will be skipped and
349 //! the result of the enumeration will be a failure.
351};
352
353//
354// enumeration_error_t
355//
356/*!
357 * @brief The result of an attempt to enumerate parts of a multipart body.
358 *
359 * @since v.0.6.1
360 */
362{
363 //! Content-Type field is not found.
364 //! If Content-Type is absent there is no way to detect 'boundary'
365 //! parameter.
367 //! Unable to parse Content-Type field value.
369 //! Content-Type field value parsed but doesn't contain an appropriate
370 //! value. For example there can be media-type different from 'multipart'
371 //! or 'boundary' parameter can be absent.
373 //! Value of 'boundary' parameter is invalid (for example it contains
374 //! some illegal characters).
376 //! No parts of a multipart body actually found.
378 //! Enumeration of parts was aborted by user-provided handler.
379 //! This code is returned when user-provided handler returns
380 //! handling_result_t::terminate_enumeration.
382 //! Some unexpected error encountered during the enumeration.
384};
385
386namespace impl
387{
388
390{
391
392// From https://tools.ietf.org/html/rfc1521:
393//
394// boundary := 0*69<bchars> bcharsnospace
395//
396// bchars := bcharsnospace / " "
397//
398// bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" / "+" /"_"
399// / "," / "-" / "." / "/" / ":" / "=" / "?"
400//
401[[nodiscard]]
402constexpr bool
404{
405 return (ch >= '0' && ch <= '9') // DIGIT
406 || ((ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z')) // ALPHA
407 || ch == '\''
408 || ch == '('
409 || ch == ')'
410 || ch == '+'
411 || ch == '_'
412 || ch == ','
413 || ch == '-'
414 || ch == '.'
415 || ch == '/'
416 || ch == ':'
417 || ch == '='
418 || ch == '?';
419}
420
421[[nodiscard]]
422constexpr bool
423is_bchar( char ch )
424{
425 return is_bcharnospace(ch) || ch == ' ';
426}
427
428} /* namespace boundary_value_checkers */
429
430} /* namespace impl */
431
432//
433// check_boundary_value
434//
435/*!
436 * @brief A helper function for checking the validity of 'boundary' value.
437 *
438 * The allowed format for 'boundary' value is defined here:
439 * https://tools.ietf.org/html/rfc2046
440@verbatim
441 boundary := 0*69<bchars> bcharsnospace
442
443 bchars := bcharsnospace / " "
444
445 bcharsnospace := DIGIT / ALPHA / "'" / "(" / ")" /
446 "+" / "_" / "," / "-" / "." /
447 "/" / ":" / "=" / "?"
448@endverbatim
449 *
450 * @return enumeration_error_t::illegal_boundary_value if @a value has
451 * illegal value or an empty optional if there is no errros detected.
452 *
453 * @since v.0.6.1
454 */
455[[nodiscard]]
456inline std::optional< enumeration_error_t >
457check_boundary_value( string_view_t value )
458{
459 using namespace impl::boundary_value_checkers;
460
461 if( value.size() >= 1u && value.size() <= 70u )
462 {
463 const std::size_t last_index = value.size() - 1u;
464 for( std::size_t i = 0u; i != last_index; ++i )
465 if( !is_bchar( value[i] ) )
467
468 if( !is_bcharnospace( value[ last_index ] ) )
470 }
471 else
473
474 return std::nullopt;
475}
476
477//
478// detect_boundary_for_multipart_body
479//
480/*!
481 * @brief Helper function for parsing Content-Type field and extracting
482 * the value of 'boundary' parameter.
483 *
484 * It finds Content-Type field, then parses it, then checks the value
485 * of media-type, then finds 'boundary' parameter, the checks the validity
486 * of 'boundary' value and then adds two leading hypens to the value of
487 * 'boundary' parameter.
488 *
489 * The returned value (if there is no error) can be used for spliting
490 * a multipart body to separate parts.
491 *
492 * @since v.0.6.1
493 */
494template< typename Extra_Data >
495[[nodiscard]]
498 const generic_request_t< Extra_Data > & req,
499 string_view_t expected_media_type,
500 std::optional< string_view_t > expected_media_subtype )
501{
502 namespace hfp = restinio::http_field_parsers;
503 using restinio::impl::is_equal_caseless;
504
505 // Content-Type header file should be present.
506 const auto content_type = req.header().opt_value_of(
507 restinio::http_field::content_type );
508 if( !content_type )
509 return make_unexpected(
511
512 // Content-Type field should successfuly parsed and should
513 // contain value that correspond to expected media-type.
514 const auto parse_result = hfp::content_type_value_t::try_parse(
515 *content_type );
516 if( !parse_result )
517 return make_unexpected(
519
520 const auto & media_type = parse_result->media_type;
521 if( !is_equal_caseless( expected_media_type, media_type.type ) )
522 {
523 return make_unexpected(
525 }
526 if( expected_media_subtype &&
527 !is_equal_caseless( *expected_media_subtype, media_type.subtype ) )
528 {
529 return make_unexpected(
531 }
532
533 // `boundary` param should be present in parsed Content-Type value.
534 const auto boundary = hfp::find_first(
535 parse_result->media_type.parameters,
536 "boundary" );
537 if( !boundary )
538 return make_unexpected(
540
541 // `boundary` should have valid value.
542 const auto boundary_check_result = check_boundary_value( *boundary );
543 if( boundary_check_result )
544 return make_unexpected( *boundary_check_result );
545
546 // Actual value of boundary mark can be created.
547 std::string actual_boundary_mark;
548 actual_boundary_mark.reserve( 2 + boundary->size() );
549 actual_boundary_mark.append( "--" );
550 actual_boundary_mark.append( boundary->data(), boundary->size() );
551
552 return { std::move(actual_boundary_mark) };
553}
554
555namespace impl
556{
557
558/*!
559 * @brief A function that parses every part of a multipart body and
560 * calls a user-provided handler for every parsed part.
561 *
562 * @return the count of parts successfuly handled by @a handler or
563 * error code in the case if some error is detected.
564 *
565 * @since v.0.6.1
566 */
567template< typename Handler >
568[[nodiscard]]
571 const std::vector< string_view_t > & parts,
572 Handler && handler )
573{
574 std::size_t parts_processed{ 0u };
575 std::optional< enumeration_error_t > error;
576
577 for( auto current_part : parts )
578 {
579 // The current part should be parsed to headers and the body.
580 auto part_parse_result = try_parse_part( current_part );
581 if( !part_parse_result )
582 return make_unexpected( enumeration_error_t::unexpected_error );
583
584 // NOTE: parsed_part is passed as rvalue reference!
585 const handling_result_t handler_ret_code = handler(
586 std::move(*part_parse_result) );
587
588 if( handling_result_t::terminate_enumeration != handler_ret_code )
589 ++parts_processed;
590 else
591 error = enumeration_error_t::terminated_by_handler;
592
593 if( handling_result_t::continue_enumeration != handler_ret_code )
594 break;
595 }
596
597 if( error )
598 return make_unexpected( *error );
599
600 return parts_processed;
601}
602
603//
604// valid_handler_type
605//
606template< typename, typename = restinio::utils::metaprogramming::void_t<> >
608
609template< typename T >
611 T,
614 std::is_same<
616 decltype(std::declval<T>()(std::declval<parsed_part_t>()))
617 >::value,
618 bool
619 >
620 >
621 > : public std::true_type
622{};
623
624} /* namespace impl */
625
626//
627// enumerate_parts
628//
629/*!
630 * @brief A helper function for enumeration of parts of a multipart body.
631 *
632 * This function:
633 *
634 * - finds Content-Type field for @a req;
635 * - parses Content-Type field, checks the media-type and extracts
636 * the value of 'boundary' parameter. The extracted 'boundary'
637 * parameter is checked for validity;
638 * - splits the body of @a req using value of 'boundary' parameter;
639 * - enumerates every part of body, parses every part and calls
640 * @handler for every parsed part.
641 *
642 * Enumeration stops if @a handler returns handling_result_t::stop_enumeration
643 * or handling_result_t::terminate_enumeration. If @a handler returns
644 * handling_result_t::terminate_enumeration the enumerate_parts() returns
645 * enumeration_error_t::terminated_by_handler error code.
646 *
647 * A handler passed as @a handler argument should be a function or
648 * lambda/functor with one of the following formats:
649 * @code
650 * handling_result_t(parsed_part_t part);
651 * handling_result_t(parsed_part_t && part);
652 * handling_result_t(const parsed_part_t & part);
653 * @endcode
654 * Note that enumerate_part() passes parsed_part_t instance to
655 * @a handler as rvalue reference. And this reference will be invalidaded
656 * after the return from @a handler.
657 *
658 * Usage example:
659 * @code
660 * auto on_post(const restinio::request_handle_t & req) {
661 * using namespace restinio::multipart_body;
662 * const auto result = enumerate_parts( *req,
663 * [](parsed_part_t part) {
664 * ... // Some actions with the current part.
665 * return handling_result_t::continue_enumeration;
666 * },
667 * "multipart", "form-data" );
668 * if(result) {
669 * ... // Producing positive response.
670 * }
671 * else {
672 * ... // Producing negative response.
673 * }
674 * return restinio::request_accepted();
675 * }
676 * @endcode
677 *
678 * @return the count of parts successfuly handled by @a handler or
679 * error code in the case if some error is detected.
680 *
681 * @since v.0.6.1
682 */
683template< typename User_Type, typename Handler >
684[[nodiscard]]
687 //! The request to be handled.
688 const generic_request_t< User_Type > & req,
689 //! The handler to be called for every parsed part.
690 Handler && handler,
691 //! The expected value of 'type' part of 'media-type' from Content-Type.
692 //! If 'type' part is not equal to @a expected_media_type then
693 //! enumeration won't be performed.
694 //!
695 //! @note
696 //! The special value '*' is not handled here.
697 string_view_t expected_media_type = string_view_t{ "multipart" },
698 //! The optional expected value of 'subtype' part of 'media-type'
699 //! from Content-Type. If @a expected_media_subtype is specified and
700 //! missmatch with 'subtype' part then enumeration won't be performed.
701 //!
702 //! @note
703 //! The special value '*' is not handled here.
704 std::optional< string_view_t > expected_media_subtype = std::nullopt )
705{
706 static_assert(
707 impl::valid_handler_type< std::decay_t<Handler> >::value,
708 "Handler should be callable object, "
709 "should accept parsed_part_t by value, const or rvalue reference, "
710 "and should return handling_result_t" );
711
712 const auto boundary = detect_boundary_for_multipart_body(
713 req,
714 expected_media_type,
715 expected_media_subtype );
716 if( boundary )
717 {
718 const auto parts = split_multipart_body( req.body(), *boundary );
719
720 if( parts.empty() )
721 return make_unexpected(
723
724 return impl::enumerate_parts_of_request_body(
725 parts,
726 std::forward<Handler>(handler) );
727 }
728
729 return make_unexpected( boundary.error() );
730}
731
732} /* namespace multipart_body */
733
734} /* namespace restinio */
The class that implements "input stream".
void putback() noexcept
Return one character back to the input stream.
character_t getch() noexcept
Get the next character from the input stream.
Information about parsing error.
error_reason_t
Reason of parsing error.
@ unexpected_eof
Unexpected end of input is encontered when some character expected.
expected_t< std::size_t, enumeration_error_t > enumerate_parts_of_request_body(const std::vector< string_view_t > &parts, Handler &&handler)
A function that parses every part of a multipart body and calls a user-provided handler for every par...
auto make_parser()
A factory function for a parser of a part of multipart message.
handling_result_t
The result to be returned from user-provided handler of parts of multipart body.
@ stop_enumeration
Enumeration of parts should be stopped. All remaining parts of multipart body will be skipped....
@ terminate_enumeration
Enumeration of parts should be ignored. All remaining parts of multipart body will be skipped and the...
@ continue_enumeration
Enumeration of parts should be continued. If there is another part the user-provided handler will be ...
std::vector< string_view_t > split_multipart_body(string_view_t body, string_view_t boundary)
Helper function for spliting a multipart body into a serie of separate parts.
expected_t< parsed_part_t, restinio::easy_parser::parse_error_t > try_parse_part(string_view_t part)
Helper function for parsing content of one part of a multipart body.
std::optional< enumeration_error_t > check_boundary_value(string_view_t value)
A helper function for checking the validity of 'boundary' value.
enumeration_error_t
The result of an attempt to enumerate parts of a multipart body.
@ content_type_field_inappropriate_value
Content-Type field value parsed but doesn't contain an appropriate value. For example there can be me...
@ no_parts_found
No parts of a multipart body actually found.
@ illegal_boundary_value
Value of 'boundary' parameter is invalid (for example it contains some illegal characters).
@ content_type_field_not_found
Content-Type field is not found. If Content-Type is absent there is no way to detect 'boundary' param...
@ terminated_by_handler
Enumeration of parts was aborted by user-provided handler. This code is returned when user-provided h...
@ content_type_field_parse_error
Unable to parse Content-Type field value.
@ unexpected_error
Some unexpected error encountered during the enumeration.
expected_t< std::string, enumeration_error_t > detect_boundary_for_multipart_body(const generic_request_t< Extra_Data > &req, string_view_t expected_media_type, std::optional< string_view_t > expected_media_subtype)
Helper function for parsing Content-Type field and extracting the value of 'boundary' parameter.
expected_t< std::size_t, enumeration_error_t > enumerate_parts(const generic_request_t< User_Type > &req, Handler &&handler, string_view_t expected_media_type=string_view_t{ "multipart" }, std::optional< string_view_t > expected_media_subtype=std::nullopt)
A helper function for enumeration of parts of a multipart body.
http_field_t http_field
Helper alies to omitt _t suffix.
A special producer that consumes the whole remaining content from the input stream.
expected_t< string_view_t, easy_parser::parse_error_t > try_parse(easy_parser::impl::source_t &from) const noexcept
A special producer that consumes the rest of the current line in the input stream until CR/LF will be...
expected_t< std::string, easy_parser::parse_error_t > try_parse(easy_parser::impl::source_t &from) const
A description of parsed content of one part of a multipart body.
http_header_fields_t fields
HTTP-fields local for that part.
string_view_t body
The body of that part.