Skip to content

Commit 86c6c96

Browse files
committed
win32/default: Enforce only 1 successfully putback guarantee
https://en.cppreference.com/w/c/io/ungetc states that only 1 byte is guaranteed to be put back. Even if linux/mac support 4KB, windows and other platforms are very strict with 1 character, and they can fail if you try 2-3-4 ungetc. To that extent, no more characters than needed are ever consumed. Also, when parsing from a file, if whitespace has been consumed, it remains consumed, it's not put back, similar to fscanf. https://pubs.opengroup.org/onlinepubs/000095399/functions/fscanf.html
1 parent 717e6f8 commit 86c6c96

4 files changed

Lines changed: 109 additions & 76 deletions

File tree

include/scn/scan.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4418,6 +4418,15 @@ class basic_scan_buffer {
44184418
SCN_NODISCARD range_type get();
44194419
SCN_NODISCARD common_range_type get_common_range();
44204420

4421+
void set_skip_whitespace(bool skip)
4422+
{
4423+
m_skip_whitespace = skip;
4424+
}
4425+
SCN_NODISCARD bool get_skip_whitespace() const
4426+
{
4427+
return m_skip_whitespace;
4428+
}
4429+
44214430
protected:
44224431
friend class forward_iterator;
44234432
friend class common_forward_iterator;
@@ -4444,6 +4453,7 @@ class basic_scan_buffer {
44444453
std::basic_string_view<char_type> m_current_view{};
44454454
std::basic_string<char_type> m_putback_buffer{};
44464455
bool m_is_contiguous{false};
4456+
bool m_skip_whitespace{false};
44474457
};
44484458

44494459
template <typename CharT>

src/scn/impl.cpp

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1822,6 +1822,19 @@ auto scan_int_exhaustive_valid_impl(std::string_view source) -> T
18221822
impl::parse_integer_value_exhaustive_valid(source, value);
18231823
return value;
18241824
}
1825+
1826+
template <typename Source>
1827+
auto get_failed_sync_position(Source& source) -> std::ptrdiff_t
1828+
{
1829+
const auto& buffer = source.get_segment_starting_at(0);
1830+
if (source.get_skip_whitespace() && !buffer.empty()) {
1831+
auto it = std::find_if(buffer.begin(), buffer.end(), [](auto ch) {
1832+
return !impl::is_ascii_space(ch);
1833+
});
1834+
return std::distance(buffer.begin(), it);
1835+
}
1836+
return 0;
1837+
}
18251838
} // namespace detail
18261839

18271840
scan_expected<void> vinput(std::string_view format, scan_args args)
@@ -1836,7 +1849,7 @@ scan_expected<void> vinput(std::string_view format, scan_args args)
18361849
}
18371850
return {};
18381851
}
1839-
if (SCN_UNLIKELY(!buffer.sync_all())) {
1852+
if (SCN_UNLIKELY(!buffer.sync(detail::get_failed_sync_position(buffer)))) {
18401853
return detail::unexpected_scan_error(
18411854
scan_error::invalid_source_state,
18421855
"Failed to sync with underlying FILE");
@@ -1861,7 +1874,8 @@ scan_expected<std::ptrdiff_t> sync_after_vscan(
18611874
}
18621875
}
18631876
else {
1864-
if (SCN_UNLIKELY(!source.sync_all())) {
1877+
if (SCN_UNLIKELY(
1878+
!source.sync(detail::get_failed_sync_position(source)))) {
18651879
return detail::unexpected_scan_error(
18661880
scan_error::invalid_source_state,
18671881
"Failed to sync with underlying source");

src/scn/impl.h

Lines changed: 82 additions & 73 deletions
Original file line numberDiff line numberDiff line change
@@ -1197,12 +1197,7 @@ bool basic_scan_file_buffer<FileInterface>::sync(std::ptrdiff_t position)
11971197
static_cast<std::ptrdiff_t>(this->putback_buffer().size())) {
11981198
putback_wrapper wrapper{m_file};
11991199
auto segment = this->get_segment_starting_at(position);
1200-
for (auto it = segment.rbegin(); it != segment.rend(); ++it) {
1201-
if (!m_file.putback(*it)) {
1202-
return false;
1203-
}
1204-
}
1205-
return true;
1200+
return segment.empty();
12061201
}
12071202

12081203
m_file.unsafe_advance_n(position - static_cast<std::ptrdiff_t>(
@@ -1219,12 +1214,7 @@ bool basic_scan_file_buffer<FileInterface>::sync(std::ptrdiff_t position)
12191214
m_file.putback(m_current_view.front());
12201215

12211216
auto segment = std::string_view{this->putback_buffer()}.substr(position);
1222-
for (auto it = segment.rbegin(); it != segment.rend(); ++it) {
1223-
if (!m_file.putback(*it)) {
1224-
return false;
1225-
}
1226-
}
1227-
return true;
1217+
return segment.empty();
12281218
}
12291219

12301220
} // namespace detail
@@ -1990,7 +1980,7 @@ struct read_code_point_into_result {
19901980
};
19911981

19921982
template <typename Range>
1993-
auto read_code_point_into(Range range)
1983+
auto extract_code_point_into(Range range)
19941984
-> read_code_point_into_result<ranges::const_iterator_t<Range>,
19951985
detail::char_t<Range>>
19961986
{
@@ -2001,24 +1991,63 @@ auto read_code_point_into(Range range)
20011991
const auto len = detail::code_point_length_by_starting_code_unit(*it);
20021992

20031993
if (SCN_UNLIKELY(len == 0)) {
2004-
++it;
2005-
it = get_start_for_next_code_point(ranges::subrange{it, range.end()});
20061994
return {it, {}};
20071995
}
20081996

1997+
if (len == 1) {
1998+
return {it, string_type(1, *it)};
1999+
}
2000+
ranges::advance(it, static_cast<std::ptrdiff_t>(len), range.end());
2001+
return {it, string_type(range.begin(), it)};
2002+
}
2003+
2004+
template <typename Range>
2005+
auto advance_code_point_into(
2006+
Range range,
2007+
const read_code_point_into_result<ranges::const_iterator_t<Range>,
2008+
detail::char_t<Range>>& result)
2009+
-> ranges::const_iterator_t<Range>
2010+
{
2011+
auto it = result.iterator;
2012+
const auto len = result.codepoint.size();
2013+
2014+
if (SCN_UNLIKELY(len == 0)) {
2015+
++it;
2016+
return get_start_for_next_code_point(ranges::subrange{it, range.end()});
2017+
}
20092018
if (len == 1) {
20102019
++it;
2011-
return {it, string_type(1, *range.begin())};
20122020
}
2021+
return it;
2022+
}
20132023

2014-
ranges::advance(it, static_cast<std::ptrdiff_t>(len), range.end());
2015-
return {it, string_type{range.begin(), it}};
2024+
template <typename Range>
2025+
auto read_code_point_into(Range range)
2026+
-> read_code_point_into_result<ranges::const_iterator_t<Range>,
2027+
detail::char_t<Range>>
2028+
{
2029+
auto result = extract_code_point_into(range);
2030+
auto it = advance_code_point_into(range, result);
2031+
return {it, std::move(result.codepoint)};
20162032
}
20172033

20182034
template <typename Range>
20192035
auto read_code_point(Range range) -> ranges::const_iterator_t<Range>
20202036
{
2021-
return read_code_point_into(range).iterator;
2037+
SCN_EXPECT(!is_range_eof(range));
2038+
2039+
auto it = range.begin();
2040+
const auto len = detail::code_point_length_by_starting_code_unit(*it);
2041+
2042+
if (SCN_UNLIKELY(len == 0)) {
2043+
++it;
2044+
return get_start_for_next_code_point(ranges::subrange{it, range.end()});
2045+
}
2046+
if (len == 1) {
2047+
return ++it;
2048+
}
2049+
ranges::advance(it, static_cast<std::ptrdiff_t>(len), range.end());
2050+
return it;
20222051
}
20232052

20242053
template <typename Range>
@@ -2158,16 +2187,16 @@ auto read_until_code_point(Range range, function_ref<bool(char32_t)> pred)
21582187
{
21592188
auto it = range.begin();
21602189
while (it != range.end()) {
2161-
const auto val =
2162-
read_code_point_into(ranges::subrange{it, range.end()});
2190+
auto subrange = ranges::subrange{it, range.end()};
2191+
const auto val = extract_code_point_into(subrange);
21632192
if (SCN_LIKELY(val.is_valid())) {
21642193
const auto cp = detail::decode_code_point_exhaustive(
21652194
std::basic_string_view<detail::char_t<Range>>{val.codepoint});
21662195
if (pred(cp)) {
21672196
return it;
21682197
}
21692198
}
2170-
it = val.iterator;
2199+
it = advance_code_point_into(subrange, val);
21712200
}
21722201

21732202
return it;
@@ -2242,32 +2271,31 @@ template <typename Range>
22422271
auto read_matching_code_unit(Range range, detail::char_t<Range> ch)
22432272
-> parse_expected<ranges::const_iterator_t<Range>>
22442273
{
2245-
auto it = read_code_unit(range);
2246-
if (SCN_UNLIKELY(!it)) {
2247-
return unexpected(make_eof_parse_error(it.error()));
2274+
if (auto e = eof_check(range); SCN_UNLIKELY(!e)) {
2275+
return unexpected(make_eof_parse_error(e));
22482276
}
22492277

22502278
if (SCN_UNLIKELY(*range.begin() !=
22512279
static_cast<detail::char_t<Range>>(ch))) {
22522280
return unexpected(parse_error::error);
22532281
}
22542282

2255-
return *it;
2283+
return ranges::next(range.begin());
22562284
}
22572285

22582286
template <typename Range>
22592287
auto read_matching_code_point(Range range, char32_t cp)
22602288
-> parse_expected<ranges::const_iterator_t<Range>>
22612289
{
2262-
auto val = read_code_point_into(range);
2290+
auto val = extract_code_point_into(range);
22632291
if (!val.is_valid()) {
22642292
return unexpected(parse_error::error);
22652293
}
22662294
auto decoded_cp = decode_code_point_exhaustive(val.codepoint);
22672295
if (SCN_UNLIKELY(cp != decoded_cp)) {
22682296
return unexpected(parse_error::error);
22692297
}
2270-
return val.iterator;
2298+
return advance_code_point_into(range, val);
22712299
}
22722300

22732301
template <typename Range>
@@ -2286,33 +2314,6 @@ auto read_matching_string(Range range,
22862314
return it;
22872315
}
22882316

2289-
template <typename Range>
2290-
auto read_matching_string_classic(Range range, std::string_view str)
2291-
-> parse_expected<ranges::const_iterator_t<Range>>
2292-
{
2293-
SCN_TRY(it, read_exactly_n_code_units(
2294-
range, static_cast<std::ptrdiff_t>(str.size()))
2295-
.transform_error(make_eof_parse_error));
2296-
2297-
if constexpr (std::is_same_v<detail::char_t<Range>, char>) {
2298-
auto sv = make_contiguous_buffer(ranges::subrange{range.begin(), it});
2299-
if (SCN_UNLIKELY(sv.view() != str)) {
2300-
return unexpected(parse_error::error);
2301-
}
2302-
return it;
2303-
}
2304-
else {
2305-
auto range_it = range.begin();
2306-
for (size_t i = 0; i < str.size(); ++i, (void)++range_it) {
2307-
if (SCN_UNLIKELY(*range_it !=
2308-
static_cast<detail::char_t<Range>>(str[i]))) {
2309-
return unexpected(parse_error::error);
2310-
}
2311-
}
2312-
return it;
2313-
}
2314-
}
2315-
23162317
// Ripped from fast_float
23172318
constexpr bool fast_streq_nocase(const char* a, const char* b, size_t len)
23182319
{
@@ -2348,16 +2349,15 @@ auto read_matching_string_classic_nocase(Range range, std::string_view str)
23482349
static_cast<char_type>('a' - 'A'));
23492350
};
23502351

2351-
SCN_TRY(it, read_exactly_n_code_units(
2352-
range, static_cast<std::ptrdiff_t>(str.size()))
2353-
.transform_error(make_eof_parse_error));
2354-
2355-
if (SCN_UNLIKELY(!std::equal(
2356-
range.begin(), it, str.begin(), [&](auto a, auto b) {
2357-
return ascii_tolower(a) ==
2358-
static_cast<detail::char_t<Range>>(b);
2359-
}))) {
2360-
return unexpected(parse_error::error);
2352+
auto it = range.begin();
2353+
for (std::size_t i = 0; i < str.size(); ++i, (void)++it) {
2354+
if (it == range.end()) {
2355+
return unexpected(make_eof_parse_error(eof_error::eof));
2356+
}
2357+
if (SCN_UNLIKELY(ascii_tolower(*it) !=
2358+
static_cast<detail::char_t<Range>>(str[i]))) {
2359+
return unexpected(parse_error::error);
2360+
}
23612361
}
23622362

23632363
return it;
@@ -2368,14 +2368,13 @@ template <typename Range>
23682368
auto read_one_of_code_unit(Range range, std::string_view str)
23692369
-> parse_expected<ranges::const_iterator_t<Range>>
23702370
{
2371-
auto it = read_code_unit(range);
2372-
if (SCN_UNLIKELY(!it)) {
2373-
return unexpected(make_eof_parse_error(it.error()));
2371+
if (auto e = eof_check(range); SCN_UNLIKELY(!e)) {
2372+
return unexpected(make_eof_parse_error(e));
23742373
}
23752374

23762375
for (auto ch : str) {
23772376
if (*range.begin() == static_cast<detail::char_t<Range>>(ch)) {
2378-
return *it;
2377+
return ranges::next(range.begin());
23792378
}
23802379
}
23812380

@@ -5426,11 +5425,11 @@ struct bool_reader_base {
54265425
auto read_textual_classic(Range range, bool& value) const
54275426
-> scan_expected<ranges::const_iterator_t<Range>>
54285427
{
5429-
if (auto r = read_matching_string_classic(range, "true")) {
5428+
if (auto r = read_matching_string_classic_nocase(range, "true")) {
54305429
value = true;
54315430
return *r;
54325431
}
5433-
if (auto r = read_matching_string_classic(range, "false")) {
5432+
if (auto r = read_matching_string_classic_nocase(range, "false")) {
54345433
value = false;
54355434
return *r;
54365435
}
@@ -5605,15 +5604,15 @@ class code_point_reader<char32_t> {
56055604
auto read(const SourceRange& range, char32_t& cp)
56065605
-> scan_expected<ranges::const_iterator_t<SourceRange>>
56075606
{
5608-
auto result = read_code_point_into(range);
5607+
auto result = extract_code_point_into(range);
56095608
if (SCN_UNLIKELY(!result.is_valid())) {
56105609
return detail::unexpected_scan_error(
56115610
scan_error::invalid_scanned_value, "Invalid code point");
56125611
}
56135612
cp = detail::decode_code_point_exhaustive_valid(
56145613
std::basic_string_view<detail::char_t<SourceRange>>{
56155614
result.codepoint});
5616-
return result.iterator;
5615+
return advance_code_point_into(range, result);
56175616
}
56185617
};
56195618

@@ -5826,6 +5825,16 @@ auto skip_ws_before_if_required(bool is_required, Range range)
58265825
return unexpected(e);
58275826
}
58285827

5828+
if constexpr (std::is_same_v<
5829+
ranges::const_iterator_t<Range>,
5830+
typename detail::basic_scan_buffer<
5831+
detail::char_t<Range>>::forward_iterator>) {
5832+
auto beg = range.begin();
5833+
if (beg.stores_parent()) {
5834+
beg.parent()->set_skip_whitespace(is_required);
5835+
}
5836+
}
5837+
58295838
if (!is_required) {
58305839
return range.begin();
58315840
}

tests/unittests/examples_test_runner.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,5 +82,5 @@ def check(i, input, expected_output):
8282
check(4, "", "[{1: 2, 3: 4}, {5: 6}]")
8383

8484
check(5, "123 456", "Write two integers:\nTwo integers: 123 456\n")
85-
check(5, "123 abc", "Write two integers:\nFirst integer: 123, rest of the line: abc")
85+
check(5, "123 abc", "Write two integers:\nFirst integer: 123, rest of the line: abc")
8686
check(5, "abc def", "Write two integers:\nEntire line: abc def")

0 commit comments

Comments
 (0)