From 71024de0fe991fb1536dcc237e2a2d4d23969f52 Mon Sep 17 00:00:00 2001 From: Ken Micklas Date: Mon, 14 Aug 2023 21:32:33 +0100 Subject: [PATCH] Fix ignored source attribute --- logos-codegen/src/lib.rs | 33 +++++++++++++++++---- tests/tests/source.rs | 62 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 90 insertions(+), 5 deletions(-) create mode 100644 tests/tests/source.rs diff --git a/logos-codegen/src/lib.rs b/logos-codegen/src/lib.rs index 1177dc4d..aa915079 100644 --- a/logos-codegen/src/lib.rs +++ b/logos-codegen/src/lib.rs @@ -23,7 +23,7 @@ use parser::{IgnoreFlags, Mode, Parser}; use quote::ToTokens; use util::MaybeVoid; -use proc_macro2::{TokenStream, TokenTree}; +use proc_macro2::{Delimiter, TokenStream, TokenTree}; use quote::quote; use syn::parse_quote; use syn::spanned::Spanned; @@ -202,10 +202,14 @@ pub fn generate(input: TokenStream) -> TokenStream { let error_type = parser.error_type.take(); let extras = parser.extras.take(); - let source = match parser.mode { - Mode::Utf8 => quote!(str), - Mode::Binary => quote!([u8]), - }; + let source = parser + .source + .take() + .map(strip_wrapping_parens) + .unwrap_or(match parser.mode { + Mode::Utf8 => quote!(str), + Mode::Binary => quote!([u8]), + }); let logos_path = parser .logos_path .take() @@ -346,3 +350,22 @@ fn is_logos_attr(attr: &syn::Attribute) -> bool { || attr.path().is_ident(TOKEN_ATTR) || attr.path().is_ident(REGEX_ATTR) } + +fn strip_wrapping_parens(t: TokenStream) -> TokenStream { + let tts: Vec = t.into_iter().collect(); + + if tts.len() != 1 { + tts.into_iter().collect() + } else { + match tts.into_iter().next().unwrap() { + TokenTree::Group(g) => { + if g.delimiter() == Delimiter::Parenthesis { + g.stream() + } else { + core::iter::once(TokenTree::Group(g)).collect() + } + } + tt => core::iter::once(tt).collect(), + } + } +} diff --git a/tests/tests/source.rs b/tests/tests/source.rs new file mode 100644 index 00000000..0659d34a --- /dev/null +++ b/tests/tests/source.rs @@ -0,0 +1,62 @@ +use std::ops::Range; + +use logos::{Logos as _, Source}; +use logos_derive::Logos; + +struct RefSource<'s, S: ?Sized + Source>(&'s S); + +impl<'s, S: ?Sized + Source> Source for RefSource<'s, S> { + type Slice = S::Slice; + + fn len(&self) -> usize { + self.0.len() + } + + fn read<'a, Chunk>(&'a self, offset: usize) -> Option + where + Chunk: logos::source::Chunk<'a>, + { + self.0.read(offset) + } + + unsafe fn read_unchecked<'a, Chunk>(&'a self, offset: usize) -> Chunk + where + Chunk: logos::source::Chunk<'a>, + { + self.0.read_unchecked(offset) + } + + fn slice(&self, range: Range) -> Option<&Self::Slice> { + self.0.slice(range) + } + + unsafe fn slice_unchecked(&self, range: Range) -> &Self::Slice { + self.0.slice_unchecked(range) + } + + fn is_boundary(&self, index: usize) -> bool { + self.0.is_boundary(index) + } +} + +/// A simple regression test that it is possible to define a custom source. +/// +/// Note that currently parenthesis are required around types with multiple +/// generic arguments. +#[derive(Logos, Debug, Clone, Copy, PartialEq)] +#[logos(source = (RefSource<'s, str>))] +enum Token { + #[regex(".")] + Char, +} + +#[test] +fn custom_source() { + let source = RefSource("abc"); + let mut lex = Token::lexer(&source); + + assert_eq!(lex.next(), Some(Ok(Token::Char))); + assert_eq!(lex.next(), Some(Ok(Token::Char))); + assert_eq!(lex.next(), Some(Ok(Token::Char))); + assert_eq!(lex.next(), None); +}