From 3da52dbb9737afa09073853423ac10bfc4be1d8b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wi=C5=9Bniewski?= Date: Sat, 29 Jun 2024 15:53:31 +0200 Subject: [PATCH 1/6] Add C++ based Gerber parser prototype --- .clang-format | 39 ++ CMakeLists.txt | 7 + main.cpp | 344 ++++++++++++++++++ output.jpeg | Bin 0 -> 10182 bytes output.svg | 40 ++ test/assets/gerberx3/tokens/d_codes/D01.grb | 5 + test/assets/gerberx3/tokens/d_codes/D02.grb | 4 + test/assets/gerberx3/tokens/d_codes/D03.grb | 5 + test/assets/gerberx3/tokens/fs/FSLAX26Y26.grb | 1 + test/assets/gerberx3/tokens/fs/FSLAX66Y66.grb | 1 + test/assets/gerberx3/tokens/fs/FSLIX26Y26.grb | 1 + test/assets/gerberx3/tokens/fs/FSTIX26Y27.grb | 1 + test/assets/gerberx3/tokens/g_codes/G01.grb | 1 + test/assets/gerberx3/tokens/g_codes/G02.grb | 1 + test/assets/gerberx3/tokens/g_codes/G03.grb | 1 + test/assets/gerberx3/tokens/g_codes/G04.grb | 1 + .../gerberx3/tokens/g_codes/G04_text.grb | 1 + test/assets/gerberx3/tokens/g_codes/G36.grb | 1 + test/assets/gerberx3/tokens/g_codes/G37.grb | 1 + .../basic/sample-5/source.1/image.png | Bin 0 -> 5790 bytes 20 files changed, 455 insertions(+) create mode 100644 .clang-format create mode 100644 CMakeLists.txt create mode 100644 main.cpp create mode 100644 output.jpeg create mode 100644 output.svg create mode 100644 test/assets/gerberx3/tokens/d_codes/D01.grb create mode 100644 test/assets/gerberx3/tokens/d_codes/D02.grb create mode 100644 test/assets/gerberx3/tokens/d_codes/D03.grb create mode 100644 test/assets/gerberx3/tokens/fs/FSLAX26Y26.grb create mode 100644 test/assets/gerberx3/tokens/fs/FSLAX66Y66.grb create mode 100644 test/assets/gerberx3/tokens/fs/FSLIX26Y26.grb create mode 100644 test/assets/gerberx3/tokens/fs/FSTIX26Y27.grb create mode 100644 test/assets/gerberx3/tokens/g_codes/G01.grb create mode 100644 test/assets/gerberx3/tokens/g_codes/G02.grb create mode 100644 test/assets/gerberx3/tokens/g_codes/G03.grb create mode 100644 test/assets/gerberx3/tokens/g_codes/G04.grb create mode 100644 test/assets/gerberx3/tokens/g_codes/G04_text.grb create mode 100644 test/assets/gerberx3/tokens/g_codes/G36.grb create mode 100644 test/assets/gerberx3/tokens/g_codes/G37.grb create mode 100644 test/gerberx3/test_rasterized_2d/reference/basic/sample-5/source.1/image.png diff --git a/.clang-format b/.clang-format new file mode 100644 index 000000000..f0247f1fc --- /dev/null +++ b/.clang-format @@ -0,0 +1,39 @@ +Language: Cpp +BasedOnStyle: LLVM +IndentWidth: 4 +IndentExternBlock: Indent +ColumnLimit: 100 +NamespaceIndentation: All +PointerAlignment: Left +ReferenceAlignment: Left +PackConstructorInitializers: Never +AlignConsecutiveDeclarations: AcrossComments +AlignConsecutiveAssignments: AcrossComments +AlignConsecutiveBitFields: AcrossComments +AlignTrailingComments: true +AllowShortBlocksOnASingleLine: Empty +AllowShortCaseLabelsOnASingleLine: false +AllowShortFunctionsOnASingleLine: Empty +AllowShortIfStatementsOnASingleLine: Never +AlwaysBreakTemplateDeclarations: Yes +IndentPPDirectives: BeforeHash +PPIndentWidth: 4 +AlignAfterOpenBracket: BlockIndent +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true +BinPackParameters: false +BinPackArguments: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: Always +AlignEscapedNewlines: Left +AlignOperands: AlignAfterOperator +AllowShortEnumsOnASingleLine: false +AllowShortLambdasOnASingleLine: Empty +BreakBeforeBraces: Attach +BreakConstructorInitializers: AfterColon +BreakStringLiterals: true +MaxEmptyLinesToKeep: 1 +QualifierAlignment: Left +SeparateDefinitionBlocks: Always +UseTab: Never +IndentCaseLabels: true diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 000000000..c613edceb --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,7 @@ +cmake_minimum_required(VERSION 3.21) +project(MyProject) + +set(CMAKE_CXX_STANDARD 23) +set(CMAKE_CXX_STANDARD_REQUIRED ON) + +add_executable(MyExecutable main.cpp) diff --git a/main.cpp b/main.cpp new file mode 100644 index 000000000..2458cc686 --- /dev/null +++ b/main.cpp @@ -0,0 +1,344 @@ +#include +#include +#include +#include +#include +#include + +#define DEBUG 1 + +#ifdef DEBUG + #define LOG_DEBUG(...) printf(__VA_ARGS__); +#elif + #define LOG_DEBUG(...) +#endif + +#ifdef _WIN32 + #include + +struct FileMapping { + HANDLE hFile; + HANDLE hMapFile; + LPVOID lpBase; + LARGE_INTEGER size; + bool is_ok = false; + + FileMapping(const char* filename) { + hFile = CreateFile( + filename, + GENERIC_READ, + FILE_SHARE_READ, + NULL, + OPEN_EXISTING, + FILE_ATTRIBUTE_NORMAL, + NULL + ); + + if (hFile == INVALID_HANDLE_VALUE) { + std::cerr << "Could not open file." << std::endl; + return; + } + + if (!GetFileSizeEx(hFile, &size)) { + std::cerr << "Could not get file size." << std::endl; + CloseHandle(hFile); + return; + } + + hMapFile = CreateFileMapping(hFile, NULL, PAGE_READONLY, 0, 0, NULL); + + if (hMapFile == NULL) { + std::cerr << "Could not create file mapping object." << std::endl; + CloseHandle(hFile); + return; + } + + lpBase = MapViewOfFile(hMapFile, FILE_MAP_READ, 0, 0, 0); + + if (lpBase == NULL) { + std::cerr << "Could not map view of file." << std::endl; + CloseHandle(hMapFile); + CloseHandle(hFile); + return; + } + + is_ok = true; + } + + ~FileMapping() { + UnmapViewOfFile(this->lpBase); + CloseHandle(this->hMapFile); + CloseHandle(this->hFile); + } + + void* GetMapping() { + return lpBase; + } + + uint64_t GetSize() { + return this->size.QuadPart; + } +}; + +#elif __unix__ + + #include + #include + #include + #include + +class FileMapping { + int fd; + void* map; + size_t size; + bool is_ok = false; + + FileMapping(const char* filename) { + fd = open(filename, O_RDONLY); + if (fd == -1) { + std::cerr << "Could not open file." << std::endl; + return; + } + + struct stat st; + if (fstat(fd, &st) == -1) { + std::cerr << "Could not get file size." << std::endl; + close(fd); + return; + } + + size = st.st_size; + map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + + if (map == MAP_FAILED) { + std::cerr << "Could not map file." << std::endl; + close(fd); + return; + } + + is_ok = true; + } + + ~FileMapping() { + munmap(map, size); + close(fd); + } + + void* GetMapping() { + return lpBase; + } + + uint64_t GetSize() { + return this->size; + } +} + +#endif + +enum class TokenType { + UNKNOWN, + INVALID, + INTEGER, + SIGN, + G_CODE, + COORDINATE_CODE, + D_CODE, + END_COMMAND, + STATEMENT_BOUNDARY, +}; + +struct Token { + std::string content; + enum TokenType type; +}; + +struct CommandToken : Token {}; + +struct ExtendedCommand : Token {}; + +struct GerberParser { + + class EndOfMapping : std::exception {}; + + class InvalidToken : std::exception {}; + + enum class Result { + CONSUMED, + ABORTED, + }; + + char* gerber_code; + uint64_t gerber_code_size; + uint64_t char_pointer = 0; + + enum TokenType token_type = TokenType::UNKNOWN; + std::vector parsed_tokens_vector; + std::vector buffer; + + GerberParser(char* file_mapping, uint64_t file_size) : + gerber_code(file_mapping), + gerber_code_size(file_size), + buffer(16384) {} + + bool has_next_char() { + return char_pointer < gerber_code_size; + } + + char get_next_char_throw() { + std::optional current = get_next_char(); + if (!current.has_value()) { + throw EndOfMapping(); + } + return current.value(); + } + + std::optional get_next_char() { + if (!has_next_char()) { + return std::nullopt; + } + char next = gerber_code[char_pointer]; + char_pointer++; + LOG_DEBUG("get_next_char(): %c \n", next); + return next; + } + + std::optional get_prev_char() { + if (char_pointer <= 0) { + return std::nullopt; + } + char_pointer--; + char next = gerber_code[char_pointer]; + return next; + } + + bool parse() { + while (char_pointer < gerber_code_size) { + LOG_DEBUG("char_pointer: %llu file_size: %llu \n", char_pointer, gerber_code_size); + try { + parse_next(); + } catch (GerberParser::EndOfMapping) { + return true; + } catch (GerberParser::InvalidToken) { + return false; + } + } + return true; + } + + void parse_next() { + char current = get_next_char_throw(); + switch (current) { + case '*': + LOG_DEBUG("Found * character. \n"); + parsed_tokens_vector.push_back( + {std::string(1, current), TokenType::END_COMMAND} + ); + break; + case 'G': + LOG_DEBUG("Found G character. \n"); + parsed_tokens_vector.push_back({std::string(1, current), TokenType::G_CODE} + ); + break; + case 'X': + case 'Y': + case 'I': + case 'J': + LOG_DEBUG("Found X|Y character. \n"); + parsed_tokens_vector.push_back( + {std::string(1, current), TokenType::COORDINATE_CODE} + ); + break; + case '%': + LOG_DEBUG("Found %% character. \n"); + parsed_tokens_vector.push_back( + {std::string(1, current), TokenType::STATEMENT_BOUNDARY} + ); + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + parse_integer(current); + break; + case '-': + case '+': + parsed_tokens_vector.push_back({std::string(1, current), TokenType::SIGN}); + break; + case ' ': + case '\t': + case '\n': + case '\r': + break; + } + } + + void parse_integer(char current) { + LOG_DEBUG("Found %c character. \n", current); + buffer.push_back(current); + parse_integer_tail(); + + LOG_DEBUG("parsed_tokens_vector %llu \n", parsed_tokens_vector.size()); + parsed_tokens_vector.push_back( + {std::string{buffer.begin(), buffer.end()}, TokenType::INTEGER} + ); + buffer.clear(); + } + + void parse_integer_tail() { + while (has_next_char()) { + std::optional current = get_next_char(); + if (!current.has_value()) { + return; + } + if (parse_number(current.value()) == Result::CONSUMED) { + buffer.push_back(current.value()); + continue; + } + // Don't consume not matching character. + get_prev_char(); + break; + } + } + + Result parse_number(char current) { + switch (current) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + LOG_DEBUG("Found %c character. \n", current); + return Result::CONSUMED; + break; + } + return Result::ABORTED; + } +}; + +int main() { + auto mapping_handler = + FileMapping("C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_" + "codes\\G01.grb"); + char* mapping = (char*)(mapping_handler.GetMapping()); + + auto size = mapping_handler.GetSize(); + GerberParser parser(mapping, size); + parser.parse(); + + for (auto token : parser.parsed_tokens_vector) { + std::cout << token.content << " " << int(token.type) << std::endl; + } + + return 0; +} diff --git a/output.jpeg b/output.jpeg new file mode 100644 index 0000000000000000000000000000000000000000..47756e8a6fad947c24b905e7532ebb8f57a152d0 GIT binary patch literal 10182 zcmeHtcT`j9+HVjLM2aX#2c-xDh*T-kH53a-4ImH!0RaIa)X)?KG&BJz0up*Bk>0xk z(tGH=_g)inH=da@Gw0m1*1g}i?)~fAS?}I!Cp&MRUVcyTL-;Asm4`~IN+1FP5QqSH zf$*cC`ygUMA|fI}V&FtfOni}qoRkDOs4ic=L{3e0^(r+LHTAXY%yidi7-^}g>2K0A zvaqnRv0bBM=VWK)WM*Y!J>P@?_$tXo5=v51N>&9iZP2f(wL10F|Vd$Swm96kh>dARr{X0MHLG06gsld=4U_Ccb)8 zNa5l&4HFV}2O8nGVM(MMiuv`lnmt>bBBqYMm&mTu(K9e|-Qwo?jaO9cuDFDx)cprw zC1n-Whg#Y?x)42m1G5+A7M3rqtYJ>hF0O9w@OSV1`~yCGM1)6tj*No0MLXl~J(I`)uV=M#JB6t!n zAbx8J=E!D`vXIar(IBnA^JSbC9 z#^c-m;@e})Gjfgl^a%Q12~2YG86OO~qs8rd!oDQ2V%zB_?6)UOpU66d3O_bmE~Tvmd+a&Jb8&4!G0-Enw+sDE zgUDxanwvd)nJLui$XD`9#)8eelA?IfTD8d8+d-ehX`cp^hEX7uDjyHJtiC^PrG545 zWtFng1wF;Pi>!LX$LmctcXt~YJu62>^z(*hUv|zz>vnvkmX>m@p9>Cn^W3T27obOc z8+knCe1~l1?)Ty{ItP%NmOnk)xLoMtyHa2#;n2l|p}0VE;+R?TcQVc1 z_d>mipF`R)js1;X;mL(E2F@m$(mk$ZJjwZ@!0FAuEnb^PRvU9d&oaK;&>U4@xZzy4 z;T0gpl%lj2lBTc5pi)oLJ>Xagd!}Gy-1ssX5#~puAy53jU0Pv`%GzN5_Se}LRb_qO zu-On{#4iz1f8m1pX-rg;G>)*;M3Yr!KXP%mayWEbs(FWO-n%^Aah>JXB==RrGTdAO zaxK(;@t7bFY949MM_HYwv*``pQoqa)Z%nh3B+96F zQ>tt9mRD{v2#^hpTzxIL%lv#1`Pq4PqFjg`E6emS98;NDFkcMs=_<3$B74#?IN$3Xbt1qu$_@n0~@vIu%&*;C7YRbHNik( z1qqS`(=>g5uMm>?XK;JO-f5KH2JQL_yY*H}2}0NX8)VB#@g{utVEV_)co3*H{bG?= zF^`K3{_&n`@|;094NMi^&6A)k2qlO&!PLG8n47oIewz9@2$sD?CLYyKj- z>^YAIB{*JTD!t!*C{xS0V1dwkA@qc)nSCFkNs6U_4=A`>+PK9D}_arR#nn3_~c6f za=N|0a?f=iWu{srJ`!1-N%WEVVm-)j<-6sm*osOZBQLw34!sT`h)Myf@M&t`VMtL) z$VBZHmADZ4C!0mZnPLx^cE9-pZJXP>AHPv;h?c0nn3GO5hm^Lhcr0dMij2huE0#-O z@IZekjzliO=Z=P&`Q9+}wYtGPZ z*4chb+6Cv^xx3E{Ik!~IA}MtOtoMbny(sFPYO_&n$YV81+QyE+A~I(YcGF9z=AXIS5xVJjQP%?qSG*h&OEBmafGliHhxNs`r#{)kFtyS!su6te#m$#@=SfJ<`?yd#TR) z3BOE^6`|=#4J*}gCSt4WTS`hDh&hn7g*04IJ-523bE~@wDZStCDg*fJynWH?>Q(SS zZW#I7vY;%9PC`lw_*ahPmY|TNNPh;E7LIOAAfjbS_b}@e-DY2j4Ox-&Q?a_OG3Nwv z&NyqqEo4+@#xMpYDGf3rQX zy)d96-&tcuyIC2A^nC73n*~iYFY*exPQ6n*;W@~gA)_ytyGgymHeB(zFzb8X4)|<) zvwLrDciBL!!EI-)BPZ`0w`~Q1FUf?RDx3dgYli3#>ANgH?e?HT(>$jJSu!DB7hQKkQ?r&c3ep=CtJWNHe+E z74f{gYH-z*^No~KlgtO&y)ik2s}t8sEp)Yi2gb|Wn!I-xNCGGCTBH|$9Qc@FZZ(=) z;&ygh#?*YC`jG2_UF5E;7gFR6k>1S@4&7!B3avA#m72|&5_u@wOe>LBdfJuRpuL!K zokj!vn&l6volB0u_{;Q{vz|o#pQUyj!$FH1*m9hRv8D0Hsb<}+^)g+etk-p{0S*#! zZ`rNU5*ouQ2- zQirH#kgbEHjEx9OB1|&9_n8W>yd)l_zt@$(@#V z=_DhdsO@l;;Hk!@ zT^=2M%$mpS%o}Um-jIyoYrRLMU+ibQo?NQ<@cUbugfc?r{?8iF^wjVI*(a5ie(q{1 z!eiES;-sYd=9W+4Q{nDtT{Js7ZbtH+mT_&;%Dj!foIJ44e5fH1-a&s|03EKvc^%{B zB(UT%nQy7@@Sr5Jd7G3YFFfexm)h0w+C47xecp%=VG36q#et;>VLT|zZuN`+*eB1u!_)o>Xa1ue?^lP8m{Y(_6~HtQ zfA}wT{ci9CMwrIUW5OqR5R?KBO3Zs*JcN~sY#09&g&Qdl4sznW;T%*jHA7<2t z4dJghekaw+lUy@)y!97#{jN4;=U|3$-|V2mYi~%h63G~&f-|g%s(+O2s&KU5FRn0? zc5Wgu#8A)3++q=!eF4euh&V%^8E|_$G(Uzl!Fr3U?$3mu<`tEEH-FtX<-d4ZgM_xx zcQ~TfISMPUTS!`Pb0W2%Y;iN+BAJ4DGVUy#mCCf!GIm4a3f6J2;MKWp4z{!n7p`kl zChpfWETrbvk{RMMBxSQ4xRw0^1A%mbJIR=n<9NN}ogSO=N(vz{qhGfJ#uMVygz%tH@5nX?c#1@4aE`q0`deRLczjkJmH?a6-8wfLn2v$P z(W4cWT>*L~+3o^v5BhT%mVS`d{B$<^=tY#;{Z=I%56_y68n~ASY{tRqTlZon9@KHd z*qb6%P^3|uV?Ai_!UOf9E#8!~9+q&lH?_NN$Xc9hv{K-GT++bg_@J5&t6sh@@#AT% zzQw0D(NNCdfv+FAw?$iz*1T;XbQ^gbqqSKZCf{DCf_!lOE0C&j&_%C8_6k9BlgQ_er2Di7?$^0T zo*vk{ohkdp^v-3qR|M*_^7qviiAL}Ai8P+JY)Q)I^ht+IB)gVEXS6Kp_d^vnz;HNw6XHXXjuNeW9bvaF#9uVBcg_1% z+{ZUo1#q^Fkg*rL6U8|jjzO0jdaBNTv!9>yJQJs0&oD0_YZw6rog4VKaul;=j}3~d-IBu*S_-13+~Nkzpioh%rlz{-OmUd z98+`1r=QN2Z+4k%wxAd{R%SNL#M-;~11eeL7YVyz-H?wlmZDpSes|zdUm5bd7joJ;UP527GGhtQWQ6IFFxK<{B|s<-#crfOOP zO8)%{KJLcYeyQRbN{(`c(-xf12Y|z2d0`kn5Glh*xdl&D-#7AEPPh9@u z?h}1%cwl9=tnS6o(PKczcq@@u|^Flc-+ktTg9wfiob-Fb0&{;X^JK2y> z=@aD>mR)jyI`{A(k)PjZ(;(v^9{?2{ zP+mbxZ+mBou_(V#KD>VXK$1wPi=TAuwcH&zW;?3PS5mZ6X>5I`eQK*FTSgr{zc-sR z53Bm2j!TH^Lm#ttLj4_Zb2aBFuOlJxFBR0UYD)XWsm(s~nV|%iX{6Dc)i(+=iOJc* z4%UwnV=MAki!?cMKcIpb^vv~`ePzJ6y<0K`KXw<~}sX zs{UY}T9D+*J@#oe9X5WQ;S^VAd#g*a3oj5tG9AJc1;OA56p3PJS9 znu)}zDJpw=PhGXFETa{zOLp_)rT1vad41soIQSOtSI-cW_|EF-M=EFh2j7NB`wla5 z0Mo4!>Heo5{SF0i6(`u(yid49?wHj2ZQc0Y^t$7uwx7l@dnL1|{^^)ARHJ$N1Sy%p zQDlb-()m2E$9%;L45ue=N~biip{qEnh#!g{@5Wg^ISgDc{5!-jjh>1gygGROZ&PwRT5>_Xp)BvD>?l=82Dd;%gR>Y{4 ziWohl^VNav%6(%wL44^H4E+2xoY{14El{WRDC2t|Ba_VC=bepcUe^4HBOn z>-92ED5a41`!Qm9Hgxb@WR2}N|FN(;U_9vBhL@TU)Dj||5|G4Xb1Modkl{y8v(oR{ zV}u8(3}LEtd;LrMgg+?ge(|T@#mdIJO7D)+w)TKMQG9d|T3PGRGlDhE6f&Wrry5|| zcI!VS_19Di2GNS84g5%Lu(3+`jl@~Ty^|_)HYxLE27^RpMrQ02{jXvHK>{;DY2y`! z37t$P*w>_cOT@O%+|Vk~t}(j`L1yitk|(wu?l{^Njm~_h$a3^5&Tua4O^S@I{r$eA zLD4L;tGAL~5iGIFXY5D-_}Karku%@BK8HA;rm70Wx*=G1q@G^7fUUjjaXH2*(5uEA z%kAPY{}t%f#%2obrJ)%=s%?{Fr~U87+v;M~0n@mSXiE3_7!{`K1H~v#NA*4%k2{G* z6f=eEjs>cG;JDPr9JUuU4O7xFwD- z7Z2KOz=QnQ{&r`W9BT6D7&6BCKhOOqxpeycaJeqHL?_&6Zp$24NFK(+vB*+~BCBR< zg6`ZN_oK#L=9d-*@6Y&B)L z5VrUZN{5)Yyt0hH1MQ;i9&vm0DDWaotbWv&p#|HPUFVIAZ3r8 zym?zW&%ap-U3r@!0H{$At_kNo-*WR$;(vnky58tqz2)bZzg!CQ*A-LBCutJ?NX64)<6FgZ(%IFv_kOS83RXMEgGb2f8fCJayKsy~om zK(o7_T^DHGoIiWDu5W9BPFng5;YvHI6IttjPI_rZMWJrFpViBqWDAu(=w@fz>&AKY z!7OiMHJtvHEx9Ic3_ah!kQ0#gXy(CZ4X`u)ABmTN_3wxSTs)|D?pXyM^u}k&#ux@= z`7XB!G?`@B;SF4iu{QG=ifO&?l!|#!>n-)rYVub-LwOE9rcxL{s?+Z#Q1U3@W@c zOfp{>7^eEc(Kjn$?>Z%!G0__Oy))zwpfwRt*Zo6U>5@3(RiN;MuuLPbNqjP}4&P9! zQ;o|$Xk?w8+&x&cS^gH&>Q)BpJFQ6hiB@znZ@U3-m-+p-zx5eW2+(dV&C+_hF&#qW zK$2=>RC2^43+J$YG$;m7$T6c#G2_au^K6_q70Is@Jb^@aU27C+0p3+9kN}OIYe7I|q1Ly}{df-8FEkUt-24WdX)~{V&eryoq zXuBP1sH{(IL@)fU^_wIS9AQwuB{pjKazZF&dC~P`ydaBIvAog(#mJkoRZ?GGCv~d#;at}R% zEOf#6&#vv`t^k$WH0y`+CIXUv3CLKfC%eruaJf6HlM4T^}uR zFW|Lna)=1Rxect6iSxHo7FY5`JctEn)7qKk@t6~-V~O(&86!Hj;Tl^3 zHrrPN4D}zFZd-BfG5Mqq<~#C)I@`J1ZKG#;P_|#qto*6=2`z{k8#-%<^L7qOT{h(= z8Aoa;T06~h4FTF(5IYYSQg~2++zuXu2ff0B4y;#m=<1_Ft&);H?cQT}vquDylc$<} zrgL~~#Qo?JX(58AtFoMQZl2u=P| z^ht@zkZ`;u5i?@{^Iu1V=hsZ7WYH@PIO^nvkmZoOX`EbnGs{bGEXTZu* zWHW9ta^r2kggYTQs@oB^zkGzK&>mOKyge)u9%P8T-*>J={$u%HN`%c&Vc#YJv;!Ho z+n_>fu7+Jr(cl^XJEEeHc*`JS+3Ha&Vt3byXKG<`KWpF}1brhsQ zwO}M!`P?xP)z2aow0jf}>#ZOH2h&n3W(V=UeM6Fnh7g3Qs;X0$+V4tiOZwVGMNixY z{P3y!A2|3$<-g@DH>vgFVm{zZA+r&8};?|EcI-`?onyx4jJRc#jUo^ zbJRTnrc@5bS=wTG0+TvLI~Ghv?jiD$_i@Sq%(@DRh@G!}7Ah*sb#&|#Hu~;|2c2c1 zbQ7X`z{vwAIi!B+Wk2CUBA4W0GxK(bCFX~Anv~{zY@y6jrwAmlOauJL-d?8hbE&^- z40$A&-(mH~!zYzDOKn?nsQ!J{1svf}jV)$w;X0OR$do(JNeJh6LM;HK;_ z>WN=N{b}TEL{a=%>KQ$%?fXb#@7~OckB@-n4bm2g(;=CFBRD1w(!PE1w*J%5930iN z+sXTxXUeD^FzsG*_#fHmKV4tAwVi=0(MKd_{>6)8|27$M1qOpv%~-x&%kn6nODAXN zkkeM^`TYIYfM(5;33W3>Mqs_90{g98T09wffBD!8-nzrP;wd*19dO8)BAvf=^{H^% z=)ykBE@pLfd8cYd2$5p64wSlbyVCI%#3hUB8}}am_Nk4g=c7p&XK;nE;EgkOBvdWd zdTGb_EszCnaN{NW$89_ce*++wpJAg2JH0NmtV$u9=0dS zXI%O=7YiN^hYSoqop`(IwgGvDXRsGaFlsskwpE h@pyzyDU@UB^^o;L>S%!yB`^@hRfo>a)dxTPzW`AYE%g8Z literal 0 HcmV?d00001 diff --git a/output.svg b/output.svg new file mode 100644 index 000000000..9ea00d187 --- /dev/null +++ b/output.svg @@ -0,0 +1,40 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/test/assets/gerberx3/tokens/d_codes/D01.grb b/test/assets/gerberx3/tokens/d_codes/D01.grb new file mode 100644 index 000000000..faddad0d5 --- /dev/null +++ b/test/assets/gerberx3/tokens/d_codes/D01.grb @@ -0,0 +1,5 @@ +%FSLAX26Y26*% +%MOMM*% +%ADD100C,0.5*% +D100* +X0000000Y2000000D01* diff --git a/test/assets/gerberx3/tokens/d_codes/D02.grb b/test/assets/gerberx3/tokens/d_codes/D02.grb new file mode 100644 index 000000000..7999c2744 --- /dev/null +++ b/test/assets/gerberx3/tokens/d_codes/D02.grb @@ -0,0 +1,4 @@ +%FSLAX26Y26*% +%FSLAX26Y26*% +%MOMM*% +X0000000Y2000000D02* diff --git a/test/assets/gerberx3/tokens/d_codes/D03.grb b/test/assets/gerberx3/tokens/d_codes/D03.grb new file mode 100644 index 000000000..13043e532 --- /dev/null +++ b/test/assets/gerberx3/tokens/d_codes/D03.grb @@ -0,0 +1,5 @@ +%FSLAX26Y26*% +%MOMM*% +%ADD100C,0.5*% +D100* +X1500000Y1500000D03* diff --git a/test/assets/gerberx3/tokens/fs/FSLAX26Y26.grb b/test/assets/gerberx3/tokens/fs/FSLAX26Y26.grb new file mode 100644 index 000000000..a74e8d5f5 --- /dev/null +++ b/test/assets/gerberx3/tokens/fs/FSLAX26Y26.grb @@ -0,0 +1 @@ +%FSLAX26Y26*% \ No newline at end of file diff --git a/test/assets/gerberx3/tokens/fs/FSLAX66Y66.grb b/test/assets/gerberx3/tokens/fs/FSLAX66Y66.grb new file mode 100644 index 000000000..d6767a154 --- /dev/null +++ b/test/assets/gerberx3/tokens/fs/FSLAX66Y66.grb @@ -0,0 +1 @@ +%FSLIX66Y66*% \ No newline at end of file diff --git a/test/assets/gerberx3/tokens/fs/FSLIX26Y26.grb b/test/assets/gerberx3/tokens/fs/FSLIX26Y26.grb new file mode 100644 index 000000000..acd039aff --- /dev/null +++ b/test/assets/gerberx3/tokens/fs/FSLIX26Y26.grb @@ -0,0 +1 @@ +%FSLAX66Y66*% \ No newline at end of file diff --git a/test/assets/gerberx3/tokens/fs/FSTIX26Y27.grb b/test/assets/gerberx3/tokens/fs/FSTIX26Y27.grb new file mode 100644 index 000000000..1ad531c84 --- /dev/null +++ b/test/assets/gerberx3/tokens/fs/FSTIX26Y27.grb @@ -0,0 +1 @@ +%FSTAX66Y66*% \ No newline at end of file diff --git a/test/assets/gerberx3/tokens/g_codes/G01.grb b/test/assets/gerberx3/tokens/g_codes/G01.grb new file mode 100644 index 000000000..7ce799f27 --- /dev/null +++ b/test/assets/gerberx3/tokens/g_codes/G01.grb @@ -0,0 +1 @@ +G01* diff --git a/test/assets/gerberx3/tokens/g_codes/G02.grb b/test/assets/gerberx3/tokens/g_codes/G02.grb new file mode 100644 index 000000000..5c7088657 --- /dev/null +++ b/test/assets/gerberx3/tokens/g_codes/G02.grb @@ -0,0 +1 @@ +G02* diff --git a/test/assets/gerberx3/tokens/g_codes/G03.grb b/test/assets/gerberx3/tokens/g_codes/G03.grb new file mode 100644 index 000000000..739ac51f4 --- /dev/null +++ b/test/assets/gerberx3/tokens/g_codes/G03.grb @@ -0,0 +1 @@ +G03* diff --git a/test/assets/gerberx3/tokens/g_codes/G04.grb b/test/assets/gerberx3/tokens/g_codes/G04.grb new file mode 100644 index 000000000..76d830703 --- /dev/null +++ b/test/assets/gerberx3/tokens/g_codes/G04.grb @@ -0,0 +1 @@ +G04* diff --git a/test/assets/gerberx3/tokens/g_codes/G04_text.grb b/test/assets/gerberx3/tokens/g_codes/G04_text.grb new file mode 100644 index 000000000..e4bde58f8 --- /dev/null +++ b/test/assets/gerberx3/tokens/g_codes/G04_text.grb @@ -0,0 +1 @@ +G04 Comment text :D * diff --git a/test/assets/gerberx3/tokens/g_codes/G36.grb b/test/assets/gerberx3/tokens/g_codes/G36.grb new file mode 100644 index 000000000..eb99e13d2 --- /dev/null +++ b/test/assets/gerberx3/tokens/g_codes/G36.grb @@ -0,0 +1 @@ +G36* diff --git a/test/assets/gerberx3/tokens/g_codes/G37.grb b/test/assets/gerberx3/tokens/g_codes/G37.grb new file mode 100644 index 000000000..d5619b082 --- /dev/null +++ b/test/assets/gerberx3/tokens/g_codes/G37.grb @@ -0,0 +1 @@ +G37* diff --git a/test/gerberx3/test_rasterized_2d/reference/basic/sample-5/source.1/image.png b/test/gerberx3/test_rasterized_2d/reference/basic/sample-5/source.1/image.png new file mode 100644 index 0000000000000000000000000000000000000000..8eb4f0e0153190cf6e689d1869206deac9afa870 GIT binary patch literal 5790 zcmeHJ`#+TV7eCKnjLW!X#h4gr+P>xL4^wLECCOWSnWlx(rO zs9Yv(($$nUvx}q%i4{fCMk|pp-}6|n?|y&#{sCX}!!w_AKIeYk=RD7M9;;U{6d8u7j1_j}t##i&3&XG)L9+fXjz6v@!FWslZGHW-2gKftd=-RN()l0%F6H z@%rL2qnhS{%E|2?KkX=8>d3ah$`&W&$ese4PWbq1qA6&W^?abAt`3vpph$6qfSSmAQ@^d59;zg8NJl^PP(}q z9F%`4>7~AUF3OO~GLdOfc)>a6Mextwv3z`u1|Y0{yMu}bdUjfXB-Yuw+X~}WYkOR5 zS8L0tymk9o;$AZ8!xN}yX0+=!O7y^cKkZXU%w_aWyB1iwFRK$ z`{mfP5|X5PIX#bV00MCADfpaVHLd ztmQVttQScjXUhrwSX%iAFhqAb|`RkK0MsqaoASw|f8%CQeNDm-6L+RBr|H zh4RJd7W_^WnAO*8g_gTfa$$jsu#oB%a*(HPk6Jbgz%i)`(NDp1yc)o+x!A8fE&=>t zisg+gWe0rl-DyDHVPHuMQO$S(rOQnw_94sK0Wa0t@brcYF9>sj5SPd!DKsg>Ot%!{$#zv z>1E6pdIo6nTo^!28H1Qx#zbTo5HRSyX5Xbyc@;p#EOs;Ve z5EcYP98}{hK^y$l!323$n%ldtX#8z8mugrw9U>mL_17HcaOPh{#*4@+=bJTen~sXF z@rIvwB&^u4lj$uFYNw!6#ogq6fy$LL9R%F#QEX@C_-U{G@O0hb>v-MV?y@wrpjaU- zE}(cV?|d@AyP|D_WxQsV?=j!<>t3ne3{cBNFglElexNzN+3TPQ5OV7^<=qbN=JoCwd z&~3?%4cmNNUvoixbGJq08dgA^QH1-ELOU#C6fxy<8 zRnf}w-@i!I95!4s|B5CEmi(G?byLn(<|2=~M!ZsU-e(}tbJo4upu2VdLXW4fqD$qj zAWOw7r@Vo+weDXYZ6VR6yl5>DBzozVcQ7xdX@(TXwD-H#$$|Ff%R$9nYl{du7a@<~ zkxIb*rZa06wQGl3Ca(!prZoltX~Jwc9Ud;^WsBV?ykzlH=?_Z2(*XWgyQm)dZk8#G0V1@6qMA5b!XY z38cM~zOX=^4Nx!(k?VwJ`HS;)-O1z5dPfW7=KvU%5eQ$$Wz+E0;>l**Y)9u8g!{RC zC-wB6LIV9|Ab#BDc8S;~jR4v|;sniA$8`Wt5p8bsy%_=}L_LnPcpsHT7^3Xh5ekTl z5kRvtwGi$e2kxtLG8Sxj_$><{8dm^g?(dy!t_02PM@$MAP>6ns6O{6u8m$Et7JP+^ zNk|!4ixj|0KGH`P<4FajXtR(xXpbzPeUwZ!@-&Xw$ck_a3|iv2YFti7QEM06d6HTK z67|Sx%>Ix%>b;nUS3t^Qa0;zROIDy%V=Way8)fGNZ_1A7CrS)7(Ehw&Opqf_06fO& zgrd)}0-KInIRdUc=tmvACGhrC7(VV{`kj(2%G?NE9Xm-m|AfwRFJW|Q2aw7PQX(kP zXY(o5sYrYRDHkfpM0XstFrTt*f-zTwmQyK4{ih7#g{I*|1rud*De0*QMLCq0xPL-9 zQTq#e6WgKI-ALW(llf)j8SA5yd@Au+Dv#NepD0|zhz%Mtcb^DU5{D!8w?BJSbqty+ z(`uAZ+>R@%48b%lt=D6Ye&7A$*SS_C*BOPpTFe;hN>b0M5IwTm@N}O`(NigF@~X7Z zjLYAS_;_4yE0671iKm^EA8o7^{-_R2yYu`DUxR#e)VMH6y%7DUyS%7K z%KMhB%)UL~$pPb%hJh<1NemD=UuKdy%+BZjp)XJZ-_(#-?WOOBXUM&%0a;~lKkj6w zP-NL67Gy0OJ*7b|=%}sS2{6YJ!vA5yMm1 z9w<_QWy%tr2bd6vR!TobjSP&Be<&E+Da;g71@7v)?td{q zr0!vodyTQ!JH#k$Z>Po;J;*4jp^FVurG5S_ih{+dc9?!^YcMqG9r{vP%F`dvNbMfK zi37mj9aBjco%XJZ&r~DhbS9H|jf%jz9z^i}G}yurb6N4|J!S>LFZMg%6!G;?ulBs& zZXPyt^xPPo-@jhJ@+D+l2AcrxPL7j&o9U zFpRcmQ}9y2L7E;?oUg$%Be)}W;5Xe@w+;}HXHIqCzp*+EH(Wb;u`oE)cHoifM`6Pc z>B|$z*`n+*0s{On9fW4=*IFCw$_IULq>Kv|>Ow(L`vHv4AeSijv;fj=+WR^si1*zy zFfr6u)nqMLJWK;gk0}&3Fos!oLnnOdy4W{Yi)~)Zf^g07X7EmjlJUz%ISHN1hY65e z_$?TIVg1gOvLC&WIfoxQoXZ7}7v#S#!LoE>H0QomTsx`ynn0{R-a!=dPT%;eesRuT zK5q9{gFNy~dUTatXvn0&(t#xG%Ip(Am;RA75&o_!NjefdQW Date: Wed, 3 Jul 2024 01:01:49 +0200 Subject: [PATCH 2/6] Redesign C++ based tokenizer --- main.cpp | 302 ++++++++++-------- test/assets/gerberx3/tokens/g_codes/G01.grb | 2 + test/assets/gerberx3/tokens/g_codes/G02.grb | 2 + test/assets/gerberx3/tokens/g_codes/G03.grb | 2 + test/assets/gerberx3/tokens/g_codes/G04.grb | 2 + .../gerberx3/tokens/g_codes/G04_text.grb | 1 + test/assets/gerberx3/tokens/g_codes/G36.grb | 1 + test/assets/gerberx3/tokens/g_codes/G37.grb | 1 + test/assets/gerberx3/tokens/g_codes/G70.grb | 2 + test/assets/gerberx3/tokens/g_codes/G71.grb | 2 + test/assets/gerberx3/tokens/g_codes/G73.grb | 2 + test/assets/gerberx3/tokens/g_codes/G74.grb | 2 + 12 files changed, 195 insertions(+), 126 deletions(-) create mode 100644 test/assets/gerberx3/tokens/g_codes/G70.grb create mode 100644 test/assets/gerberx3/tokens/g_codes/G71.grb create mode 100644 test/assets/gerberx3/tokens/g_codes/G73.grb create mode 100644 test/assets/gerberx3/tokens/g_codes/G74.grb diff --git a/main.cpp b/main.cpp index 2458cc686..5c8a624ae 100644 --- a/main.cpp +++ b/main.cpp @@ -1,8 +1,7 @@ -#include #include #include +#include #include -#include #include #define DEBUG 1 @@ -140,7 +139,13 @@ enum class TokenType { INVALID, INTEGER, SIGN, - G_CODE, + G01_CODE, + G02_CODE, + G03_CODE, + G04_CODE, + G36_CODE, + G37_CODE, + STRING, COORDINATE_CODE, D_CODE, END_COMMAND, @@ -158,9 +163,23 @@ struct ExtendedCommand : Token {}; struct GerberParser { - class EndOfMapping : std::exception {}; + class EndOfFile : std::exception {}; - class InvalidToken : std::exception {}; + class InvalidToken : std::exception { + + std::string message; + uint64_t failure_char_index; + + public: + InvalidToken(uint64_t failure_char_index) : + failure_char_index(failure_char_index) { + message = std::format("Invalid token at index: {}", failure_char_index); + } + + const char* what() const noexcept override { + return message.c_str(); + } + }; enum class Result { CONSUMED, @@ -169,145 +188,152 @@ struct GerberParser { char* gerber_code; uint64_t gerber_code_size; - uint64_t char_pointer = 0; - enum TokenType token_type = TokenType::UNKNOWN; std::vector parsed_tokens_vector; - std::vector buffer; + std::vector token_buffer; GerberParser(char* file_mapping, uint64_t file_size) : gerber_code(file_mapping), gerber_code_size(file_size), - buffer(16384) {} + token_buffer(16384) {} - bool has_next_char() { - return char_pointer < gerber_code_size; + bool parse() { + uint64_t current_char_index = 0; + try { + while (current_char_index < gerber_code_size) { + current_char_index = parse_next(current_char_index); + } + } catch (GerberParser::EndOfFile) { + return true; + } + return true; } - char get_next_char_throw() { - std::optional current = get_next_char(); - if (!current.has_value()) { - throw EndOfMapping(); - } - return current.value(); + std::string make_substring(uint64_t begin_token_index, uint64_t current_char_index) { + return std::string(gerber_code + begin_token_index, gerber_code + current_char_index); } - std::optional get_next_char() { - if (!has_next_char()) { - return std::nullopt; - } - char next = gerber_code[char_pointer]; - char_pointer++; - LOG_DEBUG("get_next_char(): %c \n", next); - return next; + void make_token(uint64_t begin_token_index, uint64_t current_char_index, enum TokenType type) { + parsed_tokens_vector.push_back({make_substring(begin_token_index, current_char_index), type} + ); } - std::optional get_prev_char() { - if (char_pointer <= 0) { - return std::nullopt; + uint64_t parse_next(uint64_t current_char_index) { + if (current_char_index >= gerber_code_size) { + throw EndOfFile(); } - char_pointer--; - char next = gerber_code[char_pointer]; - return next; + char current_char = gerber_code[current_char_index]; + + switch (current_char) { + case '*': + return parse_asterisk(current_char_index, current_char_index); + case 'G': + return parse_g_code(current_char_index, current_char_index + 1); + case ' ': + case '\t': + case '\n': + case '\r': + return current_char_index + 1; + default: + throw InvalidToken(current_char_index); + } + return current_char_index + 1; } - bool parse() { - while (char_pointer < gerber_code_size) { - LOG_DEBUG("char_pointer: %llu file_size: %llu \n", char_pointer, gerber_code_size); - try { - parse_next(); - } catch (GerberParser::EndOfMapping) { - return true; - } catch (GerberParser::InvalidToken) { - return false; - } + uint64_t parse_asterisk(uint64_t begin_token_index, uint64_t current_char_index) { + if (current_char_index >= gerber_code_size) { + return begin_token_index; } - return true; + char current_char = gerber_code[current_char_index]; + switch (current_char) { + case '*': + make_token(begin_token_index, current_char_index + 1, TokenType::END_COMMAND); + return current_char_index + 1; + } + throw InvalidToken(current_char_index); } - void parse_next() { - char current = get_next_char_throw(); - switch (current) { + uint64_t parse_string(uint64_t begin_token_index, uint64_t current_char_index) { + if (current_char_index >= gerber_code_size) { + make_token(begin_token_index, current_char_index, TokenType::STRING); + return current_char_index - 1; + } + char current_char = gerber_code[current_char_index]; + switch (current_char) { case '*': - LOG_DEBUG("Found * character. \n"); - parsed_tokens_vector.push_back( - {std::string(1, current), TokenType::END_COMMAND} - ); - break; - case 'G': - LOG_DEBUG("Found G character. \n"); - parsed_tokens_vector.push_back({std::string(1, current), TokenType::G_CODE} - ); - break; - case 'X': - case 'Y': - case 'I': - case 'J': - LOG_DEBUG("Found X|Y character. \n"); - parsed_tokens_vector.push_back( - {std::string(1, current), TokenType::COORDINATE_CODE} - ); - break; case '%': - LOG_DEBUG("Found %% character. \n"); - parsed_tokens_vector.push_back( - {std::string(1, current), TokenType::STATEMENT_BOUNDARY} - ); - break; + make_token(begin_token_index, current_char_index, TokenType::STRING); + return current_char_index - 1; + default: + return parse_string(begin_token_index, current_char_index + 1); + } + throw InvalidToken(current_char_index); + } + + uint64_t parse_g_code(uint64_t begin_token_index, uint64_t current_char_index) { + if (current_char_index >= gerber_code_size) { + return begin_token_index; + } + char current_char = gerber_code[current_char_index]; + switch (current_char) { case '0': + return parse_g_code(begin_token_index, current_char_index + 1); case '1': + if (is_a_number(current_char_index + 1)) + return begin_token_index; + make_token(begin_token_index, current_char_index + 1, TokenType::G01_CODE); + return parse_asterisk(current_char_index + 1, current_char_index + 1); case '2': + if (is_a_number(current_char_index + 1)) + return begin_token_index; + make_token(begin_token_index, current_char_index + 1, TokenType::G02_CODE); + return parse_asterisk(current_char_index + 1, current_char_index + 1); case '3': + if (is_a_number(current_char_index + 1)) { + return parse_g3_code(begin_token_index, current_char_index + 1); + } else { + make_token(begin_token_index, current_char_index + 1, TokenType::G03_CODE); + return parse_asterisk(current_char_index + 1, current_char_index + 1); + } case '4': - case '5': - case '6': - case '7': - case '8': - case '9': - parse_integer(current); - break; - case '-': - case '+': - parsed_tokens_vector.push_back({std::string(1, current), TokenType::SIGN}); - break; - case ' ': - case '\t': - case '\n': - case '\r': - break; + if (is_a_number(current_char_index + 1)) + return begin_token_index; + make_token(begin_token_index, current_char_index + 1, TokenType::G04_CODE); + if (!is_asterisk(current_char_index + 1)) { + current_char_index = + parse_string(current_char_index + 1, current_char_index + 1); + } + return parse_asterisk(current_char_index + 1, current_char_index + 1); } + throw InvalidToken(current_char_index); } - void parse_integer(char current) { - LOG_DEBUG("Found %c character. \n", current); - buffer.push_back(current); - parse_integer_tail(); - - LOG_DEBUG("parsed_tokens_vector %llu \n", parsed_tokens_vector.size()); - parsed_tokens_vector.push_back( - {std::string{buffer.begin(), buffer.end()}, TokenType::INTEGER} - ); - buffer.clear(); - } - - void parse_integer_tail() { - while (has_next_char()) { - std::optional current = get_next_char(); - if (!current.has_value()) { - return; - } - if (parse_number(current.value()) == Result::CONSUMED) { - buffer.push_back(current.value()); - continue; - } - // Don't consume not matching character. - get_prev_char(); - break; + uint64_t parse_g3_code(uint64_t begin_token_index, uint64_t current_char_index) { + if (current_char_index >= gerber_code_size) { + return begin_token_index; + } + char current_char = gerber_code[current_char_index]; + switch (current_char) { + case '6': + if (is_a_number(current_char_index + 1)) + return begin_token_index; + make_token(begin_token_index, current_char_index + 1, TokenType::G36_CODE); + return parse_asterisk(current_char_index + 1, current_char_index + 1); + case '7': + if (is_a_number(current_char_index + 1)) + return begin_token_index; + make_token(begin_token_index, current_char_index + 1, TokenType::G37_CODE); + return parse_asterisk(current_char_index + 1, current_char_index + 1); } + throw InvalidToken(current_char_index); } - Result parse_number(char current) { - switch (current) { + bool is_a_number(uint64_t current_char_index) { + if (current_char_index >= gerber_code_size) { + return false; + } + char current_char = gerber_code[current_char_index]; + switch (current_char) { case '0': case '1': case '2': @@ -318,26 +344,50 @@ struct GerberParser { case '7': case '8': case '9': - LOG_DEBUG("Found %c character. \n", current); - return Result::CONSUMED; - break; + return true; } - return Result::ABORTED; + return false; + } + + bool is_asterisk(uint64_t current_char_index) { + if (current_char_index >= gerber_code_size) { + return false; + } + char current_char = gerber_code[current_char_index]; + switch (current_char) { + case '*': + return true; + } + return false; } }; int main() { - auto mapping_handler = - FileMapping("C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_" - "codes\\G01.grb"); - char* mapping = (char*)(mapping_handler.GetMapping()); - auto size = mapping_handler.GetSize(); - GerberParser parser(mapping, size); - parser.parse(); - - for (auto token : parser.parsed_tokens_vector) { - std::cout << token.content << " " << int(token.type) << std::endl; + std::vector paths = { + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G01.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G02.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G03.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G04.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G04_text.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G36.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G37.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G70.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G71.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G73.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G74.grb", + }; + for (auto path : paths) { + auto mapping_handler = FileMapping(path.c_str()); + char* mapping = (char*)(mapping_handler.GetMapping()); + + auto size = mapping_handler.GetSize(); + GerberParser parser(mapping, size); + parser.parse(); + std::cout << path << std::endl; + for (auto token : parser.parsed_tokens_vector) { + std::cout << token.content << " " << int(token.type) << std::endl; + } } return 0; diff --git a/test/assets/gerberx3/tokens/g_codes/G01.grb b/test/assets/gerberx3/tokens/g_codes/G01.grb index 7ce799f27..f06a06440 100644 --- a/test/assets/gerberx3/tokens/g_codes/G01.grb +++ b/test/assets/gerberx3/tokens/g_codes/G01.grb @@ -1 +1,3 @@ +G1* G01* +G001* diff --git a/test/assets/gerberx3/tokens/g_codes/G02.grb b/test/assets/gerberx3/tokens/g_codes/G02.grb index 5c7088657..9d6ca973d 100644 --- a/test/assets/gerberx3/tokens/g_codes/G02.grb +++ b/test/assets/gerberx3/tokens/g_codes/G02.grb @@ -1 +1,3 @@ +G2* G02* +G002* diff --git a/test/assets/gerberx3/tokens/g_codes/G03.grb b/test/assets/gerberx3/tokens/g_codes/G03.grb index 739ac51f4..c7bfeb5ea 100644 --- a/test/assets/gerberx3/tokens/g_codes/G03.grb +++ b/test/assets/gerberx3/tokens/g_codes/G03.grb @@ -1 +1,3 @@ +G3* G03* +G003* diff --git a/test/assets/gerberx3/tokens/g_codes/G04.grb b/test/assets/gerberx3/tokens/g_codes/G04.grb index 76d830703..836f326ca 100644 --- a/test/assets/gerberx3/tokens/g_codes/G04.grb +++ b/test/assets/gerberx3/tokens/g_codes/G04.grb @@ -1 +1,3 @@ +G4* G04* +G004* diff --git a/test/assets/gerberx3/tokens/g_codes/G04_text.grb b/test/assets/gerberx3/tokens/g_codes/G04_text.grb index e4bde58f8..5dde3b7a8 100644 --- a/test/assets/gerberx3/tokens/g_codes/G04_text.grb +++ b/test/assets/gerberx3/tokens/g_codes/G04_text.grb @@ -1 +1,2 @@ G04 Comment text :D * +G04Comment text :D* diff --git a/test/assets/gerberx3/tokens/g_codes/G36.grb b/test/assets/gerberx3/tokens/g_codes/G36.grb index eb99e13d2..53b5748d9 100644 --- a/test/assets/gerberx3/tokens/g_codes/G36.grb +++ b/test/assets/gerberx3/tokens/g_codes/G36.grb @@ -1 +1,2 @@ G36* +G036* diff --git a/test/assets/gerberx3/tokens/g_codes/G37.grb b/test/assets/gerberx3/tokens/g_codes/G37.grb index d5619b082..6b7cf47d4 100644 --- a/test/assets/gerberx3/tokens/g_codes/G37.grb +++ b/test/assets/gerberx3/tokens/g_codes/G37.grb @@ -1 +1,2 @@ G37* +G037* diff --git a/test/assets/gerberx3/tokens/g_codes/G70.grb b/test/assets/gerberx3/tokens/g_codes/G70.grb new file mode 100644 index 000000000..e11c6a974 --- /dev/null +++ b/test/assets/gerberx3/tokens/g_codes/G70.grb @@ -0,0 +1,2 @@ +G70* +G070* diff --git a/test/assets/gerberx3/tokens/g_codes/G71.grb b/test/assets/gerberx3/tokens/g_codes/G71.grb new file mode 100644 index 000000000..f14637a2d --- /dev/null +++ b/test/assets/gerberx3/tokens/g_codes/G71.grb @@ -0,0 +1,2 @@ +G71* +G071* diff --git a/test/assets/gerberx3/tokens/g_codes/G73.grb b/test/assets/gerberx3/tokens/g_codes/G73.grb new file mode 100644 index 000000000..c5bf6da0d --- /dev/null +++ b/test/assets/gerberx3/tokens/g_codes/G73.grb @@ -0,0 +1,2 @@ +G73* +G073* diff --git a/test/assets/gerberx3/tokens/g_codes/G74.grb b/test/assets/gerberx3/tokens/g_codes/G74.grb new file mode 100644 index 000000000..d6cc6c2b6 --- /dev/null +++ b/test/assets/gerberx3/tokens/g_codes/G74.grb @@ -0,0 +1,2 @@ +G74* +G074* From 0c620c2061d8a5ecbf4a9fafe6cbecb3e12b8348 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wi=C5=9Bniewski?= Date: Thu, 4 Jul 2024 00:42:10 +0200 Subject: [PATCH 3/6] Add G codes and D codes to C++ tokenizer --- main.cpp | 373 +++++++++++++++--- test/assets/gerberx3/tokens/d_codes/D01.grb | 5 +- test/assets/gerberx3/tokens/d_codes/D02.grb | 3 - test/assets/gerberx3/tokens/d_codes/D03.grb | 4 - test/assets/gerberx3/tokens/d_select/D11.grb | 2 + test/assets/gerberx3/tokens/d_select/D12.grb | 2 + test/assets/gerberx3/tokens/d_select/D301.grb | 2 + test/assets/gerberx3/tokens/d_select/D999.grb | 2 + test/assets/gerberx3/tokens/g_codes/G54.grb | 2 + test/assets/gerberx3/tokens/g_codes/G55.grb | 2 + test/assets/gerberx3/tokens/g_codes/G73.grb | 2 - test/assets/gerberx3/tokens/g_codes/G75.grb | 2 + test/assets/gerberx3/tokens/g_codes/G90.grb | 2 + test/assets/gerberx3/tokens/g_codes/G91.grb | 2 + 14 files changed, 327 insertions(+), 78 deletions(-) create mode 100644 test/assets/gerberx3/tokens/d_select/D11.grb create mode 100644 test/assets/gerberx3/tokens/d_select/D12.grb create mode 100644 test/assets/gerberx3/tokens/d_select/D301.grb create mode 100644 test/assets/gerberx3/tokens/d_select/D999.grb create mode 100644 test/assets/gerberx3/tokens/g_codes/G54.grb create mode 100644 test/assets/gerberx3/tokens/g_codes/G55.grb delete mode 100644 test/assets/gerberx3/tokens/g_codes/G73.grb create mode 100644 test/assets/gerberx3/tokens/g_codes/G75.grb create mode 100644 test/assets/gerberx3/tokens/g_codes/G90.grb create mode 100644 test/assets/gerberx3/tokens/g_codes/G91.grb diff --git a/main.cpp b/main.cpp index 5c8a624ae..a3f1091bb 100644 --- a/main.cpp +++ b/main.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include #define DEBUG 1 @@ -135,19 +136,30 @@ class FileMapping { #endif enum class TokenType { - UNKNOWN, - INVALID, - INTEGER, - SIGN, - G01_CODE, - G02_CODE, - G03_CODE, - G04_CODE, - G36_CODE, - G37_CODE, - STRING, - COORDINATE_CODE, - D_CODE, + UNKNOWN = 0, + INVALID = 1, + INTEGER = 2, + SIGN = 3, + G01_CODE = 4, + G02_CODE = 5, + G03_CODE = 6, + G04_CODE = 7, + G36_CODE = 8, + G37_CODE = 9, + G54_CODE = 10, + G55_CODE = 11, + G70_CODE = 12, + G71_CODE = 13, + G74_CODE = 14, + G75_CODE = 15, + G90_CODE = 16, + G91_CODE = 17, + D01_CODE = 18, + D02_CODE = 19, + D03_CODE = 20, + DNN_SELECT = 21, + STRING = 22, + COORDINATE_CODE = 23, END_COMMAND, STATEMENT_BOUNDARY, }; @@ -161,7 +173,7 @@ struct CommandToken : Token {}; struct ExtendedCommand : Token {}; -struct GerberParser { +struct GerberTokenizer { class EndOfFile : std::exception {}; @@ -171,10 +183,9 @@ struct GerberParser { uint64_t failure_char_index; public: - InvalidToken(uint64_t failure_char_index) : - failure_char_index(failure_char_index) { - message = std::format("Invalid token at index: {}", failure_char_index); - } + InvalidToken(uint64_t failure_char_index, std::string message) : + failure_char_index(failure_char_index), + message(message) {} const char* what() const noexcept override { return message.c_str(); @@ -186,24 +197,21 @@ struct GerberParser { ABORTED, }; - char* gerber_code; - uint64_t gerber_code_size; - - std::vector parsed_tokens_vector; - std::vector token_buffer; + char* gerber_code; + uint64_t gerber_code_size; + std::vector tokens_vector; - GerberParser(char* file_mapping, uint64_t file_size) : + GerberTokenizer(char* file_mapping, uint64_t file_size) : gerber_code(file_mapping), - gerber_code_size(file_size), - token_buffer(16384) {} + gerber_code_size(file_size) {} - bool parse() { + bool tokenize() { uint64_t current_char_index = 0; try { while (current_char_index < gerber_code_size) { - current_char_index = parse_next(current_char_index); + current_char_index = tokenize_next(current_char_index); } - } catch (GerberParser::EndOfFile) { + } catch (GerberTokenizer::EndOfFile) { return true; } return true; @@ -214,11 +222,30 @@ struct GerberParser { } void make_token(uint64_t begin_token_index, uint64_t current_char_index, enum TokenType type) { - parsed_tokens_vector.push_back({make_substring(begin_token_index, current_char_index), type} + tokens_vector.push_back({make_substring(begin_token_index, current_char_index), type}); + + std::cout << tokens_vector.back().content << " " << int(tokens_vector.back().type) + << std::endl; + } + + [[noreturn]] void throw_invalid_token(uint64_t begin_token_index, uint64_t current_char_index) { + if (current_char_index >= gerber_code_size) { + if (begin_token_index > 0) { + begin_token_index--; + } + if (current_char_index > 0) { + current_char_index--; + } + } + std::string message = std::format( + "Invalid token '{}' at index: {}", + make_substring(begin_token_index, current_char_index + 1), + begin_token_index ); + throw InvalidToken(begin_token_index, message); } - uint64_t parse_next(uint64_t current_char_index) { + uint64_t tokenize_next(uint64_t current_char_index) { if (current_char_index >= gerber_code_size) { throw EndOfFile(); } @@ -226,21 +253,29 @@ struct GerberParser { switch (current_char) { case '*': - return parse_asterisk(current_char_index, current_char_index); + return tokenize_asterisk(current_char_index, current_char_index); case 'G': - return parse_g_code(current_char_index, current_char_index + 1); + return tokenize_g_code(current_char_index, current_char_index + 1); + case 'D': + return tokenize_d_code(current_char_index, current_char_index + 1); + case 'X': + case 'Y': + case 'I': + case 'J': + make_token(current_char_index, current_char_index + 1, TokenType::COORDINATE_CODE); + return tokenize_signed_integer(current_char_index + 1, current_char_index + 1); case ' ': case '\t': case '\n': case '\r': return current_char_index + 1; default: - throw InvalidToken(current_char_index); + throw_invalid_token(current_char_index, current_char_index + 1); } - return current_char_index + 1; + throw_invalid_token(current_char_index, current_char_index + 1); } - uint64_t parse_asterisk(uint64_t begin_token_index, uint64_t current_char_index) { + uint64_t tokenize_asterisk(uint64_t begin_token_index, uint64_t current_char_index) { if (current_char_index >= gerber_code_size) { return begin_token_index; } @@ -250,10 +285,10 @@ struct GerberParser { make_token(begin_token_index, current_char_index + 1, TokenType::END_COMMAND); return current_char_index + 1; } - throw InvalidToken(current_char_index); + throw_invalid_token(begin_token_index, current_char_index + 1); } - uint64_t parse_string(uint64_t begin_token_index, uint64_t current_char_index) { + uint64_t tokenize_string(uint64_t begin_token_index, uint64_t current_char_index) { if (current_char_index >= gerber_code_size) { make_token(begin_token_index, current_char_index, TokenType::STRING); return current_char_index - 1; @@ -265,50 +300,65 @@ struct GerberParser { make_token(begin_token_index, current_char_index, TokenType::STRING); return current_char_index - 1; default: - return parse_string(begin_token_index, current_char_index + 1); + return tokenize_string(begin_token_index, current_char_index + 1); } - throw InvalidToken(current_char_index); + throw_invalid_token(begin_token_index, current_char_index + 1); } - uint64_t parse_g_code(uint64_t begin_token_index, uint64_t current_char_index) { + uint64_t tokenize_g_code(uint64_t begin_token_index, uint64_t current_char_index) { if (current_char_index >= gerber_code_size) { return begin_token_index; } char current_char = gerber_code[current_char_index]; switch (current_char) { case '0': - return parse_g_code(begin_token_index, current_char_index + 1); + return tokenize_g_code(begin_token_index, current_char_index + 1); case '1': if (is_a_number(current_char_index + 1)) - return begin_token_index; + throw_invalid_token(begin_token_index, current_char_index + 1); make_token(begin_token_index, current_char_index + 1, TokenType::G01_CODE); - return parse_asterisk(current_char_index + 1, current_char_index + 1); + return tokenize_asterisk(current_char_index + 1, current_char_index + 1); case '2': if (is_a_number(current_char_index + 1)) - return begin_token_index; + throw_invalid_token(begin_token_index, current_char_index + 1); make_token(begin_token_index, current_char_index + 1, TokenType::G02_CODE); - return parse_asterisk(current_char_index + 1, current_char_index + 1); + return tokenize_asterisk(current_char_index + 1, current_char_index + 1); case '3': if (is_a_number(current_char_index + 1)) { - return parse_g3_code(begin_token_index, current_char_index + 1); + return tokenize_g3_code(begin_token_index, current_char_index + 1); } else { make_token(begin_token_index, current_char_index + 1, TokenType::G03_CODE); - return parse_asterisk(current_char_index + 1, current_char_index + 1); + return tokenize_asterisk(current_char_index + 1, current_char_index + 1); } case '4': if (is_a_number(current_char_index + 1)) - return begin_token_index; + throw_invalid_token(begin_token_index, current_char_index + 1); make_token(begin_token_index, current_char_index + 1, TokenType::G04_CODE); if (!is_asterisk(current_char_index + 1)) { current_char_index = - parse_string(current_char_index + 1, current_char_index + 1); + tokenize_string(current_char_index + 1, current_char_index + 1); } - return parse_asterisk(current_char_index + 1, current_char_index + 1); + return tokenize_asterisk(current_char_index + 1, current_char_index + 1); + case '5': + if (is_a_number(current_char_index + 1)) { + return tokenize_g5_code(begin_token_index, current_char_index + 1); + } + throw_invalid_token(begin_token_index, current_char_index + 1); + case '7': + if (is_a_number(current_char_index + 1)) { + return tokenize_g7_code(begin_token_index, current_char_index + 1); + } + throw_invalid_token(begin_token_index, current_char_index + 1); + case '9': + if (is_a_number(current_char_index + 1)) { + return tokenize_g9_code(begin_token_index, current_char_index + 1); + } + throw_invalid_token(begin_token_index, current_char_index + 1); } - throw InvalidToken(current_char_index); + throw_invalid_token(begin_token_index, current_char_index + 1); } - uint64_t parse_g3_code(uint64_t begin_token_index, uint64_t current_char_index) { + uint64_t tokenize_g3_code(uint64_t begin_token_index, uint64_t current_char_index) { if (current_char_index >= gerber_code_size) { return begin_token_index; } @@ -316,16 +366,198 @@ struct GerberParser { switch (current_char) { case '6': if (is_a_number(current_char_index + 1)) - return begin_token_index; + throw_invalid_token(begin_token_index, current_char_index + 1); make_token(begin_token_index, current_char_index + 1, TokenType::G36_CODE); - return parse_asterisk(current_char_index + 1, current_char_index + 1); + return tokenize_asterisk(current_char_index + 1, current_char_index + 1); case '7': if (is_a_number(current_char_index + 1)) - return begin_token_index; + throw_invalid_token(begin_token_index, current_char_index + 1); make_token(begin_token_index, current_char_index + 1, TokenType::G37_CODE); - return parse_asterisk(current_char_index + 1, current_char_index + 1); + return tokenize_asterisk(current_char_index + 1, current_char_index + 1); + } + throw_invalid_token(begin_token_index, current_char_index + 1); + } + + uint64_t tokenize_g5_code(uint64_t begin_token_index, uint64_t current_char_index) { + if (current_char_index >= gerber_code_size) { + return begin_token_index; + } + char current_char = gerber_code[current_char_index]; + switch (current_char) { + case '4': + if (is_a_number(current_char_index + 1)) + throw_invalid_token(begin_token_index, current_char_index + 1); + make_token(begin_token_index, current_char_index + 1, TokenType::G54_CODE); + // tokenize D code. + return tokenize_asterisk(current_char_index + 1, current_char_index + 1); + case '5': + if (is_a_number(current_char_index + 1)) + throw_invalid_token(begin_token_index, current_char_index + 1); + make_token(begin_token_index, current_char_index + 1, TokenType::G55_CODE); + // tokenize D03 code. + return tokenize_asterisk(current_char_index + 1, current_char_index + 1); + } + throw_invalid_token(begin_token_index, current_char_index + 1); + } + + uint64_t tokenize_g7_code(uint64_t begin_token_index, uint64_t current_char_index) { + if (current_char_index >= gerber_code_size) { + return begin_token_index; + } + char current_char = gerber_code[current_char_index]; + switch (current_char) { + case '0': + if (is_a_number(current_char_index + 1)) + throw_invalid_token(begin_token_index, current_char_index + 1); + make_token(begin_token_index, current_char_index + 1, TokenType::G70_CODE); + return tokenize_asterisk(current_char_index + 1, current_char_index + 1); + case '1': + if (is_a_number(current_char_index + 1)) + throw_invalid_token(begin_token_index, current_char_index + 1); + make_token(begin_token_index, current_char_index + 1, TokenType::G71_CODE); + return tokenize_asterisk(current_char_index + 1, current_char_index + 1); + case '4': + if (is_a_number(current_char_index + 1)) + throw_invalid_token(begin_token_index, current_char_index + 1); + make_token(begin_token_index, current_char_index + 1, TokenType::G74_CODE); + return tokenize_asterisk(current_char_index + 1, current_char_index + 1); + case '5': + if (is_a_number(current_char_index + 1)) + throw_invalid_token(begin_token_index, current_char_index + 1); + make_token(begin_token_index, current_char_index + 1, TokenType::G75_CODE); + return tokenize_asterisk(current_char_index + 1, current_char_index + 1); + } + throw_invalid_token(begin_token_index, current_char_index + 1); + } + + uint64_t tokenize_g9_code(uint64_t begin_token_index, uint64_t current_char_index) { + if (current_char_index >= gerber_code_size) { + return begin_token_index; + } + char current_char = gerber_code[current_char_index]; + switch (current_char) { + case '0': + if (is_a_number(current_char_index + 1)) + throw_invalid_token(begin_token_index, current_char_index + 1); + make_token(begin_token_index, current_char_index + 1, TokenType::G90_CODE); + // tokenize D code. + return tokenize_asterisk(current_char_index + 1, current_char_index + 1); + case '1': + if (is_a_number(current_char_index + 1)) + throw_invalid_token(begin_token_index, current_char_index + 1); + make_token(begin_token_index, current_char_index + 1, TokenType::G91_CODE); + // tokenize D03 code. + return tokenize_asterisk(current_char_index + 1, current_char_index + 1); + } + throw_invalid_token(begin_token_index, current_char_index + 1); + } + + uint64_t tokenize_d_code(uint64_t begin_token_index, uint64_t current_char_index) { + if (current_char_index >= gerber_code_size) { + return begin_token_index; + } + char current_char = gerber_code[current_char_index]; + switch (current_char) { + case '0': + return tokenize_d_code(begin_token_index, current_char_index + 1); + case '1': + if (is_a_number(current_char_index + 1)) + return tokenize_d_select(begin_token_index, current_char_index + 1); + make_token(begin_token_index, current_char_index + 1, TokenType::D01_CODE); + return tokenize_asterisk(current_char_index + 1, current_char_index + 1); + case '2': + if (is_a_number(current_char_index + 1)) + return tokenize_d_select(begin_token_index, current_char_index + 1); + make_token(begin_token_index, current_char_index + 1, TokenType::D02_CODE); + return tokenize_asterisk(current_char_index + 1, current_char_index + 1); + case '3': + if (is_a_number(current_char_index + 1)) { + return tokenize_d_select(begin_token_index, current_char_index + 1); + } else { + make_token(begin_token_index, current_char_index + 1, TokenType::D03_CODE); + return tokenize_asterisk(current_char_index + 1, current_char_index + 1); + } + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return tokenize_d_select(begin_token_index, current_char_index + 1); + } + throw_invalid_token(begin_token_index, current_char_index + 1); + } + + uint64_t tokenize_d_select(uint64_t begin_token_index, uint64_t current_char_index) { + if (current_char_index >= gerber_code_size) { + return begin_token_index; + } + char current_char = gerber_code[current_char_index]; + switch (current_char) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return tokenize_d_select(begin_token_index, current_char_index + 1); + default: + make_token(begin_token_index, current_char_index, TokenType::DNN_SELECT); + return tokenize_asterisk(current_char_index, current_char_index); } - throw InvalidToken(current_char_index); + throw_invalid_token(begin_token_index, current_char_index + 1); + } + + uint64_t tokenize_signed_integer(uint64_t begin_token_index, uint64_t current_char_index) { + if (current_char_index >= gerber_code_size) { + if (begin_token_index == current_char_index) { + throw_invalid_token(begin_token_index, current_char_index + 1); + } + return begin_token_index; + } + char current_char = gerber_code[current_char_index]; + switch (current_char) { + case '-': + case '+': + return tokenize_unsigned_integer(begin_token_index, current_char_index + 1); + default: + return tokenize_unsigned_integer(begin_token_index, current_char_index); + } + throw_invalid_token(begin_token_index, current_char_index + 1); + } + + uint64_t tokenize_unsigned_integer(uint64_t begin_token_index, uint64_t current_char_index) { + if (current_char_index >= gerber_code_size) { + if (begin_token_index == current_char_index) { + throw_invalid_token(begin_token_index, current_char_index + 1); + } + return begin_token_index; + } + char current_char = gerber_code[current_char_index]; + switch (current_char) { + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + return tokenize_signed_integer(begin_token_index, current_char_index + 1); + default: + if (begin_token_index == current_char_index) { + throw_invalid_token(begin_token_index, current_char_index + 1); + } + make_token(begin_token_index, current_char_index, TokenType::INTEGER); + return current_char_index; + } + throw_invalid_token(begin_token_index, current_char_index + 1); } bool is_a_number(uint64_t current_char_index) { @@ -372,22 +604,33 @@ int main() { "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G04_text.grb", "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G36.grb", "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G37.grb", + //"C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G54.grb", + //"C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G55.grb", "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G70.grb", "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G71.grb", - "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G73.grb", "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G74.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G75.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G90.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G91.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\d_select\\D11.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\d_select\\D12.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\d_select\\D301.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\d_select\\D999.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\d_codes\\D01.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\d_codes\\D02.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\d_codes\\D03.grb", }; for (auto path : paths) { auto mapping_handler = FileMapping(path.c_str()); char* mapping = (char*)(mapping_handler.GetMapping()); - auto size = mapping_handler.GetSize(); - GerberParser parser(mapping, size); - parser.parse(); + auto size = mapping_handler.GetSize(); std::cout << path << std::endl; - for (auto token : parser.parsed_tokens_vector) { - std::cout << token.content << " " << int(token.type) << std::endl; - } + GerberTokenizer tokenizer(mapping, size); + tokenizer.tokenize(); + // for (auto token : tokenizer.tokenized_tokens_vector) { + // std::cout << token.content << " " << int(token.type) << std::endl; + // } } return 0; diff --git a/test/assets/gerberx3/tokens/d_codes/D01.grb b/test/assets/gerberx3/tokens/d_codes/D01.grb index faddad0d5..e9a837c5c 100644 --- a/test/assets/gerberx3/tokens/d_codes/D01.grb +++ b/test/assets/gerberx3/tokens/d_codes/D01.grb @@ -1,5 +1,2 @@ -%FSLAX26Y26*% -%MOMM*% -%ADD100C,0.5*% -D100* X0000000Y2000000D01* +X060000Y000000I000000J060000D01* diff --git a/test/assets/gerberx3/tokens/d_codes/D02.grb b/test/assets/gerberx3/tokens/d_codes/D02.grb index 7999c2744..d13d35f4e 100644 --- a/test/assets/gerberx3/tokens/d_codes/D02.grb +++ b/test/assets/gerberx3/tokens/d_codes/D02.grb @@ -1,4 +1 @@ -%FSLAX26Y26*% -%FSLAX26Y26*% -%MOMM*% X0000000Y2000000D02* diff --git a/test/assets/gerberx3/tokens/d_codes/D03.grb b/test/assets/gerberx3/tokens/d_codes/D03.grb index 13043e532..2fa0782c5 100644 --- a/test/assets/gerberx3/tokens/d_codes/D03.grb +++ b/test/assets/gerberx3/tokens/d_codes/D03.grb @@ -1,5 +1 @@ -%FSLAX26Y26*% -%MOMM*% -%ADD100C,0.5*% -D100* X1500000Y1500000D03* diff --git a/test/assets/gerberx3/tokens/d_select/D11.grb b/test/assets/gerberx3/tokens/d_select/D11.grb new file mode 100644 index 000000000..e529776de --- /dev/null +++ b/test/assets/gerberx3/tokens/d_select/D11.grb @@ -0,0 +1,2 @@ +D11* +D011* diff --git a/test/assets/gerberx3/tokens/d_select/D12.grb b/test/assets/gerberx3/tokens/d_select/D12.grb new file mode 100644 index 000000000..e4d7933a1 --- /dev/null +++ b/test/assets/gerberx3/tokens/d_select/D12.grb @@ -0,0 +1,2 @@ +D12* +D012* diff --git a/test/assets/gerberx3/tokens/d_select/D301.grb b/test/assets/gerberx3/tokens/d_select/D301.grb new file mode 100644 index 000000000..5429c1d3d --- /dev/null +++ b/test/assets/gerberx3/tokens/d_select/D301.grb @@ -0,0 +1,2 @@ +D301* +D0301* diff --git a/test/assets/gerberx3/tokens/d_select/D999.grb b/test/assets/gerberx3/tokens/d_select/D999.grb new file mode 100644 index 000000000..690601c0a --- /dev/null +++ b/test/assets/gerberx3/tokens/d_select/D999.grb @@ -0,0 +1,2 @@ +D999* +D0999* diff --git a/test/assets/gerberx3/tokens/g_codes/G54.grb b/test/assets/gerberx3/tokens/g_codes/G54.grb new file mode 100644 index 000000000..eacc17f90 --- /dev/null +++ b/test/assets/gerberx3/tokens/g_codes/G54.grb @@ -0,0 +1,2 @@ +G54D11* +G054D11* diff --git a/test/assets/gerberx3/tokens/g_codes/G55.grb b/test/assets/gerberx3/tokens/g_codes/G55.grb new file mode 100644 index 000000000..975517c2b --- /dev/null +++ b/test/assets/gerberx3/tokens/g_codes/G55.grb @@ -0,0 +1,2 @@ +G55X0Y0D03* +G055X0Y0D03* diff --git a/test/assets/gerberx3/tokens/g_codes/G73.grb b/test/assets/gerberx3/tokens/g_codes/G73.grb deleted file mode 100644 index c5bf6da0d..000000000 --- a/test/assets/gerberx3/tokens/g_codes/G73.grb +++ /dev/null @@ -1,2 +0,0 @@ -G73* -G073* diff --git a/test/assets/gerberx3/tokens/g_codes/G75.grb b/test/assets/gerberx3/tokens/g_codes/G75.grb new file mode 100644 index 000000000..6eea00678 --- /dev/null +++ b/test/assets/gerberx3/tokens/g_codes/G75.grb @@ -0,0 +1,2 @@ +G75* +G075* diff --git a/test/assets/gerberx3/tokens/g_codes/G90.grb b/test/assets/gerberx3/tokens/g_codes/G90.grb new file mode 100644 index 000000000..56d269941 --- /dev/null +++ b/test/assets/gerberx3/tokens/g_codes/G90.grb @@ -0,0 +1,2 @@ +G90* +G090* diff --git a/test/assets/gerberx3/tokens/g_codes/G91.grb b/test/assets/gerberx3/tokens/g_codes/G91.grb new file mode 100644 index 000000000..1f41ef3f4 --- /dev/null +++ b/test/assets/gerberx3/tokens/g_codes/G91.grb @@ -0,0 +1,2 @@ +G91* +G091* From d7b3b76ec0811028f3842e858cd05b873d2c8ab8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wi=C5=9Bniewski?= Date: Thu, 4 Jul 2024 01:45:24 +0200 Subject: [PATCH 4/6] Add FS extended command tokenization --- main.cpp | 241 ++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 185 insertions(+), 56 deletions(-) diff --git a/main.cpp b/main.cpp index a3f1091bb..0bec7c102 100644 --- a/main.cpp +++ b/main.cpp @@ -136,32 +136,36 @@ class FileMapping { #endif enum class TokenType { - UNKNOWN = 0, - INVALID = 1, - INTEGER = 2, - SIGN = 3, - G01_CODE = 4, - G02_CODE = 5, - G03_CODE = 6, - G04_CODE = 7, - G36_CODE = 8, - G37_CODE = 9, - G54_CODE = 10, - G55_CODE = 11, - G70_CODE = 12, - G71_CODE = 13, - G74_CODE = 14, - G75_CODE = 15, - G90_CODE = 16, - G91_CODE = 17, - D01_CODE = 18, - D02_CODE = 19, - D03_CODE = 20, - DNN_SELECT = 21, - STRING = 22, - COORDINATE_CODE = 23, + UNKNOWN = 0, + INVALID = 1, + INTEGER = 2, + STRING = 3, + G01_COMMAND = 4, + G02_COMMAND = 5, + G03_COMMAND = 6, + G04_COMMAND = 7, + G36_COMMAND = 8, + G37_COMMAND = 9, + G54_COMMAND = 10, + G55_COMMAND = 11, + G70_COMMAND = 12, + G71_COMMAND = 13, + G74_COMMAND = 14, + G75_COMMAND = 15, + G90_COMMAND = 16, + G91_COMMAND = 17, + D01_COMMAND = 18, + D02_COMMAND = 19, + D03_COMMAND = 20, + DNN_SELECT = 21, + COORDINATE_COMMAND = 23, + FS_EXTENDED_COMMAND = 24, + FS_ZERO_OMISSION = 26, + FS_COORDINATE_TYPE = 25, + FS_AXIS_FORMAT = 27, + MODE_EXTENDED_COMMAND = 28, END_COMMAND, - STATEMENT_BOUNDARY, + EXTENDED_COMMAND_BOUNDARY, }; struct Token { @@ -262,8 +266,15 @@ struct GerberTokenizer { case 'Y': case 'I': case 'J': - make_token(current_char_index, current_char_index + 1, TokenType::COORDINATE_CODE); + make_token( + current_char_index, current_char_index + 1, TokenType::COORDINATE_COMMAND + ); return tokenize_signed_integer(current_char_index + 1, current_char_index + 1); + case '%': + make_token( + current_char_index, current_char_index + 1, TokenType::EXTENDED_COMMAND_BOUNDARY + ); + return tokenize_extended_command(current_char_index + 1, current_char_index + 1); case ' ': case '\t': case '\n': @@ -277,7 +288,7 @@ struct GerberTokenizer { uint64_t tokenize_asterisk(uint64_t begin_token_index, uint64_t current_char_index) { if (current_char_index >= gerber_code_size) { - return begin_token_index; + throw_invalid_token(begin_token_index, current_char_index + 1); } char current_char = gerber_code[current_char_index]; switch (current_char) { @@ -288,6 +299,22 @@ struct GerberTokenizer { throw_invalid_token(begin_token_index, current_char_index + 1); } + uint64_t + tokenize_extended_command_boundary(uint64_t begin_token_index, uint64_t current_char_index) { + if (current_char_index >= gerber_code_size) { + throw_invalid_token(begin_token_index, current_char_index + 1); + } + char current_char = gerber_code[current_char_index]; + switch (current_char) { + case '%': + make_token( + begin_token_index, current_char_index + 1, TokenType::EXTENDED_COMMAND_BOUNDARY + ); + return current_char_index + 1; + } + throw_invalid_token(begin_token_index, current_char_index + 1); + } + uint64_t tokenize_string(uint64_t begin_token_index, uint64_t current_char_index) { if (current_char_index >= gerber_code_size) { make_token(begin_token_index, current_char_index, TokenType::STRING); @@ -307,7 +334,7 @@ struct GerberTokenizer { uint64_t tokenize_g_code(uint64_t begin_token_index, uint64_t current_char_index) { if (current_char_index >= gerber_code_size) { - return begin_token_index; + throw_invalid_token(begin_token_index, current_char_index + 1); } char current_char = gerber_code[current_char_index]; switch (current_char) { @@ -316,24 +343,24 @@ struct GerberTokenizer { case '1': if (is_a_number(current_char_index + 1)) throw_invalid_token(begin_token_index, current_char_index + 1); - make_token(begin_token_index, current_char_index + 1, TokenType::G01_CODE); + make_token(begin_token_index, current_char_index + 1, TokenType::G01_COMMAND); return tokenize_asterisk(current_char_index + 1, current_char_index + 1); case '2': if (is_a_number(current_char_index + 1)) throw_invalid_token(begin_token_index, current_char_index + 1); - make_token(begin_token_index, current_char_index + 1, TokenType::G02_CODE); + make_token(begin_token_index, current_char_index + 1, TokenType::G02_COMMAND); return tokenize_asterisk(current_char_index + 1, current_char_index + 1); case '3': if (is_a_number(current_char_index + 1)) { return tokenize_g3_code(begin_token_index, current_char_index + 1); } else { - make_token(begin_token_index, current_char_index + 1, TokenType::G03_CODE); + make_token(begin_token_index, current_char_index + 1, TokenType::G03_COMMAND); return tokenize_asterisk(current_char_index + 1, current_char_index + 1); } case '4': if (is_a_number(current_char_index + 1)) throw_invalid_token(begin_token_index, current_char_index + 1); - make_token(begin_token_index, current_char_index + 1, TokenType::G04_CODE); + make_token(begin_token_index, current_char_index + 1, TokenType::G04_COMMAND); if (!is_asterisk(current_char_index + 1)) { current_char_index = tokenize_string(current_char_index + 1, current_char_index + 1); @@ -360,19 +387,19 @@ struct GerberTokenizer { uint64_t tokenize_g3_code(uint64_t begin_token_index, uint64_t current_char_index) { if (current_char_index >= gerber_code_size) { - return begin_token_index; + throw_invalid_token(begin_token_index, current_char_index + 1); } char current_char = gerber_code[current_char_index]; switch (current_char) { case '6': if (is_a_number(current_char_index + 1)) throw_invalid_token(begin_token_index, current_char_index + 1); - make_token(begin_token_index, current_char_index + 1, TokenType::G36_CODE); + make_token(begin_token_index, current_char_index + 1, TokenType::G36_COMMAND); return tokenize_asterisk(current_char_index + 1, current_char_index + 1); case '7': if (is_a_number(current_char_index + 1)) throw_invalid_token(begin_token_index, current_char_index + 1); - make_token(begin_token_index, current_char_index + 1, TokenType::G37_CODE); + make_token(begin_token_index, current_char_index + 1, TokenType::G37_COMMAND); return tokenize_asterisk(current_char_index + 1, current_char_index + 1); } throw_invalid_token(begin_token_index, current_char_index + 1); @@ -380,51 +407,49 @@ struct GerberTokenizer { uint64_t tokenize_g5_code(uint64_t begin_token_index, uint64_t current_char_index) { if (current_char_index >= gerber_code_size) { - return begin_token_index; + throw_invalid_token(begin_token_index, current_char_index + 1); } char current_char = gerber_code[current_char_index]; switch (current_char) { case '4': if (is_a_number(current_char_index + 1)) throw_invalid_token(begin_token_index, current_char_index + 1); - make_token(begin_token_index, current_char_index + 1, TokenType::G54_CODE); - // tokenize D code. - return tokenize_asterisk(current_char_index + 1, current_char_index + 1); + make_token(begin_token_index, current_char_index + 1, TokenType::G54_COMMAND); + return current_char_index + 1; case '5': if (is_a_number(current_char_index + 1)) throw_invalid_token(begin_token_index, current_char_index + 1); - make_token(begin_token_index, current_char_index + 1, TokenType::G55_CODE); - // tokenize D03 code. - return tokenize_asterisk(current_char_index + 1, current_char_index + 1); + make_token(begin_token_index, current_char_index + 1, TokenType::G55_COMMAND); + return current_char_index + 1; } throw_invalid_token(begin_token_index, current_char_index + 1); } uint64_t tokenize_g7_code(uint64_t begin_token_index, uint64_t current_char_index) { if (current_char_index >= gerber_code_size) { - return begin_token_index; + throw_invalid_token(begin_token_index, current_char_index + 1); } char current_char = gerber_code[current_char_index]; switch (current_char) { case '0': if (is_a_number(current_char_index + 1)) throw_invalid_token(begin_token_index, current_char_index + 1); - make_token(begin_token_index, current_char_index + 1, TokenType::G70_CODE); + make_token(begin_token_index, current_char_index + 1, TokenType::G70_COMMAND); return tokenize_asterisk(current_char_index + 1, current_char_index + 1); case '1': if (is_a_number(current_char_index + 1)) throw_invalid_token(begin_token_index, current_char_index + 1); - make_token(begin_token_index, current_char_index + 1, TokenType::G71_CODE); + make_token(begin_token_index, current_char_index + 1, TokenType::G71_COMMAND); return tokenize_asterisk(current_char_index + 1, current_char_index + 1); case '4': if (is_a_number(current_char_index + 1)) throw_invalid_token(begin_token_index, current_char_index + 1); - make_token(begin_token_index, current_char_index + 1, TokenType::G74_CODE); + make_token(begin_token_index, current_char_index + 1, TokenType::G74_COMMAND); return tokenize_asterisk(current_char_index + 1, current_char_index + 1); case '5': if (is_a_number(current_char_index + 1)) throw_invalid_token(begin_token_index, current_char_index + 1); - make_token(begin_token_index, current_char_index + 1, TokenType::G75_CODE); + make_token(begin_token_index, current_char_index + 1, TokenType::G75_COMMAND); return tokenize_asterisk(current_char_index + 1, current_char_index + 1); } throw_invalid_token(begin_token_index, current_char_index + 1); @@ -432,20 +457,20 @@ struct GerberTokenizer { uint64_t tokenize_g9_code(uint64_t begin_token_index, uint64_t current_char_index) { if (current_char_index >= gerber_code_size) { - return begin_token_index; + throw_invalid_token(begin_token_index, current_char_index + 1); } char current_char = gerber_code[current_char_index]; switch (current_char) { case '0': if (is_a_number(current_char_index + 1)) throw_invalid_token(begin_token_index, current_char_index + 1); - make_token(begin_token_index, current_char_index + 1, TokenType::G90_CODE); + make_token(begin_token_index, current_char_index + 1, TokenType::G90_COMMAND); // tokenize D code. return tokenize_asterisk(current_char_index + 1, current_char_index + 1); case '1': if (is_a_number(current_char_index + 1)) throw_invalid_token(begin_token_index, current_char_index + 1); - make_token(begin_token_index, current_char_index + 1, TokenType::G91_CODE); + make_token(begin_token_index, current_char_index + 1, TokenType::G91_COMMAND); // tokenize D03 code. return tokenize_asterisk(current_char_index + 1, current_char_index + 1); } @@ -463,18 +488,18 @@ struct GerberTokenizer { case '1': if (is_a_number(current_char_index + 1)) return tokenize_d_select(begin_token_index, current_char_index + 1); - make_token(begin_token_index, current_char_index + 1, TokenType::D01_CODE); + make_token(begin_token_index, current_char_index + 1, TokenType::D01_COMMAND); return tokenize_asterisk(current_char_index + 1, current_char_index + 1); case '2': if (is_a_number(current_char_index + 1)) return tokenize_d_select(begin_token_index, current_char_index + 1); - make_token(begin_token_index, current_char_index + 1, TokenType::D02_CODE); + make_token(begin_token_index, current_char_index + 1, TokenType::D02_COMMAND); return tokenize_asterisk(current_char_index + 1, current_char_index + 1); case '3': if (is_a_number(current_char_index + 1)) { return tokenize_d_select(begin_token_index, current_char_index + 1); } else { - make_token(begin_token_index, current_char_index + 1, TokenType::D03_CODE); + make_token(begin_token_index, current_char_index + 1, TokenType::D03_COMMAND); return tokenize_asterisk(current_char_index + 1, current_char_index + 1); } case '4': @@ -560,6 +585,105 @@ struct GerberTokenizer { throw_invalid_token(begin_token_index, current_char_index + 1); } + uint64_t tokenize_extended_command(uint64_t begin_token_index, uint64_t current_char_index) { + if (current_char_index >= gerber_code_size) { + throw_invalid_token(begin_token_index, current_char_index + 1); + } + char current_char = gerber_code[current_char_index]; + switch (current_char) { + case 'F': + return tokenize_fs_extended_command(begin_token_index, current_char_index + 1); + // case 'M': { + // uint64_t next_char_index = current_char_index + 1; + // if (current_char_index >= gerber_code_size) { + // throw_invalid_token(begin_token_index, current_char_index + 1); + // } + // char next_char = gerber_code[next_char_index]; + // switch (next_char) { + // case 'O': + // make_token( + // begin_token_index, next_char_index, + // TokenType::MODE_EXTENDED_COMMAND + // ); + // return tokenize_fs_extended_command(next_char_index, next_char_index + // + 1); + // } + // throw_invalid_token(begin_token_index, current_char_index + 1); + // } + } + throw_invalid_token(begin_token_index, current_char_index + 1); + } + + uint64_t tokenize_fs_extended_command(uint64_t begin_token_index, uint64_t current_char_index) { + if (current_char_index >= gerber_code_size) { + throw_invalid_token(begin_token_index, current_char_index + 1); + } + char current_char = gerber_code[current_char_index]; + if (current_char == 'S') { + make_token(begin_token_index, current_char_index + 1, TokenType::FS_EXTENDED_COMMAND); + current_char_index++; + begin_token_index = current_char_index; + } else { + throw_invalid_token(begin_token_index, current_char_index + 1); + } + + current_char_index = tokenize_one_of( + "LT", 2, TokenType::FS_ZERO_OMISSION, begin_token_index, current_char_index + ); + begin_token_index = current_char_index; + + current_char_index = tokenize_one_of( + "AI", 2, TokenType::FS_COORDINATE_TYPE, begin_token_index, current_char_index + ); + begin_token_index = current_char_index; + + for (int i = 0; i < 2; i++) { + if (current_char_index >= gerber_code_size) { + throw_invalid_token(begin_token_index, current_char_index + 1); + } + current_char = gerber_code[current_char_index]; + switch (current_char) { + case 'X': + case 'Y': + make_token( + begin_token_index, current_char_index + 1, TokenType::FS_AXIS_FORMAT + ); + current_char_index = + tokenize_unsigned_integer(current_char_index + 1, current_char_index + 1); + begin_token_index = current_char_index; + break; + default: + throw_invalid_token(begin_token_index, current_char_index + 1); + } + } + + current_char_index = tokenize_asterisk(begin_token_index, current_char_index); + current_char_index = + tokenize_extended_command_boundary(current_char_index, current_char_index); + + return current_char_index + 1; + } + + uint64_t tokenize_one_of( + const char* chars, + uint32_t length, + enum TokenType token_type, + uint64_t begin_token_index, + uint64_t current_char_index + ) { + if (current_char_index >= gerber_code_size) { + throw_invalid_token(begin_token_index, current_char_index + 1); + } + char current_char = gerber_code[current_char_index]; + for (int i = 0; i < length; i++) { + if (current_char == chars[i]) { + make_token(begin_token_index, current_char_index + 1, token_type); + return current_char_index + 1; + } + } + throw_invalid_token(begin_token_index, current_char_index + 1); + } + bool is_a_number(uint64_t current_char_index) { if (current_char_index >= gerber_code_size) { return false; @@ -601,11 +725,12 @@ int main() { "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G02.grb", "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G03.grb", "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G04.grb", - "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G04_text.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G04_text." + "grb", "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G36.grb", "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G37.grb", - //"C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G54.grb", - //"C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G55.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G54.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G55.grb", "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G70.grb", "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G71.grb", "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\g_codes\\G74.grb", @@ -619,6 +744,10 @@ int main() { "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\d_codes\\D01.grb", "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\d_codes\\D02.grb", "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\d_codes\\D03.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\fs\\FSLAX26Y26.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\fs\\FSLAX66Y66.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\fs\\FSLIX26Y26.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\fs\\FSTIX26Y27.grb", }; for (auto path : paths) { auto mapping_handler = FileMapping(path.c_str()); From 84fd95d69ae4655832485257f53661efd5a4650c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wi=C5=9Bniewski?= Date: Fri, 5 Jul 2024 00:31:45 +0200 Subject: [PATCH 5/6] Fix asset style --- test/assets/gerberx3/tokens/fs/FSLAX26Y26.grb | 2 +- test/assets/gerberx3/tokens/fs/FSLAX66Y66.grb | 2 +- test/assets/gerberx3/tokens/fs/FSLIX26Y26.grb | 2 +- test/assets/gerberx3/tokens/fs/FSTIX26Y27.grb | 2 +- test/assets/gerberx3/tokens/set_codes/as.grb | 2 ++ test/assets/gerberx3/tokens/set_codes/mo.grb | 2 ++ 6 files changed, 8 insertions(+), 4 deletions(-) create mode 100644 test/assets/gerberx3/tokens/set_codes/as.grb create mode 100644 test/assets/gerberx3/tokens/set_codes/mo.grb diff --git a/test/assets/gerberx3/tokens/fs/FSLAX26Y26.grb b/test/assets/gerberx3/tokens/fs/FSLAX26Y26.grb index a74e8d5f5..ba50de888 100644 --- a/test/assets/gerberx3/tokens/fs/FSLAX26Y26.grb +++ b/test/assets/gerberx3/tokens/fs/FSLAX26Y26.grb @@ -1 +1 @@ -%FSLAX26Y26*% \ No newline at end of file +%FSLAX26Y26*% diff --git a/test/assets/gerberx3/tokens/fs/FSLAX66Y66.grb b/test/assets/gerberx3/tokens/fs/FSLAX66Y66.grb index d6767a154..e9f73c693 100644 --- a/test/assets/gerberx3/tokens/fs/FSLAX66Y66.grb +++ b/test/assets/gerberx3/tokens/fs/FSLAX66Y66.grb @@ -1 +1 @@ -%FSLIX66Y66*% \ No newline at end of file +%FSLIX66Y66*% diff --git a/test/assets/gerberx3/tokens/fs/FSLIX26Y26.grb b/test/assets/gerberx3/tokens/fs/FSLIX26Y26.grb index acd039aff..9211cacdc 100644 --- a/test/assets/gerberx3/tokens/fs/FSLIX26Y26.grb +++ b/test/assets/gerberx3/tokens/fs/FSLIX26Y26.grb @@ -1 +1 @@ -%FSLAX66Y66*% \ No newline at end of file +%FSLAX66Y66*% diff --git a/test/assets/gerberx3/tokens/fs/FSTIX26Y27.grb b/test/assets/gerberx3/tokens/fs/FSTIX26Y27.grb index 1ad531c84..e2b95967c 100644 --- a/test/assets/gerberx3/tokens/fs/FSTIX26Y27.grb +++ b/test/assets/gerberx3/tokens/fs/FSTIX26Y27.grb @@ -1 +1 @@ -%FSTAX66Y66*% \ No newline at end of file +%FSTAX66Y66*% diff --git a/test/assets/gerberx3/tokens/set_codes/as.grb b/test/assets/gerberx3/tokens/set_codes/as.grb new file mode 100644 index 000000000..db7c052d4 --- /dev/null +++ b/test/assets/gerberx3/tokens/set_codes/as.grb @@ -0,0 +1,2 @@ +%ASAYBX*% +%ASAXBY*% diff --git a/test/assets/gerberx3/tokens/set_codes/mo.grb b/test/assets/gerberx3/tokens/set_codes/mo.grb new file mode 100644 index 000000000..a034bf6a6 --- /dev/null +++ b/test/assets/gerberx3/tokens/set_codes/mo.grb @@ -0,0 +1,2 @@ +%MOMM*% +%MOIN*% From 61049a60dc47d5bc2ba87d1d8388bbfef9e58fdb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Krzysztof=20Wi=C5=9Bniewski?= Date: Fri, 5 Jul 2024 00:32:11 +0200 Subject: [PATCH 6/6] Add MO extended command parsing --- main.cpp | 137 +++++++++++++++++++++++++++++++++++++------------------ 1 file changed, 92 insertions(+), 45 deletions(-) diff --git a/main.cpp b/main.cpp index 0bec7c102..1221d9e80 100644 --- a/main.cpp +++ b/main.cpp @@ -136,34 +136,35 @@ class FileMapping { #endif enum class TokenType { - UNKNOWN = 0, - INVALID = 1, - INTEGER = 2, - STRING = 3, - G01_COMMAND = 4, - G02_COMMAND = 5, - G03_COMMAND = 6, - G04_COMMAND = 7, - G36_COMMAND = 8, - G37_COMMAND = 9, - G54_COMMAND = 10, - G55_COMMAND = 11, - G70_COMMAND = 12, - G71_COMMAND = 13, - G74_COMMAND = 14, - G75_COMMAND = 15, - G90_COMMAND = 16, - G91_COMMAND = 17, - D01_COMMAND = 18, - D02_COMMAND = 19, - D03_COMMAND = 20, - DNN_SELECT = 21, - COORDINATE_COMMAND = 23, - FS_EXTENDED_COMMAND = 24, - FS_ZERO_OMISSION = 26, - FS_COORDINATE_TYPE = 25, - FS_AXIS_FORMAT = 27, - MODE_EXTENDED_COMMAND = 28, + UNKNOWN = 0, + INVALID = 1, + INTEGER = 2, + STRING = 3, + G01_COMMAND = 4, + G02_COMMAND = 5, + G03_COMMAND = 6, + G04_COMMAND = 7, + G36_COMMAND = 8, + G37_COMMAND = 9, + G54_COMMAND = 10, + G55_COMMAND = 11, + G70_COMMAND = 12, + G71_COMMAND = 13, + G74_COMMAND = 14, + G75_COMMAND = 15, + G90_COMMAND = 16, + G91_COMMAND = 17, + D01_COMMAND = 18, + D02_COMMAND = 19, + D03_COMMAND = 20, + DNN_SELECT = 21, + COORDINATE_COMMAND = 23, + FS_EXTENDED_COMMAND = 24, + FS_ZERO_OMISSION = 26, + FS_COORDINATE_TYPE = 25, + FS_AXIS_FORMAT = 27, + MODE_EXTENDED_COMMAND = 28, + MODE_EXTENDED_COMMAND_VALUE = 29, END_COMMAND, EXTENDED_COMMAND_BOUNDARY, }; @@ -593,23 +594,8 @@ struct GerberTokenizer { switch (current_char) { case 'F': return tokenize_fs_extended_command(begin_token_index, current_char_index + 1); - // case 'M': { - // uint64_t next_char_index = current_char_index + 1; - // if (current_char_index >= gerber_code_size) { - // throw_invalid_token(begin_token_index, current_char_index + 1); - // } - // char next_char = gerber_code[next_char_index]; - // switch (next_char) { - // case 'O': - // make_token( - // begin_token_index, next_char_index, - // TokenType::MODE_EXTENDED_COMMAND - // ); - // return tokenize_fs_extended_command(next_char_index, next_char_index - // + 1); - // } - // throw_invalid_token(begin_token_index, current_char_index + 1); - // } + case 'M': + return tokenize_mode_extended_command(begin_token_index, current_char_index + 1); } throw_invalid_token(begin_token_index, current_char_index + 1); } @@ -684,6 +670,66 @@ struct GerberTokenizer { throw_invalid_token(begin_token_index, current_char_index + 1); } + uint64_t + tokenize_mode_extended_command(uint64_t begin_token_index, uint64_t current_char_index) { + if (current_char_index >= gerber_code_size) { + throw_invalid_token(begin_token_index, current_char_index + 1); + } + char current_char = gerber_code[current_char_index]; + if (current_char != 'O') { + throw_invalid_token(begin_token_index, current_char_index + 1); + } + make_token(begin_token_index, current_char_index + 1, TokenType::MODE_EXTENDED_COMMAND); + + current_char_index++; + begin_token_index = current_char_index; + + if (current_char_index >= gerber_code_size) { + throw_invalid_token(begin_token_index, current_char_index + 1); + } + current_char = gerber_code[current_char_index]; + switch (current_char) { + case 'M': + current_char = next_or_throw(begin_token_index, current_char_index); + if (current_char != 'M') { + throw_invalid_token(begin_token_index, current_char_index + 1); + } + make_token( + begin_token_index, + current_char_index + 1, + TokenType::MODE_EXTENDED_COMMAND_VALUE + ); + break; + case 'I': + current_char = next_or_throw(begin_token_index, current_char_index); + if (current_char != 'N') { + throw_invalid_token(begin_token_index, current_char_index + 1); + } + make_token( + begin_token_index, + current_char_index + 1, + TokenType::MODE_EXTENDED_COMMAND_VALUE + ); + break; + default: + throw_invalid_token(begin_token_index, current_char_index + 1); + } + + current_char_index = tokenize_asterisk(current_char_index + 1, current_char_index + 1); + current_char_index = + tokenize_extended_command_boundary(current_char_index, current_char_index); + + return current_char_index + 1; + } + + char next_or_throw(uint64_t begin_token_index, uint64_t& current_char_index) { + current_char_index++; + if (current_char_index >= gerber_code_size) { + throw_invalid_token(begin_token_index, current_char_index + 1); + } + return gerber_code[current_char_index]; + } + bool is_a_number(uint64_t current_char_index) { if (current_char_index >= gerber_code_size) { return false; @@ -748,6 +794,7 @@ int main() { "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\fs\\FSLAX66Y66.grb", "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\fs\\FSLIX26Y26.grb", "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\fs\\FSTIX26Y27.grb", + "C:\\Users\\argma\\dev\\pygerber\\test\\assets\\gerberx3\\tokens\\set_codes\\mo.grb", }; for (auto path : paths) { auto mapping_handler = FileMapping(path.c_str());