diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4125888 --- /dev/null +++ b/.gitignore @@ -0,0 +1,29 @@ + +doc/*.vsdx +doc/*.~vsdx + +working/*.un~ + +*.o +bin/aurora* +*.swp +bin/compile_proto +.DS_Store + +bin/ + +*.log.* + +work/aurora*.INFO +work/aurora*.WARNING +work/aurora*.ERROR +work/aurora*.FATAL + +work/*/aurora.INFO +work/*/aurora.WARNING +work/*/aurora.ERROR +work/*/aurora.FATAL + +work/*/raft.binlog.* +x64 +doc/~$grpc-issue.xlsx \ No newline at end of file diff --git a/CMakeLists.txt b/CMakeLists.txt new file mode 100644 index 0000000..eba6a45 --- /dev/null +++ b/CMakeLists.txt @@ -0,0 +1,37 @@ +cmake_minimum_required(VERSION 3.3) +project(Aurora) + +set(CMAKE_CXX_STANDARD 17) +set(CMAKE_CXX_STANDARD_REQUIRED True) + +option(GLOG_INC_DIR,"Use include" ON) + +message("project src:" ${PROJECT_SOURCE_DIR}) + +file(GLOB_RECURSE ALL_SRC ${PROJECT_SOURCE_DIR}/src *.cc) + +foreach(ARG ${ALL_SRC}) + message("one source:${ARG}") +endforeach() + +list(FILTER ALL_SRC EXCLUDE REGEX .*trivial_lock_deque.cc) +list(FILTER ALL_SRC EXCLUDE REGEX .*trivial_lock_hash.cc) +list(FILTER ALL_SRC EXCLUDE REGEX .*trivial_lock_list.cc) +list(FILTER ALL_SRC EXCLUDE REGEX .*trivial_lock_queue.cc) + +set(MAIN_SRC ${ALL_SRC}) +set(TEST_SRC ${ALL_SRC}) +list(FILTER MAIN_SRC EXCLUDE REGEX .*gtest_main.cc) +list(FILTER TEST_SRC EXCLUDE REGEX .*[/\\]main.cc) + +message("main src found:" ${MAIN_SRC}) +message("test src found:" ${TEST_SRC}) + +# add the executable +add_executable(aurora ${ALL_SRC}) +add_executable(aurora_test ${ALL_SRC}) + + +target_include_directories(aurora PUBLIC "${PROJECT_SOURCE_DIR}/src") + + diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..8224297 --- /dev/null +++ b/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + aurora + Copyright (C) 2019 arthur + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + aurora Copyright (C) 2019 arthur + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..498b6fa --- /dev/null +++ b/Makefile @@ -0,0 +1,169 @@ + +BUILD_TYPE=debug + +CXX = g++ +CXXFLAGS = -std=c++17 -D_RAFT_UNIT_TEST_ + +ifeq ($(BUILD_TYPE),debug) +CXXFLAGS += -g3 +else +ifneq ($(BUILD_TYPE),release) +$(error input 'BUILD_TYPE' parameter can be either 'debug' or 'release') +endif +CXXFLAGS += -O3 +endif + +SRCDIR = src +BINDIR = bin +OBJDIR = $(BINDIR)/$(BUILD_TYPE)/object + +THIRD_PARTY_DIR=./third_party + +INC = -I./src \ + -I./src/protocol \ + -I$(THIRD_PARTY_DIR)/glog/src \ + -I$(THIRD_PARTY_DIR)/gflags/build-dir/include \ + -I$(THIRD_PARTY_DIR)/boost_1_68_0 \ + -I$(THIRD_PARTY_DIR)/googletest/googletest/include\ + -I$(THIRD_PARTY_DIR)/grpc/include\ + +LIB = /usr/local/lib/libglog.a \ + /usr/local/lib/libgflags.a \ + /usr/local/lib/libgtest.a \ + /usr/local/lib/libgflags_nothreads.a \ + /usr/local/lib/libboost_filesystem.a \ + /usr/local/lib/libboost_system.a \ + /usr/local/lib/libboost_thread.a \ + /usr/local/lib/libprotobuf.a \ + /usr/local/lib/libgrpc++.a \ + /usr/local/lib/libgrpc.a \ + /usr/local/lib/libgrpc++_reflection.a \ + /usr/local/lib/libtcmalloc.a \ + -lz -ldl -lpthread \ + +UNAME_S := $(shell uname -s) +ifeq ($(UNAME_S),Linux) + LIB += -latomic +endif + +PROTOS_PATH = ./src/protocol + +vpath %.proto $(PROTOS_PATH) + +PROTO_FLAG=$(BINDIR)/compile_proto +MAIN_PROGRAM=$(BINDIR)/$(BUILD_TYPE)/aurora +MAIN_TEST=$(BINDIR)/$(BUILD_TYPE)/aurora_test + +-include prepare $(PROTO_FLAG) + +.PHONY: all +all: system-check $(MAIN_PROGRAM) $(MAIN_TEST) + +.PHONY: prepare +prepare: + mkdir -p $(OBJDIR) + +ALL_SRC_FILES=$(wildcard src/*.cc src/*/*.cc) +TPL_CC_FILES=%src/tools/lock_free_deque.cc %src/tools/lock_free_hash.cc \ + %src/tools/trivial_lock_double_list.cc %src/tools/lock_free_queue.cc \ + %src/tools/lock_free_queue.cc %src/tools/lock_free_single_list.cc \ + %src/common/request_base.cc %src/client/client_framework.cc \ + %src/follower/follower_request.cc %src/leader/connection_pool.cc \ + %src/leader/client_pool.cc %src/leader/leader_request.cc \ + %src/service/ownership_delegator.cc %src/candidate/candidate_request.cc \ + %src/tools/data_structure_base.cc %src/tools/lock_free_unordered_single_list.cc \ + %src/tools/trivial_lock_list_base.cc %src/tools/lock_free_single_list.cc\ + %src/tools/trivial_lock_single_list.cc %src/tools/lock_free_hash_specific.cc\ + %src/common/react_group.cc \ + +COMPILE_SRC_FILES = $(filter-out $(TPL_CC_FILES), $(ALL_SRC_FILES) ) + +OBJ = $(patsubst %.cc, $(OBJDIR)/%.o, $(COMPILE_SRC_FILES)) + +EXE_MAIN_OBJ=%/main.o +UTEST_MAIN_OBJ=%gtest_main.o +EXE_OBJ = $(filter-out $(UTEST_MAIN_OBJ), $(OBJ) ) +UTEST_OBJ = $(filter-out $(EXE_MAIN_OBJ), $(OBJ) ) + +.PHONY:test +test:$(PROTO_FLAG) + @echo "all:" $(ALL_SRC_FILES) + @echo "src:" $(COMPILE_SRC_FILES) + @echo "object:" $(OBJ) + +$(OBJDIR)/%.o: %.cc + @mkdir -p $(OBJDIR)/$(dir $<) + $(CXX) $(CXXFLAGS) $(INC) -c $< -o $@ + +$(MAIN_PROGRAM): $(EXE_OBJ) + $(CXX) $(CXXFLAGS) $^ $(LIB) -o $@ + +$(MAIN_TEST): $(UTEST_OBJ) + $(CXX) $(CXXFLAGS) $^ $(LIB) -o $@ + +PROTOC = protoc +GRPC_CPP_PLUGIN = grpc_cpp_plugin +GRPC_CPP_PLUGIN_PATH ?= `which $(GRPC_CPP_PLUGIN)` + +$(PROTO_FLAG): $(PROTOS_PATH)/raft.proto + $(PROTOC) -I $(PROTOS_PATH) --cpp_out=$(PROTOS_PATH) $< + $(PROTOC) -I $(PROTOS_PATH) --grpc_out=$(PROTOS_PATH) --plugin=protoc-gen-grpc=$(GRPC_CPP_PLUGIN_PATH) $< + touch $@ + +.PHONY: clean +clean: + rm -rf $(OBJDIR) $(PROTOS_PATH)/*.h $(PROTOS_PATH)/*.cc $(PROTO_FLAG) $(MAIN_PROGRAM) $(MAIN_TEST) + + +# The following is to test your system and ensure a smoother experience. +# They are by no means necessary to actually compile a grpc-enabled software. + +PROTOC_CMD = which $(PROTOC) +PROTOC_CHECK_CMD = $(PROTOC) --version | grep -q libprotoc.3 +PLUGIN_CHECK_CMD = which $(GRPC_CPP_PLUGIN) +HAS_PROTOC = $(shell $(PROTOC_CMD) > /dev/null && echo true || echo false) +ifeq ($(HAS_PROTOC),true) +HAS_VALID_PROTOC = $(shell $(PROTOC_CHECK_CMD) 2> /dev/null && echo true || echo false) +endif +HAS_PLUGIN = $(shell $(PLUGIN_CHECK_CMD) > /dev/null && echo true || echo false) + +SYSTEM_OK = false +ifeq ($(HAS_VALID_PROTOC),true) +ifeq ($(HAS_PLUGIN),true) +SYSTEM_OK = true +endif +endif + +system-check: +ifneq ($(HAS_VALID_PROTOC),true) + @echo " DEPENDENCY ERROR" + @echo + @echo "You don't have protoc 3.0.0 installed in your path." + @echo "Please install Google protocol buffers 3.0.0 and its compiler." + @echo "You can find it here:" + @echo + @echo " https://github.com/google/protobuf/releases/tag/v3.0.0" + @echo + @echo "Here is what I get when trying to evaluate your version of protoc:" + @echo + -$(PROTOC) --version + @echo + @echo +endif +ifneq ($(HAS_PLUGIN),true) + @echo " DEPENDENCY ERROR" + @echo + @echo "You don't have the grpc c++ protobuf plugin installed in your path." + @echo "Please install grpc. You can find it here:" + @echo + @echo " https://github.com/grpc/grpc" + @echo + @echo "Here is what I get when trying to detect if you have the plugin:" + @echo + -which $(GRPC_CPP_PLUGIN) + @echo + @echo +endif +ifneq ($(SYSTEM_OK),true) + @false +endif diff --git a/README.md b/README.md new file mode 100644 index 0000000..2728953 --- /dev/null +++ b/README.md @@ -0,0 +1,148 @@ + +![logo](doc/images/logo/transparent-black-v2.png) + + +![AUR license](https://img.shields.io/aur/license/aurora.svg) + + +## Table of Contents + +* [About the Project](#about-the-project) +* [Getting Started](#getting-started) + * [Prerequisites](#prerequisites) + * [Building](#building) + * [Usage](#usage) +* [Contributing](#contributing) +* [License](#license) +* [Contact](#contact) +* [Donation](#donation) + + +## About The Project + +**Aurora is a [Raft](https://raft.github.io) based K-V database**. The basics of the current architecture is : + +![sys_arch](doc/images/system_architecture.png) + +*Note : The name `Aurora` **has nothing to do with** neither [Apache Aurora](http://aurora.apache.org/) nor [Amazon aurora](https://aws.amazon.com/rds/aurora/).* + +It's inspired by the [raft protocol](https://raft.github.io) which often being known as a contrast of another widely known protocol : `paxos`. Aurora comes along with all of raft dominant features supported: +* **Log Replication**: A two phase commit data replication solution. +* **Leader Election** : A majority & term based failover solution. +* **Membership Change**: A smart approach dealing with cluster topology changing. + +Besides the consensus protocol, it also comes with a local storage subsystem whose idea is inspired and being the same with [leveldb](https://github.com/google/leveldb) : to enhance blind writing operation throughput. + +Last but not least, it's implemented by using the modern cpp(11/14/17) which may contributes to the popularization of the new lanaguage standards. After all, newer is probably better. + + +## Getting Started + +Project root directory explanation: +* src : source code. +* doc : documents. +* bin : binaries & objects after successfully building. +* working : running directory of aurora. +* third_party : the third party dependencies. + +Aurora has some basic components you need to firstly building before running. + +### Prerequisites + +* [protobuf](https://github.com/protocolbuffers/protobuf). >=3.0.0. +* [grpc](https://github.com/grpc/grpc). >=1.8.x. +* [boost](https://www.boost.org/). >=1.64.0. +* [glog](https://github.com/google/glog). >=0.3.3. +* [gflags](https://github.com/gflags/gflags). >=2.2.0. +* [gtest](https://github.com/google/googletest). >=1.7.0. +* [gperftools](https://github.com/gperftools/gperftools). >=7.0. + +Create a `third_party` directory and build the above dependencies under that. + +*Note: How to build the dependencies is beyond the scope and you may need consulting the documents for each of them. Making sure the headers and libraries are correctly installed on your system. That maybe boring but can't get around.* + +After successfully built all the above dependencies, your working directory should looks something like this: +``` +|-- src +|-- doc +|-- bin +|-- working +|-- third_party + |-- boost_1_68_0 + |-- protobuf + |-- grpc + |-- glog + |-- gflags + |-- googletest +``` + +### Building +Now, you are ready to compile aurora itself. + +* unix & linux & osx: +```console +cd aurora && make -j4 +``` + +> Note: some gcc versions(like gcc (GCC) 8.3.1) don't fully support std::atomic, thus you might need to install `libatomic` manually. + +* windows: +see [building under windows](doc/windows.md) + + +## Usage + +First , take a look at the configure files: +* `election.config` : inner usage for election. +* `membership-change.config` : inner usage for membership change. +* `topology.config` : setup a cluster: + * `leader` :leader node. + * `followers` :follower nodes. + * `candidates` :follower nodes. + * all nodes are with format : `xx.xx.xx.xx:port`,like `192.168.0.100`. + +*Only `topology.config` is intending to be controlled by users, the others are either managed by the system or for debugging purpose.* + +Second, after finish configuring `topology.config`, you can start a node by : +* running command under *nix: + +```console + cd aurora/working/ + nohup ../bin/aurora > aurora.log 2>&1 & +``` + +* runing command under windows: + +```console +cd aurora\working\ +..\aurora\working\aurora.exe +``` + + + + +## Contributing +It is strongly recommended to read the [developer guide](doc/developer_guide.md) for details. + + +## License +Distributed under the **GPLv3** License. See the [license file](LICENSE) for more information. + + +## Contact +Arthur - pplorins@gmail.com + + +## Donation +This project consumed a lot time away from the author, and if you think it helps, don't hesitate to show your generosity :moneybag: :moneybag: :moneybag: + +* paypal: [paypal.me link](paypal.me/arthurCoo) +* wechat: + + ![wechat-pay](doc/images/pay_wechat.png) + +* alipay: + + ![alipay](doc/images/pay_alipay.png) + + diff --git a/build/.gitignore b/build/.gitignore new file mode 100644 index 0000000..94548af --- /dev/null +++ b/build/.gitignore @@ -0,0 +1,3 @@ +* +*/ +!.gitignore diff --git a/doc/benchmark.md b/doc/benchmark.md new file mode 100644 index 0000000..e9758ae --- /dev/null +++ b/doc/benchmark.md @@ -0,0 +1,55 @@ +# Benchmark + +This is the environment for benchmark. + +### Machines: + +|machine|OS|Compiler|Cpu|Memory|Disk| +|--|--|--|--| +|machineA|win10 64 pro|MSVC 19.00.24215.1 for x86|i7 8(4*2)core 4.0GHZ|16G|SSD| +|machineB|Mac 64 High Seria 10.13.6|Apple LLVM version 10.0.0 (clang-1000.10.44.4)|i7 8(4*2)core 2.2GHZ|16G|SSD| + +### Network: +Home router: tplink TL-WR880N 450Mbps. Latency: +* machineA --> machineB : 1~100ms(unstable) +* machineB --> machineA : 1~5ms(stable) + +### Depoly: +Leader is deployed on machineA, all followers are deployed on machineB. + +### Config: + +#### leader config: +--do_heartbeat=false --iterating_wait_timeo_us=2000000 --port=10010 --leader_append_entries_rpc_timeo_ms=5000 --leader_commit_entries_rpc_timeo_ms=5000 --client_cq_num=2 --client_thread_num=2 --notify_cq_num=2 --notify_cq_threads=4 --call_cq_num=2 --call_cq_threads=2 --iterating_threads=2 --client_pool_size=50000 + +#### follower config: +--checking_heartbeat=false --iterating_wait_timeo_us=50000 --disorder_msg_timeo_ms=100000 --port=${port} --notify_cq_num=1 --notify_cq_threads=4 --call_cq_num=1 --call_cq_threads=4 --iterating_threads=2 + +### Logic + +Using the `TestLeaderServiceClient.Benchmark` test case in `src\gtest\service\test_leader_service.h` with arguments: --gtest_filter=TestLeaderServiceClient.Benchmark --client_write_timo_ms=10000 --benchmark_client_cq_num=1 --benchmark_client_thread_num_per_cq=1 --value_len=1 --leader_svc_benchmark_req_count=20000 + +Which is, using asynchronous way of sending 2w requests each time, counting overall write throughput & average write latency. Reading performance is not considered. The influential factors being tested are : **data length** and **number of followers**. + +### Result: + +#### Taking **data length** as the factor: + +![dl-factor](../doc/images/datalen-factor.png) + +![tp-dl](../doc/images/benchmark-throughput-datalen.png) + +![latency-dl](../doc/images/benchmark-latency-datalen.png) + +#### Taking **number of followers** as the factor: + +![F-factor](../doc/images/followers-factor.png) + +![tp-#F](../doc/images/benchmark-throughput-followers.png) + +![tp-#F](../doc/images/benchmark-latency-followers.png) + +### Bottleneck analysis: +If we remove all the logics leaving only the skeleton(where leader broadcasting requests to all its followers) left, we'll get a result of ~2w throughput & ~500ms latency. After a more detailed investigation, you'll find that it's `UnaryAsyncClient::EntrustRequest()::this->m_reader->Finish` that made the watershed, probably because it would trigger the broadcasting process to the followers which further bring a performance drawback. So the **bottleneck is on the grpc framework itself** under my experiments. Better practices for how to utilize grpc is needed. + + diff --git a/doc/developer_guide.md b/doc/developer_guide.md new file mode 100644 index 0000000..3c893fa --- /dev/null +++ b/doc/developer_guide.md @@ -0,0 +1,1015 @@ + +# Developer guide + + +## Table of Contents + +* [Architecture Overview](#architecture-overview) + * [Overview](#overview) + * [About the RPC](#about-the-rpc) + * [RPC framework](#rpc-framework) + * [Asynchronous in GRPC](#asynchronous-in-grpc) + * [RPC interfaces](#rpc-interfaces) +* [Details Design](#details-design) + * [Data Structures](#data-structure) + * [Trivial lock double list](#trivial-lock-double-list) + * [Trivial lock single list](#trivial-lock-single-list) + * [Lockfree hash](#lockfree-hash) + * [Lockfree deque](#lockfree-deque) + * [Lockfree MPMC queue](#lockfree-mpmc-queue) + * [Lockfree priority queue](#lockfree-priority-queue) + * [Basic workflow](#basic-workflow) + * [Follower workflow](#follower-workflow) + * [Leader workflow](#leader-workflow) + * [Connection pool](#connection-pool) + * [Client pool](#client-pool) + * [Workflow anatomy](#workflow-anatomy) + * [Asynchronous Framework](#asynchronous-framework) + * [Threading model](#threading-model) + * [Parallel Replicating](#parallel-replicating) + * [CGG problem](#the-cgg-problem) + * [Lifetime management](#lifetime-management) + * [Global Timer](#global-timer) + * [About The Binlog](#about-the-binlog) + * [Meta data](#meta-data-part) + * [Data part](#data-part) + * [Supported operations](#supported-operation) + * [Storage Layer](#storage-layer) + * [GC](#storage-gc) + * [Election](#election) + * [The basics](#the-basics) + * [About prevoting](#about-prevoting) + * [The live lock problem & solution](#the-live-lock-problem--solution) + * [Membership Change](#membership-change) + +* [Contributing](#contributing) + * [Future Work](#future-work) + * [Naming Conventions](#naming-conventions) + * [Unit Test](#unit-test) + * [Development history](#dev-his) + + + +## Architecture Overview + +### Overview + +As you can see in the [README](../README.md) file , aurora's cluster structure has nothing special than a +typical raft cluster ,it has a leader exposed to the clients serving their read & write(only +[blind writing](https://en.wikipedia.org/wiki/Blind_write) is supported for now) requests. + +![sys_arch](../doc/images/system_architecture.png) + +### About the RPC + +#### RPC framework + +The RPC framework for each running node is [google's grpc](https://github.com/grpc/grpc), main benefits enjoyed by choosing it from the others are : +* taking the advantage of the event-driven asynchronous IO model. +* utilizing multi cores with its multithreading model which is even customizable. +* the sophisticated serialization solution : [protobuf](https://github.com/protocolbuffers/protobuf). +* rich documents come in many ways. + +If we choose the grpc as the underlying network library , one important thing should be clearly understood, that is the difference of its `synchronous model` and `asynchronous model`. In short: +* synchronous : user cannot manage the application level threads. +* asynchronous : It's user's duty to control the application level threads. + +In the synchronous model, the fixed threading model makes programmers' life easier by telling them that you just need to fill in the virtual RPC interfaces. This is pretty good in many ways, but not for all. + +In the asynchronous model, by allowing the programmers to customize the threading model, a grpc node can be fully event-driven whereas the synchronous model cannot. This further is achieved by **utilizing the `CompletionQueue`** .[This talk](https://groups.google.com/forum/#!topic/grpc-io/DuBDpK96B14) is great +helpful and worth reading carefully. For more information about the how does the grpc core's polling mechanism work, look at [this](https://github.com/grpc/grpc/blob/master/doc/core/epoll-polling-engine.md). + +#### Asynchronous in GRPC + +There is no abundant materials telling people about how to desing an asynchronous server except [the simple official example](https://grpc.io/docs/tutorials/async/helloasync-cpp/). It's a time consuming work to find out the correct way of writing an asynchronous c++ grpc server, especially for the *bidirectional streaming* version. Talking about it in detail is beyond this topic, here are some examples one can consulting into when he wants to have a basic understanding of how it works: +* a simple asynchronous grpc c++ server, says *SA*, check [this](https://gist.github.com/ppLorins/d75e60dbbbbd84e218928d9fe6781869). +* a bidirectional stream asynchronous grpc c++ server, says *SB*, check [this](https://gist.github.com/ppLorins/6e4cc625c2c5b8fd16ced3172b1ada09). +* a mixed asynchronous version of *SA* and *SB*, check [this](https://gist.github.com/ppLorins/d3392111992097c68454e9381d53b446). +* a proxy server for forwarding the unary and bidirectional stream request to *SA* and *SB*, this is the most complicated one for it demonstrates how to do client reacting together with server reacting, and this is exactly the way how does the aurora's leader works, check [this](https://gist.github.com/ppLorins/6b492beadecd07eddcd0b548ea0d12c9). + +And there are several implicit behaviors(or even pitfalls) you need to pay attention to when developing, here are a list of them: +* `CompletinQueue` delivering messages to the application in a reversed order as opposite to client entrusting, check [this](https://github.com/grpc/grpc/issues/19658). This can lead to message coming in a bad disorder sequence, increasing the possibility of `AppendEntries` timeout on the follower side since the order plays an important role in the whole `Log Replication` process. +* c++ async client unexpectly timeout if I don't call `CQ::Next()`, check [this](https://github.com/grpc/grpc/issues/19573). The downside of this behavior is that it forces us to fetch the RPC result ASAP when there is result waiting to be fetched, otherwise a timeout error would be thrown by grpc eventhough there is actually no timeout happened at all. To adapt, dedicated CQs and threads for client reacting are introduced. +* calling the server side's [grpc::ServerAsyncReaderWriter< W, R >::Finish](https://grpc.github.io/grpc/cpp/classgrpc_1_1_server_async_reader_writer.html#a75342152acd961b7fcf1317bec0b8c3a) interface will trigger twice notifications immediately on the server side, check [this](https://github.com/grpc/grpc/issues/17222) and [this](https://github.com/grpc/grpc/issues/19159), I suppose their meanings might be: + + * First notification: the final status set by the server has already been sent to the wire. + * Second notification: the stream has already been closed now. + + Thus if there are multiple threads polling on the same CQ, a synchronization between them is need. + +#### RPC interfaces + +The RPC interfaces are defined in [the protocol file](../src/protocol/raft.proto),you can see all the internal logic between the leader and followers, here is a short brief of them: + +* For Client & Admin : + * `Read` : for client invoking , read request. + * `Write` : for client invoking , write request. + * `MembershipChange` : for admin invoking , topology changing request. + +* For Log Replication : + * `AppendEntries` : Log replication 2PC's phaseI. + * `CommitEntries` : Log replication 2PC's phaseII. + * `SyncData` : re-sync all data from leader to follower,this will erase all existing data first on the follower side . + +* For Election : + * `PreVote` : an additional step before starting the real election process to prevent few false-failure nodes from disrupting the entire cluster by issuing new rounds of election. + * `Vote` : candidate requesting votes from the others. + * `HeartBeat` : doing heartbeat. + +* For Membership change: + * `PropagateMemberChange` : membership change phaseI. + * `MemberChangeCommit` : membership change phaseII. + +## Details Design + +This section describing the fundamental concepts and components. + +### Data Structure + +Since `grpc` is multi-threaded, the business threads should considering the thread-safety problems for almost all of its operations , therefore some lockfree data structures must be introduced to fit the needs. + +*Note : Nearly all of the code for CAS operations in Aurora are using the `std::atomic::compare_exchange_strong` api instead of `std::atomic::compare_exchange_weak`, exchanging for readability with little acceptable performance payment.* + +#### Trivial Lock Double List + +This is a doubly linked **sorted** list. It support two primary operations: +* Insert : insert at a proper position based on the new node's value. +* Cuthead : find the longest continuous sublist from head and cut it off from the list containing it.(This demand is the most peculiar one out of all the data structure operations.) + +Relationship between the two operations: + +* Insert vs Insert is lock free. +* Insert vs Cuthead is lock free. +* Cuthead vs Cuthead need to be exclusive from each other, this is where locking exists and the name prefix `trivial` comes from. + +The `TrivialLockDoubleList` always having a `Head Node` representing the minimum value of the template type of the list and a `Tail Node` representing the maximum value. This sets both the lower and upper boundaries for a certain list instance, getting rid of dealing with corner cases for inserting and cutting head operations. + +We'll see how they the lockfree semantics are implemented step by step : + +##### 1. Inserting at different positions. +This is the simplest and most common case , no worries about multiple thread doing this at the same time, since there are no conflicts could exist : +![inserting at different positions](images/list-insert-1-1.png) + +Insert operations in this case will get finished correctly and independently : +![inserting at different positions](images/list-insert-1-2.png) + +##### 2. Inserting at the same position. + +Think about how to deal with the following case in a lockfree way ? +![inserting at different positions](images/list-insert-1-3.png) + +I believe plenty of solutions might have been invented although I didn't consult any of them: I figured out my own version and the answer cannot being as difficult as you might thought if you following the rules: + +###### R1. Steps of race condition operation must each *strictly following a same order*. +###### R2. Repeat the operations from the right place when conflicts are detected. + +*Note: The above two principles are not only applies to `TrivialLockDoubleList` but also all of the other lockfree data structures in aurora.* + +Taking a closer look at it : + +* **Step 1**, make the `pre` and `next` pointer of the new node pointing to its neighbours. E.g. `Node 13`'s `pri` and `next` pointer pointing to `Node 11` and `Node max` respectively in the above picture. + +* **Step 2**, the previous node's `next` pointer switching its target from the next node to new node(CAS). E.g. `Node 11`'s `next` now pointing to `Node 15`, switched from pointing to `Node max`: + ![insert 1-4](images/list-insert-1-4.png) + + * If the above CAS fails,meaning there are other threads are **doing exactly the same thing at the same position**, conflicts are detected and the thread inserting `Node 13` will start **iterating from `P_NEXT` again**, conforms [R2](#R2) : + ![insert 2-1](images/list-insert-2-1.png) + + * It could fail for a second time for the thread inserting `Node 13` when retrying: + ![insert 2-2](images/list-insert-2-2.png) + but doesn't matter, just repeat until succeed. + +* **Step 3**, the next node's `pri` pointer switching its target from the previous node to new node(CAS). E.g. `Node max`'s `pri` now pointing to `Node 15`, switched from pointing `Node 11`: +![insert 2-3](images/list-insert-2-3.png) + + * After that, the thread inserting `Node 13` will recognize that current position is not correct anymore(by founding 15 > 13 ), therefore it goes ahead toward `Node min`'s' direction and will eventually find that the new right position for `Node 13`: between `Node 11` and `Node 15`, deciding to insert `Node 13` there. + + * This picture also illustrated that by following the above steps **all the conflicts among those threads will eventually get resolved** as the involved threads will either : + + * **win a CAS contention** : so it can proceed on or + * **fail a CAS contention** : it will retry and letting others go ahead. + + Neither of the two behaviors will result in an invalid coming out, the only downside is on performance : the cost of retrying. + +This achievement can be generally named as `thread re-conciliating` as the result of following [R1](#R1) and [R2](#R2), and is the **essential factor of how threads could cooperating correctly with each other**. + +Finally, reconsidering for a while about the above procedure, there is no point of view where race +condition could happen, also thanks to the CAS semantics. + +*Note : Understanding the knowledge described above especially for [R1](#R1) and [R2](#R2) are critical to read the following data structure sections.* + +##### 3. Inserting vs cutting head + +###### 3.1 What is cutting head + +Cutting head means taking the nodes that are continuous counted from the first one off from the list, leaving the remaining elements as the new list, for example, assume we have a list instance like this : + +![cuthead 1-1](images/list-cuthead-1-1.png) + +starting from `Node 4`, `Node 4` & `Node 5` & `Node 6` are continuous , cutting head will take them off from list, leaving `Node 9` alone inside the original list : + +![cuthead 1-5](images/list-cuthead-1-5.png) + +###### 3.2 General steps + +Following [R1](#R1), cutting head operations sharing the same order : + +* **Step 1**, set the pointers with CAS like this, this is mainly for clarifing the slicing point (in this case between `Node 6` and `Node 9`): +![cuthead 1-2](images/list-cuthead-1-2.png) Since the `CutHead` operations are exclusive from each other, not a second `CutHead` operation could be issued by any other threads, only under this protection can we move further. + +* **Step 2**, change `Node min`'s `next` pointer's target from the first node(`Node 4`) of the list to the first nonadjacent node(`Node 9`) of the list: + +![cuthead 1-3](images/list-cuthead-1-3.png) + +Pay attention to that this CAS operation(modifying `Node min`'s `next` pointer from `Node 4`) could fail due to the fact that there may be threads trying to insert node between `Node min` and `Node 4`,the solutions for this case is : + + * First, revert the modified `next` pointer of `Node 6` to its previous position (pointing to `Node 9`). Checking this operation as **must succeed** otherwise something have went wrong. + * Second, [recursively] starting a new `CutHead` operation from all over again. + + Same for the CAS operation of inserting `Node 3`: It could also fail when modifying `Node min`'s `next` pointer from `Node 4` in which case a new insertion will be recursively triggered, since you can't insert a node to a already cut off list, and `Node 3` will eventually being inserted between `Node min` and `Node 9` as is in the original list. All the recursive invocation are incurred by conflicts operations happened at boundaries, so we call this kind of problem as `boundary trouble`. We'll see similiar scenarios later in the `TrivialLockSingleList` section. + +* **Step 3**,after **Step 2** succeed, change `Node 9`'s `pri` pointer from pointing to the last continuous node (`Node 6`) to pointing to the `Head Node` : +![cuthead 1-4](images/list-cuthead-1-4.png) + + * If the third CAS operation succeed, congratulations, your luck is not that bad and got the `CutHead` operation almost accomplished with the final step of modify `Node 4`'s `pre` pointer to pointing to null and get the result like this: +![cuthead 1-5](images/list-cuthead-1-5.png) + + Yet there is still a situation where the third CAS operation could fail, but this time I'm not going to tell you the answers right now, trying to imagine it out yourself, doing a practice and having a break :). + + *one minute later...* + + *two minutes later...* + + *X minutes later...* + + *Okay, now I suppose you've really tried :).* + + * Thinking about a scenario where the last continuous node (`Node 7`) is the node that being inserted by another thread at the moment we're doing the third CAS and that insert operation is not completed yet : + ![cuthead 2-1](images/list-cuthead-2-1.png) + + We can extend the continuous list to containing `Node 7` even it hasn't been completely inserted since the thread which is doing the insertion **MUST** have modified its previous node (from the insertion point of view which is `Node 6`)'s `next` pointer to pointing to the new node (`Node 7`) from pointing to the first nonadjacent node (`Node 9`), yet hasn't modified the first nonadjacent node (`Node 9`)'s `pre` pointer from pointing to its previous node (`Node 6`) to pointing to the new node (Node 7) : + ![cuthead 2-2](images/list-cuthead-2-2.png) + + *Although making that sounds like a tongue twister,I still would like to describe it in a formal way, once you are confusing about the description, just ignore it and look at the picture above.* + + In this case, the third CAS operation would fail. But it's quite simple to cope with : the cutting head thread just looping on the CAS, it will get finished after the inserting operation has finished : + ![cuthead 2-3](images/list-cuthead-2-3.png) + + Okay, we've done cutting head from a list now, finally! The result will the same as expected: + + ![cuthead 2-4](images/list-cuthead-2-4.png) + +###### 3.3 The node lossing problem + +Okay, here comes the last one issue we should consider about: there may have unfinished insertions even **after** we've successfully cut some nodes off from the original list: + + ![cuthead 2-5](images/list-cuthead-2-5.png) + + If we start using(iterating) the cut off list immediately, we may lost `Node 4.5` and `Node 5.5` which are not yet finished inserting, this is certainly unacceptable. The simplest work around is to just sleeping for a little while(~1-9us) that is relatively long enough for the threads to get their job done. But we can't define a proper value for it, leaving this kind of solution as inelegant. How does aurora cope with it is a little bit complicated: + + * the list maintain a Hash with the inserting thread's id as the key, and a flag as value, the flag is for indicating whether the threads is inserting something or not at the moment. + * every inserting thread will set the flag before start inserting and erase it after finished inserting. + * before `CutHead` returned, the thread will waiting for the flags to be all cleared in a spin manner(with `std::yield` enabled to reduce cpu wasting). + + This can ensure that only after all the inserting threads are done with their jobs, can the `CutHead` function return. But this solution still not being good enough: there can be threads(says `threads X`) inserting in the original list(not the cut off list) at the moment while `CutHead` thread is on its waiting, `threads X` could prevent `CutHead` from returning forever: + + ![cuthead 2-6](images/list-cuthead-2-6.png) + + So we have to distinguish the threads which are truly relevant to the cut off list from `threads X`. But how can we achieve this? Well, note that the inserting value of each inserting threads is the best factor to do this: we can judge whether a thread is relevant by compare the value with the last element in the cut off list. For now the Hash's value isn't a flag anymore, it has become the value being inserted. However, since the value is a template one, recording(copying) it may has variable overhead, so aurora make a compromise : recording a snapshot(pointer of the node immediately after `Head Node`) of the list when threads start inserting rather than the value being inserted, this solution will still prevent most of the `threads X` from disturbing the cutting head operation, which is good enough for us even though it may has `false positive`(uncorrectly judging some unrelevant threads as relevant) effect on some cases. + + *Note:the hash used here is also lockfree, which will introduced soon later.* + +###### 3.4 Recap + +By following [R1](#R1) and [R2](#R2) and enjoying its positive consequence of `thread re-conciliating`, conflict operations like `insert` and `cuthead` can running correctly, this implementing the lockfree semantics. And this is also all about the lockfree in aurora. + +##### 4. Deleting from the list. +There are two type of deleting elements from a set : +* physically : the element is no longer exists in the set, cannot be iterated for, disappear in memory. +* logically : the element still exists in the set, can be found by iterating, but with a `deletion flag` set to true. + +It's difficult to understand the correctness of physically deleting in a lockfree way, let alone implementing it. Aurora chooses the easier approach : logically deleting. All things need to keep in mind is that : +* newly inserted nodes are with the `deletion flag` set to false. +* delete operations will set the node's `deletion flag` to true. +* ignore the deleted elements when iterating over it. +* If encounter deleted nodes when doing `inserte` or `cuthead`, treat them as non-deleted. + +The last case will not incur errors, otherwise we cannot easily decide the right position where to `insert` or `cuthead`. + +#### Trivial Lock Single List + +It would be much easier to understand this another kind of list: `TrivialLockSingleList` if you've already got a correct comprehension of all the above things about `TrivialLockDoubleList`. The demonstrations will be brifely shorted since the designing ideas for these two kind of list are basically the same, you can figure out everything by applying the rule of [R1](#R1) and [R2](#R2). + +##### 1. Deleting from the list. + +![singlelist-1](images/singlelist-1.png) + +Conflict insertions like that between `Node 9` and `Node 10` will be resolved by allowing only one of the insertions succeed by the constraint of CAS. + +##### 2. Cuting head from the list. + +![singlelist-2](images/singlelist-2.png) + +Nothing special to emphasize, but note the [boundary-trouble](#boundary-trouble) and [node-lossing](#node-lossing) problems also exist here. + +#### Lockfree hash + +This hash supports two type of usage: +* unary container, like `std::unordered_set<>`. +* binary container,aka the kv semantics, like `std::unordered_map<>`. + +Compared to `TrivialLockDoubleList`, `LockFreeHash` is simpler : +![hash](images/hash.png) + +If you have basic knowledge about hash, you can understand this easily. + +##### 1. inserting into hash +* find the right slot by key. +* insert the new element at head in a CAS manner. + +##### 2. deleting from hash +* find the right slot by key. +* traverse through the list, setting the required element's `deletion flag` to true if found.Otherwise do nothing. + +##### 3. finding from hash +* find the right slot by key. +* traverse through the list, find the node with the equal key and its `deletion flag` is false. + +Easy peasy japanesey! + +##### 4. the mapping operation +There is a special operation called `map`, it iterating over the hash slots from left to right, within each slot it iterating from top to down, executing the given function (e.g, add every element's key value by 1) to every elements in the hash. But the function could modify the key, changing the right position where the node should be, thus the modified node probably need to move from one slot to another. This is done by first deleting the node at the old position, then inserting it at the right new position. + +There is a side effect of moving node from one place to another : the node could potentially be executed by function for more than one time which is probably not what the caller wants. For example, in the above picture, `Node kx` will be executed for the first time when the iteration reaching at `slot 3`, its key get modified (says the new key is `kx'`) by the function, new position is calculated indicating that the node's new position is at `slot 13`, `Node kx'` moving (first deleting then inserting) to the that place, so far so good. But when iteration reaches `slot 13`, it doesn't know the fact that `Node kx'` actually coming from an old node and has ever been executed once, leading to `Node kx'` being executed again, moving to a new slot after `slot 13` and being executed the third time, repeating again and again. + +To get around of this trouble, a `tag` is added to each node acting as a `traverse ID`, it's initialized to `0` for each newly inserted node. When a new `map` operation is issued, a random value says `X` will be generated to uniquely indicating this iteration. `X` will be assigned to every the moved node and nodes whose `tag` equals to `x` will be recognized as executed ones during current iteration. Thus no nodes will be executed more than once for one certain `map` operation. + +##### 5. Downside + +The downside of this design is wasting memory : **the deleted nodes never get physically freed, memory usage will be monotonously increasing**. In aurora it's acceptable since the hash is used for caching some data(like meta data in binlog & sstable) that will be released at a certain point, the hash object will therefore be released together. But if you plan to use the hash in some other scenarios, pay attention to its memory downside. + +#### Lockfree deque + +The name of `deque` is for history reasons, it's actually a MPMC queue in a variant single linked list form.I failed to figure out an elegant name for it, so just let it go. Not everything is perfect in this world, say nothing of a name. + +Okay, MPMC is short for `multiple producers and multiple consumer`, usually used in the context of talking about lockfree stuff. They are many articles about this, and I believe you might have ever investigated it before, if you are confident to know what it is, just skip this section. + +The deque is initialized like this: + +![deque-push-1](images/deque-push-1.png) + +A dummy node always be there with both head and tail pointer pointing to it.Producing and consuming are the two basic operations for any MPMC implementations. + +##### 1. Producing + +Since this is a list, we append the new elements wrapped in a unified node to the end of the list, absolutely in a CAS manner, again. + +![deque-push-2](images/deque-push-2.png) + +In this picture, different color of lines stands for different threads, remember [R1](#R1) and [R2](#R2) ? I suggest taking a looking back if you forgot them, the two fundamental rules will be applied always the time. + +The order of producing for each thread is : +* allocate a new node with its tail points to the dummy node. +* take a snapshot of the current tail. +* try to append the new node to that snapshot tail. + * if succeed, lucky dog, producing is done. + * if failed, means some other threads already successfully inserted, current thread needs to redo the operation from a proper restarting point. + +This quite straight forward, after all threads finished their jobs, the deque will be look like this: + +![deque-push-3](images/deque-push-3.png) + +A little weird? Looks more like a ringbuffer in a list form? Yes, it is, **a ringbuffer without capacity limitation, a ringbuffer with a dummy node indicating its empty status**. + +##### 2. Consuming + +The basic idea of consuming is very like that of producing, and keep in mind that the dummy node is a boundary. + +![deque-pop-1](images/deque-pop-1.png) + +The overall process of consuming is trying to move `dummy->next` to its next one by one. Details will not be explained, reference the counterpart of producing, they are basically the same. + +##### 3. About deque's wait-free +This is the tricky part of `LockFreeDeque`: To achieve the [wait-free](https://en.wikipedia.org/wiki/Non-blocking_algorithm) semantic as much as possible, there is no boundary between the nodes being produced and the nodes being consumed. Therefore, a node can be immediately consumed so long as it emerged on the list regardless whether the `tail` pointer has passed over it or not, and a node can also be immediately produced once it became producible(empty), vice versa. This is an aggressive strategy, a double-edged sword, it pushes us more close to `wait-free` but also has its own downside: we cannot freeing a node just after it has been consumed since the node may still being used by other threads which are trying to move the `tail` cursor forward. The freeing operation has to be deferred: we first push the consumed node into a `garbage list`, a dedicated thread will polling periodically from it and the node be physically freed later. + +The `deferred freeing` procedure can also be found in the `TrivialLockDoubleList`. + +#### Lockfree MPMC queue + +This is the standard MPMC queue in a ringbuffer form. Also, for achieving the [wait-free](https://en.wikipedia.org/wiki/Non-blocking_algorithm) semantic, a `working status` is introduced to each slot. There are several materials explaining the relationship and difference between `lock-free` and `wait-free`, talking about it is beyond the scope. Simply speaking, the `wait-free` semantics can be easily implemented on one unilateral side (producing or consuming), but hard on both sides. With the help of the `working status` the queue has been very close to it. + +Working status for a slot: +* **Empty**: this is an empty slot (the initialized status), producing inside it is welcomed. +* **Produced**: this slot is filled, consuming is welcomed. +* **Producing**: this slot is under filling but is not filled, if one thread want to consume, it must wait for the resource to be available,aka to switch to the `produced` status. +* **Consuming**: this slot is under consuming but is not finished, if one thread want to produce, it must wait for the slot to be empty. + +The initial state of the queue is like this, imagine its head & tail are connected as ring : + +![queue-1](images/queue-1.png) + +##### 1. Queue Producing + +* judge if the queue is full. +* move head to the next position in CAS manner. + * if failed: + * not due to full, retry at a proper position like before. + * due to full, return. + * if succeed: + * waiting if `working status` of the slot just occupied is not `Empty` + * set the `working status` of the slot to `Producing`, in CAS. + * producing is a simple pointer set operation, just do it. + * set `working statue` to `Produced` in CAS, allowing consuming operations on this slot. + +![queue-2](images/queue-2.png) + +There is a possible state of the queue described by the above picture, producing threads pushed the head of the queue quickly to `slot 11`, leaving slots [0-10] behind with three possible status of each: + +* empty : just after pushing head, haven't change the slot status. +* producing : changed the slot status, setting the pointer now. +* produced : finished producing. + +**All the three status are valid to the thread consuming this slot**, it just waiting if the status hasn't been changed to `produced` by the producer. After the waiting, it start its own consuming. The behavior of P & C will making slots' status looks like being randomized. + +##### 2. Queue Consuming + +Consuming is just the counterpart of producing with very similar behaviors, the above knowledge for P is enough for understanding C. There is just a slight difference about judging the boundary, check it on the code. + +Consuming: + +![queue-3](images/queue-3.png) + +##### 3. About queue's waitfree + +As previously said, `wait-free` is achieved on unilateral side: +* produce : no threads have to wait until some other threads to finish producing, CAS pushing head to advance quickly. +* consume : no threads have to wait until some other threads to finish consuming, CAS pushing tail to advance quickly. + +For both sides, it has been very close to `wait-free`: + +* produce-consume : consumers can always do consuming once a slot become available to be consumed with only one exception that, as showing in the above picture, some consumers have to stuck on `Node 6` & `Node 7` & `Node 8` to wait for their availability whereas `Node 9` & `Node 10` are already available for them. And this will not happen, if there are enough consumers. This is what `very-close-to` mean. + +#### Lockfree priority queue + +This is just a combination of a fixed size of the above MPMC queues with each representing a specific task type. It's an array for iterating by multiple PC threads. + +![pri-queue](images/pri-queue.png) + +--- +This is all about the data structures in aurora. + +#### Basic workflow + +*Note: The `workflow` in the next few sections is meaning for the `writing workflow`.* + +Like the example shown in the [official document](https://grpc.io/docs/tutorials/async/helloasync-cpp/), aurora also wraps each request and its related variable & functions into a class called `RequestBase`. A concrete request class derives from which subclass of `RequestBase` depending on what kind of reqeust it represents. The basic hierarchy is: + +![reqeust-classes-hierarchy](images/hierarchy.png) + +Leaf nodes are the classes represeting a concrete requests, their multi-level parents contain basic logics about how to complete a RPC. + +There are three main components in the asynchronous mode: +* grpc [CompletionQueue](https://grpc.github.io/grpc/cpp/classgrpc__impl_1_1_completion_queue.html), the fundamental reactor, all event driven mechanisms are based on it, there can be many instances serving for the server at the same time. +* The wrapped class of `RequestBase`, each request will entrust its IO intent to one of the CQs. +* The threads polling on the CQ, these are the working threads and are the limited resources worthy to be treasured. + +In aurora's implementation, the most complicated part is the process of how to write a value to the state machine, obviously it's also the core component of the raft protocol. We'll take a deep look at the whole process. + +#### Follower workflow + +First, let's see how does the follower dealing with the `AppendEntries` requests: + +When an appending log request comes, the working thread first compare its `previous entity ID`, says `pre_id` with the `Last Replicate LogID`, says `LRL`, of the binlog: + + * If `pre_id` < `LRL`, means a log conflict, starting the log reverting process. + * If `pre_id` > `LRL`, an out of order request has just come, pushing it into a `pending_list` which is a `TrivialLockSingleList` and designed to hold such kind of requests. + * If `pre_id` = `LRL`, good, this is the ideal case, cut all the entries in the above `pending_list` which are smaller than the current request's logID and inserting them to another list, says `committing_list`, finally appending them together to the binlog tail. + +The `committing_list` is designed for holding the logs which are succesfully appended but not yet committed for the commit phase. The committing operation will be done when a commit request arrived later. + +But how to deal with the disorder logs in the case of `pre_id` > `LRL`? Well, there are dedicated threads(says `Thread-X`) wating for new elements pushed into `pending_list` and then checking if there are any elements in the list satisfy `element's log ID <= LRL` which means the request has already been properly processed, if someone do satisfy the requirement, return the result for that reqeust. Since there are multiple threads doing this work, some CAS-based control flags are introduced. The weird thing here is that the requst was processed by another thread instead of the thread which firstly adopted it, the duty of finishing the request has been taken over! Yeah, this is on purpose, as it has been said before that the working threads is the most valuable resource, we cannot let them waiting on anything, otherwise the whole server performance will get easily downgraded or even stuck, the worse thing we don't want to see. + +*Note: The above checking for the condition of `element's log ID <= LRL` reveals another important fact that the status of the binglog, more specifically the `LRL`, is the only criteria that should be used on judging whether a request has been successfully processed or not, because it stands for the persistence of a request.* + +![follower-iterating-thread](images/follower-iterating-thread.png) + +Okay, after the iterating thread found some request could be returned, it will invoke the corresponding methods wrapped in the request objects. And then start a new round of waiting, here is a diagram showing how does the threads interacting with each other: + +![binlog-coordinating](images/binlog-coordinating.png) + +*`Thread 1-3` are the front threads processing, `Thread-X` is the backend one.* + +Now, we've got an detailed view of how does follower work on appending log entries. + +#### Leader workflow + +Before we stepping further into how does the leader work, we need to first have a glance at the two kind of pools used by leader: + +##### Connection pool + +In grpc, a connection is represented by a distinguishable(different channel arguments) `::grpc::Channel` object. Since HTTP2 is multiplexed we can share one `::grpc::Channel` object among several `Stub` objects(which stands for a `stream` in HTTP2), here is [detailed](https://stackoverflow.com/questions/47022097/should-i-share-grpc-stubs-or-channels/56375224#56375224) discussion about this. + +And aurora maintains a configurable fixed number of connection between each pair of combinations. These tcp long connections will be reused each time when it's needed. + +##### Client pool + +Clients are used on the leader side, all the RPCs leader issued to the followers are done through them. They may share the same channel, the same `CompletionQueue`, and different clients can customize their own reacting procedures depending on their own RPC logics. It's a high frequently used resources in the view of leader, allocating each time we need it will bring heavy overhead, so they are organized into a pool. + +The pool itself is an object pool powered by `LockFreeDeque`, support general opertions `Fetch` & `Back` for a pool. One thing to note here is the lifetime management stuff: +* it takes part in its associated request object's lifetime mangement. +* after being fetched from the object pool, the client object could become an orphan(no one is responsible for its lifetime) and being released sometime if we don't extend its life. + +`Lifetime management` is an smart mechanism to extend an object's life and recycle it at a proper time. Go [there](#lifetime) to get the detailed explanations. + +##### Workflow anatomy + +Okay, for now, let's look at how does the leader serve writing requests. First we need to understand some basic concepts: +* `phaseX statistic` : a counter for the succeed/fail/... numbers in a replicating process and is maintained for each writing request . +* `implicitly fail` : a `negative false` case indecator, basically standing for the *timeout* case. +* `execution stream` : a series of jobs need to be done for dealing with a `AppendEntries` RPC resposne. Each stream corresponds to a certain response, different streams can proceeding parallel in different threads. +* `determined result` : a majority result(whether success or fail) for a writing request, depending on which the leader can return a result to the client. + +First, leader generates a GUID(gloabl unique ID) for each writing reqeust, after then push the wrapped reqeust into the `LeaderView::m_entity_list` which is used for holding the unfinished requests. Then start the [parallel replicating](#parallel-replicating) phase which issues a `AppendEntries` RPC to each of the followers and the nodes who are trying to join in the cluster at the moment but still in a `JointConsensus`(not finished joining) state. The issuing requests job is done by the `CompletionQueue` of grpc, something implicit here is that the CQ is driven by the applications threads which means that only until there exists some threads polling(invoking `Next` or `AsyncNext`) on it, then the CQ will senting the requests out on the wire. + +*Digression: I just heard the classical `Dreamtale` from my sound*: +> As the last ship sailed toward the distant horizon, I sat there watching on a rock, my mind slowly drifting away, forming in to my dreamtale... + +*It just woke my memories up which has been left behind 10+ years ago, in that beautiful campus. Can't holding myself from writing down this...* + +Since the leader working in a *multiple thread* and *asynchronous* manner, the next processes of dealing with responses for the above requests will be split into serveral independent `execution stream`s. One thread can taking the obligation of executing more than one stream as long as there exists unadopted responses in the CQ when the thread comes back from executing one stream and polling on it again. + +The most complicated thing in the current workflow is how does the threads work together, here is a diagram in the time sequence view to illustrate the above steps: + +![parallel-replicating](images/parallel-replicating.png) + +Before explaining the `execution stream` in detail, we'd better know some features and temporary variables that will be needed: + +* `phaseII_ready_list`: a variable which is used for holding followers indicators that need to do the committing jobs later. +* `permission flag` : a variable acting as a switch which ensure only one thread could take it(by successfully finished a CAS operation) and then step further to do the succeeding jobs. +* `group commit` : This is a feature commonly used in many database product, which is intended to reduce the overhead caused by frequently committing requests especially in some two phase committing scenarios, the `Log Replication` here is just one of them. +So you'll see not every succeed `AppendEntries` response will trigger a corresponding `CommitEntries` later, they will get grouped into one `CommitEntries` request when accumulated for a certain number. + +The `execution stream` contains the following steps: +* update the `phaseI_statistic`. +* judge if the current write request could get a `determined result` after the above updating: + * `Yes` : trying to get the `permission flag`: + * Got : Step futher. + * Lost : do nothing. + * `No`: judge if the number of logs which are replicated but not yet committed has reached the limitation of `group commit`. + * not reached : do nothing. + * reached : push the current follower entity into the `phaseII_ready_list`. + +For now, there is only thread for a certain write request could reach here, and it's the one who will trying to commit the written value to the leader's state machine. It will: +* entrust all commit requests gathered before. +* trying to CutHead a list of logs off from the `entity_pending_list` which is in the global leader view. + * if nothing is cut off, mean other threads with a greater log has done this. Push the current write request to another list(says ListX) which is used for holding such kind of requests. + * if something is cut off, appending them to the binlog and apply to the state machine. + +*Note: if one log got majority confirmed, all its preceeding logs must also be majority confirmed, this is guaranteed by the raft protocol, so ListX has a strong reason to waiting on all its elements to be appended to binlog finally.* + +The ListX here is essentially the same as the [pending_list](#pending-listX) mentioned before in the [Follower workflow](#follower-workflow) section. And also there are dedicated threads(says `Thread-X`) iterating over it in a high frequence checking if there are any entries satisfies some certain conditions and return a result to client if someone does. The interaction between the thread(says `Thread-Y`) who successfully appended item to binlog and `Thread-X` is very like that of the follower's : `Thread-Y` notify `Thread-X` that the `LRL` has advanced and there may exist some requests that can be returned to the client. + +Ok, let's recap with an overview about what the whole `execution stream` procedure look like: + +![replicate-callback](images/replicate-callback.png) + +Finally, I'd like to emphasize an special design for getting rid of [this implicit behavior of grpc](#client-timeout-if-dont-fetch) introduced before, in short, client side CQ will report a timeout error if you don't fetch the result quickly. What prevent us from fetching it out ASAP is that the polling threads may being busy at other things, no time to taking account the about-to timeout responses and also can't fetching it with a higher priority than other tasks remained in the CQ. Thus we have to give dedicated CQs & threads to meet the special requirement of client responses: + +![polling-cq-thread](images/polling-CQ-thread.png) + +This is the design for the above purpose, the `Backend-CQ-*` & their corresponding threads(says `threads Y`) are **only** responsible for reacting with client responses, and not the other way around. Further, to keep the work of `threads Y` as simple as possible which also contributes to fetching responses ASAP, `threads Y` will wrap the reacting info by which can find the remaining works into a struct and push that struct into a [priority queue](priorify-queue)(says `PriQueue X`), so at last, it will be the background threads(says `threads Z`) who are polling on the `PriQueue X` will indeed do the subsequent jobs. So threads of `F1-Fn` in the previous picture not the threads directly polling on the CQ, they are actually `threads Z`, a little winding. + +### Asynchronous Framework + +#### Threading model + +Let's recap all kinds of threads in aurora server: + +* Grpc inner threads: related to grpc inner designed, we don't need to spend too much time on it. +* Woking threads: as mentioned above, these are the real wokers doing RPC jobs. All network io operations need to do within the threads are entrusted into a `CompeletionQueue` and thus turned into asynchronous: +* Background MPMC threads: these are the consumers quickly fetching jobs from the MPMC queue: + +![mpmc-thread](images/MPMC-thread.png) + +* Golbal timer thread: there is a special thread periodically doing the jobs registered in a heap, details are [here](#global-timer). +* Follower background thread : As described above, this is the dedicated thread to iterating over the disorder message list. It plays an important role in the `AppendEntries` RPC. +* Leader background thread : the leader dedicated thread to iterating over the `LeaderView::m_entity_list`, checking and return a result to client if necessary. + +#### The CGG problem + +CGG shorts for `Concurrent generating GUID`.In raft protocol, each log entry has its own globally unique ID, either the `log index` or the `[log term + log index]` pair will acting as the global factor. But, what's the problem with CGG? Well, there will be no problems with it under normal situations, but there will be problems if we got an issue and the GUID still keep increasing monotonically: considering that one log entry get failed when replicating to the followers due to network unstable, and since GUID increasing monotonically, all its subsequent logs will get a greater ID, these ID will get failed again, because raft force all successfully written IDs to be continuous but their preceeding one is lost. For this kind scenarios, **there will be no changes for the leader to recover, a single error would disable the entire cluster from serving new writing requests**. + +Thus we **MUST** have a mechanism to make the leader being recoverable from a single failure, more specifically, to tune the `Last Released GUID` (says `LRG`) back to the last successfully replciated LogID if we detecting a failure. + +There are two kinds of failures for the leader during replicating: +* `explicit failure` : the requesting side knows exactly about that its request cannot be done on the receiver side, e.g., the receiver send a msg back telling that your request is invalid or the request triggered some unexpected logic on the receier side and therefore aborted. There are few cases for this kind of failure. +* `implicit failure` : the sender didn't get a response from the receiver within a certain period of time and resulting in a timeout error. This is the most possible kind of failures in the real production environment. + +The `implicit failure` is worthy to be taken care of carefully: we'd better not immediately telling the client that:"your write request failed" simply by encountering an `implicit failure`, because it may actually succeed: the response just arrived a little later than the timeout deadline but it indeed has been executed on the receiver side, this is a common `false negative` scenario for all the timeout errors. One way to mitigate the disruptions from it is that **making decisions only after the `LRG` got a determined result**. + +Why it relies on `LRG`? Let's see this: + +![cgg-1](images/cgg-1.png) + +There are four status for a GUID or a log entry in the leader side: +* Committed : applied to the state machine. +* Replicated : successfully replicated to the majority of the cluster but not yet committed. +* Sent but not confirmed : trying to be replicated to the majority, but hasn't been finished. +* Not Released : not yet generated. + +And according to raft safety guarantee, we can conclude that : + +**1> a failed log means all its succeeding logs also failed** : + +![cgg-2](images/cgg-2.png) + +In this case, we cannot say that the preceeding undetermined logs(2 & 3) also failed just because of `log4` failed. Threads processing `log 2 & 3` will indeed waiting on a condition variable(CV) which will be notified after `LRG` resolved to determine its own result. + +**2> a succeeded log means all its preceeding logs also succeeded** : + +![cgg-3](images/cgg-3.png) + +If we can be sure that the `LRG` succeeded, then all its preceeding undetermined logs can be also deducted to succeed. + +So, to determine the result for the logs in the `Sent but not confirmed` state, we can firstly determind the last log's result, this process also being called as `LastLogResolve` in aurora: + +![cgg-4](images/cgg-4.png) + +As said before, this strategy cannot ensure that all the `implicit failure` logs could get a accurate judgement on whether it's failed or not, but it can help reducing the `false negative` cases. + +So the final steps dealing with an `implicit failure` are: +* Step1. Set server status to `halted` to stop receiving new incoming requests, this is to stop server from releasing new GUIDs. +* Step2. Waiting for the last GUID to be generated. +* Step3. Waiting for the log with the last released ID to be resolved. +* Step4. Determine the failed log final result by compare the `LRL` with current logID: + * `current_log_id` > `LRL` : return a negative result to clients. + * `current_log_id` < `LRL` : the current log actually succeed, return a positive result to clients. + +The reason for `Step2` is that there is a tiny time windows where the server status has been set to `halted` but the `LRG` hasn't been generated due to the multiple thread environment. Thus we need to sleep for a little while to get the real *last* one when need it. + +For `Step3`, there is a `gap` varibale indicates how many request are there between `LRL` and `last GUID` at the moment server status is set to `halted`, and another variable `gap_processed` which stands for the #requests that has already been processed(whether successfully or unsuccessfully) within that section. + +And the `waiting` operation means for waiting in a spin manner for the following condition to be satisfied: `gap_processed` >= `gap`. Adding that both `gap_processed` and `gap` are relative accurate values, we can't make them absolutely right because the simultaneously way of working. So there is a patch mending the inaccuration: waiting for a configurable value to ensure the `gap_processed` >= `gap` condition finally matched. This kind of waiting is absolutely not a perfect solution, but it's still acceptable considering the time already spent on dealing with the CGG problem. + +(~~waiting on a CV with a reasonable timeout value after which elapsed the leader will just return a negative result to the client anyway. The waiting condition on the CV is very like that in the [leader & follower's dedicated thread](#bg-thread) : waiting for the `LRL` to be advanced over its current logID~~). + +#### Lifetime management + +There are scenarios where we need to extend the lifetime for certain objects : + +* `Write` represents for the request that are currently being processed by the leader. There will be a result return to the client immediately after the request get a majority result, but there are still other jobs to do at the moment, like finish the replication to the remain followers and start phaseII when necessary. All these subsequent jobs need a living `Write` object. +* `Write` & `AppendEntries` both need a living object to finish the `checking & return` related logic in the backend thread. +* some clients like `AppendEntriesAsyncClient` & `CommitEntriesAsyncClient` need to callback into the corresponding living request objects. +* some background tasks like `CutEmptyContext` & `DisorderMessageContext` need the request context of the iving request objects. + +The simplest way to achieve life extention is to use a `shared_ptr`, besides the automatic extending of object's lifetime we can also facilitate the thread safe property of its control block, which is great helpful in the multi-thread environment. There is one thing need to pay attention to : we must have an initiate `shared_ptr` object to hold the ownership and acting as the original assigner for the first ownership copy request, since there is no fixed originations to serve such kind of requests and paly the role as a sustainable holder. + +The solution for the initiate `shared_ptr` is to group the lifetime management logics into a wrapper of `OwnershipDelegator`, some core concepts: +* the inherited classes will share the management right with the wrapper. +* the ownership can be copied out as a form of extending the object's lifetime. +* the inherited classes can either be destructed by its own destructor or by the wrapper, but **NOT** both. + +The basic structure of the wrapper looks like this : +```c++ +template +class OwnershipDelegator { +public: + + OwnershipDelegator() { this->m_p_shp_delegator = new std::shared_ptr(nullptr, [](T *p) {}); } + + virtual ~OwnershipDelegator() { delete this->m_p_shp_delegator; } + + void ResetOwnership(T *src) noexcept { this->m_p_shp_delegator->reset(src); } + + void ReleaseOwnership() noexcept { this->m_p_shp_delegator->reset(); } + + std::shared_ptr GetOwnership()noexcept { return *this->m_p_shp_delegator; } + + void CopyOwnership(std::shared_ptr from)noexcept { *this->m_p_shp_delegator = from; } + +private: + std::shared_ptr *m_p_shp_delegator = nullptr; +}; +``` + +#### Parallel Replicating + +This is a reveiw and re-emphasize on all the above techniques applied to enhance the parallelism and further the throughput. Parallel replicating is the most complicated part of `Log Replication`. To enhance parallelism as much as possible, taking advantage of multicore is imperative: leader duplicating the writing requests to the followers as faster as it can, saturating the downstream nodes, after which the bottle neck of throughput should lying on the disk operations, theoretically. + +*Note: After got an unexpected [benchmark result](https://mail.google.com/mail/u/0/#sent/KtbxLxgKJJPkwvGplJvRNNrQlPCVDjKDGV) of grpc, the actualy bottle neck is now on grpc itself.* + +Some great efforts have been made to approach this goal: +* use an asynchronous mode of grpc. +* support processing disorder message on the follower side. +* make the processing for write requests as independent as possible. + * several lockfree data structures are introduced. + * free the working threads when they about to do synchronouos operations(typically are the waiting(on CV) opertions for `LRL` to advance) by using dedicated background threads to finish the inevitable intersection parts of the processing. +* some general optimizations: + * use a channel(connection) pool & client pool to keep long connections between leader and follower. + * avoid memory copies as much as possible. + +### Global timer + +There are some background job like sending heatbeats to follower, checking heartbeats from leader, garbage collecting, etc, need to be periodically executed, thus a dedicated global timer thread is introduced for this purpose. + +The task itself is represent with three fields: + +* a timestamp indicating the next time it should be executed. +* a interval_ms representing the intervals between each execution. +* a function object describing what the task actually is. + +Tasks are organized as a *min heap*, and the task that is the most recent to do will be located on the root of the heap. + +The timer thread worked in such a way that: + +* first, checking the timestamp(say *T*) value of the root node: + * if *T* > *now*, means the execution time it's not yet came, continue. + * if *T* >= *now*, do the job by invoking the callable function object. +* after successfully finished a job, checking whether it need to be scheduled again. If so, calculating the next timestamp of its execution by the interval and insert the new task back to the heap. + +![global-timer-thread](images/global-timer-thread.png) + +Note: *In this design, the thread acting as both a scheduler and an executor, thus task execution may get delayed if its previous task takes a long time to finish. But in the current scenarios, there is no such kind of task registered into the heap, so it's okay to just let it be.* + +### About The Binlog + +This is the log logging for raft's log entries and also acting as the [WAL](https://en.wikipedia.org/wiki/Write-ahead_logging) in the leveled storage subsystem.Let's first take a look at the file format: + +![binlog](images/storage-binlog.png) + +#### Meta Data Part + +The meta data is the index of the data part, locating in memory, callers can easily find the corresponding offset of each entry. The meta part is appended to the binlog each time the binlog file is considered large enough to be dumped to disk, since its length varies according to how many records it contains, it must be located at the end of the file rather than at the head of the file. The footer is an identifier identifying the completeness of the file. + +#### Data Part + +This is just several log entries each serialized by [protobuf](https://github.com/protocolbuffers/protobuf), concatenating together, inside each record, there is an additional 4 bytes to show the length of the serialized buffer. + +Basic fields for each log entry : +* key : the key. +* val : key's value. +* ID : to identify the entry itself. +* pre-ID : to identify the entry it bases on. + +Raft's logs are in strict order, each one is based on its previous one, contributing the `consistency semantics` in the distributed environment as a whole, for more details see the [raft paper](https://raft.github.io/raft.pdf). + +*Note: The `previous` term above is on an `ID monotonicity` basis, e.g., logA is the previous one of logB, then logA's ID must satisfy LogAID < LogBID - 1, but not necessarily equals.* + +#### Supported operation + +There are operations binlog need to support : +* Append : the most basic operation. +* Revert : in raft, existing log entries could be rewrite for several reasons (like a new leader's log is inconsistent with followers), reverting will be called once in a while. +* SetHead : in general cases, binlog file must contain at least one entry to be the previous entry of its next following. Thus once a new empty follower joined the cluster and finished synchronizing data from the leader, the last data item just synchronized will be set as the first entry, by the SetHead operation. + +### Storage Layer + +Let's take an overview first: + +![storage](../doc/images/storage-overview.png) + +As a KV database where throughput is a critical factor, a leveled manner storage layer has been chosen. You can think of it as a simpler version of the [leveldb](https://github.com/google/leveldb). Just skipping this section if you are already familiar with that. + +There are two components in the storage subsystem: + +* memory table : a hash based unorderd in-memory data collection, in lockfree manner. +* SStable : sorted string table with index enabled, nothing specific than the typical [sstable](http://distributeddatastore.blogspot.com/2013/08/cassandra-sstable-storage-format.html). + +There are few differences in file format between sstable and binlog. They are sharing the same +design philosophy: +* data items starting from at the beginning of the file. +* meta data and the file's completeness identifier(footer in sstable) are appended after the data zone. + +![storage](../doc/images/storage-sstable.png) + +Some optimization features(like the `minimal key prefix compression`, where the data area is divided in to several smaller groups inside each of the group there is a `common prefix`(usually selected as the first key) for all the keys in that group, the data items' key only stores the tailing part, sharing the common prefix, thus reducing the space requirements for storing data), are not supported yet, since the sstable file size is not a critical factor for now. + +Choosing an unorderd data structure acting as the memory table has its own pros & cons compared to a typical choice: an ordered data structure, like a [LSM Tree](https://en.wikipedia.org/wiki/Log-structured_merge-tree). + +* Pros: + * writing complexity downgraded to O(1) from O(log2N). + * can taking the advantage of the existing lockfree feature for writing operations. +* Cons: + * Can't do [rolling merging ](https://blog.acolyer.org/2014/11/26/the-log-structured-merge-tree-lsm-tree/) when migrating data from the upper level(C0) to the lower level(C1) which has a more fine grained control over it. + +The memory tables from `memory table 2` to `memory table N` can seldom been seen as they only occur when the previous memory tables still haven't been finished dumping at the moment they are going to be dumped. The dumping process for each memory table can proceed simultaneously, this also bring some tricks into the code: + +![storage](../doc/images/storage-memory-dump.png) + +First of all, when a memory table(says `M`) finished dumping, it needs to cutoff from its least ancestor table(says `N`) by changing the `next` pointer of `N` from pointing to `M` to pointing to null. But some memory table(s,like `memory table 3` in the above) can be finished dumping earlier than its succeeding ones(`memory table 4` till `memory table N`, says `Q`), if we do cutting off immediately at the moment(says `T0`), there is a time windows starts from `T0` and ends till to all the subsequent memory tables finished dumping(says `T1`), during which querys from clients will not get any data in the tables of `Q`. Thus we have to wait for `T1` reached before cutting off `N` from `M`, this is done by a CAS operation. + +The sstables will be periodically merged together from the oldest to the lastest. This is as the same with the typical merging semantics of [LSM Tree](https://en.wikipedia.org/wiki/Log-structured_merge-tree). + +Since the storage layer sharing so many concepts and features with leveldb, you might ask why just using the leveldb library instead of implementing an owned version? Well, there two important reasons to do this: +* we already have a binlog which records exactly what the `WAL` needs, we must to utilize it getting around of leveldb doing a duplicated job. +* with a customized memory table, we can take the advantage of lockfree feature for writing. + +### GC + + +### Election + +#### The basics. + +Election is the second fundamental component of the entire raft protocol. Detailedly discussing about it is beyond the scope, just keep in mind of the following basic principles: +* *Term Based* : Different round of elections are distinguished by their terms. Only one leader could be elected for one specific term, whereas **two leaders can existing at the same time in different terms**. +* *Majority Based* : Contributing to the above principle. +* *Heartbeat Timeout Triggered*: A follower becoming candidate once it cannot hear from the leader. +* *Election Timeout* : If one candidate can't win an election in a given term for a specific period of time, it will falling into a random sleep and restarting the election after that. +* *Respecting With Higher Term*: When a node learns about(by whatsoever means) that there is another node which has a higher term than its own, it will definitely step down to a follower status and conforming instructions from the node it just learned about. + +#### About prevoting. + +Pre-vote is an additional step for the candidates before they issuing the real voting requests. Purpose of this is to prevent the occasionally disconnected followers from disrupting the entire cluster: consider a network issue occurred between the leader and one (says FX) of the followers, but not all the others. FX will starting an election sending voting requests to the other nodes, but the request will certainly fail since the it's already been disconnected from the cluster, then FX gaining its term and starting a new round election again, this process repeating until network heals. After the healing, all nodes know about FX is requesting vote with a higher term and they step down to followers showing respects to the higher term. Now all the nodes take part in the election, and FX cannot win since it has been disconnected for a period of time during which the logs could have advanced a lot. Each of the other nodes could be elected as the new leader. + +But the whole thing is stupid because the old leader is actually without any problem to serve the clients, FX is just a trouble maker. + +To work it around, every time a node thinks it has to issue a vote request, it must firstly ask the others that: *do you agree me to start an election*? Only if the node being asked also detected leader's gone, could it response with "Yes", otherwise "No". + +#### The live lock problem & solution. + +There is one widely known shortcoming of raft's election design: the live lock problem. Besides the one that the [raft paper](https://raft.github.io/raft.pdf) has mentioned: + +> **Raft uses randomized election timeouts to ensure that split votes are rare and that they are resolved quickly**. To prevent split votes in the first place, election timeouts are chosen randomly from a fixed interval (e.g., 150–300ms). This spreads out the servers so that in most cases only a single server will time out; it wins the election and sends heartbeats before any other servers time out. The same mechanism is used to handle split votes. Each candidate restarts its randomized election timeout at the start of an election, and it waits for that timeout to elapse before starting the next election; this reduces the likelihood of another split vote in the new election. + +which can be nearly solved by setting a randomized timeout value before starting each round of election, yet there is still another serious problem I came across, suppose a scenario like this: + +The leader crashes at some point leaving the followers with different log entries at the moment: + +![election-overview-1](../doc/images/election-overview-1.png) + +`Server 3` & `Server 4` & `Server 5` are all possible to be elected as the new leader since each of them can get a majority vote. But will they definitely being elected out just because of this? What if the servers with a lower index(says ServerLOW) increasing their terms **always faster** than the servers with a higher index(says ServerHIGH)? See below: + +![election-overview-2](../doc/images/election-overview-2.png) + +*Note: the sleeping times in each server doesn't need to be the same with each other, they are drawn as equal just for convenience.* + +In the picture, all nodes turned into candidate state and going issue an election. ServerLOW *always* have relatively smaller sleeping intervals compared to its ServerHIGH between each round of election(represented by green), leading to each time ServerHIGH starting to do election under a certain `term X`, they will found that `term X` cannot get a majority vote due to one of the two reasons: +* the term has already been voted by its ServerLOW to themselves as mentioned in the paper: + +> To begin an election, a follower increments its current term and transitions to candidate state. It then votes for itself and issues RequestVote RPCs in parallel to each of the other servers in the cluster. + + * the lastest log is earlier than the nodes it sending the vote request to. + +This process continues resulting into either none of nodes in `Set 2` could win an election or nodes in `Set 2` win an election but with a long time. Neither of them are wanted by the applications. + +Here we named the above abnormal phenomenon as `diff-speed-caused abortion`, shorted as `DSCA`. + +For `DSCA`'s happening, the election existing sequence for each term doesn't need to be exactly the same as the above picture illustrated. It just need to make sure that when each of the nodes in `Set 2` start their elections, the nodes in ServerLOW can just rightly prevent its success. E.g., when `Server 3` start electing, just needing one member from `Server 1` & `Server 2` refuse the vote, written as `[1,2] !-> #1`, the other cases are: +* `[1,2,3] !-> #2`, for `Server 4`. +* `[1,2,3,4] !-> #3`, for `Server 5`. + +You might retort to say that this is still an extreme case and will rarely to be seen. For the precondition of that *each server with different log entries when the leader crashes (as the pictured described)*, right, it is not common to see. But we'd better focus on the election protocol itself instead of presuming anything before it. And for the protocol itself, I tried to give an formula to describe the relations between the probability of the `DSCA` and several other factors(e.g., the lower and upper bound of sleeping interval as Smin and Smax, the time elapsed as T, the number server nodes as N), joined together as: + +

+ +

+ +, but found it was **a quite complicated mathematic problem** (contributions are very welcomed). However, in [this unit test](../src/gtest/election/test_election.h), I indeed came across that phenomenon many times which impelling me to take an action, finally a solution worked out like this: +* Each candidate locally recording the terms which are carried by the PRC `preVote` and `vote` when they are invoked, as `used terms`. +* When a candidate issuing a new round of an election, it increasing its term by skipping over the `used terms` it recorded. +* If an election fail, the candidate set its term's value back to the value where it started. + +For example, a candidate says C0 initially with a term value *3*, but it knows about that *term 4* and *term 5* have already been used by the other candidates as their new electing terms, thus C0 will start its next round of election by using *term 6* instead of the adjacent *term 4*, mitigating the conflicts. + +Under my test, this solution perfectly solved the `DSCA` problem. Now, it's almost impossible for the terms to be conflict with each other during each round of elections. + +### Membership change + +Now we're coming to the last but most hard to understand part of the raft protocol. I suggest you to read the chapter of `6 Clustermembershipchanges` in the raft paper in depth before reading this section because of there are several differences in aurora's implementation, you need to have those basic knowledges to understand why these differences are introduced and still being correct. + +The phrase *membership change* means that the whole cluster being available to the clients and can perform the functions of `log replication` and `leader election` correctly while it's changing the topology by adding/removing nodes from/to it. Essentially, this is a two phase commit procedure, and it the paper it was suggested to be done with inserting the configure changing events into the log entries, this approach is great in the perspective of taking advantage of the existing binlog's version control(the changing event itself is represented as a log entry) and quite easy to implement. But in aurora's implementation, that *2PC* process is extracted out as an independent module to keep the binlog related logic as pure as possible. This brings some extra work to do: + +* two separate RPC interfaces(`PropagateMemberChange` & `MemberChangeCommit`) are needed. +* each server in the cluster maintains a separate configure(`membership-change.config`) for its membership status. +* the candidates take the version of membership configure into account when deciding whether to vote for a node or not. + +The version of a membership configure plays an important role since it's used to find out which configure is newer in elections. It simulates the logID as if the changing events are transferred with log entries. + +Another difference in aurora is that the log entries are firstly being replicated to the majorities and will not be appended to the local binlog until they got majority confirmed, this order is the reversed version of that in the paper, and this reversing also applies in the membership change module: the Cnew will be broadcasted to the cluster at first, then the leader waiting for the majority confirmation, only after that the Cold,new would take effect. + +Now, let's recap why the `joint consensus` is needed, first thinking about a scenario without the *joint consensus*: + +![membership-change-1](../doc/images/membership-change-1.png) + +The cluster wants to shrink from 7 nodes (leader & F1-F6) to 3 nodes (leader & F1-F2), the leader replicate the change event to F1-F6, and suddenly crashes leaving only F1 receives that change event. Now, in F2-F6's point of view the cluster still having 7 nodes but there are only 3 nodes in from F1's view. As a consequence, F1 could be elected as the new leader with votes from F1 & F2, in the meantime F3 could also win an election under the same term with votes from F3 & F4 & F5 & F6. Similar cases could also be enumerated **as long as Cold and Cnew can making unilateral decisions at the same time**. This is why `joint consensus` are introduced in raft. + +The next difference is that aurora simplifies the status transition from a process of + +Cold --> 2PC --> Cold,new --> 2PC --> Cnew , to : + +Cold --> Cold,new --> Cnew + +The omitted 2PCs are indeed unnecessary, raft using it may because of just making the boundaries clear. The new approach is now looking like this : + +![membership-change-2](../doc/images/membership-change-2.png) + +Note the starting point of where Cnew `can making decisions alone` exists later the original one from the raft paper. + +Some other differences for dealing with problems in `membership change` are: +* new servers may not initially store any log entries : leader will first syncing data to all the new nodes and **won't start replicating the configure change event until the new nodes are all fully synced**. +* to prevent removed servers (those not in C-new) from disrupting the cluster : after committing the configure change event, a node will **shutdown itself once detected that it's no longer in the new cluster**, making life easier. + +## Contributing + +There are about 20k lines of code in the first version in aurora. But they are in a well organized directory structure, each directory & file & class name are straight forward, I believe you can get the point of a code snippet just by taking a look at its upper wrapper's name. + +### Future work + +#### Improvements + +In raft's design, all read & writes requests are forwarded to the leader, it's an obvious bottle neck of the whole system. There are several architecture level optimizations we can do. + +##### 0. support cmake + +Awkward...But it hasn't been supported yet... + +##### 1. (done) improve working threads' way of polling +At present, for the `AppendEntries` interface on the follower side, the working threads have to block on a condition variable when some out of order messages arrived, and will be and only be notified when some certain conditions(in this scenario it's the accomplishments of processing its preceding messages) matched. This resulting into the working threads wasting their time, and getting worse as the number of the out of order message arrivals increasing, this can be get around by inventing some new polling mechanisms for the working threads : +* there is a back ground task queue designed to hold uncompleted `AppendEntries` requests. +* the working threads wrapping the current job into a task and delivering it to the above queue when they found they need to wait on a CV, and then go back to polling on the CQ. +* there are dedicated threads ready to handle the wrapped task and take over the waiting and its subsequent jobs. + +This won't help in reducing the average latency for requests, but can releasing the occupied working threads, further improve overall throughput, also introducing a more complex program design, of course. + +*Data sharding is an old topic in database design, you can seek for other articles about this topic.* + +##### 2. LockFreeDeque dynamic shrink & expand + +This is for the varing requests payloads a leader may face. There is a time slice between the client is fetched and client is returned back to the client pool which is implemented by the `LockFreeDeque`, otherwise there is risk of exhuasting client resources during heavy enough payloads. + +##### 3. Storage. + +###### 3.1 SSTable do not maintain index data in memory. + +In the current design of the storage layer, each sstable object maintains the index part(the offset for each key) for each sstable file in memory. Thus could result in unlimited memory usage if data grows always faters than GC's reclaim. This is a legacy problem for an original intention of accelerating sstable access. A better approach is the compromise of the two factor of *memory usage* and *access speed* : only maintain the first several sstables' index data in memory, and leave them alone for the others. + +Besides, we also need a way to stop sstable files' size from growing unlimitedly as the consequence of [periodically merging](#storage-gc). + +###### 3.2 Disk space + +The meta data part of a SSTable is relatively large for now. We need to figure out a data compression solution for the meta part to save disk space usage. + +###### 3.3 Memory table + +###### 3.3.1 Dump speed + +It will cost ~3.5 seconds(win10 debug) to dump a 20K items memory table to a SSTable. The iterating process in the `LockFreeHash::GetOrderedByKey::_rb_tree` is the bottle neck, haven't figure out a way to get around of using the `_rb_tree` temporary variable to customize the order comparator. + +###### 3.3.2 Limit by memory usage. + +There is only a `memory_table_max_item` config to control when to dump the memory to SSTable, but is hard to decide the number for the users. Thus something like `memory_table_max_memory_mb` is needed to give an option that can be used to control by size of memory it used. + +##### 4. Binlog GC. + +Number of binlog files will increasing as data grows, in the current design, only at the startup time for the server can we know which binlog files can be deleted, manually. We need an elegant way to delete the big files as promptly as possible to free dick space. + +##### 5. Bi-directional streaming rpc. + +The communications between leader & followers are now in a way of unary rpc. Throughput is kinda lower compared to the way of bi-directional streaming rpc. It's worth improving it in that way, and this can alos mitigate [this problem](https://github.com/grpc/grpc/issues/19658). + +#### New features + +##### 1. apportion reading requests + +Several possible solutions: + +* Followers itself can also serve the read requests by query the lastest committed value for a key to the majority of the cluster. But this will multiply the network traffic by N times. + +* Adding slave(s) for the leader, committed data will be replicated to these nodes in either a strong consistency or an eventual consistency manner, depending on the needs. Those nodes don't take participate in the election and log replication phase. This is a more feasible choice. + +##### 2. data slicing with multi-raft + +This is the ultimate solution for horizontal scalability. The data set that aurora served will be sliced into different pieces, upon each of them there will be a raft instance running. Therefore we can deploying many raft instances among a fixed number of machines, any node in the cluster can acting as the leader for a certain set of data, at the meanwhile acting as a follower for another set of data. Different instances running independently from each other, as a whole, making use of the bindwidth of the local network as much as possible. The challenging part is the slicing strategy, different applications get different inclinations. In general, consistent hashing should be the compromised and default choice. + +##### 3. multiple value format + +The value formats can be diversified to a wide range of choices, like list, set, ordered set in redis, consider the follow things when plan to import any of them: +* how to storage data in this format on disk in an efficient way. +* how to storage data in this format in memory. +* design suitable operations on this format. + +##### 4. [distributed] transactions + +This is a very big topic and really hard to get right. Just list some considerations : +* redo & undo mechanisms on a single node transaction view. +* 2PC on a global transaction view. +* a global unique monotonic transactions ID. + +##### 5. dockerize + +Can be released & deployed as a whole by being a docker service. + +### Naming Conventions + +Aurora conforms to the [google C++ style guide](https://google.github.io/styleguide/cppguide.html) with some exceptions of: +* local variables are prefixed with a single underscore. +* class & function names are in camel style. + +### Unit Test + Make sure all the unit cases related to your modification are passed before committing. Add new test cases if new code or module are added. + +### Development history +Consult [this](https://gitlab.com/pplorins/aurora/blob/master/doc/dev_log.md) to see the very detailed development history, it remains open most of the time. + + diff --git a/doc/glossary.md b/doc/glossary.md new file mode 100644 index 0000000..7f0d174 --- /dev/null +++ b/doc/glossary.md @@ -0,0 +1,7 @@ +|Term|Short For|Scenarios| +|--|--|--| +|LRL|Last Replicated Log|Leader and Follower server and theirs binlog status| +|ID-LRL|ID of the Last Replicated Log|ID of the Leader and Follower server and theirs binlog status| +|ID-LCL|ID of the Last Committed Log|ID of the Leader and Follower server and theirs binlog status| +|LRG|Last Released GUID|The lastest generated guid for write operation in Leader| +|CV|Condition Variable,especially refers to the std::condition_variable|many multiple threading places| \ No newline at end of file diff --git a/doc/images/MPMC-thread.png b/doc/images/MPMC-thread.png new file mode 100644 index 0000000..1bbe49a Binary files /dev/null and b/doc/images/MPMC-thread.png differ diff --git a/doc/images/benchmark-latency-datalen.png b/doc/images/benchmark-latency-datalen.png new file mode 100644 index 0000000..f70ef44 Binary files /dev/null and b/doc/images/benchmark-latency-datalen.png differ diff --git a/doc/images/benchmark-latency-followers.png b/doc/images/benchmark-latency-followers.png new file mode 100644 index 0000000..2ddb2f7 Binary files /dev/null and b/doc/images/benchmark-latency-followers.png differ diff --git a/doc/images/benchmark-throughput-datalen.png b/doc/images/benchmark-throughput-datalen.png new file mode 100644 index 0000000..2760431 Binary files /dev/null and b/doc/images/benchmark-throughput-datalen.png differ diff --git a/doc/images/benchmark-throughput-followers.png b/doc/images/benchmark-throughput-followers.png new file mode 100644 index 0000000..09ac59e Binary files /dev/null and b/doc/images/benchmark-throughput-followers.png differ diff --git a/doc/images/binlog-coordinating.png b/doc/images/binlog-coordinating.png new file mode 100644 index 0000000..6bd3e3c Binary files /dev/null and b/doc/images/binlog-coordinating.png differ diff --git a/doc/images/cgg-1.png b/doc/images/cgg-1.png new file mode 100644 index 0000000..9ab41f6 Binary files /dev/null and b/doc/images/cgg-1.png differ diff --git a/doc/images/cgg-2.png b/doc/images/cgg-2.png new file mode 100644 index 0000000..1ebf150 Binary files /dev/null and b/doc/images/cgg-2.png differ diff --git a/doc/images/cgg-3.png b/doc/images/cgg-3.png new file mode 100644 index 0000000..ea4e60e Binary files /dev/null and b/doc/images/cgg-3.png differ diff --git a/doc/images/cgg-4.png b/doc/images/cgg-4.png new file mode 100644 index 0000000..0430b03 Binary files /dev/null and b/doc/images/cgg-4.png differ diff --git a/doc/images/datalen-factor.png b/doc/images/datalen-factor.png new file mode 100644 index 0000000..b0c2e25 Binary files /dev/null and b/doc/images/datalen-factor.png differ diff --git a/doc/images/deque-pop-1.png b/doc/images/deque-pop-1.png new file mode 100644 index 0000000..1e2f084 Binary files /dev/null and b/doc/images/deque-pop-1.png differ diff --git a/doc/images/deque-pop-2.png b/doc/images/deque-pop-2.png new file mode 100644 index 0000000..44d6e36 Binary files /dev/null and b/doc/images/deque-pop-2.png differ diff --git a/doc/images/deque-push-1.png b/doc/images/deque-push-1.png new file mode 100644 index 0000000..56fd351 Binary files /dev/null and b/doc/images/deque-push-1.png differ diff --git a/doc/images/deque-push-2.png b/doc/images/deque-push-2.png new file mode 100644 index 0000000..5c1b916 Binary files /dev/null and b/doc/images/deque-push-2.png differ diff --git a/doc/images/deque-push-3.png b/doc/images/deque-push-3.png new file mode 100644 index 0000000..6f207f1 Binary files /dev/null and b/doc/images/deque-push-3.png differ diff --git a/doc/images/election-overview-1.png b/doc/images/election-overview-1.png new file mode 100644 index 0000000..2618870 Binary files /dev/null and b/doc/images/election-overview-1.png differ diff --git a/doc/images/election-overview-2.png b/doc/images/election-overview-2.png new file mode 100644 index 0000000..28ca675 Binary files /dev/null and b/doc/images/election-overview-2.png differ diff --git a/doc/images/follower-iterating-thread.png b/doc/images/follower-iterating-thread.png new file mode 100644 index 0000000..4883779 Binary files /dev/null and b/doc/images/follower-iterating-thread.png differ diff --git a/doc/images/followers-factor.png b/doc/images/followers-factor.png new file mode 100644 index 0000000..19ef3fc Binary files /dev/null and b/doc/images/followers-factor.png differ diff --git a/doc/images/formula.png b/doc/images/formula.png new file mode 100644 index 0000000..d2577c2 Binary files /dev/null and b/doc/images/formula.png differ diff --git a/doc/images/global-timer-thread.png b/doc/images/global-timer-thread.png new file mode 100644 index 0000000..a1dc5d6 Binary files /dev/null and b/doc/images/global-timer-thread.png differ diff --git a/doc/images/guid.png b/doc/images/guid.png new file mode 100644 index 0000000..f8093a3 Binary files /dev/null and b/doc/images/guid.png differ diff --git a/doc/images/hash.png b/doc/images/hash.png new file mode 100644 index 0000000..6371778 Binary files /dev/null and b/doc/images/hash.png differ diff --git a/doc/images/hierarchy.png b/doc/images/hierarchy.png new file mode 100644 index 0000000..eccace4 Binary files /dev/null and b/doc/images/hierarchy.png differ diff --git a/doc/images/list-cuthead-1-1.png b/doc/images/list-cuthead-1-1.png new file mode 100644 index 0000000..f5d6086 Binary files /dev/null and b/doc/images/list-cuthead-1-1.png differ diff --git a/doc/images/list-cuthead-1-2.png b/doc/images/list-cuthead-1-2.png new file mode 100644 index 0000000..c617713 Binary files /dev/null and b/doc/images/list-cuthead-1-2.png differ diff --git a/doc/images/list-cuthead-1-3.png b/doc/images/list-cuthead-1-3.png new file mode 100644 index 0000000..edfc6cc Binary files /dev/null and b/doc/images/list-cuthead-1-3.png differ diff --git a/doc/images/list-cuthead-1-4.png b/doc/images/list-cuthead-1-4.png new file mode 100644 index 0000000..2e12f47 Binary files /dev/null and b/doc/images/list-cuthead-1-4.png differ diff --git a/doc/images/list-cuthead-1-5.png b/doc/images/list-cuthead-1-5.png new file mode 100644 index 0000000..c405f33 Binary files /dev/null and b/doc/images/list-cuthead-1-5.png differ diff --git a/doc/images/list-cuthead-2-1.png b/doc/images/list-cuthead-2-1.png new file mode 100644 index 0000000..1e6fa04 Binary files /dev/null and b/doc/images/list-cuthead-2-1.png differ diff --git a/doc/images/list-cuthead-2-2.png b/doc/images/list-cuthead-2-2.png new file mode 100644 index 0000000..30bf458 Binary files /dev/null and b/doc/images/list-cuthead-2-2.png differ diff --git a/doc/images/list-cuthead-2-3.png b/doc/images/list-cuthead-2-3.png new file mode 100644 index 0000000..63e43e5 Binary files /dev/null and b/doc/images/list-cuthead-2-3.png differ diff --git a/doc/images/list-cuthead-2-4.png b/doc/images/list-cuthead-2-4.png new file mode 100644 index 0000000..45ae616 Binary files /dev/null and b/doc/images/list-cuthead-2-4.png differ diff --git a/doc/images/list-cuthead-2-5.png b/doc/images/list-cuthead-2-5.png new file mode 100644 index 0000000..aeb5a09 Binary files /dev/null and b/doc/images/list-cuthead-2-5.png differ diff --git a/doc/images/list-cuthead-2-6.png b/doc/images/list-cuthead-2-6.png new file mode 100644 index 0000000..c0d3c86 Binary files /dev/null and b/doc/images/list-cuthead-2-6.png differ diff --git a/doc/images/list-insert-1-1.png b/doc/images/list-insert-1-1.png new file mode 100644 index 0000000..3363c5f Binary files /dev/null and b/doc/images/list-insert-1-1.png differ diff --git a/doc/images/list-insert-1-2.png b/doc/images/list-insert-1-2.png new file mode 100644 index 0000000..fc33806 Binary files /dev/null and b/doc/images/list-insert-1-2.png differ diff --git a/doc/images/list-insert-1-3.png b/doc/images/list-insert-1-3.png new file mode 100644 index 0000000..613677d Binary files /dev/null and b/doc/images/list-insert-1-3.png differ diff --git a/doc/images/list-insert-1-4.png b/doc/images/list-insert-1-4.png new file mode 100644 index 0000000..71cca5b Binary files /dev/null and b/doc/images/list-insert-1-4.png differ diff --git a/doc/images/list-insert-2-1.png b/doc/images/list-insert-2-1.png new file mode 100644 index 0000000..0456c27 Binary files /dev/null and b/doc/images/list-insert-2-1.png differ diff --git a/doc/images/list-insert-2-2.png b/doc/images/list-insert-2-2.png new file mode 100644 index 0000000..abd5049 Binary files /dev/null and b/doc/images/list-insert-2-2.png differ diff --git a/doc/images/list-insert-2-3.png b/doc/images/list-insert-2-3.png new file mode 100644 index 0000000..ea714e0 Binary files /dev/null and b/doc/images/list-insert-2-3.png differ diff --git a/doc/images/list-insert-3-1.png b/doc/images/list-insert-3-1.png new file mode 100644 index 0000000..af7a738 Binary files /dev/null and b/doc/images/list-insert-3-1.png differ diff --git a/doc/images/list-insert-3-2.png b/doc/images/list-insert-3-2.png new file mode 100644 index 0000000..bcc6ad6 Binary files /dev/null and b/doc/images/list-insert-3-2.png differ diff --git a/doc/images/list-insert-3-3.png b/doc/images/list-insert-3-3.png new file mode 100644 index 0000000..b26e97f Binary files /dev/null and b/doc/images/list-insert-3-3.png differ diff --git a/doc/images/list-insert-3-4.png b/doc/images/list-insert-3-4.png new file mode 100644 index 0000000..3f88863 Binary files /dev/null and b/doc/images/list-insert-3-4.png differ diff --git a/doc/images/list-insert-3-5.png b/doc/images/list-insert-3-5.png new file mode 100644 index 0000000..bbd86f1 Binary files /dev/null and b/doc/images/list-insert-3-5.png differ diff --git a/doc/images/list-insert-4-1.png b/doc/images/list-insert-4-1.png new file mode 100644 index 0000000..ec86222 Binary files /dev/null and b/doc/images/list-insert-4-1.png differ diff --git a/doc/images/list-insert-4-2.png b/doc/images/list-insert-4-2.png new file mode 100644 index 0000000..0ef89bd Binary files /dev/null and b/doc/images/list-insert-4-2.png differ diff --git a/doc/images/list-insert-4-3.png b/doc/images/list-insert-4-3.png new file mode 100644 index 0000000..3a3d1a5 Binary files /dev/null and b/doc/images/list-insert-4-3.png differ diff --git a/doc/images/list-insert-4-4.png b/doc/images/list-insert-4-4.png new file mode 100644 index 0000000..b4eca6b Binary files /dev/null and b/doc/images/list-insert-4-4.png differ diff --git a/doc/images/logo/transparent-black-v2.png b/doc/images/logo/transparent-black-v2.png new file mode 100644 index 0000000..987e125 Binary files /dev/null and b/doc/images/logo/transparent-black-v2.png differ diff --git a/doc/images/logo/transparent-blue-v2.png b/doc/images/logo/transparent-blue-v2.png new file mode 100644 index 0000000..19ecb93 Binary files /dev/null and b/doc/images/logo/transparent-blue-v2.png differ diff --git a/doc/images/logo/transparent-red-v2.png b/doc/images/logo/transparent-red-v2.png new file mode 100644 index 0000000..1537736 Binary files /dev/null and b/doc/images/logo/transparent-red-v2.png differ diff --git a/doc/images/membership-change-1.png b/doc/images/membership-change-1.png new file mode 100644 index 0000000..b28126c Binary files /dev/null and b/doc/images/membership-change-1.png differ diff --git a/doc/images/membership-change-2.png b/doc/images/membership-change-2.png new file mode 100644 index 0000000..2c2e3e9 Binary files /dev/null and b/doc/images/membership-change-2.png differ diff --git a/doc/images/parallel-replicating.png b/doc/images/parallel-replicating.png new file mode 100644 index 0000000..500f899 Binary files /dev/null and b/doc/images/parallel-replicating.png differ diff --git a/doc/images/pay_alipay.png b/doc/images/pay_alipay.png new file mode 100644 index 0000000..0245339 Binary files /dev/null and b/doc/images/pay_alipay.png differ diff --git a/doc/images/pay_wechat.png b/doc/images/pay_wechat.png new file mode 100644 index 0000000..1057a91 Binary files /dev/null and b/doc/images/pay_wechat.png differ diff --git a/doc/images/polling-CQ-thread.png b/doc/images/polling-CQ-thread.png new file mode 100644 index 0000000..724fc71 Binary files /dev/null and b/doc/images/polling-CQ-thread.png differ diff --git a/doc/images/pri-queue.png b/doc/images/pri-queue.png new file mode 100644 index 0000000..a79d7f9 Binary files /dev/null and b/doc/images/pri-queue.png differ diff --git a/doc/images/queue-1.png b/doc/images/queue-1.png new file mode 100644 index 0000000..0019f76 Binary files /dev/null and b/doc/images/queue-1.png differ diff --git a/doc/images/queue-2.png b/doc/images/queue-2.png new file mode 100644 index 0000000..a8af807 Binary files /dev/null and b/doc/images/queue-2.png differ diff --git a/doc/images/queue-3.png b/doc/images/queue-3.png new file mode 100644 index 0000000..3410ebd Binary files /dev/null and b/doc/images/queue-3.png differ diff --git a/doc/images/replicate-callback.png b/doc/images/replicate-callback.png new file mode 100644 index 0000000..36154cb Binary files /dev/null and b/doc/images/replicate-callback.png differ diff --git a/doc/images/singlelist-1.png b/doc/images/singlelist-1.png new file mode 100644 index 0000000..26f0720 Binary files /dev/null and b/doc/images/singlelist-1.png differ diff --git a/doc/images/singlelist-2.png b/doc/images/singlelist-2.png new file mode 100644 index 0000000..dda37d4 Binary files /dev/null and b/doc/images/singlelist-2.png differ diff --git a/doc/images/storage-binlog.png b/doc/images/storage-binlog.png new file mode 100644 index 0000000..a262c2c Binary files /dev/null and b/doc/images/storage-binlog.png differ diff --git a/doc/images/storage-memory-dump.png b/doc/images/storage-memory-dump.png new file mode 100644 index 0000000..eeb3c9a Binary files /dev/null and b/doc/images/storage-memory-dump.png differ diff --git a/doc/images/storage-overview.png b/doc/images/storage-overview.png new file mode 100644 index 0000000..1fdd907 Binary files /dev/null and b/doc/images/storage-overview.png differ diff --git a/doc/images/storage-sstable.png b/doc/images/storage-sstable.png new file mode 100644 index 0000000..d627a38 Binary files /dev/null and b/doc/images/storage-sstable.png differ diff --git a/doc/images/system_architecture.png b/doc/images/system_architecture.png new file mode 100644 index 0000000..bf0f817 Binary files /dev/null and b/doc/images/system_architecture.png differ diff --git a/doc/images/write-requests-coordinating.png b/doc/images/write-requests-coordinating.png new file mode 100644 index 0000000..ab90eaf Binary files /dev/null and b/doc/images/write-requests-coordinating.png differ diff --git a/doc/windows.md b/doc/windows.md new file mode 100644 index 0000000..08d7143 --- /dev/null +++ b/doc/windows.md @@ -0,0 +1,68 @@ +# build under windows: +##### 0. Rerquire : + * visual studio >= 2015. + * vs compiler >= 19.00.24215.1. + +*Note: make sure modify the directory prefix to your own for each of the following example configure.* + +##### 1. configure preprocessor +* Go to `Project properties->c/c++->Preprocessor->Proprocessor Definitions` with values: + * _CRT_SECURE_NO_WARNINGS + * _SCL_SECURE_NO_WARNINGS + * _WIN32_WINNT=0x0A00 + * GLOG_NO_ABBREVIATED_SEVERITIES + * GFLAGS_DLL_DEFINE_FLAG= + * GTEST_HAS_TR1_TUPLE=0 + * _HAS_AUTO_PTR_ETC +* configure `Project properties->c/c++->All options->Additional Options` with `/std:c++latest`. + +##### 2. configure Include Directories +Go to `Project properties->c/c++->General->Additional`, do it for each lib. Example of mine: + +``` +C:\Users\95\Documents\Visual Studio 2015\Projects\apollo\raft\src +D:\third_party\boost_1_64_0 +C:\Users\95\.babun\cygwin\home\arthur\git\glog\src\windows +C:\Users\95\.babun\cygwin\home\arthur\git\gflags\cmake\build\include +C:\Users\95\.babun\cygwin\home\arthur\git\protobuf\src +C:\Users\95\.babun\cygwin\home\arthur\git\grpc\include +C:\Users\95\.babun\cygwin\home\arthur\git\googletest\googletest\include +``` + +##### 3. configure Library Directories +* Go to `Project properties->Linker->General->Additional Library Directories.` Example of mine: +``` +C:\Users\95\.babun\cygwin\home\arthur\git\googletest\googletest\build\Debug +D:\third_party\boost_1_64_0\bin.v2\libs_summary +C:\Users\95\.babun\cygwin\home\arthur\git\glog\Debug +C:\Users\95\.babun\cygwin\home\arthur\git\gflags\cmake\build\lib\Debug +C:\Users\95\.babun\cygwin\home\arthur\git\protobuf\cmake\build\Debug +C:\Users\95\.babun\cygwin\home\arthur\git\grpc\build-dir\Debug +C:\Users\95\.babun\cygwin\home\arthur\git\grpc\build-dir\third_party\boringssl\crypto\Debug +C:\Users\95\.babun\cygwin\home\arthur\git\grpc\build-dir\third_party\boringssl\ssl\Debug +C:\Users\95\.babun\cygwin\home\arthur\git\grpc\build-dir\third_party\zlib\Debug +C:\Users\95\.babun\cygwin\home\arthur\git\grpc\third_party\cares\cares\build-dir\bin\Release +C:\Users\95\.babun\cygwin\home\arthur\git\gperftools\x64\Release-Patch +``` + +* Go to `Project properties->Linker->General->Input->Additional Dependencies.` Example of mine: +``` +gtest.lib +grpc.lib +grpc++.lib +libprotobuf.lib +libprotobuf-lite.lib +gpr.lib +ws2_32.lib +gflags_static.lib +libglog_static.lib +shlwapi.lib +zlib.lib +ssl.lib +crypto.lib +cares.lib +address_sorting.lib +libtcmalloc_minimal.lib +``` + + diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..ed71ea0 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1,474 @@ + +#my own addings +*raft.pb.h +*raft.pb.cc +*raft.grpc.pb.h +*raft.grpc.pb.cc + + +# Created by https://www.gitignore.io/api/vim,git,c++,macos,windows,visualstudio +# Edit at https://www.gitignore.io/?templates=vim,git,c++,macos,windows,visualstudio + +### C++ ### +# Prerequisites +*.d + +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app + +### Git ### +# Created by git for backups. To disable backups in Git: +# $ git config --global mergetool.keepBackup false +*.orig + +# Created by git when using merge tools for conflicts +*.BACKUP.* +*.BASE.* +*.LOCAL.* +*.REMOTE.* +*_BACKUP_*.txt +*_BASE_*.txt +*_LOCAL_*.txt +*_REMOTE_*.txt + +### macOS ### +# General +.DS_Store +.AppleDouble +.LSOverride + +# Icon must end with two \r +Icon + +# Thumbnails +._* + +# Files that might appear in the root of a volume +.DocumentRevisions-V100 +.fseventsd +.Spotlight-V100 +.TemporaryItems +.Trashes +.VolumeIcon.icns +.com.apple.timemachine.donotpresent + +# Directories potentially created on remote AFP share +.AppleDB +.AppleDesktop +Network Trash Folder +Temporary Items +.apdisk + +### Vim ### +# Swap +[._]*.s[a-v][a-z] +[._]*.sw[a-p] +[._]s[a-rt-v][a-z] +[._]ss[a-gi-z] +[._]sw[a-p] + +# Session +Session.vim + +# Temporary +.netrwhist +*~ +# Auto-generated tag files +tags +# Persistent undo +[._]*.un~ + +### Windows ### +# Windows thumbnail cache files +Thumbs.db +ehthumbs.db +ehthumbs_vista.db + +# Dump file +*.stackdump + +# Folder config file +[Dd]esktop.ini + +# Recycle Bin used on file shares +$RECYCLE.BIN/ + +# Windows Installer files +*.cab +*.msi +*.msix +*.msm +*.msp + +# Windows shortcuts +*.lnk + +### VisualStudio ### +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Ll]og/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUNIT +*.VisualState.xml +TestResult.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.iobj +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# JustCode is a .NET coding add-in +.JustCode + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ +# ASP.NET Core default setup: bower directory is configured as wwwroot/lib/ and bower restore is true +**/wwwroot/lib/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- Backup*.rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# JetBrains Rider +.idea/ +*.sln.iml + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ +*.pyc + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# End of https://www.gitignore.io/api/vim,git,c++,macos,windows,visualstudio diff --git a/src/binlog/binlog_meta_data.cc b/src/binlog/binlog_meta_data.cc new file mode 100644 index 0000000..313f109 --- /dev/null +++ b/src/binlog/binlog_meta_data.cc @@ -0,0 +1,322 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "boost/crc.hpp" + +#include "config/config.h" +#include "tools/utilities.h" +#include "binlog/binlog_meta_data.h" + +#define _TYPE_LOG_IDX_OFFSET_ (0x01) + +namespace RaftCore::BinLog { + +FileMetaData::IdxPair::IdxPair(uint32_t _a, uint64_t _b, uint32_t _c, uint32_t _d, uint32_t _e) { + this->m_term = _a; + this->m_index = _b; + this->m_offset = _c; + this->key_crc32 = _d; + this->value_crc32 = _e; +} + +bool FileMetaData::IdxPair::operator<(const IdxPair& _other)const noexcept { + return this->LogIdentifier::operator<(_other); +} + +bool FileMetaData::IdxPair::operator>(const IdxPair& _other)const noexcept { + return this->LogIdentifier::operator>(_other); +} + +bool FileMetaData::IdxPair::operator>(const LogIdentifier& _other)const noexcept { + return this->LogIdentifier::operator>(_other); +} + +bool FileMetaData::IdxPair::operator<(const LogIdentifier& _other)const noexcept { + return this->LogIdentifier::operator<(_other); +} + +bool FileMetaData::IdxPair::operator<=(const LogIdentifier& _other)const noexcept { + return this->LogIdentifier::operator<=(_other); +} + +bool FileMetaData::IdxPair::operator>=(const LogIdentifier& _other)const noexcept { + return this->LogIdentifier::operator>=(_other); +} + +bool FileMetaData::IdxPair::operator<(const ::raft::EntityID &_other)const noexcept { + if (this->m_term < _other.term()) + return true; + + if (this->m_term > _other.term()) + return false; + + return this->m_index < _other.idx(); +} + +bool FileMetaData::IdxPair::operator==(const ::raft::EntityID &_other)const noexcept { + return (this->m_term == _other.term()) && (this->m_index==_other.idx()); +} + +bool FileMetaData::IdxPair::operator!=(const ::raft::EntityID &_other)const noexcept { + return !this->operator==(_other); +} + +bool FileMetaData::IdxPair::operator>(const ::raft::EntityID &_other)const noexcept { + if (this->m_term > _other.term()) + return true; + + if (this->m_term < _other.term()) + return false; + + return this->m_index > _other.idx(); +} + +bool FileMetaData::IdxPair::operator==(const IdxPair &_other) const noexcept{ + return (this->m_term == _other.m_term && this->m_index == _other.m_index); +} + +const FileMetaData::IdxPair& FileMetaData::IdxPair::operator=(const IdxPair &_other) noexcept { + this->m_term = _other.m_term; + this->m_index = _other.m_index; + this->m_offset = _other.m_offset; + this->key_crc32 = _other.key_crc32; + this->value_crc32 = _other.value_crc32; + + return *this; +} + +std::size_t FileMetaData::IdxPair::Hash() const noexcept { + auto h1 = std::hash{}(this->m_index); + auto h2 = std::hash{}(this->m_offset); + return h1 ^ (h2 << 8); +} + +FileMetaData::FileMetaData() noexcept {} + +FileMetaData::~FileMetaData() noexcept {} + +void FileMetaData::AddLogOffset(uint32_t term,uint64_t log_index, uint32_t file_offset, uint32_t key_crc32,uint32_t value_crc32) noexcept { + //The allocated buf will be released in the destructor. + std::shared_ptr _shp_new_record(new IdxPair(term,log_index,file_offset,key_crc32,value_crc32)); + this->m_meta_hash.Insert(_shp_new_record); +} + +void FileMetaData::AddLogOffset(const TypeOffsetList &_list) noexcept { + for (const auto &_item : _list) + this->m_meta_hash.Insert(_item); +} + +void FileMetaData::Delete(const IdxPair &_item) noexcept { + this->m_meta_hash.Delete(_item); +} + +TypeBufferInfo FileMetaData::GenerateBuffer() const noexcept { + + std::list> _hash_entry_list; + this->m_meta_hash.GetOrderedByKey(_hash_entry_list); + + /* Meta data on-disk format,from low offset to high offset: + 1> meta data(tlv list) + 1) type :1 bytes + 2) length : 3 bytes + 3) value : vary + 2> CRC32 checksum. (4 bytes) + 3> length of whole meta data area. (4 bytes) + 4> a magic string to identify the end of meta-data area. (variadic bytes) */ + + ::raft::LogOffset _log_offset; + for (const auto &_ptr_entry : _hash_entry_list) { + if (_ptr_entry == nullptr) + continue; + + ::raft::LogOffsetItem* _p_item = _log_offset.add_mappings(); + _p_item->set_log_term(_ptr_entry->m_term); + _p_item->set_log_idx(_ptr_entry->m_index); + _p_item->set_offset(_ptr_entry->m_offset); + _p_item->set_key_crc32(_ptr_entry->key_crc32); + _p_item->set_value_crc32(_ptr_entry->value_crc32); + } + + std::string _log_offset_buf = ""; + _log_offset.SerializeToString(&_log_offset_buf); + + uint32_t entry_field_length = (uint32_t)_log_offset_buf.size(); + + //Value length cannot exceeds the maximum a three byte long integer can represent. + assert(entry_field_length <= 0xFFFFFF); + + static uint32_t _footer_len = (uint32_t)std::strlen(_FILE_META_DATA_MAGIC_STR_); + uint32_t _tail_length = 4 * 2 + _footer_len; + uint32_t _buf_size = 1 + 3 + entry_field_length + _tail_length; + auto _ptr = (unsigned char*)malloc(_buf_size); + assert(_ptr!=nullptr); + + //Set meta data 1> 1). + unsigned char* _p_cur = (unsigned char*)_ptr; + _p_cur[0] = _TYPE_LOG_IDX_OFFSET_; + _p_cur++; + + //Set meta data 1> 2). + uint32_t _tmp = 0; + ::RaftCore::Tools::ConvertToBigEndian(entry_field_length, &_tmp); + unsigned char* px = (unsigned char*)&_tmp; + CHECK(*px == 0x0); //for it is big endian , the fist byte must be 0x0. + px++; + std::memcpy(_p_cur,px,_FOUR_BYTES_ - 1); + _p_cur += 3; + + //Set meta data 1> 3). + std::memcpy(_p_cur,_log_offset_buf.data(),entry_field_length); + _p_cur += entry_field_length; + + //Calculate crc32 checksum. + uint32_t _crc32_value = ::RaftCore::Tools::CalculateCRC32(_ptr, _buf_size - _tail_length); + + //Set crc32 checksum. + ::RaftCore::Tools::ConvertToBigEndian(_crc32_value, &_tmp); + std::memcpy(_p_cur,&_tmp,_FOUR_BYTES_); + _p_cur += _FOUR_BYTES_; + + //Set meta data area length. + ::RaftCore::Tools::ConvertToBigEndian(_buf_size, &_tmp); + std::memcpy(_p_cur,&_tmp,_FOUR_BYTES_); + _p_cur += _FOUR_BYTES_; + + //Set meta data area magic string. + std::memcpy(_p_cur,_FILE_META_DATA_MAGIC_STR_,_footer_len); + _p_cur += _footer_len; + + return std::make_tuple(_ptr,_buf_size); +} + +void FileMetaData::ConstructMeta(const unsigned char* _buf, std::size_t _size) noexcept { + auto _cur_ptr = _buf; + //Examine header. + CHECK(*_cur_ptr == _TYPE_LOG_IDX_OFFSET_) << "meta header check fail"; + _cur_ptr++; + + //Examine the checksum of metadata buf. + uint32_t _length = 0x0; + std::memcpy((unsigned char*)&_length+1,_cur_ptr,_FOUR_BYTES_ - 1); + ::RaftCore::Tools::ConvertBigEndianToLocal(_length, &_length); + _cur_ptr += 3; + + //Examine the checksum of metadata buf. + ::raft::LogOffset _log_offset; + CHECK(_log_offset.ParseFromArray(_cur_ptr, _length)) << "parse meta data buf fail.."; + + this->m_meta_hash.Clear(); + for (const auto &_item : _log_offset.mappings()) { + std::shared_ptr _shp_pair(new IdxPair(_item.log_term(),_item.log_idx(),_item.offset(),_item.key_crc32(),_item.value_crc32())); + this->m_meta_hash.Insert(_shp_pair); + } +} + +int FileMetaData::ConstructMeta(std::FILE* _handler) noexcept { + + //Offset of _handler should be set to 0 before calling this method. + + assert(_handler != nullptr); + + uint32_t _buf_size = ::RaftCore::Config::FLAGS_binlog_parse_buf_size * 1024 * 1024; + unsigned char *sz_buf = (unsigned char *)malloc(_buf_size); + + uint32_t _this_read = 0,_total_read = 0; + long _total_offset = 0; + + this->m_meta_hash.Clear(); + do{ + _this_read = (uint32_t)std::fread(sz_buf,1,_buf_size,_handler); + CHECK(_this_read > _FOUR_BYTES_) << "parsing:read raw file fail,file corruption found,actual read:" << _this_read; + _total_read += _this_read; + + auto _p_cur = sz_buf; + + //Parsing this buffer. + uint32_t _this_offset = 0; + do{ + if (_this_read - _this_offset < _FOUR_BYTES_) { + /*Each of the pairs must be read integrally,if current parsing buffer is smaller than 4 bytes, + means a partially read happened,need to adjust the reading position . */ + CHECK(std::fseek(_handler,_total_offset,SEEK_SET)==0) << "seeking binlog fail"; + break; + } + + uint32_t _cur_offset = _total_offset; + + uint32_t _item_buf_len = *(uint32_t*)_p_cur; + ::RaftCore::Tools::ConvertBigEndianToLocal(_item_buf_len, &_item_buf_len); + if ( _this_read - (_this_offset+_FOUR_BYTES_) < _item_buf_len ) { + /*Like the previous if branch which contains the break statement:need position adjusting*/ + CHECK(std::fseek(_handler,_total_offset,SEEK_SET)==0) << "seeking binlog fail"; + break; + } + + _p_cur += _FOUR_BYTES_; + _total_offset += _FOUR_BYTES_; + _this_offset += _FOUR_BYTES_; + + ::raft::BinlogItem _binlog_item; + + //Parse binglog_item buffer. + CHECK(_binlog_item.ParseFromArray(_p_cur, _item_buf_len)) << "parse file content fail.."; + _p_cur += _item_buf_len; + _total_offset += _item_buf_len; + _this_offset += _item_buf_len; + + const auto & _wop = _binlog_item.entity().write_op(); + uint32_t _key_crc32 = ::RaftCore::Tools::CalculateCRC32((void*)_wop.key().data(),(unsigned int)_wop.key().length()); + uint32_t _value_crc32 = ::RaftCore::Tools::CalculateCRC32((void*)_wop.value().data(),(unsigned int)_wop.value().length()); + + std::shared_ptr _shp_pair(new IdxPair(_binlog_item.entity().entity_id().term(), + _binlog_item.entity().entity_id().idx(), _cur_offset,_key_crc32,_value_crc32)); + this->m_meta_hash.Insert(_shp_pair); + + //This will always be true, otherwise it would break at previous break statement. + } while (_this_offset < _this_read); + + //If _this_read < _buf_size , means we've read to the end of file. + } while (_this_read >= _buf_size); + + free(sz_buf); + + return _total_offset == _total_read ? 0 : _total_offset; +} + +void FileMetaData::GetOrderedMeta(TypeOffsetList &_output) const noexcept { + this->m_meta_hash.GetOrderedByKey(_output); +} + +void FileMetaData::BackOffset(int offset) noexcept{ + this->m_meta_hash.Map([&offset](std::shared_ptr &_one)->void{ _one->m_offset -= offset;}); +} + +std::string FileMetaData::IdxPair::ToString() const noexcept { + const static int _buf_size = 512; + char _sz_buf[_buf_size] = { 0 }; + std::snprintf(_sz_buf,_buf_size,"IdxPair term:%u,idx:%llu,offset:%u",this->m_term,this->m_index,this->m_offset); + + return std::string(_sz_buf); +} + +std::ostream& operator<<(std::ostream& os, const FileMetaData::IdxPair& obj) { + os << obj.ToString(); + return os; +} + +} \ No newline at end of file diff --git a/src/binlog/binlog_meta_data.h b/src/binlog/binlog_meta_data.h new file mode 100644 index 0000000..9c826d9 --- /dev/null +++ b/src/binlog/binlog_meta_data.h @@ -0,0 +1,149 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_BIN_LOG_META_DATA_H__ +#define __AURORA_BIN_LOG_META_DATA_H__ + +#include +#include +#include +#include + +#include "protocol/raft.pb.h" + +#include "common/comm_defs.h" +#include "common/log_identifier.h" +#include "tools/lock_free_hash.h" + +#define _FILE_META_DATA_MAGIC_STR_ "!@#$binlog$#@!" + +namespace RaftCore::BinLog { + +using ::RaftCore::Common::LogIdentifier; +using ::RaftCore::Common::TypeBufferInfo; +using ::RaftCore::DataStructure::HashTypeBase; + +class FileMetaData final{ + +public: + + struct IdxPair final : LogIdentifier , HashTypeBase{ + + uint32_t m_offset; + uint32_t key_crc32; + uint32_t value_crc32; + + IdxPair(uint32_t _a, uint64_t _b, uint32_t _c, uint32_t _d, uint32_t _e); + + virtual bool operator<(const IdxPair& _other)const noexcept override; + + bool operator==(const IdxPair &_other) const noexcept override; + + const IdxPair& operator=(const IdxPair &_other) noexcept override; + + std::string ToString()const noexcept; + + virtual std::size_t Hash() const noexcept override; + + virtual bool operator>(const IdxPair& _other)const noexcept; + + virtual bool operator>(const LogIdentifier& _other)const noexcept; + + virtual bool operator<(const LogIdentifier& _other)const noexcept; + + virtual bool operator<=(const LogIdentifier& _other)const noexcept; + + virtual bool operator>=(const LogIdentifier& _other)const noexcept; + + virtual bool operator<(const ::raft::EntityID &_other)const noexcept; + + virtual bool operator==(const ::raft::EntityID &_other)const noexcept; + + virtual bool operator!=(const ::raft::EntityID &_other)const noexcept; + + virtual bool operator>(const ::raft::EntityID &_other)const noexcept; + }; + + typedef std::list> TypeOffsetList; + +public: + + FileMetaData() noexcept; + + virtual ~FileMetaData() noexcept; + + /* This functions will be invoked each time we write a new log entry into the binlog file, + namely,in a multiple threading access environment. + - Support Multiple Thread invoking: True + - Will be invoked simultaneously: False */ + void AddLogOffset(uint32_t term,uint64_t log_index, uint32_t file_offset,uint32_t key_crc32,uint32_t value_crc32) noexcept; + + /*- Support Multiple Thread invoking: True + - Will be invoked simultaneously: False */ + void AddLogOffset(const TypeOffsetList &_list) noexcept; + + /*- Support Multiple Thread invoking: True + - Will be invoked simultaneously: False */ + void Delete(const IdxPair &_item) noexcept; + + /*This functions will be invoked only in the rotating file scenario,namely, + a single thread accessing environment.After return the pointer pointing + to the generated buffer, this function will no longer be responsible + for the memory it allocated, the caller must take care of(freeing) it. + + - Support Multiple Thread invoking: True + - Will be invoked simultaneously: False */ + TypeBufferInfo GenerateBuffer() const noexcept; + + /* - Support Multiple Thread invoking: True + - Will be invoked simultaneously: False */ + void ConstructMeta(const unsigned char* _buf, std::size_t _size) noexcept; + + /* - Support Multiple Thread invoking: False + - Will be invoked simultaneously: False */ + /*Return value: + 0 - parsed successfully. + >0 - parsed bytes.Remaining bytes are not parsable.Need to be truncated. */ + int ConstructMeta(std::FILE* _handler) noexcept; + + /* - Support Multiple Thread invoking: True + - Will be invoked simultaneously: False */ + void GetOrderedMeta(TypeOffsetList &_output) const noexcept; + + void BackOffset(int offset) noexcept; + +private: + + ::RaftCore::DataStructure::LockFreeHash m_meta_hash; + +private: + + FileMetaData(const FileMetaData&) = delete; + + FileMetaData& operator=(const FileMetaData&) = delete; + +}; + +std::ostream& operator<<(std::ostream& os, const FileMetaData::IdxPair& obj); + +} //end namespace + + +#endif diff --git a/src/binlog/binlog_operator.cc b/src/binlog/binlog_operator.cc new file mode 100644 index 0000000..d75250b --- /dev/null +++ b/src/binlog/binlog_operator.cc @@ -0,0 +1,672 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "boost/crc.hpp" +#include "boost/filesystem.hpp" + +#include "config/config.h" +#include "tools/utilities.h" +#include "leader/leader_view.h" +#include "binlog/binlog_operator.h" +#include "storage/storage_singleton.h" + +namespace RaftCore::BinLog { + +//To avoid issues caused by including header files mutually. +namespace fs = ::boost::filesystem; +using ::raft::EntityID; +using ::RaftCore::Leader::LeaderView; +using ::RaftCore::Common::ConvertID; +using ::RaftCore::Storage::StorageGlobal; + +BinLogOperator::BinLogOperator() {} + +BinLogOperator::~BinLogOperator()noexcept { + if (this->m_initialized) + this->UnInitialize(); +} + +void BinLogOperator::Initialize(const char* role, bool file_name) noexcept { + + CHECK(role != nullptr); + + if (file_name) + this->m_file_name = role; + else + this->m_file_name = _AURORA_BINLOG_NAME_ + std::string(".") + role; + + this->m_binlog_handler = std::fopen(this->m_file_name.c_str(),_AURORA_BINLOG_OP_MODE_); + CHECK(this->m_binlog_handler != nullptr) << "open binlog file " << this->m_file_name << "fail..,errno:" << errno; + + this->ParseFile(); + + std::list> _cur_file_meta; + m_p_meta.load()->GetOrderedMeta(_cur_file_meta); + + LogIdentifier _last; + _last.Set(0,0); + if (!_cur_file_meta.empty()) + _last.Set(_cur_file_meta.back()->m_term,_cur_file_meta.back()->m_index); + + this->m_last_logged.store(_last, std::memory_order_release); + this->m_binlog_status.store(BinlogStatus::NORMAL); + this->m_log_num = (uint32_t)_cur_file_meta.size(); + + this->m_precede_lcl_inuse.store(0); + + //this->m_term_changed.store(false); + + this->m_initialized = true; +} + +//void BinLogOperator::SetTermMatched() noexcept { +// //this->m_term_changed = true; +//} + +//bool BinLogOperator::IsTermMatched() noexcept { +// //return this->m_term_changed; +//} + +void BinLogOperator::AddPreLRLUseCount() noexcept { + this->m_precede_lcl_inuse.fetch_add(1); +} + +void BinLogOperator::SubPreLRLUseCount() noexcept { + this->m_precede_lcl_inuse.fetch_sub(1); +} + +void BinLogOperator::ParseFile() noexcept{ + + //Construct the meta data if not exist, otherwise just load it into RAM + m_p_meta.store(new FileMetaData()); + + CHECK(std::fseek(this->m_binlog_handler, 0, SEEK_END) == 0) << "seek binlog file " + << this->m_file_name << "fail..,errno:" << errno; + + long _file_size = std::ftell(this->m_binlog_handler); + CHECK(_file_size >= 0) << "tell binlog file " << this->m_file_name << "fail..,errno:" << errno; + + if (_file_size == 0) { + //Empty file,just skip the following parsing meta steps. + return; + } + + long _file_tail_len = _FOUR_BYTES_ * 3; + + int _minimal_pb_section_size = _FOUR_BYTES_; //Only a four-bytes length field . + int _minimal_meta_section_size = _FOUR_BYTES_ + _file_tail_len; //Only the tailing part and a 0-length tlv field. + + CHECK(_file_size >= _minimal_pb_section_size + _minimal_meta_section_size) << "binlog file size not correct:" << _file_size; + + //First read the last 12 bytes area. + CHECK(std::fseek(this->m_binlog_handler, (_file_size - _file_tail_len), SEEK_SET) == 0) << "seek binlog file " + << this->m_file_name << "fail..,errno:" << errno; + + char sz_last_zone[12] = {0}; + CHECK(std::fread(sz_last_zone, 1, 12, this->m_binlog_handler)==12) << "read binlog file " << this->m_file_name << "last zone fail..,errno:" << errno; + + //This uncompleted file ending is probably due to an unexpected crash. + static std::size_t _footer_len = std::strlen(_FILE_META_DATA_MAGIC_STR_); + if (strncmp((const char*)&sz_last_zone[8], _FILE_META_DATA_MAGIC_STR_, _footer_len) != 0) { + + CHECK(std::fseek(this->m_binlog_handler,0, SEEK_SET) == 0) << "seek binlog file " + << this->m_file_name << "fail..,errno:" << errno; + + //Since the file has no meta data , we have to construct it all over again. + int _parsed_bytes = m_p_meta.load()->ConstructMeta(this->m_binlog_handler); + if (_parsed_bytes > 0) { + LOG(WARNING) << "binlog incomplete data found , file corruption probably happened,now truncating it."; + this->Truncate(_parsed_bytes); + } + + return; + } + + //Meta data exists , just parse it. + uint32_t _meta_data_len = *((uint32_t*)&sz_last_zone[4]); + ::RaftCore::Tools::ConvertBigEndianToLocal(_meta_data_len, &_meta_data_len); + + CHECK(std::fseek(this->m_binlog_handler,(_file_size - _meta_data_len), SEEK_SET) == 0) << "seek binlog file " + << this->m_file_name << "fail..,errno:" << errno; + + auto _ptr_meta_buf = (unsigned char*)malloc(_meta_data_len); + CHECK(std::fread(_ptr_meta_buf, 1,_meta_data_len, this->m_binlog_handler) == _meta_data_len) + << "read binlog meta data fail...,errno:" << errno; + + uint32_t crc_in_file = *(uint32_t*)&sz_last_zone[0]; + ::RaftCore::Tools::ConvertBigEndianToLocal(crc_in_file, &crc_in_file); + + //CRC32 only calculate the 1>meta data area. + uint32_t _crc32_value = ::RaftCore::Tools::CalculateCRC32(_ptr_meta_buf, _meta_data_len - 12); + + CHECK(crc_in_file == _crc32_value) << "binlog file meta check checksum failed"; + + m_p_meta.load()->ConstructMeta(_ptr_meta_buf,_meta_data_len); + free(_ptr_meta_buf); +} + +void BinLogOperator::UnInitialize() noexcept{ + + auto _p_tmp = m_p_meta.load(); + delete _p_tmp; + m_p_meta.store(nullptr); + + this->m_initialized = false; + + if (this->m_binlog_handler == nullptr) + return ; + + if (!std::fclose(this->m_binlog_handler)) + return; //normally return. + + //Something bad happened. + CHECK(false) << "close binlog file fail...,errno:"<< errno; +} + +void BinLogOperator::Truncate(uint32_t new_size) noexcept { + + CHECK(std::fclose(this->m_binlog_handler)==0) << "truncating file fclose failed"; + fs::resize_file(fs::path(this->m_file_name),new_size); + + //Re-open file. + this->m_binlog_handler = std::fopen(this->m_file_name.c_str(),_AURORA_BINLOG_OP_MODE_); + CHECK(this->m_binlog_handler != nullptr) << "re-open binlog file after truncate fail.."; +} + +TypeBufferInfo BinLogOperator::PrepareBuffer(const TypeEntityList &input_entities, FileMetaData::TypeOffsetList &offset_list) noexcept { + + unsigned char* _p_buf = nullptr; + std::size_t _buf_size = 0; + + auto ms = std::chrono::duration_cast< std::chrono::milliseconds >( + std::chrono::system_clock::now().time_since_epoch() ); + + uint64_t ts = ms.count(); + + std::list _buf_list; + uint32_t _cur_offset = 0; //Offset relative to this append operation. + for (const auto & _shp_entity : input_entities) { + + auto _binlogitem = ::raft::BinlogItem(); + _binlogitem.set_allocated_entity(_shp_entity.get()); + _binlogitem.set_timestamp_ms(ts); + + std::string _output = ""; + CHECK(_binlogitem.SerializeToString(&_output)) << "SerializeToString fail...,log entity:" + << _shp_entity->entity_id().term() << "|" << _shp_entity->entity_id().idx(); + + _binlogitem.release_entity(); + + const auto & _wop = _shp_entity->write_op(); + uint32_t _key_crc32 = ::RaftCore::Tools::CalculateCRC32((void*)_wop.key().data(),(unsigned int)_wop.key().length()); + uint32_t _value_crc32 = ::RaftCore::Tools::CalculateCRC32((void*)_wop.value().data(),(unsigned int)_wop.value().length()); + + uint32_t _this_buf_size = (uint32_t)_output.size() + _FOUR_BYTES_; + _buf_size += _this_buf_size; + _buf_list.emplace_back(std::move(_output)); + + //Recording offset info. + offset_list.emplace_back(new FileMetaData::IdxPair(_shp_entity->entity_id().term(),_shp_entity->entity_id().idx(), _cur_offset,_key_crc32,_value_crc32)); + _cur_offset += _this_buf_size; + } + + _p_buf = (unsigned char*)malloc(_buf_size); + auto _pcur = _p_buf; + + for (const auto& _buf : _buf_list) { + uint32_t _buf_len = (uint32_t)_buf.size(); + ::RaftCore::Tools::ConvertToBigEndian(uint32_t(_buf_len),&_buf_len); + std::memcpy(_pcur,(unsigned char*)&_buf_len,_FOUR_BYTES_); + _pcur += _FOUR_BYTES_; + std::memcpy(_pcur,_buf.data(),_buf.size()); + _pcur += _buf.size(); + } + + return std::make_tuple(_p_buf, _buf_size); +} + +void BinLogOperator::AppendBufferToBinlog(TypeBufferInfo &buffer_info, const FileMetaData::TypeOffsetList &offset_list) noexcept { + + //First,update this file's meta data. + long _offset = std::ftell(this->m_binlog_handler); + CHECK (_offset >= 0) << "ftell binlog file fail...,errno:"<< errno ; + + for (const auto &_item : offset_list) { + _item->m_offset += _offset; + } + + m_p_meta.load()->AddLogOffset(offset_list); + + //Second,append buffer to file. + unsigned char* _p_buf = nullptr; + int _buf_size = 0; + std::tie(_p_buf, _buf_size) = buffer_info; + + CHECK(std::fwrite((void*)_p_buf,1,_buf_size,this->m_binlog_handler) == _buf_size) << "fwrite binlog file fail...,errno:"<< errno ; + CHECK(std::fflush(this->m_binlog_handler) == 0) << "fflush binlog file fail...,errno:" << errno; + + free(_p_buf); +} + +void BinLogOperator::RotateFile() noexcept { + + /*Since file position indicator is always at the tail , + and file is opened in binary mode , ftell is exactly the file size. */ + long file_len = std::ftell(this->m_binlog_handler); + CHECK(file_len >= 0) << "ftell binlog file fail...,errno:"<< errno ; + + bool _exceed_limits = ((uint32_t)file_len > ::RaftCore::Config::FLAGS_binlog_max_size) || + (this->m_log_num.load() > ::RaftCore::Config::FLAGS_binlog_max_log_num); + if (!_exceed_limits) + return; + + int _use_count = this->m_precede_lcl_inuse.load(); + if (_use_count > 0) { + LOG(INFO) << "somewhere using the pre-lcl part,use count:" << _use_count; + return; + } + + //Get the tail meta data need to move to the new binlog file. + uint32_t _reserve_counter = 0; //#logs before ID-LCL. + std::list> _cur_file_meta; + std::list> _new_file_meta; + FileMetaData* _p_new_meta = nullptr; + m_p_meta.load()->GetOrderedMeta(_cur_file_meta); + for (auto _iter = _cur_file_meta.crbegin(); _iter != _cur_file_meta.crend(); ) { + + /*We need to preserve more log entries . There are reasons for this: + + 1. Need to preserve the LCL in the new binlog, because it will be used in reverting + log scenario as the 'pre_log_id'. + 2. In some extreme cases, the resync log procedure may need log entries from leader's + binlog file that are even earlier than the LCL. E.g., a follower coming back to the + normal connected status of the cluster after a relatively long status of disconnected. + At which point , its logs falling a lot behind the leader's and need to resync a lot of entries. + */ + bool _less_than_LCL = (*_iter)->operator<(StorageGlobal::m_instance.GetLastCommitted()); + _reserve_counter += (_less_than_LCL) ? 1 : 0 ; + bool _overflow_reserve_num = _reserve_counter > ::RaftCore::Config::FLAGS_binlog_reserve_log_num; + + if ( _less_than_LCL && _overflow_reserve_num ) + break; + + _new_file_meta.emplace_front(*_iter); + + //Delete meta data that are smaller than (ID-LCL - FLAGS_reserve_log_num). + m_p_meta.load()->Delete(**_iter); + + //Insert current iterating meta-data item to the new binlog file's meta-data list. + if (_p_new_meta == nullptr) + _p_new_meta = new FileMetaData(); + _p_new_meta->AddLogOffset((*_iter)->m_term,(*_iter)->m_index,(*_iter)->m_offset,(*_iter)->key_crc32,(*_iter)->value_crc32); + + _iter++; + } + + //If the trailing part actually exists , remove it. + unsigned char* _p_tail_logs_buf = nullptr; + long _tail_size = 0; + + //Note: _new_file_meta will contain log start from,i.e.,>= (ID-LCL - FLAGS_reserve_log_num). + std::shared_ptr _shp_front = _new_file_meta.front(); + + //Copy tail meta data to the new binlog file. + CHECK(std::fseek(this->m_binlog_handler,_shp_front->m_offset,SEEK_SET)==0); + + long _front_size = std::ftell(this->m_binlog_handler); + CHECK(_front_size >= 0) << "tell binlog file " << this->m_file_name << "fail..,errno:" << errno; + _tail_size = file_len - _front_size; + + _p_tail_logs_buf = (unsigned char *)malloc(_tail_size); + + CHECK(std::fread(_p_tail_logs_buf,1,_tail_size,this->m_binlog_handler)==_tail_size) << "read binlog file " + << this->m_file_name << " tail fail..,errno:" << errno; + + this->Truncate(_shp_front->m_offset); + + //Writing meta data to the end of binlog file, close it after that. + uint32_t _buf_size = 0; + unsigned char * _p_meta_buf = nullptr; + std::tie(_p_meta_buf, _buf_size) = m_p_meta.load()->GenerateBuffer(); + + CHECK(std::fwrite(_p_meta_buf, _buf_size, 1, this->m_binlog_handler) == 1) << "fwrite binlog file fail...,errno:"<< errno ; + free(_p_meta_buf); + CHECK (std::fflush(this->m_binlog_handler) == 0 ) << "fflush meta data to end of binlog file fail..."; + + //Rename & re-open. + this->RenameOpenBinlogFile(); + + //Write the tail logs to the new binlog file + if (_p_tail_logs_buf != nullptr) { + CHECK(fwrite((void*)_p_tail_logs_buf,_tail_size,1,this->m_binlog_handler) ==1)<< "fwrite binlog file fail...,errno:"<< errno ; + CHECK (std::fflush(this->m_binlog_handler) == 0) << "fflush binlog file fail...,errno:" << errno; + } + + //New file meta offset are relative to the old binlog file,adjust it to the new binlog file. + _p_new_meta->BackOffset(_front_size); + + //Release and re-allocate meta data buf. + auto *_p_tmp = m_p_meta.load(); + delete _p_tmp; + m_p_meta.store(_p_new_meta); +} + +bool BinLogOperator::AppendEntry(const TypeEntityList &input_entities,bool force) noexcept{ + + //TODO: remove test code + //this->m_last_logged.store(ConvertID(input_entities.back()->entity_id()), std::memory_order_release); + //return true; + + /* Prerequisite: There is no way for two or more threads calling this method + parallel with the same pre_log(guaranteed by the previous generating + guid step). Otherwise, terrible things could happen. */ + + if (!force && this->m_binlog_status.load() != BinlogStatus::NORMAL) { + LOG(ERROR) << "binlog status wrong:" << int(this->m_binlog_status.load()); + return false; + } + + if (input_entities.empty()) + return true; + + //For some legacy reason , we represent pre_log like this... + const auto &pre_log = input_entities.front()->pre_log_id(); + + //Log offsets to update. + FileMetaData::TypeOffsetList _offset_list; + auto _buf_info = this->PrepareBuffer(input_entities,_offset_list); + + { + std::unique_lock _mutex_lock(m_cv_mutex); + + /* Having to be sure that the last log entry which has been written to the binlog file + is exactly the one prior to the log that we're currently going to write . + Meaning that we have to wait until the last log caught up with the pre_log. + + Figured out two approaches here: + A. use CAS weak version to wait. + B. use condition variable to wait. + + Since approach A will consume a lot of CPU times ,I choose approach B. */ + while (!EntityIDEqual(pre_log, this->m_last_logged.load(std::memory_order_consume))) { + + auto wait_cond = [&]()->bool {return EntityIDEqual(pre_log,this->m_last_logged.load(std::memory_order_consume)); }; + + /* During high request load,many threads will be blocked here , and only one could + go further after some other threads called cv.notify_all */ + bool waiting_result = m_cv.wait_for(_mutex_lock, std::chrono::microseconds(::RaftCore::Config::FLAGS_binlog_append_file_timeo_us), wait_cond); + if (!waiting_result) { + LOG(WARNING) << "timeout during append ,current ID-LRL logID: " << this->m_last_logged.load(std::memory_order_consume) << ",waiting on previous id :" << ConvertID(pre_log) + << ", this shouldn't exist too much, and will resolve quickly."; + /*Just continuous waiting, no need to return false or something like that . Threads who got here must finish the appending process.*/ + continue; + } + break; + } + + //Note:Only one thread could reading here. + + //The following two steps can be merged into one . + this->AppendBufferToBinlog(_buf_info, _offset_list); + + //Rotating binlog file... + this->RotateFile(); + } + + this->m_last_logged.store(ConvertID(input_entities.back()->entity_id()), std::memory_order_release); + + { + /* Caution: modifying the conditions must be under the protect of mutex, + whether the conditions can be represented by an atomic object or not. + Reference:http://en.cppreference.com/w/cpp/thread/condition_variable */ + std::unique_lock _mutex_lock(m_cv_mutex); + //Notifying doesn't need to hold the mutex. + m_cv.notify_all(); + } + + this->m_log_num.fetch_add((uint32_t)input_entities.size()); + + return true; +} + +LogIdentifier BinLogOperator::GetLastReplicated() noexcept{ + return this->m_last_logged.load(std::memory_order_consume); +} + +BinLogOperator::BinlogErrorCode BinLogOperator::RevertLog(TypeMemlogFollowerList &log_list, const LogIdentifier &boundary) noexcept { + + const auto & pre_entity_id = log_list.front()->GetEntity()->pre_log_id(); + + //Find the last entry that being consistent with the first element of log_list. + std::list> _ordered_meta; + m_p_meta.load()->GetOrderedMeta(_ordered_meta); + + auto _criter_meta = _ordered_meta.crbegin(); + for (; _criter_meta != _ordered_meta.crend(); _criter_meta++) { + if ((*_criter_meta)->operator!=(pre_entity_id)) { + continue; + } + break; + } + + CHECK(_criter_meta != _ordered_meta.crend()) << "Reverting log : cannot find pre_entity_id,something must be wrong"; + + auto _citer_meta = _criter_meta.base(); + bool _found_consistent_log = false; + + for (auto _citer_log = log_list.cbegin(); _citer_log != log_list.cend();) { + const std::string &_key = (*_citer_log)->GetEntity()->write_op().key(); + const std::string &_value = (*_citer_log)->GetEntity()->write_op().value(); + + uint32_t _key_crc32 = ::RaftCore::Tools::CalculateCRC32((void*)_key.data(), (unsigned int)_key.length()); + uint32_t _value_crc32 = ::RaftCore::Tools::CalculateCRC32((void*)_value.data(), (unsigned int)_value.length()); + + bool _item_equal = (*_citer_meta)->key_crc32 == _key_crc32 && (*_citer_meta)->value_crc32 == _value_crc32; + if ( !_item_equal ) + break; + + _found_consistent_log = true; + + //Advance input log iterator. + _citer_log = log_list.erase(_citer_log); + + //Advance binlog meta iterator. + _citer_meta++; + if (_citer_meta == _ordered_meta.cend()) + break; + } + + /* There are several scenarios in reverting: + 1> log_list & binlog can find a consistent log entry: + assuming _log_listX is the sublist of log_list,with its first element is the first inconsistent + entry with binlog : + + 1) _log_listX is empty: entries in log_list are all the same as binlog, nothing to revert. + 2) _log_listX has no intersection with binlog: entries in log_list will be appended to binlog + without any erasing operations. + 3) _log_listX has intersection with binlog : the conflict entries in the binlog will be replaced + by _log_listX. + + 2> log_list & binlog can't find a consistent consistent log entry: + return a NO-CONSITENT error. + */ + if (!_found_consistent_log) + return BinlogErrorCode::NO_CONSISTENT_ERROR; + + //No need to revert anything. + if (log_list.empty()) + return BinlogErrorCode::SUCCEED_MERGED; + + const auto &_first_inconsistent_log = log_list.front(); + if (!EntityIDLarger(_first_inconsistent_log->GetEntity()->entity_id(), boundary)) + return BinlogErrorCode::OVER_BOUNDARY; + + BinlogStatus _cur_status = BinlogStatus::NORMAL; + if (!this->m_binlog_status.compare_exchange_strong(_cur_status,BinlogStatus::REVERTING)) { + //Other threads may have already modified the status variable. + return BinlogErrorCode::OTHER_ERROR; + } + + bool binlog_ended = (_citer_meta == _ordered_meta.cend()); + std::string _revert_point = binlog_ended ? "end of binlog file" : (*_citer_meta)->ToString(); + LOG(INFO) << "log reverting start...,detail:" << _revert_point; + + //Only in the following case should we do reverting. + if (!binlog_ended) { + + //Remove meta data. + auto _remove_iter = _citer_meta; + while (_remove_iter !=_ordered_meta.cend()) { + m_p_meta.load()->Delete(**_remove_iter); + _remove_iter++; + } + + //Update ID-LRL and notify the other threads who are waiting on it. + std::unique_lock _mutex_lock(m_cv_mutex); + + this->Truncate((*_citer_meta)->m_offset); + + _citer_meta--; + LogIdentifier _id_lrl; + _id_lrl.Set((*_citer_meta)->m_term,(*_citer_meta)->m_index ); + this->m_last_logged.store(_id_lrl, std::memory_order_release); + m_cv.notify_all(); + } + + //Appending new entries. + std::list> _entities_list; + for (const auto& _item : log_list) + _entities_list.emplace_back(_item->GetEntity()); + + CHECK(AppendEntry(_entities_list,true)) << "AppendEntry to binlog fail,never should this happen,something terribly wrong."; + + //No exceptions could happened here. + _cur_status = BinlogStatus::REVERTING; + CHECK(this->m_binlog_status.compare_exchange_strong(_cur_status, BinlogStatus::NORMAL)) << "Binlog reverting : status CAS failed,something terribly wrong"; + + return BinlogErrorCode::SUCCEED_TRUNCATED; +} + +BinLogOperator::BinlogErrorCode BinLogOperator::SetHead(std::shared_ptr<::raft::Entity> _shp_entity) noexcept { + + BinlogStatus _cur_status = BinlogStatus::NORMAL; + if (!this->m_binlog_status.compare_exchange_strong(_cur_status,BinlogStatus::SETTING_HEAD)) { + //Other threads may have already modified the status variable. + return BinlogErrorCode::OTHER_ERROR; + } + + CHECK(std::fclose(this->m_binlog_handler)==0) << "truncating file fclose failed"; + if (fs::exists(fs::path(this->m_file_name))) + CHECK(std::remove(this->m_file_name.c_str())==0); + this->m_binlog_handler = std::fopen(this->m_file_name.c_str(), _AURORA_BINLOG_OP_MODE_); + CHECK(this->m_binlog_handler != nullptr) << "open binlog file " << this->m_file_name << "fail..,errno:" << errno; + + //Clear and set up new meta info. + auto *_p_tmp = m_p_meta.load(); + delete _p_tmp; + m_p_meta.store(new FileMetaData()); + + m_zero_log_id.Set(0, 0); + this->m_last_logged.store(m_zero_log_id, std::memory_order_release); + + /*Since the binlog file has already just been reset,pre_log_id should be set to the initial id,only after that + the data could be written into the binlog file. */ + auto _p_pre_log_id = _shp_entity->mutable_pre_log_id(); + _p_pre_log_id->set_term(0); + _p_pre_log_id->set_idx(0); + + TypeEntityList _input_list; + _input_list.emplace_back(_shp_entity); + CHECK(AppendEntry(_input_list,true)) << "AppendEntry to binlog fail,never should this happen,something terribly wrong."; + + //No exceptions could happened here. + _cur_status = BinlogStatus::SETTING_HEAD; + CHECK(this->m_binlog_status.compare_exchange_strong(_cur_status, BinlogStatus::NORMAL)) << "Binlog setting head : status CAS failed,something terribly wrong"; + + return BinlogErrorCode::SUCCEED_TRUNCATED; +} + +bool BinLogOperator::Clear() noexcept { + if (this->m_binlog_status.load() != BinlogStatus::NORMAL) + return false; + + this->m_last_logged.store(m_zero_log_id, std::memory_order_release); + + this->DeleteOpenBinlogFile(); + + auto *_p_tmp = m_p_meta.load(); + delete _p_tmp; + m_p_meta.store(new FileMetaData()); + + return true; +} + +void BinLogOperator::DeleteOpenBinlogFile() noexcept { + + CHECK(std::fclose(this->m_binlog_handler)==0); + + LOG(INFO) << "deleting current running binlog:" << this->m_file_name; + + int _ret = std::remove(this->m_file_name.c_str()); + CHECK(_ret == 0) << ",delete fail,errno:" << errno; + + //Open & create new binlog file. + this->m_binlog_handler = std::fopen(this->m_file_name.c_str(),_AURORA_BINLOG_OP_MODE_); + CHECK(this->m_binlog_handler != nullptr) << "rotating, fopen binlog file fail...,errno:" << errno; +} + +void BinLogOperator::RenameOpenBinlogFile() noexcept{ + //Scan binlog files. + fs::path _path("."); + CHECK (fs::is_directory(_path)) << "scan current directory fail,cannot save current file"; + + int max_suffix = 0; + for (auto&& x : fs::directory_iterator(_path)) { + std::string file_name = x.path().filename().string(); + std::string::size_type pos = file_name.find(this->m_file_name); + if (pos == std::string::npos) + continue ; + + int suffix = 0; + if (file_name != this->m_file_name) + suffix = std::atol(file_name.substr(pos + this->m_file_name.length() + 1).c_str()); + + max_suffix = std::max(suffix,max_suffix); + } + + CHECK(std::fclose(this->m_binlog_handler)==0); + + //Rename current file. + char sz_new_name[1024] = { 0 }; + std::snprintf(sz_new_name, sizeof(sz_new_name),"%s-%d",this->m_file_name.c_str() ,max_suffix + 1); + CHECK (std::rename(this->m_file_name.c_str(), sz_new_name) == 0) << "rename binlog file fail...,errno:" << errno; + + //Open & create new binlog file. + this->m_binlog_handler = std::fopen(this->m_file_name.c_str(),_AURORA_BINLOG_OP_MODE_); + CHECK (this->m_binlog_handler != nullptr) << "rotating, fopen binlog file fail...,errno:" << errno; +} + +std::string BinLogOperator::GetBinlogFileName() noexcept { + return this->m_file_name; +} + +void BinLogOperator::GetOrderedMeta(FileMetaData::TypeOffsetList &_output) noexcept { + m_p_meta.load()->GetOrderedMeta(_output); +} + +} \ No newline at end of file diff --git a/src/binlog/binlog_operator.h b/src/binlog/binlog_operator.h new file mode 100644 index 0000000..23ef9f4 --- /dev/null +++ b/src/binlog/binlog_operator.h @@ -0,0 +1,171 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_BIN_LOG_OPERATOR_H__ +#define __AURORA_BIN_LOG_OPERATOR_H__ + +#include +#include +#include + +#include "protocol/raft.pb.h" + +#include "common/comm_defs.h" +#include "common/log_identifier.h" +#include "follower/memory_log_follower.h" +#include "binlog/binlog_meta_data.h" + +#define _AURORA_BINLOG_NAME_ "raft.binlog" +#define _AURORA_BINLOG_NAME_REG_ "raft\\.binlog" +#define _AURORA_BINLOG_OP_MODE_ "ab+" +#define _AURORA_BINLOG_READ_MODE_ "rb" + +namespace RaftCore::BinLog { + +using ::raft::Entity; +using ::RaftCore::Common::LogIdentifier; +using ::RaftCore::Follower::TypeMemlogFollowerList; +using ::RaftCore::Common::TypeEntityList; +using ::RaftCore::Common::TypeBufferInfo; +using ::RaftCore::BinLog::FileMetaData; + +class BinLogOperator final{ + +public: + + enum class BinlogErrorCode { + UNKNOWN = 0, + SUCCEED_TRUNCATED, + SUCCEED_MERGED, //Input logs are all existed in the bin log. + + //------Separating line.------// + SUCCEED_MAX, + + OVER_BOUNDARY, + NO_CONSISTENT_ERROR, + OTHER_ERROR, + }; + + enum class BinlogStatus { + + //Normal status , could be written new log entries + NORMAL, + + //Reverting due a log conflict with (probably new) leader's log. + REVERTING, + + //Set the first log entry in the binlog file, used in the SyncData scenario. + SETTING_HEAD + }; + +public: + + BinLogOperator(); + + virtual ~BinLogOperator()noexcept; + + void Initialize(const char* role, bool file_name = false) noexcept; + + void UnInitialize() noexcept; + + //The '_input' parameter must be ordered by log index in ascending order. + bool AppendEntry(const TypeEntityList &input_entities,bool force=false) noexcept; + + /*Although it is supported, reverting log actually won't be executed with AppendEntry simultaneously , + which is guaranteed by the callers.*/ + BinlogErrorCode RevertLog(TypeMemlogFollowerList &log_list, const LogIdentifier &boundary) noexcept; + + /*Although it is supported, SetHead log actually won't be executed with AppendEntry simultaneously , + which is done by the callers.*/ + BinlogErrorCode SetHead(std::shared_ptr<::raft::Entity> _shp_entity) noexcept; + + bool Clear() noexcept; + + LogIdentifier GetLastReplicated() noexcept; + + std::string GetBinlogFileName() noexcept; + + void GetOrderedMeta(FileMetaData::TypeOffsetList &_output) noexcept; + + //bool IsTermMatched() noexcept; + + //void SetTermMatched() noexcept; + + void AddPreLRLUseCount() noexcept; + + void SubPreLRLUseCount() noexcept; + +private: + + void ParseFile() noexcept; + + void DeleteOpenBinlogFile() noexcept; + + void RenameOpenBinlogFile() noexcept; + + void Truncate(uint32_t new_size) noexcept; + + //The following three member functions are helpers of AppendEntry. + TypeBufferInfo PrepareBuffer(const TypeEntityList &input_entities, FileMetaData::TypeOffsetList &offset_list) noexcept; + + void AppendBufferToBinlog(TypeBufferInfo &buffer_info, const FileMetaData::TypeOffsetList &offset_list) noexcept; + + void RotateFile() noexcept; + +private: + + LogIdentifier m_zero_log_id; + + bool m_initialized = false; + + //std::atomic m_term_changed = false; + + std::atomic m_last_logged; + + std::string m_file_name = ""; + + std::FILE *m_binlog_handler = nullptr; + + std::mutex m_cv_mutex; + + std::condition_variable m_cv; + + std::atomic m_p_meta; + + std::atomic m_binlog_status; + + std::atomic m_precede_lcl_inuse; + + std::atomic m_log_num; + +private: + + BinLogOperator(const BinLogOperator&) = delete; + + BinLogOperator& operator=(const BinLogOperator&) = delete; + +}; + + + +} //end namespace + + +#endif diff --git a/src/binlog/binlog_singleton.cc b/src/binlog/binlog_singleton.cc new file mode 100644 index 0000000..1dd684c --- /dev/null +++ b/src/binlog/binlog_singleton.cc @@ -0,0 +1,25 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "binlog/binlog_singleton.h" + +namespace RaftCore::BinLog { + +BinLogOperator BinLogGlobal::m_instance; + +} \ No newline at end of file diff --git a/src/binlog/binlog_singleton.h b/src/binlog/binlog_singleton.h new file mode 100644 index 0000000..a2e40b3 --- /dev/null +++ b/src/binlog/binlog_singleton.h @@ -0,0 +1,51 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_BIN_LOG_SINGLETON_H__ +#define __AURORA_BIN_LOG_SINGLETON_H__ + +#include "binlog/binlog_operator.h" + +namespace RaftCore::BinLog { + +using ::RaftCore::BinLog::BinLogOperator; + +class BinLogGlobal final{ + +public: + + static BinLogOperator m_instance; + +private: + + BinLogGlobal() = delete; + + virtual ~BinLogGlobal() = delete; + + BinLogGlobal(const BinLogGlobal&) = delete; + + BinLogGlobal& operator=(const BinLogGlobal&) = delete; + +}; + +} //end namespace + + +#endif diff --git a/src/candidate/candidate_request.cc b/src/candidate/candidate_request.cc new file mode 100644 index 0000000..65b0afa --- /dev/null +++ b/src/candidate/candidate_request.cc @@ -0,0 +1,31 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "candidate/candidate_request.h" + +namespace RaftCore::Candidate { + +template +CandidateUnaryRequest::CandidateUnaryRequest() noexcept {} + +template +CandidateUnaryRequest::~CandidateUnaryRequest() noexcept {} + + +} + diff --git a/src/candidate/candidate_request.h b/src/candidate/candidate_request.h new file mode 100644 index 0000000..f7e8d39 --- /dev/null +++ b/src/candidate/candidate_request.h @@ -0,0 +1,57 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_CANDIDATE_REQUEST_H__ +#define __AURORA_CANDIDATE_REQUEST_H__ + +#include + +#include "protocol/raft.grpc.pb.h" +#include "protocol/raft.pb.h" + +#include "common/request_base.h" + +using ::raft::RaftService; +using ::grpc::ServerCompletionQueue; +using ::RaftCore::Common::UnaryRequest; + +namespace RaftCore::Candidate { + +template +class CandidateUnaryRequest : public UnaryRequest { + +public: + + CandidateUnaryRequest()noexcept; + + virtual ~CandidateUnaryRequest()noexcept; + +private: + + CandidateUnaryRequest(const CandidateUnaryRequest&) = delete; + + CandidateUnaryRequest& operator=(const CandidateUnaryRequest&) = delete; +}; + +} //end namespace + +#include "candidate_request.cc" + +#endif diff --git a/src/candidate/candidate_view.cc b/src/candidate/candidate_view.cc new file mode 100644 index 0000000..d390467 --- /dev/null +++ b/src/candidate/candidate_view.cc @@ -0,0 +1,32 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "candidate/candidate_view.h" + +namespace RaftCore::Candidate { + +void CandidateView::Initialize() noexcept{ + CommonView::Initialize(); +} + +void CandidateView::UnInitialize() noexcept { + CommonView::UnInitialize(); +} + +} + diff --git a/src/candidate/candidate_view.h b/src/candidate/candidate_view.h new file mode 100644 index 0000000..d46f641 --- /dev/null +++ b/src/candidate/candidate_view.h @@ -0,0 +1,56 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_CANDIDATE_VIEW_H__ +#define __AURORA_CANDIDATE_VIEW_H__ + +#include +#include + +#include "common/comm_view.h" + +namespace RaftCore::Candidate { + +using ::RaftCore::Common::CommonView; + +//CandidateView is nothing inside, all logic in candidate is in the `Election` module. +class CandidateView final: public CommonView{ + +public: + + static void Initialize() noexcept; + + static void UnInitialize() noexcept; + +private: + + CandidateView() = delete; + + virtual ~CandidateView() = delete; + + CandidateView(const CandidateView&) = delete; + + CandidateView& operator=(const CandidateView&) = delete; + +}; + +} //end namespace + +#endif diff --git a/src/client/client_base.cc b/src/client/client_base.cc new file mode 100644 index 0000000..c473ebb --- /dev/null +++ b/src/client/client_base.cc @@ -0,0 +1,35 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "client/client_base.h" + +namespace RaftCore::Client { + +ClientBase::ClientBase() {} + +ClientBase::~ClientBase() {} + +void ClientBase::PushCallBackArgs(void* cb_data) noexcept{ + this->m_callback_args.push_back(cb_data); +} + +void ClientBase::ClearCallBackArgs() noexcept{ + this->m_callback_args.clear(); +} + +} diff --git a/src/client/client_base.h b/src/client/client_base.h new file mode 100644 index 0000000..c0a482c --- /dev/null +++ b/src/client/client_base.h @@ -0,0 +1,54 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_CLIENT_BASE_H__ +#define __AURORA_CLIENT_BASE_H__ + +#include + +namespace RaftCore::Client { + +//For the prospective common properties . +class ClientBase { + +public: + + ClientBase(); + + virtual ~ClientBase(); + + void PushCallBackArgs(void* cb_data)noexcept; + +protected: + + void ClearCallBackArgs()noexcept; + + std::vector m_callback_args; + +private: + + ClientBase(const ClientBase&) = delete; + + ClientBase& operator=(const ClientBase&) = delete; +}; + +} + +#endif diff --git a/src/client/client_framework.cc b/src/client/client_framework.cc new file mode 100644 index 0000000..834c918 --- /dev/null +++ b/src/client/client_framework.cc @@ -0,0 +1,210 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "glog/logging.h" + +#include "client/client_framework.h" + +namespace RaftCore::Client { + +template +ClientFramework::ClientFramework(std::shared_ptr shp_channel) noexcept{ + this->m_stub = ::raft::RaftService::NewStub(shp_channel); + this->Reset(); +} + +template +void ClientFramework::Reset() noexcept { + this->m_client_context.reset(new ::grpc::ClientContext()); +} + +template +std::shared_ptr<::raft::RaftService::Stub> ClientFramework::GetStub() noexcept { + return this->m_stub; +} + +template +ClientFramework::~ClientFramework() noexcept{} + +template +ClientTpl::ClientTpl(std::shared_ptr shp_channel) noexcept : ClientFramework(shp_channel) { + this->m_shp_request.reset(new T()); +} + +template +ClientTpl::~ClientTpl() noexcept {} + +template +SyncClient::SyncClient(std::shared_ptr shp_channel) noexcept : ClientTpl(shp_channel) {} + +template +SyncClient::~SyncClient() noexcept {} + +template +AsyncClient::AsyncClient(std::shared_ptr shp_channel, + std::shared_ptr shp_cq)noexcept : ClientTpl(shp_channel), + m_server_cq(shp_cq) {} + +template +AsyncClient::~AsyncClient()noexcept {} + +template +UnarySyncClient::UnarySyncClient(std::shared_ptr shp_channel)noexcept : + SyncClient(shp_channel) {} + +template +const R& UnarySyncClient::DoRPC(std::function&)> req_setter, + std::function<::grpc::Status(::grpc::ClientContext*,const T&,R*)> rpc, uint32_t timeo_ms, + ::grpc::Status &ret_status)noexcept { + + req_setter(this->m_shp_request); + std::chrono::system_clock::time_point deadline = std::chrono::system_clock::now() + + std::chrono::milliseconds(timeo_ms); + this->m_client_context->set_deadline(deadline); + + ret_status = rpc(this->m_client_context.get(), *this->m_shp_request, &this->m_response); + return this->m_response; +} + +template +UnarySyncClient::~UnarySyncClient()noexcept {} + +template +BidirectionalSyncClient::BidirectionalSyncClient(std::shared_ptr shp_channel)noexcept : + SyncClient(shp_channel) {} + +template +BidirectionalSyncClient::~BidirectionalSyncClient()noexcept {} + +template +UnaryAsyncClient::UnaryAsyncClient(std::shared_ptr shp_channel, + std::shared_ptr shp_cq) noexcept : AsyncClient(shp_channel, shp_cq) { + static_assert(std::is_base_of::value, "Q is not a derived from UnaryAsyncClient."); +} + +template +void UnaryAsyncClient::React(bool cq_result) noexcept { + + if (!cq_result) { + LOG(ERROR) << "UnaryAsyncClient got false result from CQ."; + this->Release(); + return; + } + + this->Responder(this->m_final_status, this->m_response); + this->Release(); +} + +template +void UnaryAsyncClient::Release() noexcept { + delete dynamic_cast(this); +} + +template +void UnaryAsyncClient::EntrustRequest(const std::function&)> &req_setter, + const FPrepareAsync &f_prepare_async, uint32_t timeo_ms) noexcept { + req_setter(this->m_shp_request); + + std::chrono::time_point _deadline = std::chrono::system_clock::now() + + std::chrono::milliseconds(timeo_ms); + this->m_client_context->set_deadline(_deadline); + + this->m_reader = f_prepare_async(this->m_client_context.get(), *this->m_shp_request, + this->m_server_cq.get()); + this->m_reader->StartCall(); + this->m_reader->Finish(&this->m_response, &this->m_final_status, dynamic_cast(this)); +} + +template +UnaryAsyncClient::~UnaryAsyncClient() noexcept {} + +template +BidirectionalAsyncClient::BidirectionalAsyncClient(std::shared_ptr shp_channel, + std::shared_ptr shp_cq) noexcept : AsyncClient(shp_channel, shp_cq), + m_async_rw(this->m_client_context.get()), m_status(ProcessStage::CONNECT) { + static_assert(std::is_base_of::value, "Q is not a derived from BidirectionalAsyncClient."); +} + +template +BidirectionalAsyncClient::~BidirectionalAsyncClient() noexcept {} + +template +void BidirectionalAsyncClient::React(bool cq_result) noexcept { + + Q* _p_downcast = dynamic_cast(this); + + if (!cq_result && (this->m_status != ProcessStage::READ)) { + LOG(ERROR) << "BidirectionalAsyncClient got false result from CQ."; + delete _p_downcast; + return; + } + + switch (this->m_status) { + case ProcessStage::READ: + + //Meaning client said it wants to end the stream either by a 'WritesDone' or 'finish' call. + if (!cq_result) { + this->m_async_rw.Finish(this->m_final_status, _p_downcast); + this->m_status = ProcessStage::FINISH; + break; + } + + this->m_responder(this->m_final_status, this->m_response); + break; + + case ProcessStage::WRITE: + this->m_async_rw.Read(&this->m_response, _p_downcast); + this->m_status = ProcessStage::READ; + break; + + case ProcessStage::CONNECT: + break; + + case ProcessStage::WRITES_DONE: + this->m_async_rw.Finish(this->m_final_status, _p_downcast); + this->m_status = ProcessStage::FINISH; + break; + + case ProcessStage::FINISH: + if (this->m_final_status.error_code() != ::grpc::StatusCode::OK) { + LOG(ERROR) << "rpc fail,err code:" << this->m_final_status.error_code() + << ",err msg:" << this->m_final_status.error_message(); + } + delete _p_downcast; + break; + + default: + CHECK(false) << "Unexpected tag " << int(this->m_status); + } +} + +template +void BidirectionalAsyncClient::AsyncDo(std::function&)> req_setter) noexcept { + req_setter(this->m_shp_request); + this->m_async_rw.Write(this->m_shp_request,dynamic_cast(this)); + this->m_status = ProcessStage::WRITE; +} + +template +void BidirectionalAsyncClient::WriteDone() noexcept { + this->m_async_rw.WritesDone(dynamic_cast(this)); + this->m_status = ProcessStage::WRITES_DONE; +} + +} + diff --git a/src/client/client_framework.h b/src/client/client_framework.h new file mode 100644 index 0000000..08a3deb --- /dev/null +++ b/src/client/client_framework.h @@ -0,0 +1,239 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_CLIENT_FRAMEWORK_H__ +#define __AURORA_CLIENT_FRAMEWORK_H__ + +#include + +#include "grpc/grpc.h" +#include "grpc++/grpc++.h" + +#include "protocol/raft.grpc.pb.h" +#include "common/react_base.h" + +namespace RaftCore::Client { + +using ::grpc::CompletionQueue; +//using ::grpc::ServerCompletionQueue; +using ::grpc::Channel; +using ::RaftCore::Common::ReactBase; + +template +using FPrepareAsync = std::function>( + ::grpc::ClientContext*,const T&, ::grpc::CompletionQueue*)>; + +template +class ClientFramework { + +public: + + ClientFramework(std::shared_ptr shp_channel)noexcept; + + virtual ~ClientFramework()noexcept; + + //Reset a client object for reusing purpose. + virtual void Reset() noexcept; + + virtual std::shared_ptr<::raft::RaftService::Stub> GetStub() noexcept final; + +protected: + + std::shared_ptr<::grpc::ClientContext> m_client_context; + + std::shared_ptr<::raft::RaftService::Stub> m_stub; + + ::grpc::Status m_final_status; + +private: + + ClientFramework(const ClientFramework&) = delete; + + ClientFramework& operator=(const ClientFramework&) = delete; +}; + +template +class ClientTpl : public ClientFramework { + +public: + + typedef std::function TypeResponder; + + ClientTpl(std::shared_ptr shp_channel)noexcept; + + virtual ~ClientTpl()noexcept; + +protected: + + std::shared_ptr m_shp_request; + + R m_response; + +private: + + ClientTpl(const ClientTpl&) = delete; + + ClientTpl& operator=(const ClientTpl&) = delete; +}; + +template +class SyncClient : public ClientTpl { + +public: + + SyncClient(std::shared_ptr shp_channel)noexcept; + + virtual ~SyncClient()noexcept; + +private: + + SyncClient(const SyncClient&) = delete; + + SyncClient& operator=(const SyncClient&) = delete; +}; + +template +class UnarySyncClient : public SyncClient { + +public: + + UnarySyncClient(std::shared_ptr shp_channel)noexcept; + + virtual ~UnarySyncClient()noexcept; + + const R& DoRPC(std::function&)> req_setter, + std::function<::grpc::Status(::grpc::ClientContext*,const T&,R*)> rpc, uint32_t timeo_ms, + ::grpc::Status &ret_status)noexcept; + +private: + + UnarySyncClient(const UnarySyncClient&) = delete; + + UnarySyncClient& operator=(const UnarySyncClient&) = delete; +}; + +template +class BidirectionalSyncClient : public SyncClient { + +public: + + BidirectionalSyncClient(std::shared_ptr shp_channel)noexcept; + + virtual ~BidirectionalSyncClient()noexcept; + +protected: + + std::shared_ptr<::grpc::ClientReaderWriter> m_sync_rw; + +private: + + BidirectionalSyncClient(const BidirectionalSyncClient&) = delete; + + BidirectionalSyncClient& operator=(const BidirectionalSyncClient&) = delete; +}; + +template +class AsyncClient : public ClientTpl, public ReactBase { + +public: + + AsyncClient(std::shared_ptr shp_channel,std::shared_ptr shp_cq)noexcept; + + virtual ~AsyncClient()noexcept; + + virtual void Responder(const ::grpc::Status& status, const R& rsp) noexcept = 0; + +protected: + + std::shared_ptr m_server_cq; + +private: + + AsyncClient(const AsyncClient&) = delete; + + AsyncClient& operator=(const AsyncClient&) = delete; +}; + +template +class UnaryAsyncClient : public AsyncClient { + +public: + + UnaryAsyncClient(std::shared_ptr shp_channel,std::shared_ptr shp_cq)noexcept; + + virtual ~UnaryAsyncClient()noexcept; + + void EntrustRequest(const std::function&)> &req_setter, + const FPrepareAsync &f_prepare_async, uint32_t timeo_ms) noexcept; + +protected: + + virtual void React(bool cq_result) noexcept override; + + virtual void Release() noexcept; + +protected: + + std::unique_ptr<::grpc::ClientAsyncResponseReader> m_reader; + +private: + + UnaryAsyncClient(const UnaryAsyncClient&) = delete; + + UnaryAsyncClient& operator=(const UnaryAsyncClient&) = delete; +}; + +template +class BidirectionalAsyncClient : public AsyncClient { + +public: + + BidirectionalAsyncClient(std::shared_ptr shp_channel, std::shared_ptr shp_cq)noexcept; + + virtual ~BidirectionalAsyncClient()noexcept; + +protected: + + virtual void React(bool cq_result) noexcept override; + + void AsyncDo(std::function&)> req_setter) noexcept; + + void WriteDone() noexcept; + +protected: + + ::grpc::ClientAsyncReaderWriter m_async_rw; + + enum ProcessStage { READ = 1, WRITE = 2, CONNECT = 3, WRITES_DONE = 4, FINISH = 5 }; + + ProcessStage m_status; + +private: + + BidirectionalAsyncClient(const BidirectionalAsyncClient&) = delete; + + BidirectionalAsyncClient& operator=(const BidirectionalAsyncClient&) = delete; +}; + +} //end namespace + +#include "client/client_framework.cc" + +#endif diff --git a/src/client/client_impl.cc b/src/client/client_impl.cc new file mode 100644 index 0000000..e2984fb --- /dev/null +++ b/src/client/client_impl.cc @@ -0,0 +1,194 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "service/service.h" +#include "leader/client_pool.h" +#include "member/member_manager.h" +#include "election/election.h" +#include "client/client_impl.h" + +namespace RaftCore::Client { + +using ::RaftCore::Common::VoteType; +using ::RaftCore::Service::Write; +using ::RaftCore::Leader::ClientPool; +using ::RaftCore::Member::MemberMgr; +using ::RaftCore::Election::ElectionMgr; + +AppendEntriesAsyncClient::AppendEntriesAsyncClient(std::shared_ptr<::grpc::Channel> shp_channel, + std::shared_ptr<::grpc::CompletionQueue> shp_cq) + : UnaryAsyncClient<::raft::AppendEntriesRequest, ::raft::AppendEntriesResponse, + AppendEntriesAsyncClient>(shp_channel, shp_cq) { + + //Give myself a long lived delegator. + this->OwnershipDelegator::ResetOwnership(this); +} + +AppendEntriesAsyncClient::~AppendEntriesAsyncClient() {} + +void AppendEntriesAsyncClient::Responder(const ::grpc::Status& status, + const ::raft::AppendEntriesResponse& rsp) noexcept { + + auto _shp_write = this->OwnershipDelegator::GetOwnership(); + auto *_p_conn_pool = (ClientPool*)this->m_callback_args[0]; + + _shp_write->ReplicateDoneCallBack(status, rsp, _p_conn_pool->GetParentFollower(), this); +} + +void AppendEntriesAsyncClient::Release() noexcept { + //Reset myself. + this->Reset(); + + //Release associated write request. + this->OwnershipDelegator::ReleaseOwnership(); + + //Push myself back to the connection pool. + auto *_p_conn_pool = (ClientPool*)this->m_callback_args[0]; + auto _shp_copied = this->OwnershipDelegator::GetOwnership(); + _p_conn_pool->Back(_shp_copied); + + VLOG(90) << "AppendEntriesAsyncClient returned:" << _p_conn_pool->GetParentFollower()->my_addr; + + //Clear my args. + this->ClearCallBackArgs(); +} + +CommitEntriesAsyncClient::CommitEntriesAsyncClient(std::shared_ptr<::grpc::Channel> shp_channel, + std::shared_ptr<::grpc::CompletionQueue> shp_cq) + : UnaryAsyncClient<::raft::CommitEntryRequest, ::raft::CommitEntryResponse, + CommitEntriesAsyncClient> (shp_channel, shp_cq){ + + //Give myself a long lived delegator. + this->OwnershipDelegator::ResetOwnership(this); +} + +CommitEntriesAsyncClient::~CommitEntriesAsyncClient() {} + +void CommitEntriesAsyncClient::Responder(const ::grpc::Status& status, + const ::raft::CommitEntryResponse& rsp) noexcept { + + auto _shp_write = this->OwnershipDelegator::GetOwnership(); + auto *_p_conn_pool = (ClientPool*)this->m_callback_args[0]; + + _shp_write->CommitDoneCallBack(status, rsp, _p_conn_pool->GetParentFollower()); +} + +void CommitEntriesAsyncClient::Release() noexcept { + //Reset myself. + this->Reset(); + + //Release associated write request. + this->OwnershipDelegator::ReleaseOwnership(); + + //Push myself back to the connection pool. + auto *_p_conn_pool = (ClientPool*)this->m_callback_args[0]; + + auto _shp_copied = this->OwnershipDelegator::GetOwnership(); + _p_conn_pool->Back(_shp_copied); + + VLOG(90) << "CommitEntriesAsyncClient returned:" << _p_conn_pool->GetParentFollower()->my_addr; + + //Clear my args. + this->ClearCallBackArgs(); +} + +HeartbeatSyncClient::HeartbeatSyncClient(std::shared_ptr<::grpc::Channel> shp_channel): + UnarySyncClient<::raft::HeartBeatRequest, ::raft::CommonResponse>(shp_channel) {} + +HeartbeatSyncClient::~HeartbeatSyncClient() {} + +WriteSyncClient::WriteSyncClient(std::shared_ptr<::grpc::Channel> shp_channel): + UnarySyncClient<::raft::ClientWriteRequest, ::raft::ClientWriteResponse>(shp_channel) {} + +WriteSyncClient::~WriteSyncClient() {} + +SyncDataSyncClient::SyncDataSyncClient(std::shared_ptr<::grpc::Channel> shp_channel): + BidirectionalSyncClient<::raft::SyncDataRequest, ::raft::SyncDataResponse>(shp_channel) { + this->m_sync_rw = this->m_stub->SyncData(this->m_client_context.get()); +} + +SyncDataSyncClient::~SyncDataSyncClient() {} + +auto SyncDataSyncClient::GetInstantiatedReq()noexcept -> decltype(m_shp_request) { + if (!this->m_shp_request) + this->m_shp_request.reset(new ::raft::SyncDataRequest()); + + return this->m_shp_request; +} + +auto SyncDataSyncClient::GetReaderWriter() noexcept -> decltype(m_sync_rw) { + return this->m_sync_rw; +} + +::raft::SyncDataResponse* SyncDataSyncClient::GetResponse() noexcept { + return &this->m_response; +} + +MemberChangePrepareAsyncClient::MemberChangePrepareAsyncClient(std::shared_ptr<::grpc::Channel> shp_channel, + std::shared_ptr<::grpc::CompletionQueue> shp_cq) + : UnaryAsyncClient<::raft::MemberChangeInnerRequest, ::raft::MemberChangeInnerResponse, + MemberChangePrepareAsyncClient,::grpc::CompletionQueue>(shp_channel, shp_cq){} + +MemberChangePrepareAsyncClient::~MemberChangePrepareAsyncClient() {} + +void MemberChangePrepareAsyncClient::Responder(const ::grpc::Status& status, + const ::raft::MemberChangeInnerResponse& rsp) noexcept { + uint32_t _idx = static_cast(reinterpret_cast(m_callback_args[1])); + MemberMgr::MemberChangePrepareCallBack(status, rsp, m_callback_args[0], _idx); +} + +MemberChangeCommitAsyncClient::MemberChangeCommitAsyncClient(std::shared_ptr<::grpc::Channel> shp_channel, + std::shared_ptr<::grpc::CompletionQueue> shp_cq) + : UnaryAsyncClient<::raft::MemberChangeInnerRequest, ::raft::MemberChangeInnerResponse, + MemberChangeCommitAsyncClient,::grpc::CompletionQueue>(shp_channel, shp_cq){} + +MemberChangeCommitAsyncClient::~MemberChangeCommitAsyncClient() {} + +void MemberChangeCommitAsyncClient::Responder(const ::grpc::Status& status, + const ::raft::MemberChangeInnerResponse& rsp) noexcept { + uint32_t _idx = static_cast(reinterpret_cast(m_callback_args[1])); + MemberMgr::MemberChangeCommitCallBack(status, rsp, m_callback_args[0], _idx); +} + +PrevoteAsyncClient::PrevoteAsyncClient(std::shared_ptr<::grpc::Channel> shp_channel, + std::shared_ptr<::grpc::CompletionQueue> shp_cq) + : UnaryAsyncClient<::raft::VoteRequest, ::raft::VoteResponse, + PrevoteAsyncClient,::grpc::CompletionQueue>(shp_channel, shp_cq){} + +PrevoteAsyncClient::~PrevoteAsyncClient() {} + +void PrevoteAsyncClient::Responder(const ::grpc::Status& status, + const ::raft::VoteResponse& rsp) noexcept { + uint32_t _idx = static_cast(reinterpret_cast(m_callback_args[0])); + ElectionMgr::CallBack(status, rsp, VoteType::PreVote,_idx); +} + +VoteAsyncClient::VoteAsyncClient(std::shared_ptr<::grpc::Channel> shp_channel, + std::shared_ptr<::grpc::CompletionQueue> shp_cq) + : UnaryAsyncClient<::raft::VoteRequest, ::raft::VoteResponse, + VoteAsyncClient,::grpc::CompletionQueue>(shp_channel, shp_cq){} + +VoteAsyncClient::~VoteAsyncClient() {} + +void VoteAsyncClient::Responder(const ::grpc::Status& status, + const ::raft::VoteResponse& rsp) noexcept { + uint32_t _idx = static_cast(reinterpret_cast(m_callback_args[0])); + ElectionMgr::CallBack(status, rsp, VoteType::Vote,_idx); +} + +} diff --git a/src/client/client_impl.h b/src/client/client_impl.h new file mode 100644 index 0000000..145496a --- /dev/null +++ b/src/client/client_impl.h @@ -0,0 +1,239 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_CLIENT_IMPL_H__ +#define __AURORA_CLIENT_IMPL_H__ + +#include "protocol/raft.pb.h" +#include "protocol/raft.grpc.pb.h" + +#include "client/client_framework.h" +#include "client/client_base.h" +#include "service/ownership_delegator.h" + +namespace RaftCore { + namespace Service { + class Write; + } +} + +namespace RaftCore::Client { + +using ::RaftCore::Client::UnarySyncClient; +using ::RaftCore::Client::ClientBase; +using ::RaftCore::Service::Write; +using ::RaftCore::Service::OwnershipDelegator; + +class AppendEntriesAsyncClient : public UnaryAsyncClient<::raft::AppendEntriesRequest, + ::raft::AppendEntriesResponse, AppendEntriesAsyncClient>, + public OwnershipDelegator, public OwnershipDelegator, + public ClientBase { + +public: + + AppendEntriesAsyncClient(std::shared_ptr<::grpc::Channel> shp_channel, + std::shared_ptr<::grpc::CompletionQueue> shp_cq); + + virtual ~AppendEntriesAsyncClient(); + +protected: + + virtual void Responder(const ::grpc::Status& status, const ::raft::AppendEntriesResponse& rsp) noexcept override; + + virtual void Release() noexcept override; + +private: + + AppendEntriesAsyncClient(const AppendEntriesAsyncClient&) = delete; + + AppendEntriesAsyncClient& operator=(const AppendEntriesAsyncClient&) = delete; +}; + +class CommitEntriesAsyncClient : public UnaryAsyncClient<::raft::CommitEntryRequest, + ::raft::CommitEntryResponse, CommitEntriesAsyncClient>, + public OwnershipDelegator, public OwnershipDelegator, + public ClientBase { + +public: + + CommitEntriesAsyncClient(std::shared_ptr<::grpc::Channel> shp_channel, + std::shared_ptr<::grpc::CompletionQueue> shp_cq); + + virtual ~CommitEntriesAsyncClient(); + + virtual void Release() noexcept override; + +protected: + + virtual void Responder(const ::grpc::Status& status, const ::raft::CommitEntryResponse& rsp) noexcept override; + +private: + + CommitEntriesAsyncClient(const CommitEntriesAsyncClient&) = delete; + + CommitEntriesAsyncClient& operator=(const CommitEntriesAsyncClient&) = delete; +}; + +typedef std::shared_ptr TypePtrCommitAC; + +class HeartbeatSyncClient : public UnarySyncClient<::raft::HeartBeatRequest, ::raft::CommonResponse>, + public ClientBase { + +public: + + HeartbeatSyncClient(std::shared_ptr<::grpc::Channel> shp_channel); + + virtual ~HeartbeatSyncClient(); + +private: + + HeartbeatSyncClient(const HeartbeatSyncClient&) = delete; + + HeartbeatSyncClient& operator=(const HeartbeatSyncClient&) = delete; +}; + +class WriteSyncClient : public UnarySyncClient<::raft::ClientWriteRequest, ::raft::ClientWriteResponse>, + public ClientBase { + +public: + + WriteSyncClient(std::shared_ptr<::grpc::Channel> shp_channel); + + virtual ~WriteSyncClient(); + +private: + + WriteSyncClient(const WriteSyncClient&) = delete; + + WriteSyncClient& operator=(const WriteSyncClient&) = delete; +}; + +class SyncDataSyncClient : public BidirectionalSyncClient<::raft::SyncDataRequest, ::raft::SyncDataResponse>, + public ClientBase { + +public: + + SyncDataSyncClient(std::shared_ptr<::grpc::Channel> shp_channel); + + virtual ~SyncDataSyncClient(); + + auto GetInstantiatedReq() noexcept-> decltype(m_shp_request); + + auto GetReaderWriter() noexcept -> decltype(m_sync_rw); + + ::raft::SyncDataResponse* GetResponse() noexcept; + +private: + + SyncDataSyncClient(const SyncDataSyncClient&) = delete; + + SyncDataSyncClient& operator=(const SyncDataSyncClient&) = delete; +}; + +class MemberChangePrepareAsyncClient : public UnaryAsyncClient<::raft::MemberChangeInnerRequest, + ::raft::MemberChangeInnerResponse, MemberChangePrepareAsyncClient,::grpc::CompletionQueue>, public ClientBase { + +public: + + MemberChangePrepareAsyncClient(std::shared_ptr<::grpc::Channel> shp_channel, + std::shared_ptr<::grpc::CompletionQueue> shp_cq); + + virtual ~MemberChangePrepareAsyncClient(); + +protected: + + virtual void Responder(const ::grpc::Status& status, const ::raft::MemberChangeInnerResponse& rsp) noexcept override; + +private: + + MemberChangePrepareAsyncClient(const MemberChangePrepareAsyncClient&) = delete; + + MemberChangePrepareAsyncClient& operator=(const MemberChangePrepareAsyncClient&) = delete; +}; + +class MemberChangeCommitAsyncClient : public UnaryAsyncClient<::raft::MemberChangeInnerRequest, + ::raft::MemberChangeInnerResponse, MemberChangeCommitAsyncClient,::grpc::CompletionQueue>, public ClientBase { + +public: + + MemberChangeCommitAsyncClient(std::shared_ptr<::grpc::Channel> shp_channel, + std::shared_ptr<::grpc::CompletionQueue> shp_cq); + + virtual ~MemberChangeCommitAsyncClient(); + +protected: + + virtual void Responder(const ::grpc::Status& status, const ::raft::MemberChangeInnerResponse& rsp) noexcept override; + +private: + + MemberChangeCommitAsyncClient(const MemberChangeCommitAsyncClient&) = delete; + + MemberChangeCommitAsyncClient& operator=(const MemberChangeCommitAsyncClient&) = delete; + +}; + +class PrevoteAsyncClient : public UnaryAsyncClient<::raft::VoteRequest, + ::raft::VoteResponse, PrevoteAsyncClient,::grpc::CompletionQueue>, public ClientBase { + +public: + + PrevoteAsyncClient(std::shared_ptr<::grpc::Channel> shp_channel, + std::shared_ptr<::grpc::CompletionQueue> shp_cq); + + virtual ~PrevoteAsyncClient(); + +protected: + + virtual void Responder(const ::grpc::Status& status, const ::raft::VoteResponse& rsp) noexcept override; + +private: + + PrevoteAsyncClient(const PrevoteAsyncClient&) = delete; + + PrevoteAsyncClient& operator=(const PrevoteAsyncClient&) = delete; + +}; + +class VoteAsyncClient : public UnaryAsyncClient<::raft::VoteRequest, + ::raft::VoteResponse, VoteAsyncClient,::grpc::CompletionQueue>, public ClientBase { + +public: + + VoteAsyncClient(std::shared_ptr<::grpc::Channel> shp_channel, + std::shared_ptr<::grpc::CompletionQueue> shp_cq); + + virtual ~VoteAsyncClient(); + +protected: + + virtual void Responder(const ::grpc::Status& status, const ::raft::VoteResponse& rsp) noexcept override; + +private: + + VoteAsyncClient(const VoteAsyncClient&) = delete; + + VoteAsyncClient& operator=(const VoteAsyncClient&) = delete; + +}; + +} + +#endif diff --git a/src/common/comm_defs.cc b/src/common/comm_defs.cc new file mode 100644 index 0000000..a876281 --- /dev/null +++ b/src/common/comm_defs.cc @@ -0,0 +1,47 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "common/comm_defs.h" + +namespace RaftCore::Common { + +bool EntityIDEqual(const ::raft::EntityID &left, const ::raft::EntityID &right) { + return (left.term() == right.term() && left.idx() == right.idx()); +} + +bool EntityIDSmaller(const ::raft::EntityID &left, const ::raft::EntityID &right) { + if (left.term() < right.term()) + return true; + + if (left.term() == right.term()) + return left.idx() < right.idx(); + + return false; +} + +bool EntityIDSmallerEqual(const ::raft::EntityID &left, const ::raft::EntityID &right) { + if (left.term() < right.term()) + return true; + + if (left.term() == right.term()) + return left.idx() <= right.idx(); + + return false; +} + +} diff --git a/src/common/comm_defs.h b/src/common/comm_defs.h new file mode 100644 index 0000000..68ade06 --- /dev/null +++ b/src/common/comm_defs.h @@ -0,0 +1,104 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once +#pragma warning( disable : 4290 ) + +#ifndef _AURORA_COMM_DEFS_H_ +#define _AURORA_COMM_DEFS_H_ + +#include +#include +#include +#include +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "protocol/raft.pb.h" + +#include "common/macro_manager.h" + +namespace RaftCore::Common { + +#define _TWO_BYTES_ (2) +#define _FOUR_BYTES_ (4) +#define _EIGHT_BYTES_ (8) +#define _MAX_UINT16_ (0xFFFF) +#define _MAX_UINT32_ (0xFFFFFFFF) +#define _MAX_INT32_ (0x7FFFFFFF) +#define _MAX_UINT64_ (0xFFFFFFFFFFFFFFFF) + +#define _RING_BUF_EMPTY_POS_ (-1) +#define _RING_BUF_INVALID_POS_ (-2) + +#define _ROLE_STR_LEADER_ "leader" +#define _ROLE_STR_FOLLOWER_ "follower" +#define _ROLE_STR_CANDIDATE_ "candidate" +#define _ROLE_STR_UNKNOWN_ "unknown" +#define _ROLE_STR_TEST_ "test" + +#define _AURORA_LOCAL_IP_ "127.0.0.1" + +#ifdef _SVC_WRITE_TEST_ +#define _WRITE_VAL_TS_ "write_val_ts_" +#endif + +typedef std::shared_lock SharedLock; +typedef std::unique_lock UniqueLock; + +typedef SharedLock ReadLock; +typedef UniqueLock WriteLock; + +typedef std::list> TypeEntityList; +typedef std::tuple TypeBufferInfo; + +enum class FinishStatus { NEGATIVE_FINISHED = 0, UNFINISHED, POSITIVE_FINISHED }; + +enum class PhaseID { PhaseI = 0, PhaseII }; + +enum class VoteType { PreVote = 0, Vote }; + +bool EntityIDSmaller(const ::raft::EntityID &left, const ::raft::EntityID &right); + +bool EntityIDEqual(const ::raft::EntityID &left, const ::raft::EntityID &right); + +bool EntityIDSmallerEqual(const ::raft::EntityID &left, const ::raft::EntityID &right); + +template +struct TwoPhaseCommitBatchTask { + std::vector m_todo; + std::vector m_flags; +}; + +} + +/*Note:Additional definitions in other namespace of this project.These definitions may not be suitable to + be located in their original namespace since otherwise will cause header files recursively including issues.*/ +namespace RaftCore::Member { + + enum class EJointStatus { STABLE=0, JOINT_CONSENSUS }; + + enum class JointConsensusMask { + IN_OLD_CLUSTER = 1, + IN_NEW_CLUSTER = 2, + }; + +} + +#endif diff --git a/src/common/comm_view.cc b/src/common/comm_view.cc new file mode 100644 index 0000000..fae9733 --- /dev/null +++ b/src/common/comm_view.cc @@ -0,0 +1,83 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include + +#include "tools/timer.h" +#include "storage/storage_singleton.h" +#include "common/comm_view.h" + +namespace RaftCore::Common { + +int CommonView::m_cpu_cores; + +LockFreePriotityQueue CommonView::m_priority_queue; + +volatile bool CommonView::m_running_flag = false; + +LogIdentifier CommonView::m_zero_log_id; + +LogIdentifier CommonView::m_max_log_id; + +std::vector CommonView::m_vec_routine; + +using ::RaftCore::Timer::GlobalTimer; +using ::RaftCore::Storage::StorageGlobal; + +void CommonView::Initialize() noexcept { + + m_zero_log_id.m_term = 0; + m_zero_log_id.m_index = 0; + + m_max_log_id.m_term = 0xFFFFFFFF; + m_max_log_id.m_index = 0xFFFFFFFFFFFFFFFF; + + m_cpu_cores = std::thread::hardware_concurrency(); + CHECK(m_cpu_cores > 0); + + //Register storage's GC. + auto *_p_storage = &StorageGlobal::m_instance; + auto _storage_gc = [_p_storage]()->bool { + _p_storage->PurgeGarbage(); + return true; + }; + + bool _enable_sstable_gc = true; +#ifdef _COMMON_VIEW_TEST_ + _enable_sstable_gc = ::RaftCore::Config::FLAGS_enable_sstable_gc; +#endif + if (_enable_sstable_gc) + GlobalTimer::AddTask(::RaftCore::Config::FLAGS_sstable_purge_interval_second*1000,_storage_gc); + + int consumer_threads_num = ::RaftCore::Config::FLAGS_lockfree_queue_consumer_threads_num; + if (consumer_threads_num == 0) + consumer_threads_num = m_cpu_cores; + + //Start initializing the MCMP queue. + m_priority_queue.Initialize(consumer_threads_num); +} + +void CommonView::UnInitialize() noexcept { + + CommonView::m_vec_routine.clear(); + + m_priority_queue.UnInitialize(); +} + +} + diff --git a/src/common/comm_view.h b/src/common/comm_view.h new file mode 100644 index 0000000..5d70f23 --- /dev/null +++ b/src/common/comm_view.h @@ -0,0 +1,100 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_COMM_VIEW_H__ +#define __AURORA_COMM_VIEW_H__ + +#include "common/comm_defs.h" +#include "common/log_identifier.h" +#include "config/config.h" +#include "tools/lock_free_deque.h" +#include "tools/lock_free_unordered_single_list.h" +#include "tools/lock_free_priority_queue.h" +#include "tools/timer.h" + +namespace RaftCore::Common { + +using ::RaftCore::DataStructure::LockFreeUnorderedSingleList; +using ::RaftCore::DataStructure::LockFreePriotityQueue; +using ::RaftCore::Timer::GlobalTimer; +using ::RaftCore::Common::LogIdentifier; + +class CommonView { + +#ifdef _COMMON_VIEW_TEST_ +public: +#else +protected: +#endif + + static void Initialize() noexcept; + + static void UnInitialize() noexcept; + +public: + + static int m_cpu_cores; + + static LockFreePriotityQueue m_priority_queue; + + //TODO: find why m_garbage can't be instantiated. + //template + //static LockFreeDeque> m_garbage; + + //This is the running flag for leader&follower routine threads. + static volatile bool m_running_flag; + + static LogIdentifier m_zero_log_id; + + static LogIdentifier m_max_log_id; + +protected: + + template typename W, template typename N,typename T> + static void InstallGC(LockFreeUnorderedSingleList> *p_ref_garbage) noexcept { + p_ref_garbage->SetDeleter(W::ReleaseCutHead); + + auto _pending_list_gc = [p_ref_garbage]()->bool { + p_ref_garbage->PurgeSingleList(::RaftCore::Config::FLAGS_retain_num_unordered_single_list); + return true; + }; + + GlobalTimer::AddTask(::RaftCore::Config::FLAGS_gc_interval_ms ,_pending_list_gc); + } + + static std::vector m_vec_routine; +private: + + CommonView() = delete; + + virtual ~CommonView() = delete; + + CommonView(const CommonView&) = delete; + + CommonView& operator=(const CommonView&) = delete; + +}; + +//template +//LockFreeDeque> CommonView::m_garbage; + +} //end namespace + +#endif diff --git a/src/common/error_code.h b/src/common/error_code.h new file mode 100644 index 0000000..3091e3d --- /dev/null +++ b/src/common/error_code.h @@ -0,0 +1,36 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_ERROR_CODE_H__ +#define __AURORA_ERROR_CODE_H__ + +/*--------------Produce & consume operation return values--------------*/ +#define QUEUE_ERROR (-1) +#define QUEUE_SUCC (0) +#define QUEUE_FULL (1) +#define QUEUE_EMPTY (2) + + +/*--------------storage error codes--------------*/ +#define SUCC (0) +#define LEFT_BEHIND (-1) + + +#endif diff --git a/src/common/log_identifier.cc b/src/common/log_identifier.cc new file mode 100644 index 0000000..de90688 --- /dev/null +++ b/src/common/log_identifier.cc @@ -0,0 +1,134 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "common/log_identifier.h" + +namespace RaftCore::Common { + +void LogIdentifier::Set(const LogIdentifier &_other) noexcept{ + this->m_term = _other.m_term; + this->m_index = _other.m_index; +} + +void LogIdentifier::Set(uint32_t term,uint64_t index) noexcept{ + this->m_term = term; + this->m_index = index; +} + +uint32_t LogIdentifier::GreaterThan(const LogIdentifier& _other) const noexcept { + if (this->operator<(_other)) + return -1; + + if (this->m_term > _other.m_term) + return 0x7FFFFFFF; + + return (uint32_t)(this->m_index - _other.m_index); +} + +bool LogIdentifier::operator==(const LogIdentifier& _other) const noexcept{ + return (_other.m_term == this->m_term && _other.m_index == this->m_index); +} + +bool LogIdentifier::operator!=(const LogIdentifier& _other) const noexcept{ + return !(_other == *this); +} + +bool LogIdentifier::operator< (const LogIdentifier &_other) const noexcept{ + if (this->m_term < _other.m_term) + return true; + + if (this->m_term > _other.m_term) + return false; + + return this->m_index < _other.m_index; +} + +bool LogIdentifier::operator<= (const LogIdentifier &_other) const noexcept{ + return (this->operator <(_other) || this->operator ==(_other)); +} + +bool LogIdentifier::operator> (const LogIdentifier &_other) const noexcept{ + if (this->m_term > _other.m_term) + return true; + + if (this->m_term < _other.m_term) + return false; + + return this->m_index > _other.m_index; +} + +bool LogIdentifier::operator>= (const LogIdentifier &_other) const noexcept{ + return (this->operator >(_other) || this->operator ==(_other)); +} + +std::string LogIdentifier::ToString() const noexcept{ + return "LogIdentifier term:" + std::to_string(this->m_term) + ",idx:" + std::to_string(this->m_index); +} + +LogIdentifier ConvertID(const ::raft::EntityID &entity_id) { + LogIdentifier _id; + _id.Set(entity_id.term(),entity_id.idx()); + return _id; +} + +std::ostream& operator<<(std::ostream& os, const LogIdentifier& obj) { + os << "LogIdentifier term:" << obj.m_term << ",idx:" << obj.m_index; + return os; +} + +bool EntityIDEqual(const ::raft::EntityID &left, const LogIdentifier &right) { + return (left.term() == right.m_term && left.idx() == right.m_index); +} + +bool EntityIDLarger(const ::raft::EntityID &left, const LogIdentifier &right) { + if (left.term() > right.m_term) { + return true; + } + + if (left.term() < right.m_term) { + return false; + } + + return left.idx() > right.m_index; +} + +bool EntityIDLargerEqual(const ::raft::EntityID &left, const LogIdentifier &right) { + if (left.term() > right.m_term) { + return true; + } + + if (left.term() < right.m_term) { + return false; + } + + return left.idx() >= right.m_index; +} + +bool EntityIDSmaller(const ::raft::EntityID &left, const LogIdentifier &right) { + if (left.term() < right.m_term) { + return true; + } + + if (left.term() > right.m_term) { + return false; + } + + return left.idx() < right.m_index; +} + +} diff --git a/src/common/log_identifier.h b/src/common/log_identifier.h new file mode 100644 index 0000000..34135d8 --- /dev/null +++ b/src/common/log_identifier.h @@ -0,0 +1,79 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef _AURORA_LOG_INDENTIFIER_H_ +#define _AURORA_LOG_INDENTIFIER_H_ + +#include + +#include "protocol/raft.pb.h" + +namespace RaftCore::Common { + +/*This struct is used for being the instance of tempalte class std::atomic where we +cannot use ::raft::EntityID directly ,beacuse it is not TRIVIALLY COPYABLE. */ +struct LogIdentifier { + + /* Can't give a user-defined constructor for this struct , otherwise the compiler + would complain 'the default constructor of "std::atomic" cannot be referenced -- it is a deleted function', + even though the LogIdentifier struct itself is TRIVIALLY COPABLE . This is a compiler (Microsoft (R) C/C++ Optimizing Compiler Version 19.00.24215.1 for x86) + issue, ,by contrast, there is no such problem under clang : Apple LLVM version 7.0.2 (clang-700.1.81). + */ + + uint32_t m_term = 0; //Election term + uint64_t m_index = 0; //The index under current election term + + //The followings mean to simulate copy-constructor,a work around of the problem described above. + void Set(const LogIdentifier &_other)noexcept; + + void Set(uint32_t term, uint64_t index)noexcept; + + uint32_t GreaterThan(const LogIdentifier& _other) const noexcept; + + bool operator==(const LogIdentifier& _other) const noexcept; + + bool operator!=(const LogIdentifier& _other) const noexcept; + + bool operator< (const LogIdentifier &_other) const noexcept; + + bool operator<= (const LogIdentifier &_other) const noexcept; + + bool operator> (const LogIdentifier &_other) const noexcept; + + bool operator>= (const LogIdentifier &_other) const noexcept; + + std::string ToString() const noexcept; +}; + +std::ostream& operator<<(std::ostream& os, const LogIdentifier& obj); + +LogIdentifier ConvertID(const ::raft::EntityID &entity_id); + +bool EntityIDEqual(const ::raft::EntityID &left, const LogIdentifier &right); + +bool EntityIDLarger(const ::raft::EntityID &left, const LogIdentifier &right); + +bool EntityIDLargerEqual(const ::raft::EntityID &left, const LogIdentifier &right); + +bool EntityIDSmaller(const ::raft::EntityID &left, const LogIdentifier &right); + +} + +#endif diff --git a/src/common/macro_manager.h b/src/common/macro_manager.h new file mode 100644 index 0000000..e340de9 --- /dev/null +++ b/src/common/macro_manager.h @@ -0,0 +1,43 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef _AURORA_MACRO_MGR_H_ +#define _AURORA_MACRO_MGR_H_ + +#ifdef _RAFT_UNIT_TEST_ + +#define _DEQUE_TEST_ +#define _SVC_WRITE_TEST_ +#define _SVC_APPEND_ENTRIES_TEST_ +#define _COMMON_VIEW_TEST_ +#define _LEADER_VIEW_TEST_ +#define _FOLLOWER_VIEW_TEST_ +#define _MEMBER_MANAGEMENT_TEST_ +#define _UNORDERED_SINGLE_LIST_TEST_ +#define _SINGLE_LIST_TEST_ +#define _ELECTION_TEST_ +#define _TRIIAL_DOUBLE_LIST_TEST_ +#define _STORAGE_TEST_ +#define _GLOBAL_TEST_ + +#endif + + +#endif diff --git a/src/common/memory_log_base.cc b/src/common/memory_log_base.cc new file mode 100644 index 0000000..1727f1e --- /dev/null +++ b/src/common/memory_log_base.cc @@ -0,0 +1,86 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "common/memory_log_base.h" + +namespace RaftCore::Common { + +MemoryLogItemBase::MemoryLogItemBase(uint32_t _term, uint64_t _index) noexcept{ + auto p_obj = new ::raft::Entity(); + auto _p_entity_id = p_obj->mutable_entity_id(); + _p_entity_id->set_term(_term); + _p_entity_id->set_idx(_index); + + this->m_entity.reset(p_obj); +} + +MemoryLogItemBase::MemoryLogItemBase(const ::raft::Entity &_entity)noexcept { + /*This is where memory copy overhead occurs.Because the content of AppendEntriesRequest + object need to be retained until next CommitEntries RPC call. */ + this->m_entity.reset(new ::raft::Entity(_entity)); +} + +MemoryLogItemBase::~MemoryLogItemBase() noexcept{} + +std::shared_ptr<::raft::Entity> MemoryLogItemBase::GetEntity()const noexcept { + return m_entity; +} + +bool MemoryLogItemBase::operator<(const MemoryLogItemBase &_other) const noexcept{ + + if (this->m_entity->entity_id().term() < _other.m_entity->entity_id().term()) + return true; + + if (this->m_entity->entity_id().term() > _other.m_entity->entity_id().term()) + return false; + + return this->m_entity->entity_id().idx() < _other.m_entity->entity_id().idx(); +} + +bool MemoryLogItemBase::operator==(const MemoryLogItemBase& _other)const noexcept { + + return (this->m_entity->entity_id().term() == _other.m_entity->entity_id().term() && + this->m_entity->entity_id().idx() == _other.m_entity->entity_id().idx()); +} + +bool MemoryLogItemBase::operator!=(const MemoryLogItemBase& _other)const noexcept { + return !this->operator==(_other); +} + +bool MemoryLogItemBase::operator>(const MemoryLogItemBase& _other)const noexcept { + + if (this->m_entity->entity_id().term() > _other.m_entity->entity_id().term()) + return true; + + if (this->m_entity->entity_id().term() < _other.m_entity->entity_id().term()) + return false; + + return this->m_entity->entity_id().idx() > _other.m_entity->entity_id().idx(); +} + +bool MemoryLogItemBase::AfterOf(const MemoryLogItemBase& _other)const noexcept { + return (this->m_entity->pre_log_id().term() == _other.m_entity->entity_id().term() && + this->m_entity->pre_log_id().idx() == _other.m_entity->entity_id().idx()); +} + +bool CmpMemoryLog(const MemoryLogItemBase *left, const MemoryLogItemBase *right) noexcept { + return left->GetEntity()->entity_id().term() == right->GetEntity()->pre_log_id().term() && + left->GetEntity()->entity_id().idx() == right->GetEntity()->pre_log_id().idx() ; +} + +} diff --git a/src/common/memory_log_base.h b/src/common/memory_log_base.h new file mode 100644 index 0000000..b5ffa80 --- /dev/null +++ b/src/common/memory_log_base.h @@ -0,0 +1,67 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef _AURORA_MEMORY_LOG_BASE_H_ +#define _AURORA_MEMORY_LOG_BASE_H_ + +#include + +#include "protocol/raft.pb.h" + +namespace RaftCore::Common { + +class MemoryLogItemBase { + +public: + + MemoryLogItemBase(uint32_t _term, uint64_t _index)noexcept; + + MemoryLogItemBase(const ::raft::Entity &_entity)noexcept; + + virtual ~MemoryLogItemBase()noexcept; + + bool operator<(const MemoryLogItemBase &_other)const noexcept; + + bool operator>(const MemoryLogItemBase &_other)const noexcept; + + virtual bool operator==(const MemoryLogItemBase& _other)const noexcept; + + virtual bool operator!=(const MemoryLogItemBase& _other)const noexcept; + + bool AfterOf(const MemoryLogItemBase& _other)const noexcept; + + std::shared_ptr<::raft::Entity> GetEntity()const noexcept; + +protected: + + //Prevent the base class from being instantiated + virtual void NotImplemented() noexcept = 0 ; + +protected: + + //Note: doesn't take the ownership of the original object + std::shared_ptr<::raft::Entity> m_entity; +}; + +bool CmpMemoryLog(const MemoryLogItemBase *left, const MemoryLogItemBase *right) noexcept; + +} + +#endif diff --git a/src/common/react_base.cc b/src/common/react_base.cc new file mode 100644 index 0000000..85d56fd --- /dev/null +++ b/src/common/react_base.cc @@ -0,0 +1,47 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "glog/logging.h" + +#include "common/react_base.h" + +namespace RaftCore::Common { + +ReactInfo::ReactInfo() noexcept {} + +void ReactInfo::Set(bool cq_result, void* tag) noexcept { + this->m_cq_result = cq_result; + this->m_tag = tag; +} + +ReactInfo::ReactInfo(const ReactInfo &other) noexcept { + this->m_cq_result = other.m_cq_result; + this->m_tag = other.m_tag; +} + +ReactBase::ReactBase() noexcept{} + +ReactBase::~ReactBase() noexcept{} + +void ReactBase::GeneralReacting(const ReactInfo &info)noexcept { + ::RaftCore::Common::ReactBase* _p_ins = static_cast<::RaftCore::Common::ReactBase*>(info.m_tag); + _p_ins->React(info.m_cq_result); +} + +} + diff --git a/src/common/react_base.h b/src/common/react_base.h new file mode 100644 index 0000000..06e779b --- /dev/null +++ b/src/common/react_base.h @@ -0,0 +1,68 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_REACT_BASE_H__ +#define __AURORA_REACT_BASE_H__ + +#include +#include +#include + +#include "grpc++/completion_queue.h" + +namespace RaftCore::Common { + +struct ReactInfo { + + ReactInfo()noexcept; + + void Set(bool cq_result, void* tag)noexcept; + + ReactInfo(const ReactInfo &other)noexcept; + + bool m_cq_result = false; + void* m_tag; +}; + +typedef std::function TypeReactorFunc; + +//An empty wrapper for all the subclasses which need to implement 'React' method. +class ReactBase { + +public: + + ReactBase()noexcept; + + virtual ~ReactBase()noexcept; + + virtual void React(bool cq_result = true) noexcept = 0; + + static void GeneralReacting(const ReactInfo &info)noexcept; + +private: + + ReactBase(const ReactBase&) = delete; + + ReactBase& operator=(const ReactBase&) = delete; +}; + +} //end namespace + +#endif diff --git a/src/common/react_group.cc b/src/common/react_group.cc new file mode 100644 index 0000000..f21f8d0 --- /dev/null +++ b/src/common/react_group.cc @@ -0,0 +1,74 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "glog/logging.h" + +#include "common/react_group.h" + +namespace RaftCore::Common { + +template +ReactWorkGroup::ReactWorkGroup(TypePtrCQ shp_cq, TypeReactorFunc reactor, int therad_num) noexcept { + this->m_shp_cq = shp_cq; + this->m_reactor = reactor; + this->m_polling_threads_num = therad_num; +} + +template +ReactWorkGroup::~ReactWorkGroup() {} + +template +void ReactWorkGroup::StartPolling() noexcept { + for (int i = 0; i < this->m_polling_threads_num; ++i) { + std::thread *_p_thread = new std::thread(&ReactWorkGroup::GrpcPollingThread, this); + this->m_vec_threads.emplace_back(_p_thread); + LOG(INFO) << "polling thread:" << _p_thread->get_id() << " for cq :" << this->m_shp_cq.get() + << " started."; + } +} + +template +void ReactWorkGroup::GrpcPollingThread() noexcept { + void* tag; + bool ok; + ::RaftCore::Common::ReactInfo _info; + + while (this->m_shp_cq->Next(&tag, &ok)) { + _info.Set(ok, tag); + this->m_reactor(_info); + } +} + +template +void ReactWorkGroup::WaitPolling() noexcept { + for (auto& _thread : this->m_vec_threads) + _thread->join(); +} + +template +TypePtrCQ ReactWorkGroup::GetCQ() noexcept { + return this->m_shp_cq; +} + +template +void ReactWorkGroup::ShutDownCQ() noexcept { + this->m_shp_cq->Shutdown(); +} + +} + diff --git a/src/common/react_group.h b/src/common/react_group.h new file mode 100644 index 0000000..2ddcd3c --- /dev/null +++ b/src/common/react_group.h @@ -0,0 +1,79 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_REACT_GROUP_H__ +#define __AURORA_REACT_GROUP_H__ + +#include +#include +#include + +#include "grpc++/completion_queue.h" + +#include "common/react_base.h" + +namespace RaftCore::Common { + +using ::grpc::CompletionQueue; +using ::grpc::ServerCompletionQueue; + +template +using TypePtrCQ = std::shared_ptr; + +//On a one CQ <---> multiple threads basis. +template +class ReactWorkGroup { + +public: + + enum class CQType { ServerCQ = 2, GENERAL_CQ }; + +public: + + ReactWorkGroup(TypePtrCQ shp_cq, TypeReactorFunc reactor, int therad_num)noexcept; + + virtual ~ReactWorkGroup(); + + void StartPolling() noexcept; + + void WaitPolling() noexcept; + + TypePtrCQ GetCQ() noexcept; + + void ShutDownCQ() noexcept; + +private: + + void GrpcPollingThread() noexcept; + + TypePtrCQ m_shp_cq; + + std::vector m_vec_threads; + + TypeReactorFunc m_reactor; + + int m_polling_threads_num = 0; +}; + +} //end namespace + +#include "common/react_group.cc" + +#endif diff --git a/src/common/request_base.cc b/src/common/request_base.cc new file mode 100644 index 0000000..acc983d --- /dev/null +++ b/src/common/request_base.cc @@ -0,0 +1,179 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "glog/logging.h" + +#include "common/request_base.h" + +namespace RaftCore::Common { + +template +const char* BidirectionalRequest::m_status_macro_names[] = { "READ","WRITE","CONNECT","DONE","FINISH" }; + +template +RequestBase::RequestBase() noexcept {} + +template +void RequestBase::Initialize(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq) noexcept { + this->m_async_service = shp_svc; + this->m_server_notify_cq = shp_notify_cq; + this->m_server_call_cq = shp_call_cq; +} + +template +RequestBase::~RequestBase() noexcept{} + +template +RequestTpl::RequestTpl() noexcept {} + +template +RequestTpl::~RequestTpl() noexcept {} + +template +UnaryRequest::UnaryRequest() noexcept : m_responder(&this->m_server_context) { + static_assert(std::is_base_of::value, "Q is not a derived from UnaryRequest."); + this->m_stage = ProcessStage::CREATE; +} + +template +void UnaryRequest::React(bool cq_result) noexcept { + + Q* _p_downcast = dynamic_cast(this); + + if (!cq_result) { + LOG(ERROR) << "UnaryRequest got false result from CQ."; + delete _p_downcast; + return; + } + + auto _status = ::grpc::Status::OK; + switch (this->m_stage) { + case ProcessStage::CREATE: + /* Spawn a new subclass instance to serve new clients while we process + the one for this . The instance will deallocate itself as + part of its FINISH state.*/ + new Q(this->m_async_service,this->m_server_notify_cq,this->m_server_call_cq); + + // The actual processing. + _status = this->Process(); + + /* And we are done! Let the gRPC runtime know we've finished, using the + memory address of this instance as the uniquely identifying tag for + the event.*/ + this->m_stage = ProcessStage::FINISH; + this->m_responder.Finish(this->m_response, _status, _p_downcast); + break; + + case ProcessStage::FINISH: + delete _p_downcast; + break; + + default: + CHECK(false) << "Unexpected tag " << int(this->m_stage); + break; + } +} + +template +UnaryRequest::~UnaryRequest() noexcept {} + +template +BidirectionalRequest::BidirectionalRequest() noexcept : m_reader_writer(&this->m_server_context) { + this->m_stage = ProcessStage::CONNECT; + this->m_server_context.AsyncNotifyWhenDone(this); +} + +template +BidirectionalRequest::~BidirectionalRequest() noexcept {} + +template +void BidirectionalRequest::React(bool cq_result) noexcept { + + Q* _p_downcast = dynamic_cast(this); + + if (!cq_result && (this->m_stage != ProcessStage::READ)) { + LOG(ERROR) << "BidirectionalRequest got false result from CQ, state:" << this->GetStageName(); + delete _p_downcast; + return; + } + + /*The `ServerAsyncReaderWriter::Finish()` call will resulting into two notifications for a single + request. Processing those two notifications simultaneously will causing problems. So we + need a synchronization here. */ + std::unique_lock _wlock(this->m_mutex); + + auto _status = ::grpc::Status::OK; + switch (this->m_stage) { + case ProcessStage::READ: + + //Meaning client said it wants to end the stream either by a 'WritesDone' or 'finish' call. + if (!cq_result) { + this->m_reader_writer.Finish(::grpc::Status::OK, _p_downcast); + this->m_stage = ProcessStage::DONE; + break; + } + + _status = this->Process(); + if (!_status.ok()) { + LOG(ERROR) << "bidirectional request going to return a non-success result:" + << _status.error_code() << ",msg:" << _status.error_message(); + this->m_reader_writer.Finish(::grpc::Status::OK, _p_downcast); + this->m_stage = ProcessStage::DONE; + break; + } + + this->m_reader_writer.Write(this->m_response, _p_downcast); + this->m_stage = ProcessStage::WRITE; + break; + + case ProcessStage::WRITE: + this->m_reader_writer.Read(&this->m_request, _p_downcast); + this->m_stage = ProcessStage::READ; + break; + + case ProcessStage::CONNECT: + //Spawn a new instance to serve further incoming request. + new Q(this->m_async_service,this->m_server_notify_cq,this->m_server_call_cq); + + this->m_reader_writer.Read(&this->m_request, _p_downcast); + this->m_stage = ProcessStage::READ; + break; + + case ProcessStage::DONE: + this->m_stage = ProcessStage::FINISH; + break; + + case ProcessStage::FINISH: + _wlock.unlock(); + delete _p_downcast; + break; + + default: + CHECK(false) << "Unexpected tag " << int(this->m_stage); + } +} + +template +const char* BidirectionalRequest::GetStageName()const noexcept { + return m_status_macro_names[(int)this->m_stage]; +} + +} + diff --git a/src/common/request_base.h b/src/common/request_base.h new file mode 100644 index 0000000..35bbb0f --- /dev/null +++ b/src/common/request_base.h @@ -0,0 +1,161 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_REQUEST_BASE_H__ +#define __AURORA_REQUEST_BASE_H__ + +#include + +#include "protocol/raft.grpc.pb.h" +#include "protocol/raft.pb.h" + +#include "common/react_base.h" + +using ::raft::RaftService; +using ::grpc::ServerCompletionQueue; + +namespace RaftCore::Common { + +using ::RaftCore::Common::ReactBase; + +template +class RequestBase : public ReactBase { + +public: + + RequestBase()noexcept; + + void Initialize(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq)noexcept; + + virtual ~RequestBase()noexcept; + + virtual ::grpc::Status Process() noexcept = 0; + +protected: + + //Server context cannot be reused across rpcs. + ::grpc::ServerContext m_server_context; + + std::shared_ptr m_async_service; + + std::shared_ptr m_server_notify_cq; + + std::shared_ptr m_server_call_cq; + +private: + + RequestBase(const RequestBase&) = delete; + + RequestBase& operator=(const RequestBase&) = delete; +}; + +template +class RequestTpl : public RequestBase { + +public: + + RequestTpl()noexcept; + + virtual ~RequestTpl()noexcept; + +protected: + + T m_request; + + R m_response; + +private: + + RequestTpl(const RequestTpl&) = delete; + + RequestTpl& operator=(const RequestTpl&) = delete; +}; + +template +class UnaryRequest : public RequestTpl { + +public: + + UnaryRequest()noexcept; + + virtual ~UnaryRequest()noexcept; + +protected: + + virtual void React(bool cq_result) noexcept override; + +protected: + + ::grpc::ServerAsyncResponseWriter m_responder; + + enum class ProcessStage { CREATE = 0, FINISH }; + + ProcessStage m_stage; + +private: + + UnaryRequest(const UnaryRequest&) = delete; + + UnaryRequest& operator=(const UnaryRequest&) = delete; +}; + +template +class BidirectionalRequest : public RequestTpl { + +public: + + BidirectionalRequest()noexcept; + + virtual ~BidirectionalRequest()noexcept; + +protected: + + virtual void React(bool cq_result) noexcept override; + + const char* GetStageName()const noexcept; + +protected: + + ::grpc::ServerAsyncReaderWriter m_reader_writer; + + enum class ProcessStage { READ = 0, WRITE, CONNECT, DONE, FINISH }; + + static const char* m_status_macro_names[]; + + ProcessStage m_stage; + +private: + + std::mutex m_mutex; + +private: + + BidirectionalRequest(const BidirectionalRequest&) = delete; + + BidirectionalRequest& operator=(const BidirectionalRequest&) = delete; +}; + +} //end namespace + +#include "common/request_base.cc" + +#endif diff --git a/src/config/config.cc b/src/config/config.cc new file mode 100644 index 0000000..d0ecdb7 --- /dev/null +++ b/src/config/config.cc @@ -0,0 +1,129 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "config/config.h" + +namespace RaftCore::Config { + + DEFINE_uint32(notify_cq_num, 2, "#notify CQ the server use."); + DEFINE_uint32(notify_cq_threads, 4, "#threads polling on each notify CQ."); + DEFINE_uint32(call_cq_num, 2, "#call CQ the server use."); + DEFINE_uint32(call_cq_threads, 2, "#threads polling on each call CQ."); + DEFINE_uint32(client_cq_num, 2, "#completion queues dedicated for process backend RPCs in the leader."); + DEFINE_uint32(client_thread_num, 2, "#threads for each client CQ."); + DEFINE_uint32(request_pool_size, 100, "#call data instance each thread hold."); + + DEFINE_uint32(binlog_append_file_timeo_us, 1500, "append binlog cv wait timeout in microseconds ."); + DEFINE_uint32(binlog_meta_hash_buf_size, 4, "binlog's meta data hash buf size in MB, each server instance has only one of such buffer."); + + DEFINE_uint32(binlog_max_size, 1024 * 1024 * 128, "max size(meta data not included) in bytes for each individual binlog file,used together with binlog_max_log_num."); + DEFINE_uint32(binlog_max_log_num, 1024 * 1024 * 4, "max #logs for each individual binlog file,used together with binlog_max_size."); + DEFINE_uint32(binlog_parse_buf_size, 64, "buf size(in MB) used for parsing binlog on server startup."); + + DEFINE_string(ip, "0.0.0.0", "svr listening ipv4 address."); + DEFINE_uint32(port, 10010, "svr listening port."); + DEFINE_uint32(guid_step_len, 200, "guid increase step length."); + DEFINE_uint32(guid_disk_random_write_hint_ms, 20, "non SSD disk random write latency hint in milliseconds"); + + DEFINE_uint32(leader_heartbeat_rpc_timeo_ms, 50, "heartbeat timeout in millisecond."); + DEFINE_uint32(leader_append_entries_rpc_timeo_ms, 100, "AppendEntries rpc timeout in millisecond."); + DEFINE_uint32(leader_commit_entries_rpc_timeo_ms, 100, "CommitEntries rpc timeout in millisecond."); + DEFINE_uint32(leader_resync_log_rpc_timeo_ms, 3000, "resync log rpc timeout in millisecond."); + + DEFINE_uint32(leader_heartbeat_interval_ms, 500, "leader dedicated thread sending heartbeat intervals in ms."); + DEFINE_uint32(leader_last_log_resolve_additional_wait_ms, 100, "additional wait ms when resolving the last log."); + + DEFINE_uint32(lockfree_queue_resync_log_elements,2 * 1024,"#elements in the resync log queue,round up to nearest 2^n."); + DEFINE_uint32(lockfree_queue_resync_data_elements, 1 * 1024, "#elements in the resync data queue,round up to nearest 2^n."); + DEFINE_uint32(lockfree_queue_client_react_elements, 1024 * 1024, "#elements in the client react queue,round up to nearest 2^n."); + + DEFINE_uint32(lockfree_queue_consumer_wait_ms, 800, "the waiting time in us that waiting on the queue's CV."); + DEFINE_uint32(lockfree_queue_consumer_threads_num, 16, "#consuemr thread,0 means spawninig them by #CPU cores."); + + DEFINE_uint32(list_op_tracker_hash_slot_num, 64, "#slots a operation tracker will use."); + + DEFINE_uint32(timer_precision_ms, 10, "timer check intervals in milliseconds."); + DEFINE_uint32(thread_stop_waiting_us, 3, "the interval in us to check if the thread if stopped as intended."); + DEFINE_uint32(gc_interval_ms, 30, "the interval in ms for garbage collecting."); + DEFINE_uint32(garbage_deque_retain_num, 5000, "the #(garbage deque node) retaining when doing GC in deque."); + + DEFINE_uint32(conn_per_link, 64, "#tcp connections between each leader<-->follower link."); + DEFINE_uint32(channel_pool_size, 10, "#channels for each tcp connection."); + DEFINE_uint32(client_pool_size, 10000, "#clients the client pool maintained for each follower."); + + DEFINE_uint32(resync_log_reverse_step_len, 20, "the reversing step len when the leader try to find the lastest consistent log entry with a follower."); + + DEFINE_uint32(resync_data_item_num_each_rpc, 1024, "the #data items in a single sending of a stream RPC call."); + DEFINE_uint32(resync_data_log_num_each_rpc, 1024, "the #replicated logs in a single sending of a stream RPC call."); + DEFINE_uint32(resync_data_task_max_time_ms, 200, "the max time in millisecond a resync data task can hold in a single execution."); + DEFINE_uint32(binlog_reserve_log_num, 100, "the #log reserved before the ID-LCL when rotating binlog file."); + + DEFINE_uint32(group_commit_count, 500, "#previous appending requests that a commit request at least represents."); + DEFINE_uint32(cut_empty_timeos_ms, 500, "In leader, if a replicated msg cannot be successfully processed within this time, an error will be returned."); + + DEFINE_uint32(iterating_threads, 4, "#threads for iterating the unfinished requests."); + DEFINE_uint32(iterating_wait_timeo_us, 50 * 1000, "follower disorder threads wait on CV timeout in microseconds."); + + DEFINE_uint32(follower_check_heartbeat_interval_ms, 10, "interval in milliseconds of follower's checking leader's heartbea behavior,must be."); + DEFINE_uint32(disorder_msg_timeo_ms, 1000, "In follower, if a disorder msg cannot be successfully processed within this time, an error will be returned."); + + DEFINE_uint32(cgg_wait_for_last_released_guid_finish_us, 50, "there is a time windows one thread can still generating guids even server status already been \ + set to HALT.This is the us waiting to it elapses."); + + DEFINE_uint32(election_heartbeat_timeo_ms, 3000, "this is the duraion after it elapsed the follower will start to turn into candidate role."); + DEFINE_uint32(election_term_interval_min_ms, 150, "the lower bound of sleeping interval before incease term and start a new election."); + DEFINE_uint32(election_term_interval_max_ms, 300, "the upper bound of sleeping interval before incease term and start a new election."); + DEFINE_uint32(election_vote_rpc_timeo_ms, 2000, "vote rpc timeout in millisecond."); + DEFINE_uint32(election_non_op_timeo_ms, 500, "timeo value in ms of the submitting non-op log entry operation after new leader elected."); + DEFINE_uint32(election_wait_non_op_finish_ms, 200, "time in ms waiting for non-op finished."); + + DEFINE_uint32(memchg_sync_data_wait_seconds, 1, "leader will wait for the newly joined nodes to finish sync all the data,this is how long it will wait during each round of waiting."); + DEFINE_uint32(memchg_rpc_timeo_ms, 50, "membership change RPC timeout in milliseconds."); + + DEFINE_uint32(memory_table_max_item, 1024 * 1024 * 2, "max #records a memory can hold."); + DEFINE_uint32(memory_table_hash_slot_num, 10 * 1000, "#slots a memory table object's inner hash object can hold."); + DEFINE_uint32(sstable_table_hash_slot_num, 10 * 1000, "#slots a sstable table object's inner hash object can hold."); + DEFINE_uint32(sstable_purge_interval_second, 10, "Interval in seconds of merging and purging sstabls."); + + DEFINE_uint32(child_glog_v, 90, "the GLOG_v environment variable used for child processes in gtest."); + DEFINE_uint32(election_thread_wait_us, 1000, "the waiting time between each check of election thread exiting."); + DEFINE_bool(do_heartbeat, true, "whether leader sending heartbeat message to followers or not."); + DEFINE_bool(heartbeat_oneshot, false, "sending heartbeat message just once."); + DEFINE_bool(member_leader_gone, false, "whether the old leader will exist in the new cluster or not."); + DEFINE_uint32(concurrent_client_thread_num, 0, "#thread client using when doing benchmark."); + DEFINE_bool(enable_sstable_gc, true, "whether enable sstable purging or not."); + DEFINE_bool(checking_heartbeat, true, "whether follower checking heartbeat or not."); + DEFINE_uint32(append_entries_start_idx, 8057, "#log start index for the AppendEntries interface."); + DEFINE_bool(clear_existing_sstable_files, true, "whether delete all existing sstable files or not."); + DEFINE_uint32(hash_slot_num, 500, "#slots in lockfree hash."); + DEFINE_uint32(resync_log_start_idx, 8057, "#log start index for the LeaderView::ResyncLog interface."); + DEFINE_uint32(deque_push_count, 100000, "#elements pushed before testing."); + DEFINE_uint32(meta_count, 80000, "#meta items for testing memory useage."); + DEFINE_uint32(follower_svc_benchmark_req_round, 10000, "#rounds(phaseI+phaseII) of requests sent during follower service benchmarking."); + DEFINE_uint32(leader_svc_benchmark_req_count, 10000, "#requests of requests sent during leader service benchmarking."); + DEFINE_uint32(benchmark_client_cq_num, 2, "#CQ client used to trigger the requests."); + DEFINE_uint32(benchmark_client_thread_num_per_cq, 4, "#threads client per CQ used to trigger the requests."); + DEFINE_uint32(client_write_timo_ms, 50, "timeout value(ms) for client writing."); + DEFINE_bool(benchmark_client_split_entrusting, true, "whether to split the benchmark client entrusing process."); + DEFINE_string(target_ip, "default_none", "the target ip for a new benchmark server."); + DEFINE_string(my_ip, "default_none", "the ip addr to indicate myself in client req."); + DEFINE_uint32(storage_get_slice_count, 10, "#elements get from get_slice()."); + DEFINE_uint32(retain_num_unordered_single_list, 100, "retain num for unordered_single_list unit test."); + DEFINE_bool(do_commit, false, "whether issue the commit request or not after appenedEntries."); + DEFINE_uint32(value_len, 2, "value length in unite test."); +} diff --git a/src/config/config.h b/src/config/config.h new file mode 100644 index 0000000..c39baab --- /dev/null +++ b/src/config/config.h @@ -0,0 +1,152 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + + +#ifndef _AURORA_CONFIG_H_ +#define _AURORA_CONFIG_H_ + +#include "gflags/gflags.h" + +namespace RaftCore::Config { + + //Service config. + DECLARE_uint32(notify_cq_num); + DECLARE_uint32(notify_cq_threads); + DECLARE_uint32(call_cq_num); + DECLARE_uint32(call_cq_threads); + DECLARE_uint32(client_cq_num); + DECLARE_uint32(client_thread_num); + DECLARE_uint32(request_pool_size); + + //Common config + DECLARE_string(ip); + DECLARE_uint32(port); + + DECLARE_uint32(lockfree_queue_resync_log_elements); + DECLARE_uint32(lockfree_queue_resync_data_elements); + DECLARE_uint32(lockfree_queue_client_react_elements); + DECLARE_uint32(lockfree_queue_consumer_wait_ms); + DECLARE_uint32(lockfree_queue_consumer_threads_num); + + DECLARE_uint32(list_op_tracker_hash_slot_num); + + DECLARE_uint32(timer_precision_ms); + DECLARE_uint32(thread_stop_waiting_us); + DECLARE_uint32(gc_interval_ms); + DECLARE_uint32(garbage_deque_retain_num); + + DECLARE_uint32(iterating_threads); + DECLARE_uint32(iterating_wait_timeo_us); + + //Guid + DECLARE_uint32(guid_step_len); + DECLARE_uint32(guid_disk_random_write_hint_ms); + + //Binlog config + DECLARE_uint32(binlog_meta_hash_buf_size); + DECLARE_uint32(binlog_max_size); + DECLARE_uint32(binlog_max_log_num); + DECLARE_uint32(binlog_parse_buf_size); + DECLARE_uint32(binlog_append_file_timeo_us); + DECLARE_uint32(binlog_reserve_log_num); + + //leader config --> follower entity + DECLARE_uint32(leader_heartbeat_rpc_timeo_ms); + DECLARE_uint32(leader_append_entries_rpc_timeo_ms); + DECLARE_uint32(leader_commit_entries_rpc_timeo_ms); + DECLARE_uint32(leader_resync_log_rpc_timeo_ms); + DECLARE_uint32(leader_heartbeat_interval_ms); + DECLARE_uint32(leader_last_log_resolve_additional_wait_ms); + + //leader config --> connection pool + DECLARE_uint32(conn_per_link); + DECLARE_uint32(channel_pool_size); + DECLARE_uint32(client_pool_size); + + //leader config --> resync log + DECLARE_uint32(resync_log_reverse_step_len); + + //leader config --> resync data + DECLARE_uint32(resync_data_item_num_each_rpc); + DECLARE_uint32(resync_data_log_num_each_rpc); + DECLARE_uint32(resync_data_task_max_time_ms); + + //leader config --> optimization + DECLARE_uint32(group_commit_count); + + //leader config --> cutempty + DECLARE_uint32(cut_empty_timeos_ms); + + //follower config + DECLARE_uint32(follower_check_heartbeat_interval_ms); + DECLARE_uint32(disorder_msg_timeo_ms); + + //For CGG problem + DECLARE_uint32(cgg_wait_for_last_released_guid_finish_us); + + //For election. + DECLARE_uint32(election_heartbeat_timeo_ms); + DECLARE_uint32(election_term_interval_min_ms); + DECLARE_uint32(election_term_interval_max_ms); + DECLARE_uint32(election_vote_rpc_timeo_ms); + DECLARE_uint32(election_non_op_timeo_ms); + DECLARE_uint32(election_wait_non_op_finish_ms); + + //For membership change. + DECLARE_uint32(memchg_sync_data_wait_seconds); + DECLARE_uint32(memchg_rpc_timeo_ms); + + //For Storage. + DECLARE_uint32(memory_table_max_item); + DECLARE_uint32(memory_table_hash_slot_num); + DECLARE_uint32(sstable_table_hash_slot_num); + DECLARE_uint32(sstable_purge_interval_second); + + //For unit test. + DECLARE_uint32(child_glog_v); + DECLARE_uint32(election_thread_wait_us); + DECLARE_bool(do_heartbeat); + DECLARE_bool(heartbeat_oneshot); + DECLARE_bool(member_leader_gone); + DECLARE_uint32(concurrent_client_thread_num); + DECLARE_bool(enable_sstable_gc); + DECLARE_bool(checking_heartbeat); + DECLARE_uint32(append_entries_start_idx); + DECLARE_bool(clear_existing_sstable_files); + DECLARE_uint32(hash_slot_num); + DECLARE_uint32(resync_log_start_idx); + DECLARE_uint32(deque_push_count); + DECLARE_uint32(meta_count); + DECLARE_uint32(follower_svc_benchmark_req_round); + DECLARE_uint32(leader_svc_benchmark_req_count); + DECLARE_uint32(benchmark_client_cq_num); + DECLARE_uint32(benchmark_client_thread_num_per_cq); + DECLARE_uint32(client_write_timo_ms); + DECLARE_bool(benchmark_client_split_entrusting); + DECLARE_string(target_ip); + DECLARE_string(my_ip); + DECLARE_uint32(storage_get_slice_count); + DECLARE_uint32(retain_num_unordered_single_list); + DECLARE_bool(do_commit); + DECLARE_uint32(value_len); + +} + +#endif diff --git a/src/election/election.cc b/src/election/election.cc new file mode 100644 index 0000000..7251fc2 --- /dev/null +++ b/src/election/election.cc @@ -0,0 +1,696 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include +#include +#include +#include + +#include "grpc/grpc.h" +#include "grpc++/grpc++.h" +#include "boost/filesystem.hpp" + +#include "protocol/raft.grpc.pb.h" + +#include "config/config.h" +#include "binlog/binlog_singleton.h" +#include "member/member_manager.h" +#include "global/global_env.h" +#include "client/client_impl.h" +#include "storage/storage.h" +#include "tools/timer.h" +#include "leader/leader_view.h" +#include "election/election.h" + +#define _AURORA_ELECTION_TERM_PREFIX_ "current term:" +#define _AURORA_ELECTION_VOTEFOR_PREFIX_ "I [tried] voted for:" +#define _AURORA_ELECTION_KNOWN_VOTING_PREFIX_ "known others [tried] voted terms:" + +namespace RaftCore::Election { + +namespace fs = ::boost::filesystem; +using ::RaftCore::BinLog::BinLogGlobal; +using ::raft::VoteResponse; +using ::raft::ErrorCode; +using ::RaftCore::Common::ReadLock; +using ::RaftCore::Common::WriteLock; +using ::RaftCore::State::StateMgr; +using ::RaftCore::Global::GlobalEnv; +using ::RaftCore::Timer::GlobalTimer; +using ::RaftCore::Leader::LeaderView; +using ::RaftCore::Member::MemberMgr; +using ::RaftCore::Member::JointConsensusMask; +using ::RaftCore::Member::EJointStatus; +using ::RaftCore::Client::PrevoteAsyncClient; +using ::RaftCore::Client::VoteAsyncClient; +using ::RaftCore::Client::WriteSyncClient; +using ::RaftCore::Storage::StorageMgr; + +std::atomic ElectionMgr::m_cur_term; + +std::map ElectionMgr::m_voted; + +std::shared_timed_mutex ElectionMgr::m_voted_mutex; + +ElectionMgr::NewLeaderEvent ElectionMgr::m_new_leader_event; + +uint32_t ElectionMgr::m_cur_cluster_size; + +std::shared_timed_mutex ElectionMgr::m_election_mutex; + +std::map> ElectionMgr::m_known_voting; + +std::shared_timed_mutex ElectionMgr::m_known_voting_mutex; + +std::thread* ElectionMgr::m_p_thread = nullptr; + +#ifdef _ELECTION_TEST_ +volatile bool ElectionMgr::m_candidate_routine_running = false; +#endif + +MemberMgr::JointSummary ElectionMgr::m_joint_snapshot; + +uint32_t ElectionMgr::m_cur_cluster_vote_counter; + +uint32_t ElectionMgr::m_new_cluster_vote_counter; + +volatile bool ElectionMgr::m_leader_debut = false; + +LogIdentifier ElectionMgr::m_pre_term_lrl; + +TwoPhaseCommitBatchTask ElectionMgr::m_phaseI_task; + +TwoPhaseCommitBatchTask ElectionMgr::m_phaseII_task; + +void ElectionMgr::Initialize() noexcept { + LoadFile(); +} + +void ElectionMgr::UnInitialize() noexcept { + SaveFile(); +} + +void ElectionMgr::SaveFile() noexcept{ + + //Destroy contents if file exists, and since the content is human readable , no need to open in binary mode. + std::FILE* f_handler = std::fopen(_AURORA_ELECTION_CONFIG_FILE_, "w+"); + CHECK(f_handler != nullptr) << "open BaseState file " << _AURORA_ELECTION_CONFIG_FILE_ << "fail..,errno:" << errno; + + std::string _voted_info = ""; + { + ReadLock _r_lock(m_voted_mutex); + for (auto iter = m_voted.crbegin(); iter != m_voted.crend(); ++iter) + _voted_info += (std::to_string(iter->first) + "|" + iter->second + ","); + } + + std::string _known_voting = ""; + { + ReadLock _r_lock(m_known_voting_mutex); + for (auto &_pair_kv : m_known_voting) { + std::string _votings = ""; + for (auto& _item : _pair_kv.second) + _votings += _item + "%"; + _known_voting += (std::to_string(_pair_kv.first) + "|" + _votings + ","); + } + } + + std::string buf = _AURORA_ELECTION_TERM_PREFIX_ + std::to_string(m_cur_term.load()) + "\n" + + _AURORA_ELECTION_VOTEFOR_PREFIX_ + _voted_info + "\n" + + _AURORA_ELECTION_KNOWN_VOTING_PREFIX_ + _known_voting + "\n"; + + std::size_t written = fwrite(buf.data(), 1, buf.size(), f_handler); + CHECK(written == buf.size()) << "fwrite BaseState file fail...,errno:" << errno << ",written:" << written << ",expected:" << buf.size(); + + CHECK(!std::fclose(f_handler)) << "close BaseState file fail...,errno:" << errno; +} + +void ElectionMgr::Reset() noexcept { + m_new_leader_event.m_notify_flag = false; +} + +void ElectionMgr::LoadFile() noexcept{ + + m_cur_term.store(_MAX_UINT32_); + + { + WriteLock _w_lock(m_voted_mutex); + m_voted.clear(); + } + + { + WriteLock _w_lock(m_known_voting_mutex); + m_known_voting.clear(); + } + + //The local scope is to release the handle by std::ifstream. + std::ifstream f_input(_AURORA_ELECTION_CONFIG_FILE_); + + for (std::string _ori_line; std::getline(f_input, _ori_line); ) { + std::string _line = ""; + _line.reserve(_ori_line.length()); + std::copy_if(_ori_line.begin(), _ori_line.end(), std::back_inserter(_line), [](char c) { return c != '\r' && c != '\n'; }); + + if (_line.find(_AURORA_ELECTION_TERM_PREFIX_) != std::string::npos) { + std::size_t pos = _line.find(":"); + CHECK (pos != std::string::npos) << "cannot find delimiter[:] in state file, _line:" << _line; + m_cur_term.store(std::atol(_line.substr(pos + 1).c_str())); + continue; + } + + if (_line.find(_AURORA_ELECTION_VOTEFOR_PREFIX_) != std::string::npos) { + std::size_t pos = _line.find(":"); + CHECK (pos != std::string::npos) << "cannot find delimiter[:] in state file, _line:" << _line; + + std::list _output; + ::RaftCore::Tools::StringSplit(_line.substr(pos + 1),',',_output); + for (const auto &_item : _output) { + std::list _inner_output; + ::RaftCore::Tools::StringSplit(_item,'|',_inner_output); + CHECK(_inner_output.size() == 2); + + auto _iter = _inner_output.cbegin(); + uint32_t _term = std::atol((*_iter++).c_str()); + + WriteLock _w_lock(m_voted_mutex); + m_voted[_term] = *_iter; + } + + continue; + } + + if (_line.find(_AURORA_ELECTION_KNOWN_VOTING_PREFIX_) != std::string::npos) { + std::size_t pos = _line.find(":"); + CHECK (pos != std::string::npos) << "cannot find delimiter[:] in state file, _line:" << _line; + + std::list _term_list; + ::RaftCore::Tools::StringSplit(_line.substr(pos + 1),',',_term_list); + for (const auto &_item : _term_list) { + std::list _voting_list; + ::RaftCore::Tools::StringSplit(_item,'|', _voting_list); + CHECK(_voting_list.size() == 2); + + auto _iter = _voting_list.cbegin(); + uint32_t _term = std::atol((*_iter++).c_str()); + + std::list _votings; + ::RaftCore::Tools::StringSplit(*_iter,'%',_votings); + + WriteLock _w_lock(m_known_voting_mutex); + if (m_known_voting.find(_term) == m_known_voting.cend()) + m_known_voting[_term] = std::set(); + + for (const auto& _item : _votings) + m_known_voting[_term].emplace(_item); + } + + continue; + } + } + + f_input.close(); + + //Give default values if not found in the state file + bool give_default = false; + if (m_cur_term.load() == _MAX_UINT32_) { + m_cur_term.store(0); //term started from 0 + give_default = true; + } + + if (give_default) + ElectionMgr::SaveFile(); +} + +void ElectionMgr::ElectionThread() noexcept{ + + //Reset election environment before doing it. + Reset(); + + auto _entrance = [&]() ->void{ + +#ifdef _ELECTION_TEST_ + m_candidate_routine_running = true; +#endif + + LOG(INFO) << "start electing,switch role:[Follower --> Candidate]"; + SwitchRole(RaftRole::CANDIDATE); + CandidateRoutine(); + +#ifdef _ELECTION_TEST_ + m_candidate_routine_running = false; +#endif + }; + + if (m_p_thread) + delete m_p_thread; + + m_p_thread = new std::thread(_entrance); + m_p_thread->detach(); +} + +#ifdef _ELECTION_TEST_ +void ElectionMgr::WaitElectionThread()noexcept { + while (m_candidate_routine_running) + std::this_thread::sleep_for(std::chrono::microseconds(::RaftCore::Config::FLAGS_election_thread_wait_us)); +} +#endif + +std::string ElectionMgr::TryVote(uint32_t term, const std::string &addr)noexcept { + + { + WriteLock _w_lock(m_voted_mutex); + if (m_voted.find(term) != m_voted.end()) { + LOG(WARNING) << "voting in a term that is already been voted before:" + << term << ",original voted addr:" << m_voted[term] << ",current trying to vote addr:" << addr; + return m_voted[term]; + } + m_voted[term] = addr; + } + + SaveFile(); + + return ""; +} + +void ElectionMgr::RenameBinlogNames(RaftRole old_role, RaftRole target_role) noexcept { + const char* _old_role_str = StateMgr::GetRoleStr(old_role); + const char* _target_role_str = StateMgr::GetRoleStr(target_role); + + std::list _binlog_files; + StorageMgr::FindRoleBinlogFiles(_old_role_str, _binlog_files); + + for (const auto&file_name : _binlog_files) { + std::string _target_file_name = file_name; + + auto _pos = _target_file_name.find(_old_role_str); + CHECK(_pos != std::string::npos) << "rename binlog fail when switching role, old_file_name:" << file_name << ",old_role:" << _old_role_str; + + _target_file_name.replace(_pos, std::strlen(_old_role_str), _target_role_str); + + fs::path _target_file(_target_file_name); + if (fs::exists(_target_file)){ + LOG(WARNING) << "target binlog exist, delete it:" << _target_file_name; + fs::remove(_target_file); + } + + CHECK(std::rename(file_name.c_str(), _target_file_name.c_str()) == 0) << "rename binlog file fail...,errno:" << errno; + + LOG(INFO) << "Switching role, rename binlog name from :" << file_name << " to " << _target_file_name; + } +} + +void ElectionMgr::SwitchRole(RaftRole target_role, const std::string &new_leader) noexcept { + + auto _old_role = StateMgr::GetRole(); + if (_old_role == target_role) { + LOG(WARNING) << "same role switching detected ,from " << StateMgr::GetRoleStr(_old_role) + << " to " << StateMgr::GetRoleStr(target_role); + return; + } + + StateMgr::SwitchTo(target_role,new_leader); + + //Re-initialize global env. + GlobalEnv::UnInitialEnv(_old_role); + + //Switch role also needs to rename binlog file names. + RenameBinlogNames(_old_role, target_role); + + /*Only after `GlobalEnv::UnInitialEnv` could StateMgr::SwitchTo be called since otherwise + `GlobalEnv::UnInitialEnv` would read the modified current state. */ + GlobalEnv::InitialEnv(true); +} + +void ElectionMgr::CandidateRoutine() noexcept{ + + std::random_device rd; + std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd() + + int _sleep_min = ::RaftCore::Config::FLAGS_election_term_interval_min_ms; + int _sleep_max = ::RaftCore::Config::FLAGS_election_term_interval_max_ms; + std::uniform_int_distribution<> dis(_sleep_min, _sleep_max); + + //For a consistent read , we need a snap shot. + m_joint_snapshot.Reset(); + { + ReadLock _r_lock(MemberMgr::m_mutex); + m_joint_snapshot = MemberMgr::m_joint_summary; + } + + Topology _topo; + CTopologyMgr::Read(&_topo); + + m_cur_cluster_size = _topo.GetClusterSize(); + PreparePrevoteTask(_topo); + + std::shared_ptr _shp_req(new VoteRequest()); + + _shp_req->mutable_base()->set_addr(StateMgr::GetMyAddr()); + _shp_req->mutable_base()->set_term(m_cur_term.load()); + + auto _lrl = BinLogGlobal::m_instance.GetLastReplicated(); + _shp_req->mutable_last_log_entity()->set_term(_lrl.m_term); + _shp_req->mutable_last_log_entity()->set_idx(_lrl.m_index); + + _shp_req->set_member_version(MemberMgr::GetVersion()); + + /*IMPORTANT : This is the term before the first round of election.We need a backing term mechanism + otherwise candidate doesn't have a change to be instructed by the leader which is either + newly elected or the old spurious failed one. */ + auto _start_term = m_cur_term.load(); + + while (true) { + int _sleep_random = dis(gen); + std::this_thread::sleep_for(std::chrono::milliseconds(_sleep_random)); + + LOG(INFO) << "[Candidate] Just slept " << _sleep_random << " milliseconds under term :" << m_cur_term.load(); + + if (IncreaseToMaxterm()) + break; + + LOG(INFO) << "[Candidate] I'm successfully increased to term:" << m_cur_term.load() << ",start issue pre-voting requests to the other nodes."; + + //Update term in the request, too. + _shp_req->mutable_base()->set_term(m_cur_term.load()); + + if (!BroadcastVoting(_shp_req, _topo, VoteType::PreVote)) { + LOG(INFO) << "[Candidate] pre-vote rejected term: " << m_cur_term.load() << ",now back term to " << _start_term + << " ,may because the current node temporarily losing heartbeat messages from leader, " + << "yet the leader is still alive to the other nodes,switch role:[Candidate --> Follwoer]"; + //A role switching from Candidate-->Follower should also revert the term to avoid infinite starting new round of election. + m_cur_term.store(_start_term); + SwitchRole(RaftRole::FOLLOWER); + break; + } + + LOG(INFO) << "[Candidate] pre-voting succeed under term: " << m_cur_term.load() + << ",start issue voting requests to the other nodes."; + + if (!BroadcastVoting(_shp_req, _topo, VoteType::Vote)) { + LOG(INFO) << "[Candidate] vote rejected term: " << m_cur_term.load() << ",starting a new round, " + <<"and back my term to the starting term:" <<_start_term; + /*Note: There is a term going back policy, to prevent candidate with a term always greater than the + newly elected leader ,resulting in infinite starting new election round. */ + m_cur_term.store(_start_term); + continue; + } + + LOG(INFO) << "[Candidate] voting success! Become the new leader of term: " << m_cur_term.load() + << ",switch role:[Candidate --> Leader]" ; + + //Record the snapshot of LRL for being used in leader's new term. + m_pre_term_lrl.Set(_lrl); + m_leader_debut = true; + + SwitchRole(RaftRole::LEADER); + + /*After successfully elected as leader, we submit a non-op log to ensure logs consistent + amid the new topology in advance. To simplify, we just sent a write request as a usual + client and this operation could be time consuming due to the possible log-resync process. + Adding that the non-op isn't necessary in aurora's design(normal subsequent requests will + also trigger the resync process if it's required), this is just to finish the resync job + ASAP after a new leader elected.. */ + + auto _heartbeat = []()->bool { + LeaderView::BroadcastHeatBeat(); + return false; //One shot. + }; + GlobalTimer::AddTask(0, _heartbeat); //Intend to execute immediately. + + //Wait sometime to ensure the heartbeat has been sent & acknowledged by the followers. + std::this_thread::sleep_for(std::chrono::milliseconds(::RaftCore::Config::FLAGS_election_wait_non_op_finish_ms)); + + LOG(INFO) << "[Leader] start issue non-op request"; + + //First one is to make the lag behind followers to catch up. + std::string _tag = std::to_string(_sleep_random) + "_prepare"; + SentNonOP(_tag); + + //Wait sometime to ensure the followers has caught up. + std::this_thread::sleep_for(std::chrono::milliseconds(::RaftCore::Config::FLAGS_election_wait_non_op_finish_ms)); + + //Second one is to truncate the overstepped followers to truncate the additional logs. + _tag = std::to_string(_sleep_random) + "_commit"; + SentNonOP(_tag); + + break; + } +} + +void ElectionMgr::SentNonOP(const std::string &tag) noexcept { + + std::string _local_add = _AURORA_LOCAL_IP_ + std::string(":") + std::to_string(::RaftCore::Config::FLAGS_port); + std::shared_ptr<::grpc::Channel> _channel = grpc::CreateChannel(_local_add, grpc::InsecureChannelCredentials()); + + WriteSyncClient _write_client(_channel); + + auto _setter = [&](std::shared_ptr<::raft::ClientWriteRequest>& req) { + req->mutable_req()->set_key("aurora-reserved-non-op-key_" + tag); + req->mutable_req()->set_value("aurora-reserved-non-op-value_" + tag); + }; + + auto _rpc = std::bind(&::raft::RaftService::Stub::Write, _write_client.GetStub().get(), + std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); + + ::grpc::Status _status; + auto &_rsp = _write_client.DoRPC(_setter, _rpc, ::RaftCore::Config::FLAGS_election_non_op_timeo_ms, _status); + + if (!_status.ok()) { + LOG(ERROR) << "submit non-op fail,err msg:" << _status.error_message(); + return; + } + + if (_rsp.client_comm_rsp().result() != ::raft::ErrorCode::SUCCESS) { + LOG(ERROR) << "submit non-op fail,err msg:" << _rsp.client_comm_rsp().err_msg(); + return; + } + + LOG(INFO) << "submit non-op succeed."; +} + +bool ElectionMgr::IncreaseToMaxterm() noexcept{ + + auto _cur_term = m_cur_term.load(); + + //Quickly find the next votable term. + while (true) { + + if (m_new_leader_event.m_notify_flag) { + LOG(INFO) << "[Candidate] a higher term found , switch role:[Candidate --> Follwoer]. "; + m_cur_term.store(m_new_leader_event.m_new_leader_term); + SwitchRole(RaftRole::FOLLOWER,m_new_leader_event.m_new_leader_addr); + return true; + } + + auto _old_term = _cur_term; + _cur_term = _cur_term + 1; + + LOG(INFO) << "[Candidate] term increased normally : " << _old_term << " --> " << _cur_term; + + auto _find_to_max_term = [&]()->void { + ReadLock _r_lock(m_known_voting_mutex); + auto _last_iter = m_known_voting.crbegin(); + if (_last_iter == m_known_voting.crend()) + return; + + /*Make sure the term to be used is greater than the largest known one at present, to get + rid of term conflict as far as possible.*/ + if (_cur_term <= _last_iter->first) { + _old_term = _cur_term; + _cur_term = _last_iter->first + 1; + LOG(INFO) << "[Candidate] term increased jumping: " << _old_term << " --> " << _cur_term; + } + }; + _find_to_max_term(); + + auto _voted_addr = TryVote(_cur_term, StateMgr::GetMyAddr()); + if (!_voted_addr.empty()) { + LOG(INFO) << "[Candidate] I'm candidate,voting myself at term " << _cur_term + << " fail,found I've voted some other nodes under this term ,that is:" << _voted_addr; + continue; + } + + break; + } + + m_cur_term.store(_cur_term); + + return false; +} + +void ElectionMgr::NotifyNewLeaderEvent(uint32_t term,const std::string addr)noexcept { + //The field updating order is important. + m_new_leader_event.m_new_leader_term = term; + m_new_leader_event.m_new_leader_addr = addr; + m_new_leader_event.m_notify_flag = true; +} + +void ElectionMgr::PreparePrevoteTask(const Topology &topo)noexcept { + + auto _add = [&](const std::string & node_addr) ->void{ + if (node_addr == StateMgr::GetMyAddr()) + return; + + m_phaseI_task.m_todo.emplace_back(node_addr); + + uint32_t _flag = int(JointConsensusMask::IN_OLD_CLUSTER); + + if (m_joint_snapshot.m_joint_status == EJointStatus::JOINT_CONSENSUS) { + const auto& _new_cluster = m_joint_snapshot.m_joint_topology.m_new_cluster; + if (_new_cluster.find(node_addr) != _new_cluster.cend()) + _flag |= int(JointConsensusMask::IN_NEW_CLUSTER); + } + + m_phaseI_task.m_flags.emplace_back(_flag); + }; + std::for_each(topo.m_followers.cbegin(), topo.m_followers.cend(), _add); + std::for_each(topo.m_candidates.cbegin(), topo.m_candidates.cend(), _add); + + if (m_joint_snapshot.m_joint_status == EJointStatus::JOINT_CONSENSUS) { + const auto& _new_nodes = m_joint_snapshot.m_joint_topology.m_added_nodes; + for (auto _iter = _new_nodes.cbegin(); _iter != _new_nodes.cend(); ++_iter) { + m_phaseI_task.m_todo.emplace_back(_iter->first); + m_phaseI_task.m_flags.emplace_back(uint32_t(JointConsensusMask::IN_NEW_CLUSTER)); + } + } +} + +bool ElectionMgr::BroadcastVoting(std::shared_ptr shp_req, const Topology &topo, + VoteType vote_type) noexcept{ + + TwoPhaseCommitBatchTask* _p_task_list = &m_phaseI_task; + if (vote_type == VoteType::Vote) + _p_task_list = &m_phaseII_task; + + std::shared_ptr<::grpc::CompletionQueue> _shp_cq(new ::grpc::CompletionQueue()); + + auto _req_setter = [&shp_req](std::shared_ptr<::raft::VoteRequest>& _target)->void { + _target = shp_req; + }; + + auto _entrust_prevote_client = [&](auto &_shp_channel,std::size_t idx){ + auto _shp_client = new PrevoteAsyncClient(_shp_channel, _shp_cq); + _shp_client->PushCallBackArgs(reinterpret_cast(idx)); + auto _f_prepare = std::bind(&::raft::RaftService::Stub::PrepareAsyncPreVote, + _shp_client->GetStub().get(), std::placeholders::_1, + std::placeholders::_2, std::placeholders::_3); + _shp_client->EntrustRequest(_req_setter, _f_prepare, ::RaftCore::Config::FLAGS_election_vote_rpc_timeo_ms); + }; + + auto _entrust_vote_client = [&](auto &_shp_channel,std::size_t idx){ + auto _shp_client = new VoteAsyncClient(_shp_channel, _shp_cq); + _shp_client->PushCallBackArgs(reinterpret_cast(idx)); + auto _f_prepare = std::bind(&::raft::RaftService::Stub::PrepareAsyncVote, + _shp_client->GetStub().get(), std::placeholders::_1, + std::placeholders::_2, std::placeholders::_3); + _shp_client->EntrustRequest(_req_setter, _f_prepare, ::RaftCore::Config::FLAGS_election_vote_rpc_timeo_ms); + }; + + int _entrust_total_num = 0; + for (std::size_t i = 0; i < _p_task_list->m_todo.size(); ++i) { + auto _shp_channel = ::grpc::CreateChannel(_p_task_list->m_todo[i], ::grpc::InsecureChannelCredentials()); + if (vote_type == VoteType::PreVote) + _entrust_prevote_client(_shp_channel,i); + else + _entrust_vote_client(_shp_channel,i); + _entrust_total_num++; + } + + //initialized to 1 means including myself. + m_cur_cluster_vote_counter = 1; + m_new_cluster_vote_counter = 1; + + PollingCQ(_shp_cq,_entrust_total_num); + + uint32_t _cur_cluster_majority = m_cur_cluster_size / 2 + 1; + + bool _succeed = m_cur_cluster_vote_counter >= _cur_cluster_majority; + + if (m_joint_snapshot.m_joint_status == EJointStatus::JOINT_CONSENSUS) { + std::size_t _new_cluster_total_nodes = m_joint_snapshot.m_joint_topology.m_new_cluster.size(); + std::size_t _new_cluster_majority = _new_cluster_total_nodes / 2 + 1; + _succeed &= (m_new_cluster_vote_counter >= _new_cluster_majority); + } + + return _succeed; +} + +void ElectionMgr::PollingCQ(std::shared_ptr<::grpc::CompletionQueue> shp_cq,int entrust_num)noexcept { + void* tag; + bool ok; + + int _counter = 0; + while (_counter < entrust_num) { + if (!shp_cq->Next(&tag, &ok)) + break; + + ::RaftCore::Common::ReactBase* _p_ins = (::RaftCore::Common::ReactBase*)tag; + _p_ins->React(ok); + _counter++; + } +} + +void ElectionMgr::AddVotingTerm(uint32_t term,const std::string &addr) noexcept{ + + WriteLock _w_lock(m_known_voting_mutex); + + LOG(INFO) << "add known voting term " << term << " from " << addr; + + if (m_known_voting.find(term) == m_known_voting.cend()) + m_known_voting[term] = std::set(); + + m_known_voting[term].emplace(addr); +} + +void ElectionMgr::CallBack(const ::grpc::Status &status, const ::raft::VoteResponse& rsp, + VoteType vote_type,uint32_t idx) noexcept { + + TwoPhaseCommitBatchTask* _p_task_list = &m_phaseI_task; + if (vote_type == VoteType::Vote) + _p_task_list = &m_phaseII_task; + + if (!status.ok()){ + LOG(ERROR) << "rpc status fail,idx:" << idx << ",addr:" << _p_task_list->m_todo[idx] + << ",error code:" << status.error_code() << ",error msg:" << status.error_message() ; + return; + } + + std::string _vote_type_str = (vote_type == VoteType::Vote) ?"vote" : "prevote"; + + ErrorCode _err_code = (vote_type == VoteType::Vote) ? ErrorCode::VOTE_YES : ErrorCode::PREVOTE_YES; + if (rsp.comm_rsp().result() != _err_code) { + LOG(INFO) << "peer " << _p_task_list->m_todo[idx] << " rejected " << _vote_type_str + << ",error message:" << rsp.comm_rsp().err_msg(); + return; + } + + LOG(INFO) << "peer " << _p_task_list->m_todo[idx] << " approved,vote type: " << _vote_type_str; + + if (_p_task_list->m_flags[idx] & int(JointConsensusMask::IN_OLD_CLUSTER)) + m_cur_cluster_vote_counter++; + + if (_p_task_list->m_flags[idx] & int(JointConsensusMask::IN_NEW_CLUSTER)) + m_new_cluster_vote_counter++; + + if (vote_type == VoteType::PreVote) { + m_phaseII_task.m_todo.emplace_back(_p_task_list->m_todo[idx]); + m_phaseII_task.m_flags.emplace_back(_p_task_list->m_flags[idx]); + } +} + +} + diff --git a/src/election/election.h b/src/election/election.h new file mode 100644 index 0000000..5825b60 --- /dev/null +++ b/src/election/election.h @@ -0,0 +1,168 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_ELECTION_H__ +#define __AURORA_ELECTION_H__ + +#include +#include +#include +#include +#include + +#include "protocol/raft.pb.h" + +#include "common/comm_defs.h" +#include "common/log_identifier.h" +#include "tools/utilities.h" +#include "state/state_mgr.h" +#include "member/member_manager.h" +#include "topology/topology_mgr.h" + +#define _AURORA_ELECTION_CONFIG_FILE_ "election.config" + +namespace RaftCore::Election { + +using ::RaftCore::Common::VoteType; +using ::RaftCore::Common::TwoPhaseCommitBatchTask; +using ::RaftCore::Common::LogIdentifier; +using ::RaftCore::State::RaftRole; +using ::RaftCore::Topology; +using ::RaftCore::Member::MemberMgr; +using ::raft::VoteRequest; + +class ElectionMgr { + +public: + + static void Initialize() noexcept; + + static void UnInitialize() noexcept; + + static std::string TryVote(uint32_t term,const std::string &addr)noexcept; + + static void AddVotingTerm(uint32_t term,const std::string &addr) noexcept; + + static void ElectionThread()noexcept; + + static void NotifyNewLeaderEvent(uint32_t term, const std::string addr)noexcept; + + //SwitchRole is not idempotent. + static void SwitchRole(RaftRole target_role, const std::string &new_leader = "") noexcept; + + static void CallBack(const ::grpc::Status &status, const ::raft::VoteResponse& rsp, VoteType vote_type,uint32_t idx) noexcept; + +#ifdef _ELECTION_TEST_ + static void WaitElectionThread()noexcept; +#endif + +public: + + /*To make election process simple & clear & non error prone, avoiding multiple + thread operations as much as possible. */ + static std::shared_timed_mutex m_election_mutex; + + //Persistent state on all servers: + static std::atomic m_cur_term; //current term + + //This is a special variable passing through the server-wide lives. + static volatile bool m_leader_debut; + + static LogIdentifier m_pre_term_lrl; + +private: + + static void RenameBinlogNames(RaftRole old_role, RaftRole target_role) noexcept; + + static void CandidateRoutine()noexcept; + + static void LoadFile()noexcept; + + static void SaveFile() noexcept; + + static void Reset() noexcept; + + static bool BroadcastVoting(std::shared_ptr shp_req, const Topology &topo, + VoteType vote_type) noexcept; + + //Return: if a higher term found during the increasing process. + static bool IncreaseToMaxterm()noexcept; + + static void PollingCQ(std::shared_ptr<::grpc::CompletionQueue> shp_cq,int entrust_num)noexcept; + + static void PreparePrevoteTask(const Topology &topo)noexcept; + + static void SentNonOP(const std::string &tag) noexcept; + +private: + + static MemberMgr::JointSummary m_joint_snapshot; + + static std::map m_voted; + + static std::shared_timed_mutex m_voted_mutex; + + static std::map> m_known_voting; + + static std::shared_timed_mutex m_known_voting_mutex; + + static std::thread *m_p_thread; + +#ifdef _ELECTION_TEST_ + static volatile bool m_candidate_routine_running; +#endif + + struct NewLeaderEvent { + + std::string m_new_leader_addr; + + uint32_t m_new_leader_term; + + //POT type,thread safe. + volatile bool m_notify_flag; + }; + + static NewLeaderEvent m_new_leader_event; + + static uint32_t m_cur_cluster_size; + + static uint32_t m_cur_cluster_vote_counter; + + static uint32_t m_new_cluster_vote_counter; + + static TwoPhaseCommitBatchTask m_phaseI_task; + + static TwoPhaseCommitBatchTask m_phaseII_task; + +private: + + ElectionMgr() = delete; + + virtual ~ElectionMgr() = delete; + + ElectionMgr(const ElectionMgr&) = delete; + + ElectionMgr& operator=(const ElectionMgr&) = delete; + +}; + +} //end namespace + +#endif diff --git a/src/follower/follower_bg_task.cc b/src/follower/follower_bg_task.cc new file mode 100644 index 0000000..84d2084 --- /dev/null +++ b/src/follower/follower_bg_task.cc @@ -0,0 +1,77 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "service/service.h" +#include "member/member_manager.h" +#include "follower/follower_bg_task.h" + +namespace RaftCore::Follower::BackGroundTask { + +DisorderMessageContext::DisorderMessageContext(int value_flag)noexcept{ + this->m_value_flag = value_flag; + this->m_generation_tp = std::chrono::system_clock::now(); + + this->m_processed_flag.store(false); + //VLOG(89) << "DisorderMessageContext constructed"; +} + +DisorderMessageContext::~DisorderMessageContext()noexcept{ + //VLOG(89) << "DisorderMessageContext destructed"; +} + +bool DisorderMessageContext::operator<(const DisorderMessageContext& other)const noexcept { + + if (this->m_value_flag < 0 || other.m_value_flag > 0) + return true; + + if (this->m_value_flag > 0 || other.m_value_flag < 0) + return false; + + auto &_shp_req = this->m_append_request; + auto &_shp_req_other = other.m_append_request; + + return _shp_req->GetLastLogID() < _shp_req_other->GetLastLogID(); +} + +bool DisorderMessageContext::operator>(const DisorderMessageContext& other)const noexcept { + + if (this->m_value_flag < 0 || other.m_value_flag > 0) + return false; + + if (this->m_value_flag > 0 || other.m_value_flag < 0) + return true; + + auto &_shp_req = this->m_append_request; + auto &_shp_req_other = other.m_append_request; + + return _shp_req->GetLastLogID() > _shp_req_other->GetLastLogID(); +} + +bool DisorderMessageContext::operator==(const DisorderMessageContext& other)const noexcept { + + if (other.m_value_flag != 0 || this->m_value_flag != 0) + return false; + + auto &_shp_req = this->m_append_request; + auto &_shp_req_other = other.m_append_request; + + return _shp_req->GetLastLogID() == _shp_req_other->GetLastLogID(); +} + +} + diff --git a/src/follower/follower_bg_task.h b/src/follower/follower_bg_task.h new file mode 100644 index 0000000..f3e6f2e --- /dev/null +++ b/src/follower/follower_bg_task.h @@ -0,0 +1,67 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_FOLLOWER_BG_TASK_H__ +#define __AURORA_FOLLOWER_BG_TASK_H__ + +#include "tools/utilities.h" +#include "tools/trivial_lock_double_list.h" + +namespace RaftCore { + namespace Service { + class AppendEntries; + } +} + +namespace RaftCore::Follower::BackGroundTask { + +using ::RaftCore::DataStructure::OrderedTypeBase; +using ::RaftCore::Tools::TypeSysTimePoint; +using ::RaftCore::Service::AppendEntries; + +class DisorderMessageContext final : public OrderedTypeBase { + + public: + + DisorderMessageContext(int value_flag = 0)noexcept; + + virtual ~DisorderMessageContext()noexcept; + + virtual bool operator<(const DisorderMessageContext& other)const noexcept override; + + virtual bool operator>(const DisorderMessageContext& other)const noexcept override; + + virtual bool operator==(const DisorderMessageContext& other)const noexcept override; + + std::shared_ptr m_append_request; + + TypeSysTimePoint m_generation_tp; + + /*<0: minimal value; + >0:max value; + ==0:comparable value. */ + int m_value_flag = 0; + + std::atomic m_processed_flag; +}; + +} //end namespace + +#endif diff --git a/src/follower/follower_request.cc b/src/follower/follower_request.cc new file mode 100644 index 0000000..5ffe623 --- /dev/null +++ b/src/follower/follower_request.cc @@ -0,0 +1,36 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "follower/follower_request.h" + +namespace RaftCore::Follower { + +template +FollowerUnaryRequest::FollowerUnaryRequest() noexcept {} + +template +FollowerUnaryRequest::~FollowerUnaryRequest() noexcept {} + +template +FollowerBidirectionalRequest::FollowerBidirectionalRequest() noexcept {} + +template +FollowerBidirectionalRequest::~FollowerBidirectionalRequest() noexcept {} + +} + diff --git a/src/follower/follower_request.h b/src/follower/follower_request.h new file mode 100644 index 0000000..ce020a4 --- /dev/null +++ b/src/follower/follower_request.h @@ -0,0 +1,77 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_FOLLOWER_REQUEST_H__ +#define __AURORA_FOLLOWER_REQUEST_H__ + +#include + +#include "protocol/raft.grpc.pb.h" +#include "protocol/raft.pb.h" + +#include "common/request_base.h" + +using ::raft::RaftService; +using ::grpc::ServerCompletionQueue; +using ::RaftCore::Common::UnaryRequest; +using ::RaftCore::Common::BidirectionalRequest; + +namespace RaftCore::Follower { + +//Just a thin wrapper for differentiate rpcs. +template +class FollowerUnaryRequest : public UnaryRequest{ + +public: + + FollowerUnaryRequest()noexcept; + + virtual ~FollowerUnaryRequest()noexcept; + +private: + + FollowerUnaryRequest(const FollowerUnaryRequest&) = delete; + + FollowerUnaryRequest& operator=(const FollowerUnaryRequest&) = delete; + +}; + +template +class FollowerBidirectionalRequest : public BidirectionalRequest{ + +public: + + FollowerBidirectionalRequest()noexcept; + + virtual ~FollowerBidirectionalRequest()noexcept; + +private: + + FollowerBidirectionalRequest(const FollowerBidirectionalRequest&) = delete; + + FollowerBidirectionalRequest& operator=(const FollowerBidirectionalRequest&) = delete; + +}; + +} //end namespace + +#include "follower_request.cc" + +#endif diff --git a/src/follower/follower_view.cc b/src/follower/follower_view.cc new file mode 100644 index 0000000..e1f62f3 --- /dev/null +++ b/src/follower/follower_view.cc @@ -0,0 +1,139 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "config/config.h" +#include "election/election.h" +#include "tools/timer.h" +#include "tools/lock_free_priority_queue.h" +#include "service/service.h" +#include "follower/follower_view.h" + +namespace RaftCore::Follower { + +using ::RaftCore::Service::AppendEntries; + +std::condition_variable FollowerView::m_cv; + +std::mutex FollowerView::m_cv_mutex; + +std::chrono::time_point FollowerView::m_last_heartbeat; + +std::shared_timed_mutex FollowerView::m_last_heartbeat_lock; + +/* Note: each follower has exactly one pending_list , the head and tail nodes were +constant during the lifetime of this pending list, so it's okay to 'new' an object without +caring about when to 'delete' . */ +TrivialLockDoubleList FollowerView::m_phaseI_pending_list(std::shared_ptr(new MemoryLogItemFollower(0x0, 0x0)), + std::shared_ptr(new MemoryLogItemFollower(_MAX_UINT32_, _MAX_UINT64_))); + +TrivialLockDoubleList FollowerView::m_phaseII_pending_list(std::shared_ptr(new MemoryLogItemFollower(0x0, 0x0)), + std::shared_ptr(new MemoryLogItemFollower(_MAX_UINT32_, _MAX_UINT64_))); + +TrivialLockSingleList FollowerView::m_disorder_list(std::shared_ptr(new DisorderMessageContext(-1)), + std::shared_ptr(new DisorderMessageContext(1))); + +LockFreeUnorderedSingleList> FollowerView::m_garbage; + +LockFreeUnorderedSingleList> FollowerView::m_disorder_garbage; + +using ::RaftCore::Common::ReadLock; +using ::RaftCore::Timer::GlobalTimer; +using ::RaftCore::Election::ElectionMgr; + +void FollowerView::Initialize(bool switching_role) noexcept{ + + CommonView::Initialize(); + + //Register GC task to the global timer. + CommonView::InstallGC(&m_garbage); + CommonView::InstallGC(&m_disorder_garbage); + +#ifdef _FOLLOWER_VIEW_TEST_ + auto _test = []()->bool { + //VLOG(89) << "I'm alive to debug the stuck issue..."; + return true; + }; + GlobalTimer::AddTask(1000, _test); +#endif + + //Initial to time epoch. + m_last_heartbeat = std::chrono::time_point(); + + //Avoiding immediately checking heartbeat timeout after a switching role event happened. + if (switching_role) + m_last_heartbeat = std::chrono::steady_clock::now(); + + decltype(m_last_heartbeat) * _p_last_heartbeat = &m_last_heartbeat; + auto _check_heartbeat = [_p_last_heartbeat,switching_role]()->bool { + + //Just for unit test. + if (!::RaftCore::Config::FLAGS_checking_heartbeat) + return true; + + ReadLock _r_lock(FollowerView::m_last_heartbeat_lock); + /*In a general startup case(non switching-role) , Only after receiving the 1st heartbeat msg from server, + could the checking mechanism really getting started.*/ + if (!switching_role && (std::chrono::duration_cast(_p_last_heartbeat->time_since_epoch()).count() == 0)) + return true; + + auto _diff = std::chrono::steady_clock::now() - (*_p_last_heartbeat); + _r_lock.unlock(); + + auto _diff_ms = std::chrono::duration_cast(_diff).count(); + if (_diff_ms <= ::RaftCore::Config::FLAGS_election_heartbeat_timeo_ms) + return true; + + LOG(INFO) << "leader heartbeat timeout line reached,start electing,diff_ms:" << _diff_ms; + + /*Election's heartbeat timeout happened ,starting to turn into candidate state.It will do this + by creating a new thread to IMMEDIATELY re-initialize the global env , which will terminate + the current timer thread the other way round. */ + ElectionMgr::ElectionThread(); + return false; + }; + GlobalTimer::AddTask(::RaftCore::Config::FLAGS_follower_check_heartbeat_interval_ms,_check_heartbeat); + + CommonView::m_running_flag = true; + + //Start follower routine thread. + for (std::size_t i = 0; i < ::RaftCore::Config::FLAGS_iterating_threads; ++i) + CommonView::m_vec_routine.emplace_back(new std::thread(AppendEntries::DisorderLogRoutine)); +} + +void FollowerView::UnInitialize() noexcept { + + //Waiting for routine thread exit. + CommonView::m_running_flag = false; + + for (auto* p_thread : CommonView::m_vec_routine) { + p_thread->join(); + delete p_thread; + } + + Clear(); + CommonView::UnInitialize(); +} + +void FollowerView::Clear() noexcept { + m_phaseI_pending_list.Clear(); + m_phaseII_pending_list.Clear(); + m_disorder_list.Clear(); +} + +} + diff --git a/src/follower/follower_view.h b/src/follower/follower_view.h new file mode 100644 index 0000000..7f40868 --- /dev/null +++ b/src/follower/follower_view.h @@ -0,0 +1,95 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_FOLLOWER_VIEW_H__ +#define __AURORA_FOLLOWER_VIEW_H__ + +#include +#include + +#include "common/comm_defs.h" +#include "common/comm_view.h" +#include "follower/memory_log_follower.h" +#include "follower/follower_bg_task.h" +#include "tools/trivial_lock_double_list.h" +#include "tools/trivial_lock_single_list.h" + +namespace RaftCore::Follower { + +using ::RaftCore::Common::CommonView; +using ::RaftCore::Follower::MemoryLogItemFollower; +using ::RaftCore::Follower::BackGroundTask::DisorderMessageContext; +using ::RaftCore::DataStructure::LockFreeUnorderedSingleList; +using ::RaftCore::DataStructure::SingleListNode; +using ::RaftCore::DataStructure::TrivialLockSingleList; +using ::RaftCore::DataStructure::DoubleListNode; +using ::RaftCore::DataStructure::TrivialLockDoubleList; + +/*Note: This is the class for representing follower's state in follower's own view. */ +class FollowerView final: public CommonView{ + +public: + + //Pending log entries waiting to be written into binlog file. + static TrivialLockDoubleList m_phaseI_pending_list; + + //Pending log entries waiting to be stored. + static TrivialLockDoubleList m_phaseII_pending_list; + + static LockFreeUnorderedSingleList> m_garbage; + + //Pending log entries waiting to be returned. + static TrivialLockSingleList m_disorder_list; + + static LockFreeUnorderedSingleList> m_disorder_garbage; + + //Threads whose log entries haven't been written are waiting on this CV + static std::condition_variable m_cv; + + //Used together with the above CV + static std::mutex m_cv_mutex; + + static std::chrono::time_point m_last_heartbeat; + + static std::shared_timed_mutex m_last_heartbeat_lock; + +public: + + static void Initialize(bool switching_role=false) noexcept; + + static void UnInitialize() noexcept; + + static void Clear() noexcept; + +private: + + FollowerView() = delete; + + virtual ~FollowerView() = delete; + + FollowerView(const FollowerView&) = delete; + + FollowerView& operator=(const FollowerView&) = delete; + +}; + +} //end namespace + +#endif diff --git a/src/follower/memory_log_follower.cc b/src/follower/memory_log_follower.cc new file mode 100644 index 0000000..38750ae --- /dev/null +++ b/src/follower/memory_log_follower.cc @@ -0,0 +1,74 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "common/comm_defs.h" +#include "common/log_identifier.h" +#include "follower/memory_log_follower.h" + +namespace RaftCore::Follower { + +MemoryLogItemFollower::~MemoryLogItemFollower() noexcept{ + //VLOG(89) << "MemoryLogItemFollower destructed " << ::RaftCore::Common::ConvertID(this->m_entity->entity_id()); +} + +MemoryLogItemFollower::MemoryLogItemFollower(uint32_t _term, uint64_t _index) noexcept:MemoryLogItemBase(_term, _index) { + //VLOG(89) << "MemoryLogItemFollower constructed pos1"; +} + +MemoryLogItemFollower::MemoryLogItemFollower(const ::raft::Entity &_entity) noexcept: MemoryLogItemBase(_entity) { + //VLOG(89) << "MemoryLogItemFollower constructed pos2"; +} + +bool MemoryLogItemFollower::operator==(const MemoryLogItemFollower& _other)const noexcept { + if (!MemoryLogItemBase::operator==(_other)) + return false; + + /*MemoryLogItemFollower comparing additional fields because of it is used in TrivialLockDoubleList + and could possibly been inserted with same log_id and pre_log_id but different pairs. */ + if (!EntityIDEqual(this->m_entity->pre_log_id(), _other.GetEntity()->pre_log_id())) + return false; + + //Don't compare their contents, it should be able to being rewrite only by term & idx. + //(not TODO): improve the comparison by comparing their crc32 values rather than the value itself. + //return (this->m_entity->write_op().key() == _other.m_entity->write_op().key() && this->m_entity->write_op().value() == _other.m_entity->write_op().value()); + + return true; +} + +bool MemoryLogItemFollower::operator!=(const MemoryLogItemFollower& _other)const noexcept { + return !this->operator==(_other); +} + +bool MemoryLogItemFollower::operator<=(const MemoryLogItemFollower& _other)const noexcept { + return EntityIDSmallerEqual(this->m_entity->entity_id(), _other.m_entity->entity_id()); +} + +bool MemoryLogItemFollower::operator<(const MemoryLogItemFollower& _other)const noexcept { + return this->MemoryLogItemBase::operator<(_other); +} + +bool MemoryLogItemFollower::operator>(const MemoryLogItemFollower& _other)const noexcept { + return this->MemoryLogItemBase::operator>(_other); +} + +bool CmpMemoryLogFollower(const MemoryLogItemFollower& left, const MemoryLogItemFollower& right) noexcept { + return CmpMemoryLog(&left,&right); +} + + +} diff --git a/src/follower/memory_log_follower.h b/src/follower/memory_log_follower.h new file mode 100644 index 0000000..a5f5cf6 --- /dev/null +++ b/src/follower/memory_log_follower.h @@ -0,0 +1,69 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef _AURORA_MEMORY_LOG_FOLLOWER_H_ +#define _AURORA_MEMORY_LOG_FOLLOWER_H_ + +#include + +#include "protocol/raft.pb.h" + +#include "tools/trivial_lock_double_list.h" +#include "common/memory_log_base.h" + +using ::RaftCore::Common::MemoryLogItemBase; +using ::RaftCore::Common::EntityIDEqual; +using ::RaftCore::Common::EntityIDSmallerEqual; + +namespace RaftCore::Follower { + +class MemoryLogItemFollower final : public ::RaftCore::DataStructure::OrderedTypeBase , public MemoryLogItemBase { + +public: + + virtual ~MemoryLogItemFollower() noexcept; + + MemoryLogItemFollower(uint32_t _term, uint64_t _index) noexcept; + + MemoryLogItemFollower(const ::raft::Entity &_entity) noexcept; + + bool operator<=(const MemoryLogItemFollower& _other)const noexcept; + + virtual bool operator<(const MemoryLogItemFollower& _other)const noexcept; + + virtual bool operator>(const MemoryLogItemFollower& _other)const noexcept; + + virtual bool operator==(const MemoryLogItemFollower& _other)const noexcept; + + virtual bool operator!=(const MemoryLogItemFollower& _other)const noexcept; + +protected: + + virtual void NotImplemented() noexcept{} + +}; + +typedef std::list> TypeMemlogFollowerList; + +bool CmpMemoryLogFollower(const MemoryLogItemFollower& left, const MemoryLogItemFollower& right) noexcept; + +} + +#endif diff --git a/src/global/global_env.cc b/src/global/global_env.cc new file mode 100644 index 0000000..a4c5b35 --- /dev/null +++ b/src/global/global_env.cc @@ -0,0 +1,342 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include + +#include "grpc/grpc.h" +#include "grpc++/server_context.h" +#include "grpc++/security/server_credentials.h" + +#include "common/request_base.h" +#include "binlog/binlog_singleton.h" +#include "storage/storage_singleton.h" +#include "service/service.h" +#include "topology/topology_mgr.h" +#include "leader/leader_view.h" +#include "follower/follower_view.h" +#include "candidate/candidate_view.h" +#include "guid/guid_generator.h" +#include "election/election.h" +#include "tools/timer.h" +#include "tools/lock_free_priority_queue.h" +#include "global/global_env.h" + +namespace RaftCore::Global { + +std::unique_ptr<::grpc::Server> GlobalEnv::m_pserver; + +volatile bool GlobalEnv::m_running = false; + +std::vector> GlobalEnv::m_vec_notify_cq_workgroup; + +std::vector> GlobalEnv::m_vec_call_cq_workgroup; + +std::vector> GlobalEnv::m_vec_client_cq_workgroup; + +std::shared_ptr<::raft::RaftService::AsyncService> GlobalEnv::m_async_service; + +std::atomic GlobalEnv::m_released_cq_idx; + +volatile bool GlobalEnv::m_cq_fully_shutdown = false; + +std::string GlobalEnv::m_server_addr; + +::grpc::ServerBuilder GlobalEnv::m_builder; + + +bool GlobalEnv::IsRunning() noexcept { + return m_running; +} + +TypePtrCQ GlobalEnv::GetClientCQInstance() noexcept { + +#ifdef _CONN_TEST_ + //Just return a casual CQ. + return m_vec_notify_cq_workgroup[0].GetCQ(); +#endif + + //m_released_cq_idx needn't to be atomic, what we need is just a approximately accurate idx. + int _before_val = m_released_cq_idx.fetch_add(1); + int _idx = _before_val % m_vec_client_cq_workgroup.size(); + return m_vec_client_cq_workgroup[_idx].GetCQ(); +} + +void GlobalEnv::InitGrpcEnv() noexcept { + m_server_addr = std::string(::RaftCore::Config::FLAGS_ip) + ":" + std::to_string(::RaftCore::Config::FLAGS_port); + m_builder.AddListeningPort(m_server_addr, ::grpc::InsecureServerCredentials()); + + m_async_service.reset(new ::raft::RaftService::AsyncService()); + m_builder.RegisterService(m_async_service.get()); + + m_vec_notify_cq_workgroup.clear(); + m_vec_call_cq_workgroup.clear(); + + TypeReactorFunc _reactor = ::RaftCore::Common::ReactBase::GeneralReacting; + + for (std::size_t i = 0; i < ::RaftCore::Config::FLAGS_notify_cq_num; ++i) { + uint32_t _notify_cq_thread_num = ::RaftCore::Config::FLAGS_notify_cq_threads; + m_vec_notify_cq_workgroup.emplace_back(m_builder.AddCompletionQueue(), _reactor, _notify_cq_thread_num); + } + + for (std::size_t i = 0; i < ::RaftCore::Config::FLAGS_call_cq_num; ++i) { + uint32_t _call_cq_thread_num = ::RaftCore::Config::FLAGS_call_cq_threads; + m_vec_call_cq_workgroup.emplace_back(m_builder.AddCompletionQueue(), _reactor, _call_cq_thread_num); + } + + //The additional CQ is for the response processing dedicated CQ. + if (::RaftCore::State::StateMgr::GetRole() == State::RaftRole::LEADER) { + m_vec_client_cq_workgroup.clear(); + for (std::size_t i = 0; i < ::RaftCore::Config::FLAGS_client_cq_num; ++i) { + uint32_t _client_cq_thread_num = ::RaftCore::Config::FLAGS_client_thread_num; + TypeReactorFunc _client_reactor = ::RaftCore::Leader::LeaderView::ClientThreadReacting; + + TypePtrCQ _shp_cq(new CompletionQueue()); + m_vec_client_cq_workgroup.emplace_back(_shp_cq, _client_reactor, _client_cq_thread_num); + } + } + + m_released_cq_idx.store(0); +} + +void GlobalEnv::StartGrpcService() noexcept{ + + m_pserver = m_builder.BuildAndStart(); + + //Spawning request pool instances must be after server successfully built. + for (std::size_t i = 0; i < ::RaftCore::Config::FLAGS_notify_cq_num; ++i) { + for (std::size_t j = 0; j < ::RaftCore::Config::FLAGS_request_pool_size; ++j) + SpawnFamilyBucket(m_async_service, i); + } + + LOG(INFO) << "Server listening on " << m_server_addr; + + auto _start_workgroup_threads = [&](auto &work_group) { + for (auto &_item : work_group) + _item.StartPolling(); + }; + + LOG(INFO) << "spawning notify_cq polling threads."; + _start_workgroup_threads(m_vec_notify_cq_workgroup); + + LOG(INFO) << "spawning call_cq polling threads."; + _start_workgroup_threads(m_vec_call_cq_workgroup); + + LOG(INFO) << "spawning client_cq polling threads."; + _start_workgroup_threads(m_vec_client_cq_workgroup); + + m_running = true; + + auto _wait_workgroup_threads = [&](auto &work_group) { + for (auto &_item : work_group) + _item.WaitPolling(); + }; + + LOG(INFO) << "waiting notify_cq polling threads to exist"; + _wait_workgroup_threads(m_vec_notify_cq_workgroup); + + LOG(INFO) << "waiting call_cq polling threads to exist"; + _wait_workgroup_threads(m_vec_call_cq_workgroup); + + LOG(INFO) << "waiting client_cq polling threads to exist"; + _wait_workgroup_threads(m_vec_client_cq_workgroup); + + m_cq_fully_shutdown = true; + + VLOG(89) << "fully shutdown set to true"; +} + +void GlobalEnv::SpawnFamilyBucket(std::shared_ptr<::raft::RaftService::AsyncService> shp_svc, std::size_t cq_idx) noexcept { + + auto _notify_cq = m_vec_notify_cq_workgroup[cq_idx].GetCQ(); + + std::size_t _call_cq_idx = cq_idx % m_vec_call_cq_workgroup.size(); + auto _call_cq = m_vec_call_cq_workgroup[_call_cq_idx].GetCQ(); + + //Entrusting a complete set of request instances to each CQ. + new ::RaftCore::Service::Write(shp_svc, _notify_cq, _call_cq); + new ::RaftCore::Service::Read(shp_svc, _notify_cq, _call_cq); + new ::RaftCore::Service::MembershipChange(shp_svc, _notify_cq, _call_cq); + new ::RaftCore::Service::AppendEntries(shp_svc, _notify_cq, _call_cq); + new ::RaftCore::Service::CommitEntries(shp_svc, _notify_cq, _call_cq); + new ::RaftCore::Service::SyncData(shp_svc, _notify_cq, _call_cq); + new ::RaftCore::Service::MemberChangePrepare(shp_svc, _notify_cq, _call_cq); + new ::RaftCore::Service::MemberChangeCommit(shp_svc, _notify_cq, _call_cq); + new ::RaftCore::Service::PreVote(shp_svc, _notify_cq, _call_cq); + new ::RaftCore::Service::Vote(shp_svc, _notify_cq, _call_cq); + new ::RaftCore::Service::HeartBeat(shp_svc, _notify_cq, _call_cq); +} + +void GlobalEnv::StopGrpcService() noexcept { + + m_pserver->Shutdown(); + m_pserver->Wait(); + + auto _shutdown_workgroup_cqs = [&](auto &work_group) { + for (auto &_item : work_group) + _item.ShutDownCQ(); + }; + + LOG(INFO) << "shutting down notify_cq."; + _shutdown_workgroup_cqs(m_vec_notify_cq_workgroup); + + LOG(INFO) << "shutting down call_cq."; + _shutdown_workgroup_cqs(m_vec_call_cq_workgroup); + + LOG(INFO) << "shutting down client_cq."; + _shutdown_workgroup_cqs(m_vec_client_cq_workgroup); + + //Need to wait for all CQs shutdown. + while (!m_cq_fully_shutdown); + + m_running = false; +} + +void GlobalEnv::InitialEnv(bool switching_role) noexcept { + + LOG(INFO) << "start initialing global env."; + + //Check config validity first. + CHECK(::RaftCore::Config::FLAGS_follower_check_heartbeat_interval_ms < ::RaftCore::Config::FLAGS_leader_heartbeat_interval_ms); + CHECK(::RaftCore::Config::FLAGS_leader_heartbeat_interval_ms < ::RaftCore::Config::FLAGS_election_heartbeat_timeo_ms); + CHECK(::RaftCore::Config::FLAGS_memory_table_max_item < ::RaftCore::Config::FLAGS_binlog_max_log_num); + CHECK(::RaftCore::Config::FLAGS_garbage_deque_retain_num >= 1); + + //TODO: check #threads doesn't exceeds m_step_len + + //#-------------------------------Init topology-------------------------------#// + ::RaftCore::CTopologyMgr::Initialize(); + ::RaftCore::Topology global_topo; + ::RaftCore::CTopologyMgr::Read(&global_topo); + + //#-------------------------------Init State Manager-------------------------------#// + ::RaftCore::State::StateMgr::Initialize(global_topo); + + /* + if (::RaftCore::State::StateMgr::AddressUndetermined()) { + const auto &_nic_addrs = ::RaftCore::State::StateMgr::GetNICAddrs(); + auto _address = ::RaftCore::Member::MemberMgr::FindPossibleAddress(_nic_addrs); + CHECK(!_address.empty()) << "can't find my address in both topology and membership config files."; + ::RaftCore::State::StateMgr::SetMyAddr(_address); + }*/ + + //#-------------------------------Init Global Timer-------------------------------#// + ::RaftCore::Timer::GlobalTimer::Initialize(); + + const char* _p_role = ::RaftCore::State::StateMgr::GetRoleStr(); + LOG(INFO) << "--------------------started as " << _p_role << "--------------------"; + + //#-------------------------------Init Storage-------------------------------#// + ::RaftCore::Storage::StorageGlobal::m_instance.Initialize(_p_role); + + /*BinLogGlobal must be initialized after StorageGlobal to avoid opening the binlog file for + multiple times. */ + + //#-------------------------------Init Binlog Operator-------------------------------#// + ::RaftCore::BinLog::BinLogGlobal::m_instance.Initialize(_p_role); + + //#-------------------------------Init Guid File-------------------------------#// + auto _lrl = ::RaftCore::BinLog::BinLogGlobal::m_instance.GetLastReplicated(); + ::RaftCore::Guid::GuidGenerator::Initialize(_lrl.m_index); + + //#-------------------------------Init grpc env-------------------------------#// + auto _current_role = ::RaftCore::State::StateMgr::GetRole(); + if (!switching_role) + InitGrpcEnv(); + + //#-------------------------------Init leader-------------------------------#// + if (_current_role == ::RaftCore::State::RaftRole::LEADER) + ::RaftCore::Leader::LeaderView::Initialize(global_topo); + + //#-------------------------------Init Follower-------------------------------#// + if (_current_role == ::RaftCore::State::RaftRole::FOLLOWER) + ::RaftCore::Follower::FollowerView::Initialize(switching_role); + + //#-------------------------------Init Candidate-------------------------------#// + if (_current_role == ::RaftCore::State::RaftRole::CANDIDATE) + ::RaftCore::Candidate::CandidateView::Initialize(); + + //#-------------------------------Init Election Manager.-------------------------------#// + ::RaftCore::Election::ElectionMgr::Initialize(); + + //#-------------------------------Init Membership Manager.-------------------------------#// + ::RaftCore::Member::MemberMgr::Initialize(); + + LOG(INFO) << "finish initialing global env."; +} + +void GlobalEnv::UnInitialEnv(::RaftCore::State::RaftRole state) noexcept { + + auto _current_role = ::RaftCore::State::StateMgr::GetRole(); + bool _from_old_state = (state != ::RaftCore::State::RaftRole::UNKNOWN); + if (_from_old_state) + _current_role = state; + + //#-------------------------------UnInit Global Timer-------------------------------#// + /*Note : This should firstly be done before state manager ,which is the dependee.*/ + ::RaftCore::Timer::GlobalTimer::UnInitialize(); + + //#----------------------------UnInit Server State.-----------------------------#// + /*Note : This should firstly be done to prevent server from serving newly coming requests.*/ + ::RaftCore::State::StateMgr::UnInitialize(); + + ::RaftCore::CTopologyMgr::UnInitialize(); + + //#-------------------------------UnInit guid file-------------------------------#// + ::RaftCore::Guid::GuidGenerator::UnInitialize(); + + //#-------------------------------UnInit binlog operator-------------------------------#// + ::RaftCore::BinLog::BinLogGlobal::m_instance.UnInitialize(); + + //#-------------------------------UnInit Storage-------------------------------#// + ::RaftCore::Storage::StorageGlobal::m_instance.UnInitialize(); + + //#-------------------------------UnInit leader-------------------------------#// + if (_current_role == ::RaftCore::State::RaftRole::LEADER) + ::RaftCore::Leader::LeaderView::UnInitialize(); + + //#-------------------------------UnInit Follower-------------------------------#// + if (_current_role == ::RaftCore::State::RaftRole::FOLLOWER) + ::RaftCore::Follower::FollowerView::UnInitialize(); + + //#-------------------------------UnInit Candidate-------------------------------#// + if (_current_role == ::RaftCore::State::RaftRole::CANDIDATE) + ::RaftCore::Candidate::CandidateView::UnInitialize(); + + //#-------------------------------UnInit Election Manager.-------------------------------#// + ::RaftCore::Election::ElectionMgr::UnInitialize(); + + //#-------------------------------UnInit Membership Manager.-------------------------------#// + ::RaftCore::Member::MemberMgr::UnInitialize(); +} + +void GlobalEnv::RunServer() noexcept{ + //#-------------------------------Start server-------------------------------#// + StartGrpcService(); +} + +void GlobalEnv::StopServer() noexcept { + StopGrpcService(); +} + +void GlobalEnv::ShutDown() noexcept { + StopServer(); + UnInitialEnv(); +} + +} + diff --git a/src/global/global_env.h b/src/global/global_env.h new file mode 100644 index 0000000..be792cc --- /dev/null +++ b/src/global/global_env.h @@ -0,0 +1,109 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_GLOBAL_ENV_H__ +#define __AURORA_GLOBAL_ENV_H__ + +#include + +#include "grpc++/server_builder.h" +#include "grpc++/server.h" +#include "grpc++/completion_queue.h" + +#include "protocol/raft.grpc.pb.h" + +#include "state/state_mgr.h" +#include "common/react_base.h" +#include "common/react_group.h" + +namespace RaftCore::Global { + +using ::grpc::CompletionQueue; +using ::RaftCore::Common::TypePtrCQ; +using ::RaftCore::Common::TypeReactorFunc; +using ::RaftCore::Common::ReactWorkGroup; + +/*Note: This is the class for representing follower's state in follower's own view. */ +class GlobalEnv final { + +public: + + static void InitialEnv(bool switching_role=false) noexcept; + + static void RunServer() noexcept; + + static void StopServer() noexcept; + + static void UnInitialEnv(::RaftCore::State::RaftRole state=::RaftCore::State::RaftRole::UNKNOWN) noexcept; + + //Note : Must called in other threads , not in any gRPC threads. + static void ShutDown() noexcept; + + static bool IsRunning() noexcept; + + static TypePtrCQ GetClientCQInstance() noexcept; + +private: + + static void InitGrpcEnv() noexcept; + + static void StartGrpcService() noexcept; + + static void SpawnFamilyBucket(std::shared_ptr<::raft::RaftService::AsyncService> shp_svc, std::size_t cq_idx) noexcept; + + static void StopGrpcService() noexcept; + +private: + + static std::unique_ptr<::grpc::Server> m_pserver; + + static volatile bool m_running; + + static volatile bool m_cq_fully_shutdown; + + static std::vector> m_vec_notify_cq_workgroup; + + static std::vector> m_vec_call_cq_workgroup; + + static std::vector> m_vec_client_cq_workgroup; + + static std::shared_ptr<::raft::RaftService::AsyncService> m_async_service; + + static std::atomic m_released_cq_idx; + + static std::string m_server_addr; + + static ::grpc::ServerBuilder m_builder; + +private: + + GlobalEnv() = delete; + + virtual ~GlobalEnv() noexcept = delete; + + GlobalEnv(const GlobalEnv&) = delete; + + GlobalEnv& operator=(const GlobalEnv&) = delete; + +}; + +} //end namespace + +#endif diff --git a/src/gtest/binlog/test_all.h b/src/gtest/binlog/test_all.h new file mode 100644 index 0000000..dbe9d43 --- /dev/null +++ b/src/gtest/binlog/test_all.h @@ -0,0 +1,27 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_ALL_BINLOG_H__ +#define __GTEST_ALL_BINLOG_H__ + +#include "gtest/binlog/test_meta.h" +#include "gtest/binlog/test_binlog.h" + +#endif diff --git a/src/gtest/binlog/test_binlog.h b/src/gtest/binlog/test_binlog.h new file mode 100644 index 0000000..fd5a7aa --- /dev/null +++ b/src/gtest/binlog/test_binlog.h @@ -0,0 +1,385 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_BINLOG_H__ +#define __GTEST_BINLOG_H__ + +#include +#include +#include + +#include "boost/filesystem.hpp" + +#include "gtest/test_base.h" +#include "common/comm_defs.h" +#include "binlog/binlog_singleton.h" +#include "storage/storage_singleton.h" +#include "leader/leader_view.h" + +using ::RaftCore::BinLog::BinLogGlobal; +using ::RaftCore::BinLog::BinLogOperator; +using ::RaftCore::Storage::StorageGlobal; +using ::RaftCore::Leader::LeaderView; +using ::RaftCore::DataStructure::HashNode; + +namespace fs = ::boost::filesystem; + +class TestBinlog : public TestBase { + + public: + + TestBinlog() {} + + protected: + + virtual void SetUp() override { + m_zero.Set(0, 0); + } + + virtual void TearDown() override { + } + + auto RevertPreviousLogs(int cur_idx,int thread_idx) { + + //test reverting. + std::list> _log_list; + + int _pre_count = 3; + + for (int i = cur_idx - _pre_count; i <= cur_idx; ++i) { + ::raft::Entity _tmp; + auto _p_id = _tmp.mutable_entity_id(); + _p_id->set_term(0); + _p_id->set_idx(i); + + auto _p_pre_id = _tmp.mutable_pre_log_id(); + _p_pre_id->set_term(0); + _p_pre_id->set_idx(i-1); + + auto _p_wop = _tmp.mutable_write_op(); + + char sz_val[1024] = { 0 }; + //std::snprintf(sz_val,sizeof(sz_val),"val_%d tid:%d",i,thread_idx); + std::snprintf(sz_val,sizeof(sz_val),"val_%d",i); + _p_wop->set_key("key_" + std::to_string(i)); + _p_wop->set_value(sz_val); + + _log_list.emplace_back(new MemoryLogItemFollower(_tmp)); + } + + return BinLogGlobal::m_instance.RevertLog(_log_list, this->m_zero); + } + + LogIdentifier m_zero; +}; + +TEST_F(TestBinlog, GeneralOperation) { + + //Remove existing binlog file first. + const char *_role = "test"; + std::string _binlog_file = _AURORA_BINLOG_NAME_ + std::string(".") + _role; + if (fs::exists(fs::path(_binlog_file))) + ASSERT_TRUE(std::remove(_binlog_file.c_str())==0); + + //testing file + BinLogGlobal::m_instance.Initialize(_role); + + //Construct binlog file. + std::list > _input; + for (int i = 0; i < 10;++i) { + std::shared_ptr<::raft::Entity> _shp_entity(new ::raft::Entity()); + + auto _p_entity_id = _shp_entity->mutable_entity_id(); + _p_entity_id->set_term(0); + _p_entity_id->set_idx(i); + + auto _p_pre_entity_id = _shp_entity->mutable_pre_log_id(); + _p_pre_entity_id->set_term(0); + _p_pre_entity_id->set_idx(i==0?0:i-1); + + auto _p_wop = _shp_entity->mutable_write_op(); + _p_wop->set_key("key_" + std::to_string(i)); + _p_wop->set_value("val_" + std::to_string(i)); + + _input.emplace_back(_shp_entity); + } + ASSERT_TRUE(BinLogGlobal::m_instance.AppendEntry(_input)); + + std::list> _output; + BinLogGlobal::m_instance.GetOrderedMeta(_output); + + ASSERT_TRUE(BinLogGlobal::m_instance.GetLastReplicated() == LogIdentifier(*_output.back())); + + ASSERT_TRUE(BinLogGlobal::m_instance.GetBinlogFileName() == _binlog_file); + + //test reverting. + std::list> _log_list; + + /*This is test case is for 1> 3) of the scenarios mentioned in the implementation + of BinLogGlobal::m_instance.RevertLog function.Others too less error prone to test. */ + for (int i = 6; i <= 15; ++i) { + ::raft::Entity _tmp; + auto _p_id = _tmp.mutable_entity_id(); + _p_id->set_term(0); + _p_id->set_idx(i); + + auto _p_pre_id = _tmp.mutable_pre_log_id(); + _p_pre_id->set_term(0); + _p_pre_id->set_idx(i-1); + + auto _p_wop = _tmp.mutable_write_op(); + + int idx = i <= 7 ? i : i+1; + std::string _cur_idx = std::to_string(idx); + _p_wop->set_key("key_" + _cur_idx); + _p_wop->set_value("val_" + _cur_idx); + + _log_list.emplace_back(new MemoryLogItemFollower(_tmp)); + } + + BinLogGlobal::m_instance.RevertLog(_log_list, this->m_zero); + + //Setting Head. + int _head_idx = 13317; + std::shared_ptr<::raft::Entity> _shp_entity(new ::raft::Entity()); + auto _p_entity_id = _shp_entity->mutable_entity_id(); + _p_entity_id->set_term(0); + _p_entity_id->set_idx(_head_idx); + + auto _p_wop = _shp_entity->mutable_write_op(); + + std::string _cur_idx = std::to_string(_head_idx); + _p_wop->set_key("key_head_" + _cur_idx); + _p_wop->set_value("val_head_" + _cur_idx); + + BinLogGlobal::m_instance.SetHead(_shp_entity); + + BinLogGlobal::m_instance.UnInitialize(); +} + +//TODO: figure out why binlog consume so much memory: 8w ~25MB. +TEST_F(TestBinlog, ConcurrentOperation) { + + //Remove existing binlog file first. + const char *_role = "test"; + std::string _binlog_file = _AURORA_BINLOG_NAME_ + std::string(".") + _role; + + if (fs::exists(fs::path(_binlog_file))) + ASSERT_TRUE(std::remove(_binlog_file.c_str())==0); + + //testing file + BinLogGlobal::m_instance.Initialize(_role); + + int _sum = 10000; + auto _op = [&](int thread_idx) { + + int _revert_each = 100; + int _revert_counter = 0; + + for (int i=thread_idx; i < _sum*this->m_cpu_cores;i+=this->m_cpu_cores) { + + std::list > _input; + std::shared_ptr<::raft::Entity> _shp_entity(new ::raft::Entity()); + + auto _p_entity_id = _shp_entity->mutable_entity_id(); + _p_entity_id->set_term(0); + _p_entity_id->set_idx(i); + + auto _p_pre_entity_id = _shp_entity->mutable_pre_log_id(); + _p_pre_entity_id->set_term(0); + _p_pre_entity_id->set_idx(i==0?0:i-1); + + auto _p_wop = _shp_entity->mutable_write_op(); + std::string _cur_idx = std::to_string(i); + + char sz_val[1024] = { 0 }; + //std::snprintf(sz_val,sizeof(sz_val),"val_%d tid:%d",i,thread_idx); + std::snprintf(sz_val,sizeof(sz_val),"val_%d",i); + _p_wop->set_key("key_" + _cur_idx); + _p_wop->set_value(sz_val); + + _input.emplace_back(_shp_entity); + + ASSERT_TRUE(BinLogGlobal::m_instance.AppendEntry(_input)); + + if (_revert_counter >= _revert_each) { + auto _revert_code = this->RevertPreviousLogs(i,thread_idx); + //ASSERT_TRUE(_revert_code == BinLogOperator::BinlogErrorCode::SUCCEED_TRUNCATED || _revert_code == BinLogOperator::BinlogErrorCode::OTHER_ERROR) << "error code:" << _revert_code; + if (_revert_code == BinLogOperator::BinlogErrorCode::SUCCEED_TRUNCATED) { + std::cout << "!!!!!!!! revert succeed,cur_idx:" << i << ",thread_idx:" << thread_idx << std::endl; + } + + if(_revert_code != BinLogOperator::BinlogErrorCode::SUCCEED_TRUNCATED && _revert_code != BinLogOperator::BinlogErrorCode::OTHER_ERROR) + std::cout << "----------error code:" << int(_revert_code) << ",cur_idx:" << i + << ",thread_idx:" << thread_idx << std::endl; + + _revert_counter = 0; + } + + std::cout << BinLogGlobal::m_instance.GetLastReplicated() << ",thread_idx:" << thread_idx << std::endl; + + _revert_counter++; + } + + ASSERT_TRUE(BinLogGlobal::m_instance.GetBinlogFileName() == _binlog_file); + }; + + this->LaunchMultipleThread(_op); + + LogIdentifier _end_log_id; + _end_log_id.Set(0,_sum * this->m_cpu_cores - 1); + ASSERT_TRUE(BinLogGlobal::m_instance.GetLastReplicated()==_end_log_id); + + BinLogGlobal::m_instance.UnInitialize(); +} + +TEST_F(TestBinlog, SetHead) { + + //Remove existing binlog file first. + const char *_role = "setHead"; + std::string _binlog_file = _AURORA_BINLOG_NAME_ + std::string(".") + _role; + if (fs::exists(fs::path(_binlog_file))) + ASSERT_TRUE(std::remove(_binlog_file.c_str())==0); + + //testing file + BinLogGlobal::m_instance.Initialize(_role); + + std::shared_ptr<::raft::Entity> _shp_entity(new ::raft::Entity()); + + int i = 17; + auto _p_entity_id = _shp_entity->mutable_entity_id(); + _p_entity_id->set_term(0); + _p_entity_id->set_idx(i); + auto _p_wop = _shp_entity->mutable_write_op(); + _p_wop->set_key("key_" + std::to_string(i)); + _p_wop->set_value("val_" + std::to_string(i)); + BinLogGlobal::m_instance.SetHead(_shp_entity); + + i = 19; + _p_entity_id->set_term(0); + _p_entity_id->set_idx(i); + _p_wop = _shp_entity->mutable_write_op(); + _p_wop->set_key("key_" + std::to_string(i)); + _p_wop->set_value("val_" + std::to_string(i)); + + BinLogGlobal::m_instance.SetHead(_shp_entity); + + BinLogGlobal::m_instance.UnInitialize(); +} + +TEST_F(TestBinlog, RotateFile) { + + ::RaftCore::Config::FLAGS_binlog_max_size = 100; + ::RaftCore::Config::FLAGS_binlog_reserve_log_num = 2; + + //Remove existing binlog file first. + const char *_role = "test"; + std::string _binlog_file = _AURORA_BINLOG_NAME_ + std::string(".") + _role; + if (fs::exists(fs::path(_binlog_file))) + ASSERT_TRUE(std::remove(_binlog_file.c_str())==0); + + //testing file + BinLogGlobal::m_instance.Initialize(_role); + + //Set storage first. + ASSERT_TRUE(StorageGlobal::m_instance.Initialize(_ROLE_STR_TEST_)); + + LogIdentifier _lcl_id; + _lcl_id.Set(0, 5); + ASSERT_TRUE(StorageGlobal::m_instance.Set(_lcl_id,"k","v")); + + //Construct binlog file. + std::list > _input; + for (int i = 0; i < 10;++i) { + std::shared_ptr<::raft::Entity> _shp_entity(new ::raft::Entity()); + + auto _p_entity_id = _shp_entity->mutable_entity_id(); + _p_entity_id->set_term(0); + _p_entity_id->set_idx(i); + + auto _p_pre_entity_id = _shp_entity->mutable_pre_log_id(); + _p_pre_entity_id->set_term(0); + _p_pre_entity_id->set_idx(i==0?0:i-1); + + auto _p_wop = _shp_entity->mutable_write_op(); + _p_wop->set_key("key_" + std::to_string(i)); + _p_wop->set_value("val_" + std::to_string(i)); + + _input.emplace_back(_shp_entity); + } + + ASSERT_TRUE(BinLogGlobal::m_instance.AppendEntry(_input)); + + BinLogGlobal::m_instance.UnInitialize(); +} + +TEST_F(TestBinlog, Perf) { + + //Remove existing binlog file first. + const char *_role = "test"; + std::string _binlog_file = _AURORA_BINLOG_NAME_ + std::string(".") + _role; + if (fs::exists(fs::path(_binlog_file))) + ASSERT_TRUE(std::remove(_binlog_file.c_str())==0); + + //testing file + BinLogGlobal::m_instance.Initialize(_role); + + auto _start = std::chrono::steady_clock::now(); + + int _write_times = 10000; + int _count_each_time = 20; + + uint32_t _start_idx = 0; + + for (int i = 0; i < _write_times; ++i) { + std::list > _input; + + _start_idx = i * _count_each_time; + + for (int j = _start_idx; j < _count_each_time;++j) { + std::shared_ptr<::raft::Entity> _shp_entity(new ::raft::Entity()); + + auto _p_pre_entity_id = _shp_entity->mutable_pre_log_id(); + _p_pre_entity_id->set_term(0); + _p_pre_entity_id->set_idx(j==0?0:j-1); + + auto _p_entity_id = _shp_entity->mutable_entity_id(); + _p_entity_id->set_term(0); + _p_entity_id->set_idx(j); + + auto _p_wop = _shp_entity->mutable_write_op(); + _p_wop->set_key("key_" + std::to_string(j)); + _p_wop->set_value("val_" + std::to_string(j)); + + _input.emplace_back(_shp_entity); + } + ASSERT_TRUE(BinLogGlobal::m_instance.AppendEntry(_input)); + } + + auto _end = std::chrono::steady_clock::now(); + auto _ms = std::chrono::duration_cast(_end - _start).count(); + + std::cout << "time cost:" << _ms << " us with write " << _write_times << " times," + << _count_each_time << " items for each write, avg " << _ms/float(_write_times) << " us for each write."; + + BinLogGlobal::m_instance.UnInitialize(); +} + +#endif diff --git a/src/gtest/binlog/test_meta.h b/src/gtest/binlog/test_meta.h new file mode 100644 index 0000000..dbbb77a --- /dev/null +++ b/src/gtest/binlog/test_meta.h @@ -0,0 +1,330 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_META_H__ +#define __GTEST_META_H__ + +#include +#include +#include +#include + +#include "gtest/test_base.h" +#include "binlog/binlog_meta_data.h" +#include "binlog/binlog_singleton.h" + +/* +#define _CRTDBG_MAP_ALLOC +#include +#include */ + +using ::RaftCore::BinLog::FileMetaData; +using ::RaftCore::BinLog::BinLogGlobal; +using ::RaftCore::Common::LogIdentifier; +using ::RaftCore::DataStructure::HashNode; + +class TestMeta : public TestBase { + + public: + + TestMeta() {} + + virtual void SetUp() override { + } + + virtual void TearDown() override { + } + + protected: + + void GenerateTestMeta(int _sum,FileMetaData &_meta){ + int _counter = 0; + bool _add_batch = true; + std::list> _list; + for (int i = 0; i < _sum; ++i) { + + _counter++; + if (_counter <= 5) { + std::shared_ptr _shp_pair(new FileMetaData::IdxPair(10, i, i, 79, 79)); + _list.emplace_back(_shp_pair); + if (i == _sum - 1) + _meta.AddLogOffset(_list); + continue; + } + + if (_add_batch) { + std::shared_ptr _shp_pair(new FileMetaData::IdxPair(10, i, i, 79, 79)); + _list.emplace_back(_shp_pair); + _meta.AddLogOffset(_list); + _add_batch = false; + _list.clear(); + continue; + } + + //add single + _meta.AddLogOffset(10,i,i,79,79); + _add_batch = true; + _counter = 0; + } + } + + void CheckMetaListEqual(const std::list> &_output1, + const std::list> &_output2) { + ASSERT_EQ(_output1.size(),_output2.size()); + + auto _iter2 = _output2.cbegin(); + for (auto _iter = _output1.cbegin(); _iter != _output1.cend();++_iter,++_iter2) { + ASSERT_TRUE(**_iter == **_iter2); + } + } + + void CheckMetaEqual(const FileMetaData &meta1,const FileMetaData &meta2) { + + std::list> _output1; + meta1.GetOrderedMeta(_output1); + + std::list> _output2; + meta2.GetOrderedMeta(_output2); + + this->CheckMetaListEqual(_output1, _output2); + } + +}; + +TEST_F(TestMeta, GeneralOperation) { + + //testing IdxPair. + FileMetaData::IdxPair _pair_1(10,20,30,40,50); + + FileMetaData::IdxPair _pair_2(10,21,30,40,50); + ASSERT_TRUE(_pair_1 < _pair_2); + + FileMetaData::IdxPair _pair_3(10,20,30,40,50); + ASSERT_TRUE(_pair_1 == _pair_3); + + std::cout << _pair_3.Hash() << std::endl; + + FileMetaData::IdxPair _pair_4(10,19,30,40,50); + ASSERT_TRUE(_pair_1 > _pair_4); + + LogIdentifier _log_id1; + _log_id1.Set(10,19); + ASSERT_TRUE(_pair_1 > _log_id1); + ASSERT_TRUE(_pair_1 >= _log_id1); + + LogIdentifier _log_id2; + _log_id2.Set(10,21); + ASSERT_TRUE(_pair_1<_log_id2); + ASSERT_TRUE(_pair_1<=_log_id2); + + ::raft::EntityID _entity_id; + _entity_id.set_term(10); + _entity_id.set_idx(21); + ASSERT_TRUE(_pair_1<_entity_id); + + _entity_id.set_idx(19); + ASSERT_TRUE(_pair_1>_entity_id); + ASSERT_TRUE(_pair_1!=_entity_id); + + _entity_id.set_idx(20); + ASSERT_TRUE(_pair_1==_entity_id); + + //testing FileMetaData. + int _sum = 10000; + FileMetaData _meta; + this->GenerateTestMeta(_sum,_meta); + + std::list> _output; + _meta.GetOrderedMeta(_output); + ASSERT_EQ(_sum, _output.size()); + + int _delete_point1 = _sum / 3; + FileMetaData::IdxPair _pair_d1(10,_delete_point1 , _delete_point1, 79, 79); + _meta.Delete(_pair_d1); + + int _delete_point2 = _sum / 3*2; + FileMetaData::IdxPair _pair_d2(10, _delete_point2, _delete_point2, 79, 79); + _meta.Delete(_pair_d2); + + _meta.GetOrderedMeta(_output); + + ASSERT_EQ(_sum - 2, _output.size()); + + int _cur_val = 0; + auto _iter = _output.cbegin(); + for (int i = 0; i < _sum; ++i) { + + if (_cur_val == _delete_point1 || _cur_val == _delete_point2) + continue; + + ASSERT_TRUE(*(*_iter) == FileMetaData::IdxPair(10, i, 0, 0, 0)); + + _iter++; + _cur_val++; + } + + //testing buf. + uint32_t _buf_size = 0;; + unsigned char* _pbuf = nullptr; + std::tie(_pbuf,_buf_size) = _meta.GenerateBuffer(); + + std::cout << "generated buf size:" << _buf_size << std::endl; + + FileMetaData _meta_2; + _meta_2.ConstructMeta(_pbuf,_buf_size); + + this->CheckMetaEqual(_meta,_meta_2); + + //Remove existing binlog file first. + const char *_role = "test"; + ASSERT_EQ(std::remove(std::string(_AURORA_BINLOG_NAME_ + std::string(".") + _role).c_str()),0); + + //testing file + BinLogGlobal::m_instance.Initialize(_role); + + //Construct binlog file. + std::list > _input; + for (int i = 0; i < 10;++i) { + std::shared_ptr<::raft::Entity> _shp_entity(new ::raft::Entity()); + + auto _p_entity_id = _shp_entity->mutable_entity_id(); + _p_entity_id->set_term(0); + _p_entity_id->set_idx(i); + + auto _p_pre_entity_id = _shp_entity->mutable_pre_log_id(); + _p_pre_entity_id->set_term(0); + _p_pre_entity_id->set_idx(i==0?0:i-1); + + auto _p_wop = _shp_entity->mutable_write_op(); + _p_wop->set_key("key_" + std::to_string(i)); + _p_wop->set_value("val_" + std::to_string(i)); + + _input.emplace_back(_shp_entity); + } + ASSERT_TRUE(BinLogGlobal::m_instance.AppendEntry(_input)); + + BinLogGlobal::m_instance.GetOrderedMeta(_output); + + BinLogGlobal::m_instance.UnInitialize(); + + //starting real test. + std::FILE* _hfile = std::fopen(BinLogGlobal::m_instance.GetBinlogFileName().c_str(),_AURORA_BINLOG_OP_MODE_); + + ASSERT_EQ(std::fseek(_hfile, 0, SEEK_SET), 0); + FileMetaData _meta_3; + _meta_3.ConstructMeta(_hfile); + + std::list> _output2; + _meta_3.GetOrderedMeta(_output2); + + this->CheckMetaListEqual(_output,_output2); + +} + +TEST_F(TestMeta, MetaAllocate) { + + auto _start = this->StartTimeing(); + LogIdentifier _log_id; + this->EndTiming(_start, "new id"); + + _start = this->StartTimeing(); + ::RaftCore::DataStructure::LockFreeHash m_meta_hash; + this->EndTiming(_start, "new LockFreeHash"); + + _start = this->StartTimeing(); + FileMetaData _meta; + this->EndTiming(_start, "new meta"); + + std::cout << "sizeof meta:" << sizeof(_meta) << std::endl; +} + +template +class HashNodeTest final { + +public: + + HashNodeTest(const std::shared_ptr &key, const std::shared_ptr &val) noexcept { + this->m_shp_key = key; + this->m_shp_val = val; + this->m_next = nullptr; + } + +private: + + std::shared_ptr m_shp_key; + + std::shared_ptr m_shp_val; + + HashNodeTest* m_next = nullptr; + + uint32_t m_iterating_tag = 0; + +private: + + HashNodeTest(const HashNodeTest&) = delete; + + HashNodeTest& operator=(const HashNodeTest&) = delete; + +}; + +TEST_F(TestMeta, MetaLeak) { + + std::cout << "sizeof(FileMetaData) :" << sizeof(FileMetaData) << std::endl;; + + std::cout << "sizeof(::RaftCore::DataStructure::LockFreeHash) :" + << sizeof(::RaftCore::DataStructure::LockFreeHash) << std::endl; + + std::cout << "sizeof(HashNodeTest) :" << sizeof(HashNodeTest) << std::endl; + + std::cout << "sizeof(std::atomic*>) :" + << sizeof(std::atomic*>) << std::endl; + + std::cout << "sizeof(FileMetaData::IdxPair) :" << sizeof(FileMetaData::IdxPair) << std::endl; + + { + uint32_t _count = ::RaftCore::Config::FLAGS_meta_count; + + //FileMetaData is not the cause. + /* FileMetaData _meta; + for (std::size_t i = 0; i < _count; ++i) + _meta.AddLogOffset(0, i, 7, 1, 1); */ + + ::RaftCore::DataStructure::LockFreeHash m_meta_hash; + for (std::size_t i = 0; i < _count; ++i) { + //std::shared_ptr _shp_new_record(new FileMetaData::IdxPair(0, i, 7, 1, 1)); + //m_meta_hash.Insert(_shp_new_record); + //HashNodeTest* p_new_node = new HashNodeTest(_shp_new_record, nullptr); + + //std::shared_ptr _shp_new_record(new uint64_t(i), [](auto *P) {}); + + std::shared_ptr _shp_new_record(new uint64_t(i)); + HashNodeTest* p_new_node = new HashNodeTest(_shp_new_record, nullptr); + } + + std::cout << "check buf size now." << sizeof(HashNodeTest) << std::endl; + } + + std::cout << "clear now." << std::endl;; + + //_CrtSetReportMode(_CRT_ERROR, _CRTDBG_MODE_DEBUG); + //_CrtDumpMemoryLeaks(); +} + +#endif diff --git a/src/gtest/candidate/test_all.h b/src/gtest/candidate/test_all.h new file mode 100644 index 0000000..f2dde43 --- /dev/null +++ b/src/gtest/candidate/test_all.h @@ -0,0 +1,26 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_ALL_CANDIDATE_H__ +#define __GTEST_ALL_CANDIDATE_H__ + +#include "gtest/candidate/test_candidate.h" + +#endif diff --git a/src/gtest/candidate/test_candidate.h b/src/gtest/candidate/test_candidate.h new file mode 100644 index 0000000..cade29f --- /dev/null +++ b/src/gtest/candidate/test_candidate.h @@ -0,0 +1,59 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_CANDIDATE_H__ +#define __GTEST_CANDIDATE_H__ + +#include +#include +#include + +#include "gtest/test_base.h" +#include "candidate/candidate_view.h" + +using ::RaftCore::Candidate::CandidateView; + +class TestCandidate : public TestBase { + + public: + + TestCandidate() {} + + ~TestCandidate() {} + + protected: + + virtual void SetUp() override {} + + virtual void TearDown() override {} + +}; + +TEST_F(TestCandidate, GeneralOperation) { + + CandidateView::Initialize(); + CandidateView::UnInitialize(); + + std::cout << "test candidate end." << std::endl; + +} + + +#endif diff --git a/src/gtest/common/test_all.h b/src/gtest/common/test_all.h new file mode 100644 index 0000000..3ebabf5 --- /dev/null +++ b/src/gtest/common/test_all.h @@ -0,0 +1,26 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_ALL_COMMON_H__ +#define __GTEST_ALL_COMMON_H__ + +#include "gtest/common/test_comm.h" + +#endif diff --git a/src/gtest/common/test_comm.h b/src/gtest/common/test_comm.h new file mode 100644 index 0000000..6db1714 --- /dev/null +++ b/src/gtest/common/test_comm.h @@ -0,0 +1,123 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_COMMON_H__ +#define __GTEST_COMMON_H__ + +#include +#include +#include + +#include "gtest/test_base.h" +#include "common/comm_view.h" +#include "common/comm_defs.h" +#include "common/log_identifier.h" +#include "leader/memory_log_leader.h" +#include "follower/memory_log_follower.h" + +using ::RaftCore::Common::LogIdentifier; +using ::RaftCore::Leader::MemoryLogItemLeader; +using ::RaftCore::Follower::MemoryLogItemFollower; +using ::RaftCore::Common::CommonView; +using ::raft::EntityID; + +class TestComm : public TestBase { + + public: + + TestComm() {} + + virtual void SetUp() override { + } + + virtual void TearDown() override { + } + +}; + +TEST_F(TestComm, GeneralOperation) { + + LogIdentifier _obj1; + _obj1.Set(3, 17); + + LogIdentifier _obj2; + _obj2.Set(_obj1); + ASSERT_TRUE(_obj1==_obj2); + + _obj2.m_index = 19; + ASSERT_TRUE(_obj1!=_obj2); + + _obj2.m_index = 17; + ASSERT_TRUE(_obj1<=_obj2); + + _obj2.m_index = 13; + ASSERT_TRUE(_obj1>_obj2); + + _obj2.m_index = 17; + ASSERT_TRUE(_obj1>=_obj2); + + std::cout << _obj1.ToString() << std::endl; + std::cout << _obj1 << std::endl; + + EntityID _entity_id; + _entity_id.set_term(3); + _entity_id.set_idx(17); + + ASSERT_TRUE(::RaftCore::Common::ConvertID(_entity_id)== _obj2); + ASSERT_TRUE(::RaftCore::Common::EntityIDEqual(_entity_id, _obj2)); + + _obj2.m_index = 19; + //ASSERT_TRUE(::RaftCore::Common::EntityIDNotEqual(_entity_id, _obj2)); + + _obj2.m_index = 13; + ASSERT_TRUE(::RaftCore::Common::EntityIDLarger(_entity_id, _obj2)); + + _obj2.m_index = 19; + ASSERT_TRUE(::RaftCore::Common::EntityIDSmaller(_entity_id, _obj2)); + + _obj2.m_index = 17; + //ASSERT_TRUE(::RaftCore::Common::EntityIDSmallerEqual(_entity_id, _obj2)); + + MemoryLogItemLeader _ldr1(3,17); + MemoryLogItemLeader _ldr2(3,19); + MemoryLogItemLeader _ldr3(*_ldr1.GetEntity()); + + ASSERT_TRUE(_ldr1<_ldr2); + ASSERT_TRUE(_ldr2>_ldr1); + ASSERT_TRUE(_ldr1==_ldr3); + + MemoryLogItemFollower _f1(3,17); + + MemoryLogItemFollower _f2(*_f1.GetEntity()); + MemoryLogItemFollower _f3(3,19); + + ASSERT_TRUE(_f1<=_f2); + ASSERT_TRUE(_f1<_f3); + ASSERT_TRUE(_f3>_f1); + ASSERT_TRUE(_f1==_f2); + ASSERT_TRUE(_f1!=_f3); + + CommonView::Initialize(); + + CommonView::UnInitialize(); +} + + +#endif diff --git a/src/gtest/election/test_all.h b/src/gtest/election/test_all.h new file mode 100644 index 0000000..63ad054 --- /dev/null +++ b/src/gtest/election/test_all.h @@ -0,0 +1,26 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_ALL_ELECTION_H__ +#define __GTEST_ALL_ELECTION_H__ + +#include "gtest/election/test_election.h" + +#endif diff --git a/src/gtest/election/test_election.h b/src/gtest/election/test_election.h new file mode 100644 index 0000000..6662844 --- /dev/null +++ b/src/gtest/election/test_election.h @@ -0,0 +1,210 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_ELECTION_H__ +#define __GTEST_ELECTION_H__ + +#include +#include +#include + +#include "gtest/test_base.h" +#include "global/global_env.h" +#include "election/election.h" + +using ::RaftCore::Election::ElectionMgr; +using ::RaftCore::Election::RaftRole; + +class TestElection : public TestMultipleBackendFollower { + + public: + + TestElection() {} + + protected: + + virtual void SetUp() override { + this->PrepareBinlogFiles(); + + this->StartFollowersFunc(TestElection::GenerateFileName); + + ::RaftCore::Global::GlobalEnv::InitialEnv(); + } + + virtual void TearDown() override { + ::RaftCore::Global::GlobalEnv::UnInitialEnv(); + this->EndFollowers(); + } + + private: + + void PrepareBinlogFiles()noexcept { + int _log_entry_base_num = 5; + for (int idx = 0; idx < _TEST_FOLLOWER_NUM_;++idx) { + auto _role = this->GenerateFileRole(idx); + auto _binlog_file = this->GenerateFileName(idx); + + this->ConstructBinlogFile(_binlog_file,_role , _log_entry_base_num++); + } + + //Construct current binlog. + this->ConstructBinlogFile(_TEST_LEADER_BINLOG_,"leader", _log_entry_base_num++); + } + + void ConstructBinlogFile(const char* binlog_file,const char* role, + int entry_num) noexcept { + + if (fs::exists(fs::path(binlog_file))) + ASSERT_TRUE(std::remove(binlog_file)==0); + + //testing file + BinLogGlobal::m_instance.Initialize(role); + + //Construct binlog file. + std::list > _input; + for (int i = 0; i < entry_num;++i) { + std::shared_ptr<::raft::Entity> _shp_entity(new ::raft::Entity()); + + auto _p_entity_id = _shp_entity->mutable_entity_id(); + _p_entity_id->set_term(0); + _p_entity_id->set_idx(i); + + auto _p_pre_entity_id = _shp_entity->mutable_pre_log_id(); + _p_pre_entity_id->set_term(0); + _p_pre_entity_id->set_idx(i==0?0:i-1); + + auto _p_wop = _shp_entity->mutable_write_op(); + _p_wop->set_key("key_" + std::to_string(i)); + _p_wop->set_value("val_" + std::to_string(i)); + + _input.emplace_back(_shp_entity); + } + ASSERT_TRUE(BinLogGlobal::m_instance.AppendEntry(_input)); + + std::list> _output; + BinLogGlobal::m_instance.GetOrderedMeta(_output); + + //A routine test for binlog operator. + ASSERT_TRUE(BinLogGlobal::m_instance.GetLastReplicated() == LogIdentifier(*_output.back())); + ASSERT_STREQ(BinLogGlobal::m_instance.GetBinlogFileName().c_str(),binlog_file); + + BinLogGlobal::m_instance.UnInitialize(); + } + + static const char* GenerateFileName(int idx) noexcept { + auto _role = GenerateFileRole(idx); + + static char _binlog_file[100] = {}; + std::snprintf(_binlog_file,sizeof(_binlog_file),"%s.%s",_AURORA_BINLOG_NAME_,_role); + + return _binlog_file; + } + + static const char* GenerateFileRole(int idx) noexcept { + static char _role[100] = {}; + std::snprintf(_role,sizeof(_role),"election-%d",idx); + return _role; + } +}; + +TEST_F(TestElection, GeneralOperation) { + + //ElectionMgr::Initialize(); + + /*First make sure term 3 is not in the 'election.config' config file.Only + after that can we start the unit test. */ + + ASSERT_TRUE(ElectionMgr::TryVote(3, "12.34.56.78:100")==""); + + ASSERT_TRUE(ElectionMgr::TryVote(3, "12.34.56.78:101")=="12.34.56.78:100"); + + ElectionMgr::AddVotingTerm(7, "12.34.56.78:200"); + + ElectionMgr::AddVotingTerm(7, "12.34.56.78:201"); + + ASSERT_TRUE(ElectionMgr::TryVote(4, "12.34.56.78:200")==""); + + ElectionMgr::SwitchRole(RaftRole::FOLLOWER,"12.34.56.78:300"); + + ElectionMgr::SwitchRole(RaftRole::CANDIDATE); + + ElectionMgr::SwitchRole(RaftRole::LEADER); + + ElectionMgr::SwitchRole(RaftRole::FOLLOWER,"12.34.56.78:300"); + + ElectionMgr::ElectionThread(); + + //Waiting above thread to get fully started. + std::this_thread::sleep_for(std::chrono::seconds(3)); + + ElectionMgr::NotifyNewLeaderEvent(4,"12.34.56.78:300"); + + ElectionMgr::WaitElectionThread(); + + //ElectionMgr::UnInitialize(); + + std::cout << "test election end." << std::endl; +} + +TEST_F(TestElection, Election) { + + /*Waiting current leader sending heartbeat msg to followers thus triggering theirs heartbeat + checking mechanism. */ + std::this_thread::sleep_for(std::chrono::seconds(2)); + + //Start grpc service. + std::thread* _th = new std::thread([]() { + ::RaftCore::Global::GlobalEnv::RunServer(); + }); + //Wait for server get fully started + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + + ElectionMgr::SwitchRole(RaftRole::FOLLOWER,"12.34.56.78:300"); + + //Waiting for detecting the fake leader has gone. + std::cout << "Waiting for detecting the fake leader has gone..." << std::endl; + std::this_thread::sleep_for(std::chrono::seconds(3)); + + //Waiting for a new leader being elected out. + std::cout << "Waiting for a new leader being elected out..." << std::endl; + std::this_thread::sleep_for(std::chrono::seconds(3)); + + Topology _topo; + CTopologyMgr::Read(&_topo); + + std::list _valid_new_leaders{"127.0.0.1:10022","127.0.0.1:10010"}; + + ASSERT_TRUE(std::find(_valid_new_leaders.cbegin(), _valid_new_leaders.cend(), _topo.m_leader) != _valid_new_leaders.cend()) + << "new leader invalid: " << _topo.m_leader; + + std::cout << "new leader elected :" << _topo.m_leader << " under term:" << ElectionMgr::m_cur_term.load() << std::endl; + + //Waiting for the new leader finish syncing logs with its followers. + std::cout << "Waiting for the new leader finish syncing logs with its followers..." << std::endl; + std::this_thread::sleep_for(std::chrono::seconds(5)); + + ::RaftCore::Global::GlobalEnv::StopServer(); + + //Waiting for the above spawned thread exist. + std::this_thread::sleep_for(std::chrono::seconds(1)); +} + + +#endif diff --git a/src/gtest/follower/test_all.h b/src/gtest/follower/test_all.h new file mode 100644 index 0000000..9c8aad8 --- /dev/null +++ b/src/gtest/follower/test_all.h @@ -0,0 +1,26 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_ALL_FOLLOWER_H__ +#define __GTEST_ALL_FOLLOWER_H__ + +#include "gtest/follower/test_follower.h" + +#endif diff --git a/src/gtest/follower/test_follower.h b/src/gtest/follower/test_follower.h new file mode 100644 index 0000000..be7d3f3 --- /dev/null +++ b/src/gtest/follower/test_follower.h @@ -0,0 +1,60 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_FOLLOWER_H__ +#define __GTEST_FOLLOWER_H__ + +#include +#include +#include + +#include "gtest/test_base.h" +#include "follower/follower_view.h" + +using ::RaftCore::Follower::FollowerView; + +class TestFollower : public TestBase { + + public: + + TestFollower() {} + + protected: + + virtual void SetUp() override { + } + + virtual void TearDown() override { + } + +}; + +TEST_F(TestFollower, GeneralOperation) { + + FollowerView::Initialize(); + FollowerView::Clear(); + FollowerView::UnInitialize(); + + std::cout << "test follower end." << std::endl; + +} + + +#endif diff --git a/src/gtest/global/test_all.h b/src/gtest/global/test_all.h new file mode 100644 index 0000000..fbf3807 --- /dev/null +++ b/src/gtest/global/test_all.h @@ -0,0 +1,26 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_ALL_GLOBAL_H__ +#define __GTEST_ALL_GLOBAL_H__ + +#include "gtest/global/test_global.h" + +#endif diff --git a/src/gtest/global/test_global.h b/src/gtest/global/test_global.h new file mode 100644 index 0000000..85dcea4 --- /dev/null +++ b/src/gtest/global/test_global.h @@ -0,0 +1,99 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_GLOBAL_H__ +#define __GTEST_GLOBAL_H__ + +#include +#include +#include + +#include "gtest/test_base.h" +#include "global/global_env.h" +#include "state/state_mgr.h" + +using ::RaftCore::Global::GlobalEnv; +using ::RaftCore::State::RaftRole; + +class TestGlobalEnv : public TestMultipleBackendFollower { + + public: + + TestGlobalEnv() {} + + virtual void SetUp() override { + this->StartFollowers(); + } + + virtual void TearDown() override { + this->EndFollowers(); + } + +}; + +TEST_F(TestGlobalEnv, GeneralOperation) { + + ::RaftCore::Global::GlobalEnv::InitialEnv(); + + std::cout << "server is going to run 1st time..." << std::endl; + + std::thread* _th = new std::thread([]() { + ::RaftCore::Global::GlobalEnv::RunServer(); + }); + + std::this_thread::sleep_for(std::chrono::seconds(3)); + ::RaftCore::Global::GlobalEnv::StopServer(); + ::RaftCore::Global::GlobalEnv::UnInitialEnv(RaftRole::LEADER); + + _th->join(); + + std::cout << "server is stopped now 1st time..." << std::endl; + + ::RaftCore::Global::GlobalEnv::InitialEnv(true); + + std::cout << "server is going to run 2nd time..." << std::endl; + + _th = new std::thread([]() { + ::RaftCore::Global::GlobalEnv::RunServer(); + }); + + std::this_thread::sleep_for(std::chrono::seconds(3)); + ::RaftCore::Global::GlobalEnv::StopServer(); + ::RaftCore::Global::GlobalEnv::UnInitialEnv(RaftRole::LEADER); + _th->join(); + + std::cout << "server is stopped now 2nd time..." << std::endl; + + + ::RaftCore::Global::GlobalEnv::InitialEnv(true); + + std::cout << "server is going to run 3rd time..." << std::endl; + + _th = new std::thread([]() { + ::RaftCore::Global::GlobalEnv::RunServer(); + }); + + ::RaftCore::Global::GlobalEnv::ShutDown(); + + std::cout << "server is stopped now 3rd time..." << std::endl; +} + + +#endif diff --git a/src/gtest/gtest_main.cc b/src/gtest/gtest_main.cc new file mode 100644 index 0000000..398c0e7 --- /dev/null +++ b/src/gtest/gtest_main.cc @@ -0,0 +1,36 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include + +#include "gtest/test_all.h" + +int main(int argc, char **argv) { + + ::testing::InitGoogleTest(&argc, argv); + + google::ParseCommandLineFlags(&argc, &argv, true); + google::InitGoogleLogging(argv[0]); + + FLAGS_log_dir = "."; + FLAGS_logbuflevel = -1; + + return RUN_ALL_TESTS(); +} + + diff --git a/src/gtest/guid/test_all.h b/src/gtest/guid/test_all.h new file mode 100644 index 0000000..76e2c69 --- /dev/null +++ b/src/gtest/guid/test_all.h @@ -0,0 +1,26 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_ALL_GUID_H__ +#define __GTEST_ALL_GUID_H__ + +#include "gtest/guid/test_guid.h" + +#endif diff --git a/src/gtest/guid/test_guid.h b/src/gtest/guid/test_guid.h new file mode 100644 index 0000000..003d13b --- /dev/null +++ b/src/gtest/guid/test_guid.h @@ -0,0 +1,133 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_GUID_H__ +#define __GTEST_GUID_H__ + +#include +#include +#include + +#include "boost/filesystem.hpp" + +#include "gtest/test_base.h" +#include "guid/guid_generator.h" + +using ::RaftCore::Guid::GuidGenerator; +namespace fs = ::boost::filesystem; + +class TestGuid : public TestBase { + + public: + + TestGuid() {} + + virtual void SetUp() override { + m_i.store(0); + } + + virtual void TearDown() override { + } + + protected: + + std::vector *m_vec_output = new std::vector[this->m_cpu_cores]; + + std::atomic m_i; +}; + +TEST_F(TestGuid, GeneralOperation) { + + uint64_t _base = 100; + GuidGenerator::Initialize(_base); + + uint64_t _pre = _base; + uint64_t _last_release = _base; + + for (int i = 1; i <= 50 ; ++i) { + GuidGenerator::GUIDPair _pair = GuidGenerator::GenerateGuid(); + + uint64_t _cur = _last_release + 1; + + ASSERT_EQ(_pair.m_pre_guid,_pre); + ASSERT_EQ(_pair.m_cur_guid,_cur); + + _pre = _cur; + _last_release = _cur; + } + + _base = 300; + _last_release = _base; + GuidGenerator::SetNextBasePoint(_base); + GuidGenerator::GUIDPair _pair = GuidGenerator::GenerateGuid(); + + ASSERT_EQ(_pair.m_pre_guid, _base); + ASSERT_EQ(_pair.m_cur_guid, _last_release + 1); + + ASSERT_EQ(GuidGenerator::GetLastReleasedGuid(), _last_release + 1); + + GuidGenerator::UnInitialize(); +} + +TEST_F(TestGuid, ConcurrentOperation) { + + GuidGenerator::Initialize(); + + auto _op = [&](int idx) { + + int _counter = 0; + + for (int i = 1; i <= 100 ; ++i) { + GuidGenerator::GUIDPair _pair = GuidGenerator::GenerateGuid(); + + std::cout << std::this_thread::get_id() << " generate: " << _pair.m_pre_guid << "|" + << _pair.m_cur_guid << std::endl; + + _counter++; + + if (_counter > 20) { + std::cout << std::this_thread::get_id() << " last guid: " << GuidGenerator::GetLastReleasedGuid() << std::endl; + GuidGenerator::SetNextBasePoint(_pair.m_cur_guid - 5); + _counter = 0; + } + + m_vec_output[idx].push_back(_pair.m_cur_guid); + + } + }; + + this->LaunchMultipleThread(_op); + + //merge vectors + std::vector _total_vec; + for (int i = 0; i < this->m_cpu_cores; ++i) + _total_vec.insert(_total_vec.cend(),m_vec_output[i].cbegin(),m_vec_output[i].cend()); + + std::sort(_total_vec.begin(), _total_vec.end()); + + //for (const auto & _item : _total_vec) + // std::cout << "final :" << _item << std::endl; + + GuidGenerator::UnInitialize(); +} + + + +#endif diff --git a/src/gtest/leader/test_all.h b/src/gtest/leader/test_all.h new file mode 100644 index 0000000..9b50f3f --- /dev/null +++ b/src/gtest/leader/test_all.h @@ -0,0 +1,28 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_ALL_LEADER_H__ +#define __GTEST_ALL_LEADER_H__ + +#include "gtest/leader/test_conn_pool.h" +#include "gtest/leader/test_follower_entity.h" +#include "gtest/leader/test_leader_view.h" + +#endif diff --git a/src/gtest/leader/test_conn_pool.h b/src/gtest/leader/test_conn_pool.h new file mode 100644 index 0000000..908aef0 --- /dev/null +++ b/src/gtest/leader/test_conn_pool.h @@ -0,0 +1,107 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_CONNECTION_POOL_H__ +#define __GTEST_CONNECTION_POOL_H__ + +#include +#include +#include + +#ifndef _CONN_TEST_ +#define _CONN_TEST_ +#endif + +#include "gtest/test_base.h" +#include "client/client_impl.h" +#include "leader/channel_pool.h" +#include "leader/client_pool.h" + +using ::raft::HeartBeatRequest; +using ::RaftCore::Leader::ChannelPool; +using ::RaftCore::Leader::ClientPool; +using ::RaftCore::Client::AppendEntriesAsyncClient; + +class TestConnPool : public TestSingleBackendFollower { + + public: + + TestConnPool() {} + + protected: + + virtual void SetUp() override { + this->m_shp_channel_pool.reset(new ChannelPool(this->m_follower_svc_addr,::RaftCore::Config::FLAGS_channel_pool_size)); + auto _channel = this->m_shp_channel_pool->GetOneChannel(); + + for (int i = 0; i < this->m_cpu_cores; ++i) { + std::shared_ptr _shp_client( + new AppendEntriesAsyncClient(_channel, GlobalEnv::GetClientCQInstance())); + this->m_obj_pool.Back(_shp_client); + } + } + + virtual void TearDown() override { } + + std::shared_ptr m_shp_channel_pool; + + ClientPool m_obj_pool; +}; + +TEST_F(TestConnPool, GeneralOperation) { + + std::cout << "start.." << std::endl; + + auto _shp_client = m_obj_pool.Fetch(); + m_obj_pool.Back(_shp_client); + + auto _shp_channel = this->m_shp_channel_pool->GetOneChannel(); + + //Test 0 term is okay. + this->m_shp_channel_pool->HeartBeat(0,this->m_leader_addr); + + ASSERT_EQ(m_obj_pool.GetParentFollower(),nullptr); + + std::cout << "end.." << std::endl; +} + +TEST_F(TestConnPool, ConcurrentOperation) { + + auto _op = [&](int thread_idx) { + + int _run_times = 5000; + for (int i = 0; i < _run_times; ++i) { + auto _shp_client = m_obj_pool.Fetch(); + ASSERT_TRUE(_shp_client); + _shp_client->PushCallBackArgs(nullptr); + _shp_client->PushCallBackArgs(nullptr); + _shp_client->Reset(); + + m_obj_pool.Back(_shp_client); + + this->m_shp_channel_pool->HeartBeat(0,this->m_leader_addr); + } + }; + + this->LaunchMultipleThread(_op); +} + + +#endif diff --git a/src/gtest/leader/test_follower_entity.h b/src/gtest/leader/test_follower_entity.h new file mode 100644 index 0000000..d62b827 --- /dev/null +++ b/src/gtest/leader/test_follower_entity.h @@ -0,0 +1,54 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_FOLLOWER_ENTITY_H__ +#define __GTEST_FOLLOWER_ENTITY_H__ + +#include +#include +#include + +#include "gtest/test_base.h" +#include "leader/follower_entity.h" + +using ::RaftCore::Leader::FollowerEntity; + +class TestFollowerEntity : public TestSingleBackendFollower { + + public: + + TestFollowerEntity() {} + + protected: + + virtual void SetUp() override { } + + virtual void TearDown() override { } + +}; + +TEST_F(TestFollowerEntity, GeneralOperation) { + + FollowerEntity _obj_entity(this->m_follower_svc_addr); + +} + + +#endif diff --git a/src/gtest/leader/test_leader_view.h b/src/gtest/leader/test_leader_view.h new file mode 100644 index 0000000..d4dbc83 --- /dev/null +++ b/src/gtest/leader/test_leader_view.h @@ -0,0 +1,93 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_LEADER_VIEW_H__ +#define __GTEST_LEADER_VIEW_H__ + +#include +#include +#include + +#include "gtest/test_base.h" +#include "election/election.h" +#include "storage/storage_singleton.h" +#include "leader/memory_log_leader.h" + +using ::RaftCore::Leader::LeaderView; +using ::RaftCore::Leader::BackGroundTask::LogReplicationContext; +using ::RaftCore::Leader::BackGroundTask::ReSyncLogContext; +using ::RaftCore::Leader::BackGroundTask::SyncDataContenxt; +using ::RaftCore::Leader::TypePtrFollowerEntity; +using ::RaftCore::Leader::MemoryLogItemLeader; +using ::RaftCore::Leader::FollowerStatus; +using ::RaftCore::Election::ElectionMgr; +using ::RaftCore::Storage::StorageGlobal; + +namespace fs = boost::filesystem; + +class TestLeaderView : public TestMultipleBackendFollower { + + public: + + TestLeaderView() {} + + protected: + + virtual void SetUp() override { + this->StartFollowers(); + + //-----------------------Initializing server.-----------------------// + ::RaftCore::Global::GlobalEnv::InitialEnv(); + } + + virtual void TearDown() override { + std::cout << "destructor of TestMultipleBackendFollower called" << std::endl; + + ::RaftCore::Global::GlobalEnv::UnInitialEnv(); + + this->EndFollowers(); + } +}; + +TEST_F(TestLeaderView, GeneralOperation) { + + auto _lrl = BinLogGlobal::m_instance.GetLastReplicated(); + + LogIdentifier _lcl; + _lcl.Set(0,_lrl.m_index - 10); + + StorageGlobal::m_instance.Set(_lcl, "lcl_key", "lcl_val"); + + LogIdentifier _start_point; + _start_point.Set(ElectionMgr::m_cur_term.load(), _lcl.m_index + 1); + + std::string _follower_addr = this->m_local_ip + ":" + std::to_string(this->m_follower_port); + TypePtrFollowerEntity _shp_follower(new FollowerEntity(_follower_addr,FollowerStatus::RESYNC_LOG)); + + std::shared_ptr _shp_task(new ReSyncLogContext()); + _shp_task->m_last_sync_point.Set(_start_point); + _shp_task->m_follower = _shp_follower; + LeaderView::ReSyncLogCB(_shp_task); + + std::shared_ptr _shp_sync_data_ctx(new SyncDataContenxt(_shp_follower)); + LeaderView::SyncDataCB(_shp_sync_data_ctx); +} + +#endif diff --git a/src/gtest/member/test_all.h b/src/gtest/member/test_all.h new file mode 100644 index 0000000..eab7212 --- /dev/null +++ b/src/gtest/member/test_all.h @@ -0,0 +1,26 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_ALL_MEMBER_H__ +#define __GTEST_ALL_MEMBER_H__ + +#include "gtest/member/test_member.h" + +#endif diff --git a/src/gtest/member/test_member.h b/src/gtest/member/test_member.h new file mode 100644 index 0000000..680eff8 --- /dev/null +++ b/src/gtest/member/test_member.h @@ -0,0 +1,193 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_MEMBER_H__ +#define __GTEST_MEMBER_H__ + +#include +#include +#include + +#include "gtest/test_base.h" +#include "global/global_env.h" +#include "common/comm_defs.h" +#include "binlog/binlog_singleton.h" +#include "storage/storage_singleton.h" +#include "member/member_manager.h" + +using ::RaftCore::Common::ReadLock; +using ::RaftCore::Member::MemberMgr; +using ::RaftCore::BinLog::BinLogGlobal; +using ::RaftCore::Storage::StorageGlobal; +using ::RaftCore::Leader::BackGroundTask::TwoPhaseCommitContext; + +class TestMember : public TestCluster { + + public: + + //Start several normal servers and 2 empty servers. + TestMember() : TestCluster(2) {} + + protected: + + virtual void SetUp() override {} + + virtual void TearDown() override {} + + virtual void SetStoragePoint(int backoff = 10) { + auto _lrl = BinLogGlobal::m_instance.GetLastReplicated(); + LogIdentifier _lcl; + + _lcl.Set(0, _lrl.m_index - backoff); + StorageGlobal::m_instance.Set(_lcl,"lcl_key","lcl_val"); + } + + virtual void IssueMemberChangeRequest() { + std::shared_ptr<::grpc::Channel> _channel = grpc::CreateChannel(this->m_leader_addr, grpc::InsecureChannelCredentials()); + std::unique_ptr<::raft::RaftService::Stub> _stub = ::raft::RaftService::NewStub(_channel); + + ::raft::MemberChangeRequest _memchg_req; + ::raft::MemberChangeResponse _memchg_rsp; + + std::set _new_cluster = this->m_cluster_leader_not_gone; + if (::RaftCore::Config::FLAGS_member_leader_gone) + _new_cluster = this->m_cluster_leader_gone; + + for (const auto &_item : _new_cluster) { + auto *_node = _memchg_req.add_node_list(); + *_node = _item; + } + + ::grpc::ClientContext _contextX; + ::grpc::Status _status = _stub->MembershipChange(&_contextX, _memchg_req, &_memchg_rsp); + ASSERT_TRUE(_status.ok()); + ASSERT_TRUE(_memchg_rsp.client_comm_rsp().result()==::raft::ErrorCode::SUCCESS) << "ClientWrite fail,detail:" << _memchg_rsp.DebugString(); + } + + protected: + + std::set m_cluster_leader_not_gone{"127.0.0.1:10010","127.0.0.1:10022", // old nodes. + "127.0.0.1:10030","127.0.0.1:10031"}; // new empty nodes. + + std::set m_cluster_leader_gone{"127.0.0.1:10022", // old nodes. + "127.0.0.1:10030","127.0.0.1:10031"}; // new empty nodes. + +}; + +TEST_F(TestMember, GeneralOperation) { + + std::cout << "member version:" << MemberMgr::GetVersion(); + + this->SetStoragePoint(); + + std::set _new_cluster = this->m_cluster_leader_not_gone; + if (::RaftCore::Config::FLAGS_member_leader_gone) + _new_cluster = this->m_cluster_leader_gone; + + MemberMgr::PullTrigger(_new_cluster); + MemberMgr::ContinueExecution(); + + auto _old_version = MemberMgr::GetVersion(); + + //Give enough time to wait for the membership change finish. + while(true) { + ReadLock _r_lock(MemberMgr::m_mutex); + auto _new_version = MemberMgr::m_joint_summary.m_version; + //Note:A two phase membership replication will eventually causing version increased by 2. + if (_new_version == (_old_version + 2)) + break; + std::this_thread::sleep_for(std::chrono::seconds(1)); + } + + //Wait routine to finish. + std::this_thread::sleep_for(std::chrono::seconds(2)); + + //Wait for the new cluster to elect out a new leader. + if (::RaftCore::Config::FLAGS_member_leader_gone) + std::this_thread::sleep_for(std::chrono::seconds(5)); +} + +TEST_F(TestMember, WriteData) { + + this->SetStoragePoint(); + + //Step 1. start membership changing but just finish phaseI. + this->IssueMemberChangeRequest(); + + //Waiting for sync-data process to finish and leader switched to joint consensus state. + std::this_thread::sleep_for(std::chrono::seconds(20)); + + //Step 2. start writing data through server's public rpc interface. + this->ClientWrite("memberchg_key","memberchg_val"); + + std::this_thread::sleep_for(std::chrono::seconds(3)); + + //Step 3. manually check whether the appending log requests are propagated to both C-old and C-new,and judge by it. + + //Step 4. resume execution of the pending routine thread. + MemberMgr::ContinueExecution(); + + //Waiting for continue to complete. + std::this_thread::sleep_for(std::chrono::seconds(2)); + + //Step 5. write again + this->ClientWrite("memberchg_second_key","memberchg_second_val"); + + //Step 6. manually check whether the appending log requests are propagated to only C-new,and judge by it. +} + +TEST_F(TestMember, Election) { + + //Use a short binlog. + this->SetStoragePoint(5); + + //Step 1. start membership changing but just finish phaseI. + this->IssueMemberChangeRequest(); + + //Waiting for sync-data process to finish and leader switched to joint consensus state. + std::this_thread::sleep_for(std::chrono::seconds(5)); + + //Step 2. start election by start and then stop the heartbeat. + ::RaftCore::Config::FLAGS_do_heartbeat = true; + + LOG(INFO) << "starting heartbeat..." << std::endl;; + + //Waiting for the heartbeat message to be sent out. + std::this_thread::sleep_for(std::chrono::seconds(3)); + + LOG(INFO) << "stopping heartbeat..." << std::endl;; + + //Stop sending heartbeat. + ::RaftCore::Config::FLAGS_heartbeat_oneshot = true; + + LOG(INFO) << "wait heartbeat timeout..." << std::endl;; + + //Waiting for heartbeat timeout + std::this_thread::sleep_for(std::chrono::seconds(3)); + + //Waiting for election finished. + std::this_thread::sleep_for(std::chrono::seconds(3)); + + //Step 3. manually check whether the election requests are propagated to both C-old and C-new,and judge by it. + +} + + +#endif diff --git a/src/gtest/other/Makefile b/src/gtest/other/Makefile new file mode 100644 index 0000000..814dee6 --- /dev/null +++ b/src/gtest/other/Makefile @@ -0,0 +1,76 @@ + + +CXX = g++ +CXXFLAGS = -std=c++17 -O3 + +SRCDIR = ../../../src +BINDIR = bin +OBJDIR = $(BINDIR)/object + +THIRD_PARTY_DIR=./third_party + +INC = -I$(THIRD_PARTY_DIR)/grpc/include\ + +LIB = -L/usr/local/lib \ + -lprotobuf -pthread -lgrpc++ -lgrpc -lgrpc++_reflection\ + -lz -ldl \ + +PROTOS_PATH = $(SRCDIR)/protocol + +vpath %.proto $(PROTOS_PATH) + +PROTO_FLAG=$(BINDIR)/compile_proto + +IDIOT_SVR=$(BINDIR)/idiot_svr +IDIOT_CLIENT=$(BINDIR)/idiot_client + +PROTO_FILE_PREFIX=raft + +-include prepare $(PROTO_FLAG) + +.PHONY: all +all: prepare $(IDIOT_SVR) $(IDIOT_CLIENT) + +.PHONY: prepare +prepare: + mkdir -p $(OBJDIR) $(BINDIR) + +ALL_SRC_FILES=$(wildcard *.cc) + +OBJ = $(patsubst %.cc, $(OBJDIR)/%.o, $(ALL_SRC_FILES)) + +SVR_CC_OBJ=%/idiot_server.o +CLIENT_CC_OBJ=%idiot_client.o + +SVR_OBJ = $(filter-out $(CLIENT_CC_OBJ), $(OBJ) ) +CLIENT_OBJ = $(filter-out $(SVR_CC_OBJ), $(OBJ) ) + +.PHONY:test +test:$(PROTO_FLAG) + @echo "src:" $(ALL_SRC_FILES) + +$(OBJDIR)/%.o: %.cc + @mkdir -p $(OBJDIR)/$(dir $<) + $(CXX) $(CXXFLAGS) $(INC) -c $< -o $@ + +$(IDIOT_SVR): $(SVR_OBJ) + $(CXX) $(CXXFLAGS) $^ $(LIB) -o $@ + +$(IDIOT_CLIENT): $(CLIENT_OBJ) + $(CXX) $(CXXFLAGS) $^ $(LIB) -o $@ + +PROTOC = protoc +GRPC_CPP_PLUGIN = grpc_cpp_plugin +GRPC_CPP_PLUGIN_PATH ?= `which $(GRPC_CPP_PLUGIN)` + +$(PROTO_FLAG): $(PROTOS_PATH)/$(PROTO_FILE_PREFIX).proto + $(PROTOC) -I $(PROTOS_PATH) --cpp_out=./ $< + $(PROTOC) -I $(PROTOS_PATH) --grpc_out=./ --plugin=protoc-gen-grpc=$(GRPC_CPP_PLUGIN_PATH) $< + mkdir -p `dirname $(PROTO_FLAG)` + touch $@ + +.PHONY: clean +clean: + rm -rf $(BINDIR) raft*.h raft*.cc + + diff --git a/src/gtest/other/idiot_client.cc b/src/gtest/other/idiot_client.cc new file mode 100644 index 0000000..08f921d --- /dev/null +++ b/src/gtest/other/idiot_client.cc @@ -0,0 +1,305 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include +#include +#include +#include + +#include "grpc/grpc.h" +#include "grpc++/grpc++.h" +#include "grpc/support/log.h" +#include "grpc++/server_context.h" +#include "grpc++/security/server_credentials.h" +#include "grpc++/completion_queue.h" + +#include "raft.pb.h" +#include "raft.grpc.pb.h" + +using ::grpc::Server; +using ::grpc::ServerAsyncResponseWriter; +using ::grpc::ServerBuilder; +using ::grpc::ServerContext; +using ::grpc::CompletionQueue; +using ::grpc::ServerCompletionQueue; +using ::grpc::Status; + +class ChannelMgr { + +public: + + static void Initialize(int conn_size,std::string addr)noexcept { + + for (int i = 0; i < conn_size; ++i) { + auto _channel_args = ::grpc::ChannelArguments(); + std::string _key = "key_" + std::to_string(i); + std::string _val = "val_" + std::to_string(i); + _channel_args.SetString(_key,_val); + + auto shp_channel = ::grpc::CreateCustomChannel(addr, grpc::InsecureChannelCredentials(), _channel_args); + + m_channel_pool.emplace_back(shp_channel); + } + } + + static std::shared_ptr<::grpc::Channel> GetOneChannel()noexcept { + + static std::atomic _idx; + + uint32_t _old_val = _idx.fetch_add(1); + + uint32_t _pool_idx = _old_val % m_channel_pool.size(); + + return m_channel_pool[_pool_idx]; + } + + static std::vector> m_channel_pool; +}; + +std::vector> ChannelMgr::m_channel_pool; + +uint32_t g_count = 50000; + +std::string g_my_addr = "127.0.0.1:10010"; + +struct AsyncClientCall { + ::raft::AppendEntriesResponse reply; + + ::grpc::ClientContext context; + + Status status; + + std::unique_ptr<::grpc::ClientAsyncResponseReader<::raft::AppendEntriesResponse>> response_reader; +}; + +void AsyncCompleteRpc(CompletionQueue* polling_cq) { + void* got_tag; + bool ok = false; + + uint32_t _counter = 0; + + auto _start = std::chrono::steady_clock::now(); + + //std::cout << "thread " << std::this_thread::get_id() << " start timer" << std::endl;; + + while (polling_cq->Next(&got_tag, &ok)) { + + //std::cout << "before counter:" << _counter << std::endl; + + //std::this_thread::sleep_for(std::chrono::seconds(2)); + + AsyncClientCall* call = static_cast(got_tag); + GPR_ASSERT(ok); + if (!call->status.ok()) { + std::cout << call->status.error_code() << ",msg:" << call->status.error_message(); + GPR_ASSERT(false); + } + + delete call; + if (++_counter >= g_count) + break; + } + + auto _end = std::chrono::steady_clock::now(); + auto _ms = std::chrono::duration_cast(_end - _start); + + std::cout << "thread " << std::this_thread::get_id() << " inner time cost:" << _ms.count() << std::endl; + + uint32_t _throughput = g_count / float(_ms.count()) * 1000; + + std::cout << "thread " << std::this_thread::get_id() << " inner throughput : " << _throughput << std::endl; +} + +class GreeterClient { + public: + explicit GreeterClient(std::shared_ptr<::grpc::Channel> shp_channel,CompletionQueue* in_cq) { + stub_ = ::raft::RaftService::NewStub(shp_channel); + this->cq_ = in_cq; + //this->cq_ = new CompletionQueue(); + } + + void EntrustSayHello(int idx) { + //Shouldn't start with 0 when doing appendEntries. + idx += 1; + + ::raft::AppendEntriesRequest request; + + request.mutable_base()->set_addr(g_my_addr); + request.mutable_base()->set_term(0); + + auto _p_entry = request.add_replicate_entity(); + auto _p_entity_id = _p_entry->mutable_entity_id(); + _p_entity_id->set_term(0); + _p_entity_id->set_idx(idx); + + auto _p_pre_entity_id = _p_entry->mutable_pre_log_id(); + _p_pre_entity_id->set_term(0); + _p_pre_entity_id->set_idx(idx - 1); + + auto _p_wop = _p_entry->mutable_write_op(); + + _p_wop->set_key("follower_benchmark_key_" + std::to_string(idx)); + _p_wop->set_value("follower_benchmark_val_" + std::to_string(idx)); + + AsyncClientCall* call = new AsyncClientCall; + + std::chrono::time_point _deadline = std::chrono::system_clock::now() + + std::chrono::milliseconds(3100); + //call->context.set_deadline(_deadline); + + call->response_reader = stub_->PrepareAsyncAppendEntries(&call->context, request, cq_); + call->response_reader->StartCall(); + call->response_reader->Finish(&call->reply, &call->status, (void*)call); + } + + private: + + std::unique_ptr<::raft::RaftService::Stub> stub_; + + CompletionQueue* cq_; +}; + + +int main(int argc, char** argv) { + + if (argc != 7) { + std::cout << "Usage:./program --count_per_thread=xx --thread_per_cq=xx --cq=xx --addr=xx --conn=xx --my_addr=xx"; + return 0; + } + + const char * target_str = "--count_per_thread="; + auto p_target = std::strstr(argv[1],target_str); + if (p_target == nullptr) { + printf("para error argv[1] should be --count_per_thread=xx \n"); + return 0; + } + p_target += std::strlen(target_str); + g_count = std::atoi(p_target); + + uint32_t thread_num = 1; + target_str = "--thread_per_cq="; + p_target = std::strstr(argv[2],target_str); + if (p_target == nullptr) { + printf("para error argv[2] should be --thread_per_cq=xx \n"); + return 0; + } + p_target += std::strlen(target_str); + thread_num = std::atoi(p_target); + + target_str = "--cq="; + p_target = std::strstr(argv[3],target_str); + if (p_target == nullptr) { + printf("para error argv[3] should be --cq=xx \n"); + return 0; + } + p_target += std::strlen(target_str); + int _cq_num = std::atoi(p_target); + + std::string _addr = "localhost:50051"; + target_str = "--addr="; + p_target = std::strstr(argv[4],target_str); + if (p_target == nullptr) { + printf("para error argv[4] should be --addr=xx \n"); + return 0; + } + p_target += std::strlen(target_str); + _addr = p_target; + + target_str = "--conn="; + p_target = std::strstr(argv[5],target_str); + if (p_target == nullptr) { + printf("para error argv[5] should be --conn=xx \n"); + return 0; + } + p_target += std::strlen(target_str); + int _conn_size = std::atoi(p_target); + + target_str = "--my_addr="; + p_target = std::strstr(argv[6],target_str); + if (p_target == nullptr) { + printf("para error argv[6] should be --my_addr=xx \n"); + return 0; + } + p_target += std::strlen(target_str); + g_my_addr = p_target; + + ChannelMgr::Initialize(_conn_size, _addr); + + //std::cout << "req for each thread:" << g_count << std::endl; + + //start the polling thread on CQ first. + std::vector _vec_t; + std::vector _vec_cq; + + for (int i = 0; i < _cq_num; ++i) { + auto * _p_cq = new CompletionQueue; + _vec_cq.push_back(_p_cq); + + for (uint32_t i = 0; i < thread_num; i++) + _vec_t.push_back(new std::thread(AsyncCompleteRpc,_p_cq)); + } + + std::vector _vec_entrusting_threads; + + auto _entrust_reqs = [&](int cq_idx, int thread_idx) { + GreeterClient _greeter_client(ChannelMgr::GetOneChannel(), _vec_cq[cq_idx]); + + int _total_thread_num = thread_num * _cq_num; + int _total_thread_idx = thread_num * cq_idx + thread_idx; + + for (int i = 0; i < g_count; i++) { + int req_idx = i * _total_thread_num + _total_thread_idx; + _greeter_client.EntrustSayHello(req_idx); // The actual RPC call! + } + }; + + auto _start = std::chrono::steady_clock::now(); + + //start entrusting the requests. + for (int i = 0; i < _cq_num; ++i) { + for (int m = 0; m < thread_num; ++m) { + std::thread* _p_t = new std::thread(_entrust_reqs, i, m); + _vec_entrusting_threads.emplace_back(_p_t); + } + } + + //Waiting entrusting thread to finish. + for (uint32_t i = 0; i < _vec_entrusting_threads.size(); i++) + _vec_entrusting_threads[i]->join(); + + std::cout << "entrusting done." << std::endl << std::flush; + + //Waiting polling thread to finish. + for (uint32_t i = 0; i < _vec_t.size(); i++) + _vec_t[i]->join(); + + int _total = _cq_num * thread_num * g_count; + std::cout << "g_count:" << _total << std::endl; + + auto _end = std::chrono::steady_clock::now(); + auto _ms = std::chrono::duration_cast(_end - _start); + + std::cout << "time cost:" << _ms.count() << std::endl; + + uint32_t _throughput = _total / float(_ms.count()) * 1000; + + std::cout << "final throughput : " << _throughput << std::endl; + + return 0; +} diff --git a/src/gtest/other/idiot_server.cc b/src/gtest/other/idiot_server.cc new file mode 100644 index 0000000..532126e --- /dev/null +++ b/src/gtest/other/idiot_server.cc @@ -0,0 +1,193 @@ +/* + * + * Copyright 2015 gRPC authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + */ + +#include +#include +#include +#include +#include + +#include "grpc/grpc.h" +#include "grpc++/grpc++.h" +#include "grpc/support/log.h" +#include "grpc++/server_context.h" +#include "grpc++/security/server_credentials.h" +#include "grpc++/completion_queue.h" + +#include "raft.pb.h" +#include "raft.grpc.pb.h" + +using ::grpc::Server; +using ::grpc::ServerAsyncResponseWriter; +using ::grpc::ServerBuilder; +using ::grpc::ServerContext; +using ::grpc::CompletionQueue; +using ::grpc::ServerCompletionQueue; +using ::grpc::Status; + +int g_thread_pair_num = 1; +int g_cq_pair_num = 1; +int g_pool = 1; + +class ServerImpl final { + public: + ~ServerImpl() { + server_->Shutdown(); + // Always shutdown the completion queue after the server. + for (const auto& _cq : m_notify_cq) + _cq->Shutdown(); + + //for (const auto& _cq : m_call_cq) + // _cq->Shutdown(); + + } + + // There is no shutdown handling in this code. + void Run() { + std::string server_address("0.0.0.0:60051"); + + ServerBuilder builder; + // Listen on the given address without any authentication mechanism. + builder.AddListeningPort(server_address, grpc::InsecureServerCredentials()); + // Register "service_" as the instance through which we'll communicate with + // clients. In this case it corresponds to an *asynchronous* service. + builder.RegisterService(&service_); + // Get hold of the completion queue used for the asynchronous communication + // with the gRPC runtime. + + for (int i = 0; i < g_cq_pair_num; ++i) { + //cq_ = builder.AddCompletionQueue(); + + m_notify_cq.emplace_back(builder.AddCompletionQueue()); + std::cout << "notify_cq:" << m_notify_cq[m_notify_cq.size() - 1].get() << " added." << std::endl; + + //m_call_cq.emplace_back(builder.AddCompletionQueue()); + //std::cout <<"call_cq:" << m_call_cq[m_call_cq.size() - 1].get() << " added." << std::endl; + } + + // Finally assemble the server. + server_ = builder.BuildAndStart(); + std::cout << "Server listening on " << server_address << std::endl; + + // Proceed to the server's main loop. + std::vector _vec_threads; + + for (int i = 0; i < g_thread_pair_num ; ++i) { + int _cq_idx = i % g_cq_pair_num; + for (int j = 0; j < g_pool; ++j) + new CallData(&service_,m_notify_cq[_cq_idx].get()); + + _vec_threads.emplace_back(new std::thread(&ServerImpl::HandleRpcs, this, m_notify_cq[_cq_idx].get())); + } + + std::cout << g_thread_pair_num << " working aysnc threads spawned" << std::endl; + + for (const auto& _t : _vec_threads) + _t->join(); + } + + private: + // Class encompassing the state and logic needed to serve a request. + class CallData { + public: + CallData(::raft::RaftService::AsyncService* service, ::grpc::ServerCompletionQueue* notify_cq) + : service_(service), notify_cq_(notify_cq), responder_(&ctx_), status_(CREATE) { + Proceed(); + } + + void Proceed() { + if (status_ == CREATE) { + status_ = PROCESS; + + service_->RequestAppendEntries(&ctx_, &request_, &responder_, notify_cq_, notify_cq_, this); + } else if (status_ == PROCESS) { + new CallData(service_, notify_cq_); + + reply_.mutable_comm_rsp()->set_result(::raft::ErrorCode::SUCCESS); + + //std::cout << "i'm here" << std::endl; + + status_ = FINISH; + responder_.Finish(reply_, ::grpc::Status::OK, this); + } else { + delete this; + } + } + + private: + ::raft::RaftService::AsyncService* service_; + ::grpc::ServerCompletionQueue* notify_cq_; + + ::grpc::ServerContext ctx_; + + ::raft::AppendEntriesRequest request_; + ::raft::AppendEntriesResponse reply_; + + ::grpc::ServerAsyncResponseWriter<::raft::AppendEntriesResponse> responder_; + + enum CallStatus { CREATE, PROCESS, FINISH }; + CallStatus status_; // The current serving state. + }; + + void HandleRpcs(ServerCompletionQueue *poll_cq) { + uint32_t _counter = 0; + void* tag; + bool ok; + while (true) { + + GPR_ASSERT(poll_cq->Next(&tag, &ok)); + GPR_ASSERT(ok); + + static_cast(tag)->Proceed(); + } + } + + std::vector> m_notify_cq; + + //std::vector> m_call_cq; + + ::raft::RaftService::AsyncService service_; + std::unique_ptr<::grpc::Server> server_; + }; + +const char* ParseCmdPara( char* argv,const char* para) { + auto p_target = std::strstr(argv,para); + if (p_target == nullptr) { + printf("para error argv[%s] should be %s \n",argv,para); + return nullptr; + } + p_target += std::strlen(para); + return p_target; +} + +int main(int argc, char** argv) { + + if (argc != 4) { + std::cout << "Usage:./program --thread_pair=xx --cq_pair=xx --pool=xx"; + return 0; + } + + g_thread_pair_num = std::atoi(ParseCmdPara(argv[1],"--thread_pair=")); + g_cq_pair_num = std::atoi(ParseCmdPara(argv[2],"--cq_pair=")); + g_pool = std::atoi(ParseCmdPara(argv[3],"--pool=")); + + ServerImpl server; + server.Run(); + + return 0; +} diff --git a/src/gtest/service/test_all.h b/src/gtest/service/test_all.h new file mode 100644 index 0000000..6bce8cc --- /dev/null +++ b/src/gtest/service/test_all.h @@ -0,0 +1,27 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_ALL_SERVICE_H__ +#define __GTEST_ALL_SERVICE_H__ + +#include "gtest/service/test_follower_service.h" +#include "gtest/service/test_leader_service.h" + +#endif diff --git a/src/gtest/service/test_benchmark.h b/src/gtest/service/test_benchmark.h new file mode 100644 index 0000000..39cc05a --- /dev/null +++ b/src/gtest/service/test_benchmark.h @@ -0,0 +1,483 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_SERVICE_H__ +#define __GTEST_SERVICE_H__ + +#include +#include +#include +#include + +#include "gtest/test_base.h" + +using grpc::Channel; +using grpc::ClientAsyncResponseReader; +using grpc::ClientContext; +using grpc::CompletionQueue; +using grpc::Status; + +using ::raft::ClientWriteRequest; +using ::raft::ClientWriteResponse; +using ::raft::AppendEntriesRequest; +using ::raft::AppendEntriesResponse; +using ::raft::CommitEntryRequest; +using ::raft::CommitEntryResponse; +using ::raft::ErrorCode; + +template +using FPrepareAsync = std::function>( + ::grpc::ClientContext*,const T&, CompletionQueue*)>; + +class BenchmarkTime { + +public: + + BenchmarkTime() { + this->m_start_tp = std::chrono::system_clock::from_time_t(std::mktime(&this->m_start_tm)); + } + +protected: + + static uint64_t GetAvgUSLantency() { + return m_total_latency.load(); + } + +protected: + + std::chrono::time_point m_start_tp; + + static std::atomic m_total_latency; + +private: + + //2019-09-26 + std::tm m_start_tm = { 0, 0, 0, 26, 9 - 1, 2019 - 1900 }; +}; + +std::atomic BenchmarkTime::m_total_latency = 0; + +class BenchmarkReact : public BenchmarkTime { +public: + virtual void React(bool cq_result) noexcept = 0; +}; + +template +class BenchmarkClient : public BenchmarkReact { + +public: + + BenchmarkClient(std::shared_ptr shp_channel, std::shared_ptr shp_cq) { + this->m_channel = shp_channel; + this->m_cq = shp_cq; + this->m_stub = ::raft::RaftService::NewStub(shp_channel); + this->m_client_context.reset(new ::grpc::ClientContext()); + } + + virtual ~BenchmarkClient() {} + + virtual void React(bool cq_result) noexcept override { + + if (!cq_result) { + LOG(ERROR) << "UnaryBenchmarkClient got false result from CQ."; + this->Release(); + return; + } + + this->Responder(this->m_final_status, this->m_response); + this->Release(); + } + + void EntrustRequest(std::function&)> req_setter, + const FPrepareAsync &f_prepare_async, uint32_t timeo_ms, uint64_t idx = 0) noexcept { + + req_setter(this->m_shp_request); + + std::chrono::time_point _deadline = std::chrono::system_clock::now() + + std::chrono::milliseconds(timeo_ms); + + //VLOG(89) << "idx:" << idx << ",start set timeout deadline,plus100"; + + this->m_client_context->set_deadline(_deadline); + + //std::time_t _now = std::chrono::system_clock::to_time_t(_deadline); + //VLOG(89) << "idx:" << idx << ",time deadline set to:" << std::put_time(std::localtime(&_now),"%H:%M:%S"); + + this->m_reader = f_prepare_async(this->m_client_context.get(), *this->m_shp_request, this->m_cq.get()); + this->m_reader->StartCall(); + this->m_reader->Finish(&this->m_response, &m_final_status, this); + } + + std::shared_ptr<::raft::RaftService::Stub> GetStub() noexcept { + return this->m_stub; + } + + uint64_t LogLatency(uint64_t start_us, uint64_t idx) const { + auto _now_us = (std::chrono::duration_cast(std::chrono::system_clock::now() - this->m_start_tp)).count(); + auto _latency = _now_us - start_us; + VLOG(2) << "single req latency(us):" << _latency << ",idx:" << idx; + return _latency; + } + +protected: + + virtual void Release() noexcept = 0; + + virtual void Responder(const ::grpc::Status& status, const R& rsp) noexcept = 0; + +protected: + + std::shared_ptr<::grpc::Channel> m_channel; + + std::shared_ptr m_cq; + + std::unique_ptr<::grpc::ClientAsyncResponseReader> m_reader; + + std::shared_ptr m_shp_request; + + R m_response; + + std::shared_ptr<::grpc::ClientContext> m_client_context; + + std::shared_ptr<::raft::RaftService::Stub> m_stub; + + ::grpc::Status m_final_status; + +private: + + BenchmarkClient(const BenchmarkClient&) = delete; + + BenchmarkClient& operator=(const BenchmarkClient&) = delete; +}; + +class CommitEntrieBenchmarkClient : public BenchmarkClient { + +public: + + CommitEntrieBenchmarkClient(std::shared_ptr<::grpc::Channel> shp_channel, std::shared_ptr<::grpc::CompletionQueue> shp_cq) : + BenchmarkClient(shp_channel,shp_cq) {} + + virtual ~CommitEntrieBenchmarkClient() {} + + virtual void Responder(const ::grpc::Status& status, + const ::raft::CommitEntryResponse& rsp) noexcept override { + + const auto &_idx = this->m_shp_request->entity_id().idx(); + + VLOG(89) << "Commit got index:" << _idx; + + CHECK(status.ok()) << "error_code:" << status.error_code() << ",err msg" + << status.error_message() << ",idx:" << _idx; + + const ::raft::CommonResponse& comm_rsp = rsp.comm_rsp(); + auto _error_code = comm_rsp.result(); + ASSERT_TRUE(_error_code == ErrorCode::SUCCESS || _error_code == ErrorCode::ALREADY_COMMITTED) + << int(_error_code); + } + + virtual void Release() noexcept override { + delete this; + } +}; + +class AppendEntrieBenchmarkClient : public BenchmarkClient { + +public: + + AppendEntrieBenchmarkClient(std::shared_ptr<::grpc::Channel> shp_channel, std::shared_ptr<::grpc::CompletionQueue> shp_cq) : + BenchmarkClient(shp_channel,shp_cq) {} + + virtual ~AppendEntrieBenchmarkClient() {} + + virtual void Responder(const ::grpc::Status& status, + const AppendEntriesResponse& rsp) noexcept override { + + int _lst_idx = this->m_shp_request->replicate_entity().size() - 1; + uint64_t _lst_log_idx = this->m_shp_request->replicate_entity(_lst_idx).entity_id().idx(); + + VLOG(89) << "appendEntries got index:" << _lst_log_idx; + + CHECK(status.ok()) << "error_code:" << status.error_code() << ",err msg:" + << status.error_message() << ",idx:" << _lst_log_idx; + + const ::raft::CommonResponse& comm_rsp = rsp.comm_rsp(); + auto _error_code = comm_rsp.result(); + ASSERT_TRUE(_error_code == ErrorCode::SUCCESS || _error_code == ErrorCode::SUCCESS_MERGED) + << int(_error_code); + + auto _start_us = std::atoll(comm_rsp.err_msg().c_str()); + auto _lantency_us = this->LogLatency(_start_us, _lst_log_idx); + m_total_latency.fetch_add(_lantency_us); + + if (!::RaftCore::Config::FLAGS_do_commit) + return; + + //Entrust commit request. + std::shared_ptr _shp_commit_req(new CommitEntryRequest()); + std::string _local_addr = std::string(_AURORA_LOCAL_IP_) + ":" + + std::to_string(_RAFT_UNIT_TEST_LEADER_PORT_); + _shp_commit_req->mutable_base()->set_addr(_local_addr); + _shp_commit_req->mutable_base()->set_term(0); + + auto _p_entity_id = _shp_commit_req->mutable_entity_id(); + _p_entity_id->set_term(0); + + _p_entity_id->set_idx(_lst_log_idx); + + auto * _p_commit_client = new CommitEntrieBenchmarkClient(this->m_channel, this->m_cq); + + auto _req_setter = [&](std::shared_ptr<::raft::CommitEntryRequest>& _target)->void { + _target = _shp_commit_req; + }; + auto _f_prepare = std::bind(&::raft::RaftService::Stub::PrepareAsyncCommitEntries, + _p_commit_client->GetStub().get(), std::placeholders::_1, + std::placeholders::_2, std::placeholders::_3); + _p_commit_client->EntrustRequest(_req_setter, _f_prepare, + ::RaftCore::Config::FLAGS_leader_commit_entries_rpc_timeo_ms); + + VLOG(89) << "client entrust commit of idx:" << _lst_log_idx; + } + + virtual void Release() noexcept override { + delete this; + } +}; + +class WriteBenchmarkClient : public BenchmarkClient{ + +public: + + WriteBenchmarkClient(std::shared_ptr<::grpc::Channel> shp_channel, + std::shared_ptr<::grpc::CompletionQueue> shp_cq, int idx) : + BenchmarkClient(shp_channel, shp_cq), m_idx(idx) {} + + virtual ~WriteBenchmarkClient() {} + + virtual void Responder(const ::grpc::Status& status, + const ClientWriteResponse& rsp) noexcept override { + + VLOG(89) << "fetch,idx:" << this->m_idx; + + if (!status.ok()) { + LOG(ERROR) << "error_code:" << status.error_code() << ",err msg" + << status.error_message() << ", idx:" << m_idx; + return; + } + + const ::raft::ClientCommonResponse& _client_comm_rsp = rsp.client_comm_rsp(); + auto _error_code = _client_comm_rsp.result(); + ASSERT_TRUE(_error_code == ErrorCode::SUCCESS) << "err code:" << int(_error_code) + << ",err msg:" << _client_comm_rsp.err_msg() << ",idx:" << this->m_idx; + + auto _start_us = std::atoll(_client_comm_rsp.err_msg().c_str()); + auto _lantency_us = this->LogLatency(_start_us, this->m_idx); + m_total_latency.fetch_add(_lantency_us); + } + + virtual void Release() noexcept override { + delete this; + } + +private: + + uint64_t m_idx; +}; + +class BenchmarkBase : public BenchmarkTime { + +public: + + BenchmarkBase(bool leader_svc = true) { + + std::string _leader_addr = std::string(_AURORA_LOCAL_IP_) + ":" + std::to_string(_RAFT_UNIT_TEST_LEADER_PORT_); + std::string _follower_addr = std::string(_AURORA_LOCAL_IP_) + ":" + std::to_string(_RAFT_UNIT_TEST_FOLLWER_PORT_); + + this->m_leader_svc = leader_svc; + this->m_target_addr = this->m_leader_svc ? _leader_addr : _follower_addr; + + std::string _target_ip = ::RaftCore::Config::FLAGS_target_ip; + if (_target_ip != "default_none") + this->m_target_addr = _target_ip; + + this->m_thread_num_per_cq = ::RaftCore::Config::FLAGS_benchmark_client_thread_num_per_cq; + this->m_cq_num = ::RaftCore::Config::FLAGS_benchmark_client_cq_num; + + this->m_req_num_per_thread = ::RaftCore::Config::FLAGS_follower_svc_benchmark_req_round; + if (::RaftCore::Config::FLAGS_do_commit) + this->m_req_num_per_thread *= 2;; + + if (this->m_leader_svc) + this->m_req_num_per_thread = ::RaftCore::Config::FLAGS_leader_svc_benchmark_req_count; + + this->m_total_req_num = this->m_req_num_per_thread * this->m_thread_num_per_cq * this->m_cq_num; + } + + virtual ~BenchmarkBase() { + for (auto &_cq : this->m_vec_cq) + _cq->Shutdown(); + } + + virtual void EntrustClient2CQ(std::shared_ptr shp_channel, + std::shared_ptr shp_cq, int idx)noexcept = 0; + + void DoBenchmark(bool pure_client = true)noexcept { + + std::vector> _vec_channel; + + for (std::size_t i = 0; i < ::RaftCore::Config::FLAGS_conn_per_link; ++i) { + auto _channel_args = ::grpc::ChannelArguments(); + + std::string _key = "key_" + std::to_string(i); + std::string _val = "val_" + std::to_string(i); + _channel_args.SetString(_key,_val); + + _vec_channel.emplace_back(::grpc::CreateCustomChannel(this->m_target_addr, grpc::InsecureChannelCredentials(), _channel_args)); + } + + for (std::size_t i = 0; i < this->m_cq_num; ++i) + this->m_vec_cq.emplace_back(new CompletionQueue()); + + auto _thread_func = [&](int cq_idx) { + void* tag; + bool ok; + + auto _start = std::chrono::steady_clock::now(); + + uint32_t _cur_got_num = 0; + + auto _shp_cq = this->m_vec_cq[cq_idx]; + + while (true) { + + if (_cur_got_num >= this->m_req_num_per_thread) + break; + + _shp_cq->Next(&tag, &ok); + + BenchmarkReact* _p_ins = static_cast(tag); + _p_ins->React(ok); + + _cur_got_num++; + + continue; + } + + auto _end = std::chrono::steady_clock::now(); + auto _ms = std::chrono::duration_cast(_end - _start); + + std::cout << "thread " << std::this_thread::get_id() << " inner time cost:" << _ms.count() << std::endl; + + uint32_t _throughput = (uint32_t)(_cur_got_num / float(_ms.count()) * 1000); + + std::cout << "thread " << std::this_thread::get_id() << " inner throughput : " << _throughput << std::endl; + }; + + //start the polling thread on CQ first. + std::vector _polling_threads; + for (std::size_t i = 0; i < this->m_cq_num; ++i) { + for (std::size_t j = 0; j < this->m_thread_num_per_cq; ++j) { + std::thread *_pthread = new std::thread(_thread_func, i); + _polling_threads.push_back(_pthread); + VLOG(89) << "clientCQ thread : " << _pthread->get_id() << " for CQ:" << i << " started."; + } + } + + std::cout << "set timeout value begin" << std::endl << std::flush; + + //start entrusting the requests. + auto _entrust_reqs = [&](int cq_idx, int thread_idx) { + + int _channel_num = _vec_channel.size(); + + int _total_thread_num = this->m_thread_num_per_cq * this->m_cq_num; + int _total_thread_idx = this->m_thread_num_per_cq * cq_idx + thread_idx; + + auto &_shp_channel = _vec_channel[_total_thread_idx % _channel_num]; + + for (std::size_t i = 0; i < this->m_req_num_per_thread; ++i) { + int req_idx = i * _total_thread_num + _total_thread_idx; + this->EntrustClient2CQ(_shp_channel, this->m_vec_cq[cq_idx], req_idx); + VLOG(89) << "entrusted idx:" << req_idx; + } + }; + + auto _start = std::chrono::steady_clock::now(); + + std::vector _entrusting_threads; + + for (std::size_t n = 0; n < this->m_cq_num; ++n) { + for (std::size_t j = 0; j < this->m_thread_num_per_cq; ++j) { + auto* _p_thread = new std::thread(_entrust_reqs, n, j); + _entrusting_threads.push_back(_p_thread); + } + } + + for (auto &_item : _polling_threads) + _item->join(); + + auto _end = std::chrono::steady_clock::now(); + auto _ms = std::chrono::duration_cast(_end - _start); + + std::cout << "req number " << m_total_req_num << " total cost(ms):" << _ms.count() << std::endl; + + float _throughput = m_total_req_num / float(_ms.count()) * 1000; + + uint64_t _total_latency = GetAvgUSLantency(); + + uint64_t _avg_latenct_us = (uint64_t)(_total_latency / float(m_total_req_num)); + + std::cout << " inner throughput : " << _throughput << ",avg latency(us):" << _avg_latenct_us << std::endl; + + //Entrusting threads are the least to wait. + for (auto &_item : _entrusting_threads) + _item->join(); + + if (pure_client) + return; + + int _waiting_finish_seconds = 5; + std::cout << "waiting for ongoing remote processing to be finished for " << _waiting_finish_seconds << " seconds."; + std::this_thread::sleep_for(std::chrono::seconds(_waiting_finish_seconds)); + } + +private: + + std::string m_target_addr = ""; + + uint32_t m_req_num_per_thread = 0; + + uint32_t m_cq_num = 0; + + uint32_t m_thread_num_per_cq = 0; + + uint32_t m_total_req_num = 0; + + std::vector> m_vec_cq; + + bool m_leader_svc = false; + +private: + + BenchmarkBase(const BenchmarkBase&) = delete; + + BenchmarkBase& operator=(const BenchmarkBase&) = delete; +}; + +#endif diff --git a/src/gtest/service/test_follower_service.h b/src/gtest/service/test_follower_service.h new file mode 100644 index 0000000..82bc2cc --- /dev/null +++ b/src/gtest/service/test_follower_service.h @@ -0,0 +1,439 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_FOLLOWER_SERVICE_H__ +#define __GTEST_FOLLOWER_SERVICE_H__ + +#include +#include +#include + +#include "gtest/service/test_benchmark.h" +#include "follower/follower_view.h" + +class TestFollowerServiceBanchmark : public BenchmarkBase { + +public: + + TestFollowerServiceBanchmark() : BenchmarkBase(false) {} + + virtual ~TestFollowerServiceBanchmark() {} + + virtual std::string GetLeaderAddr()const noexcept = 0; + + virtual void EntrustClient2CQ(std::shared_ptr shp_channel, + std::shared_ptr shp_cq, int idx)noexcept override { + + //Shouldn't start with 0 when doing appendEntries. + idx += 1; + + auto *_p_append_client = new AppendEntrieBenchmarkClient(shp_channel, shp_cq); + + std::shared_ptr _shp_req(new AppendEntriesRequest()); + + std::string _my_addr = this->GetLeaderAddr(); + if (::RaftCore::Config::FLAGS_my_ip != std::string("default_none")) + _my_addr = ::RaftCore::Config::FLAGS_my_ip; + + _shp_req->mutable_base()->set_addr(_my_addr); + _shp_req->mutable_base()->set_term(0); + + auto _p_entry = _shp_req->add_replicate_entity(); + auto _p_entity_id = _p_entry->mutable_entity_id(); + _p_entity_id->set_term(0); + _p_entity_id->set_idx(idx); + + auto _p_pre_entity_id = _p_entry->mutable_pre_log_id(); + _p_pre_entity_id->set_term(0); + _p_pre_entity_id->set_idx(idx - 1); + + auto _p_wop = _p_entry->mutable_write_op(); + + _p_wop->set_key("follower_benchmark_key_" + std::to_string(idx)); + _p_wop->set_value("follower_benchmark_val_" + std::to_string(idx)); + + static std::tm m_start_tm = { 0, 0, 0, 26, 9 - 1, 2019 - 1900 }; + static auto m_start_tp = std::chrono::system_clock::from_time_t(std::mktime(&m_start_tm)); + auto us = std::chrono::duration_cast(std::chrono::system_clock::now() - m_start_tp); + _shp_req->set_debug_info(std::to_string(us.count())); + + auto _req_setter = [&_shp_req](std::shared_ptr<::raft::AppendEntriesRequest>& _target)->void { + _target = _shp_req; + }; + auto _f_prepare = std::bind(&::raft::RaftService::Stub::PrepareAsyncAppendEntries, + _p_append_client->GetStub().get(), std::placeholders::_1, + std::placeholders::_2, std::placeholders::_3); + _p_append_client->EntrustRequest(_req_setter, _f_prepare, + ::RaftCore::Config::FLAGS_leader_append_entries_rpc_timeo_ms); + } + +}; + +class TestFollowerService : public TestSingleBackendFollower, public TestFollowerServiceBanchmark { + + public: + + TestFollowerService() {} + + virtual void SetUp() override { + //::RaftCore::Config::FLAGS_memory_table_max_item = 10; + ::RaftCore::Config::FLAGS_checking_heartbeat = false; + } + + virtual void TearDown() override {} + + protected: + + std::string GetLeaderAddr()const noexcept override { + return this->m_leader_addr; + } + + void DoHeartBeat() { + + std::shared_ptr<::grpc::Channel> _channel = grpc::CreateChannel(this->m_follower_svc_addr, grpc::InsecureChannelCredentials()); + std::unique_ptr<::raft::RaftService::Stub> _stub = ::raft::RaftService::NewStub(_channel); + + ::grpc::ClientContext _context; + std::chrono::system_clock::time_point _deadline = std::chrono::system_clock::now() + std::chrono::seconds(1); + //_context.set_deadline(_deadline); + + ::raft::HeartBeatRequest _req; + ::raft::CommonResponse _rsp; + + _req.mutable_base()->set_addr(this->m_leader_addr); + _req.mutable_base()->set_term(0); + + ::grpc::Status _status = _stub->HeartBeat(&_context, _req, &_rsp); + + ASSERT_TRUE(_status.ok()); + ASSERT_TRUE(_rsp.result()==ErrorCode::SUCCESS) << "DoHeartBeat fail,detail:" << _rsp.DebugString(); + } + + void DoAppendEntriesCommit() { + + std::string _target_ip = ::RaftCore::Config::FLAGS_target_ip ; + std::string _real_ip = this->m_follower_svc_addr; + + if (_target_ip != "default_none") + _real_ip = _target_ip; + + std::shared_ptr<::grpc::Channel> _channel = grpc::CreateChannel(_real_ip, grpc::InsecureChannelCredentials()); + std::unique_ptr<::raft::RaftService::Stub> _stub = ::raft::RaftService::NewStub(_channel); + + AppendEntriesRequest _req; + AppendEntriesResponse _rsp; + + _req.mutable_base()->set_addr(this->m_leader_addr); + _req.mutable_base()->set_term(0); + + //8057:overlap, 8062:exact match,8063:disorder. + int _start = ::RaftCore::Config::FLAGS_append_entries_start_idx; + int _sum = 10; + for (int i = _start; i < _start + _sum; ++i) { + auto _p_entity = _req.add_replicate_entity(); + + auto _p_entity_id = _p_entity->mutable_entity_id(); + _p_entity_id->set_term(0); + _p_entity_id->set_idx(i); + + auto _p_pre_entity_id = _p_entity->mutable_pre_log_id(); + _p_pre_entity_id->set_term(0); + _p_pre_entity_id->set_idx(i==0?0:i-1); + + auto _p_wop = _p_entity->mutable_write_op(); + std::string _idx = std::to_string(i); + _p_wop->set_key("key_" + _idx); + _p_wop->set_value("val_" + _idx); + } + + ::grpc::ClientContext _contextX; + std::chrono::system_clock::time_point _deadline = std::chrono::system_clock::now() + + std::chrono::milliseconds(::RaftCore::Config::FLAGS_leader_append_entries_rpc_timeo_ms); + _contextX.set_deadline(_deadline); + + ::grpc::Status _status = _stub->AppendEntries(&_contextX, _req, &_rsp); + ASSERT_TRUE(_status.ok()) << ", error_code:" << _status.error_code() << ",err msg:" << _status.error_message(); + ASSERT_TRUE(_rsp.comm_rsp().result()==ErrorCode::SUCCESS || + _rsp.comm_rsp().result()==ErrorCode::SUCCESS_MERGED) << "DoAppendEntriesCommit fail,detail:" << _rsp.DebugString(); + + //Committing + CommitEntryRequest _commit_req; + CommitEntryResponse _commit_rsp; + + _commit_req.mutable_base()->set_addr(this->m_leader_addr); + _commit_req.mutable_base()->set_term(0); + + auto _p_entity_id = _commit_req.mutable_entity_id(); + _p_entity_id->set_term(0); + _p_entity_id->set_idx(_start + _sum - 1); + + ::grpc::ClientContext _context; + _deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(::RaftCore::Config::FLAGS_leader_commit_entries_rpc_timeo_ms); + _context.set_deadline(_deadline); + + _status = _stub->CommitEntries(&_context, _commit_req, &_commit_rsp); + ASSERT_TRUE(_status.ok()); + ASSERT_TRUE(_commit_rsp.comm_rsp().result()==ErrorCode::SUCCESS || + _commit_rsp.comm_rsp().result()==ErrorCode::ALREADY_COMMITTED) << "DoAppendEntriesCommit fail,detail:" << _commit_rsp.DebugString(); + } + + void DoSyncData() { + + std::shared_ptr<::grpc::Channel> _channel = grpc::CreateChannel(this->m_follower_svc_addr, grpc::InsecureChannelCredentials()); + std::unique_ptr<::raft::RaftService::Stub> _stub = ::raft::RaftService::NewStub(_channel); + + ::grpc::ClientContext _context; + + std::shared_ptr<::grpc::ClientReaderWriter<::raft::SyncDataRequest,::raft::SyncDataResponse>> _stream = _stub->SyncData(&_context); + + ::raft::SyncDataRequest _req; + _req.mutable_base()->set_term(0); + _req.mutable_base()->set_addr(this->m_leader_addr); + _req.set_msg_type(::raft::SyncDataMsgType::PREPARE); + _stream->Write(_req); + + ::raft::SyncDataResponse _rsp; + + _rsp.Clear(); + CHECK(_stream->Read(&_rsp)); + CHECK_EQ(_rsp.comm_rsp().result(), ErrorCode::PREPARE_CONFRIMED) << "sync data fail,msg:" << _rsp.DebugString(); + + //Sync data. + int _write_times = 5; + int _counter = 10; + + for (int j = 0; j < _write_times; ++j) { + + _req.clear_entity(); + _req.set_msg_type(::raft::SyncDataMsgType::SYNC_DATA); + + int _start = j * _counter; + int _end = _start + _counter; + + for (int i = _start; i < _end; ++i) { + auto _p_entity = _req.add_entity(); + + auto _p_pre_log_id = _p_entity->mutable_pre_log_id(); + _p_pre_log_id->set_term(0); + _p_pre_log_id->set_idx(i-1); + + auto _p_entity_id = _p_entity->mutable_entity_id(); + _p_entity_id->set_term(0); + _p_entity_id->set_idx(i); + + std::string _idx = std::to_string(i); + + auto _p_wop = _p_entity->mutable_write_op(); + _p_wop->set_key("resync_data_key_" + _idx); + _p_wop->set_value("resync_data_val_" + _idx); + } + + _stream->Write(_req); + + _rsp.Clear(); + _stream->Read(&_rsp); + + CHECK_EQ(_rsp.comm_rsp().result(), ErrorCode::SYNC_DATA_CONFRIMED) << "sync data fail,msg:" << _rsp.DebugString(); + } + + //Sync log. + int _log_write_times = 5; + int _log_counter = 10; + int _log_start = _log_write_times * _log_counter; + + for (int j = 0; j < _log_write_times; ++j) { + + _req.clear_entity(); + _req.set_msg_type(::raft::SyncDataMsgType::SYNC_LOG); + + int _start = _log_start + j * _log_counter; + int _end = _start + _log_counter; + + for (int i = _start; i < _end; ++i) { + auto _p_entity = _req.add_entity(); + + auto _p_pre_log_id = _p_entity->mutable_pre_log_id(); + _p_pre_log_id->set_term(0); + _p_pre_log_id->set_idx(i-1); + + auto _p_entity_id = _p_entity->mutable_entity_id(); + _p_entity_id->set_term(0); + _p_entity_id->set_idx(i); + + std::string _idx = std::to_string(i); + + auto _p_wop = _p_entity->mutable_write_op(); + _p_wop->set_key("resync_log_key_" + _idx); + _p_wop->set_value("resync_log_val_" + _idx); + } + + _stream->Write(_req); + + _rsp.Clear(); + _stream->Read(&_rsp); + + CHECK_EQ(_rsp.comm_rsp().result(), ErrorCode::SYNC_LOG_CONFRIMED) << "sync data fail,msg:" << _rsp.DebugString(); + } + + CHECK(_stream->WritesDone()) << "client writes done fail."; + ::grpc::Status _status = _stream->Finish(); + + CHECK(_status.ok()) << "error_code:" << _status.error_code() << ",err msg:" + << _status.error_message(); + } +}; + +class TestFollowerServiceClient : public TestBase, public TestFollowerServiceBanchmark { + + public: + + TestFollowerServiceClient() {} + + virtual ~TestFollowerServiceClient() {} + + protected: + + std::string GetLeaderAddr()const noexcept override { + return this->m_leader_addr; + } +}; + +TEST_F(TestFollowerService, GeneralOperation) { + + this->DoAppendEntriesCommit(); + this->DoSyncData(); + this->DoHeartBeat(); +} + +TEST_F(TestFollowerService, ContinuousWorking) { + this->DoAppendEntriesCommit(); +} + +TEST_F(TestFollowerService, ConcurrentOperation) { + + auto _tp = this->StartTimeing(); + + //Be sure to remove binlog file before this test. + + int _test_thread_num = ::RaftCore::Config::FLAGS_concurrent_client_thread_num; + if (_test_thread_num <= 0) + _test_thread_num = this->m_cpu_cores; + + std::mutex _mutex; + + std::atomic _counter; + _counter.store(0); + + int _sum = 1000; + auto _op = [&](int thread_idx) { + + std::shared_ptr<::grpc::Channel> _channel = grpc::CreateChannel(this->m_follower_svc_addr, grpc::InsecureChannelCredentials()); + std::unique_ptr<::raft::RaftService::Stub> _stub = ::raft::RaftService::NewStub(_channel); + + std::chrono::system_clock::time_point _deadline = std::chrono::system_clock::now() + std::chrono::seconds(1); + //_context.set_deadline(_deadline); + + AppendEntriesRequest _req; + AppendEntriesResponse _rsp; + + CommitEntryRequest _commit_req; + CommitEntryResponse _commit_rsp; + + int _run_times = 1000; + for (int k = 0; k < _run_times; ++k) { + + _req.Clear(); + _req.mutable_base()->set_term(0); + _req.mutable_base()->set_addr(this->m_leader_addr); + + int _write_num = 10; + for (int i = 0; i < _write_num; ++i) { + + int _cur_idx = k * _write_num * _test_thread_num + thread_idx * _write_num + i; + + auto _p_entity = _req.add_replicate_entity(); + + auto _p_entity_id = _p_entity->mutable_entity_id(); + _p_entity_id->set_term(0); + _p_entity_id->set_idx(_cur_idx); + + auto _p_pre_entity_id = _p_entity->mutable_pre_log_id(); + _p_pre_entity_id->set_term(0); + _p_pre_entity_id->set_idx(_cur_idx==0?0:_cur_idx-1); + + auto _p_wop = _p_entity->mutable_write_op(); + std::string _idx = std::to_string(_cur_idx); + _p_wop->set_key("key_" + _idx); + _p_wop->set_value("val_" + _idx); + } + + int _start = k * _write_num * _test_thread_num + thread_idx * _write_num; + int _end = k * _write_num * _test_thread_num + thread_idx * _write_num + _write_num - 1; + + _mutex.lock(); + std::cout << "thread " << thread_idx << " write log from " << _start << " to " << _end << std::endl; + _mutex.unlock(); + + ::grpc::ClientContext _contextX; + ::grpc::Status _status = _stub->AppendEntries(&_contextX, _req, &_rsp); + + _counter.fetch_add(1); + + ASSERT_TRUE(_status.ok()); + ASSERT_TRUE(_rsp.comm_rsp().result()==ErrorCode::SUCCESS || _rsp.comm_rsp().result()==ErrorCode::SUCCESS_MERGED) << "DoAppendEntriesCommit fail,detail:" << _rsp.DebugString(); + + //Committing + _commit_req.Clear(); + _commit_req.mutable_base()->set_addr(this->m_leader_addr); + _commit_req.mutable_base()->set_term(0); + + auto _p_entity_id = _commit_req.mutable_entity_id(); + _p_entity_id->set_term(0); + _p_entity_id->set_idx(_end); + + ::grpc::ClientContext _context; + _status = _stub->CommitEntries(&_context, _commit_req, &_commit_rsp); + ASSERT_TRUE(_status.ok()); + ASSERT_TRUE(_rsp.comm_rsp().result()==ErrorCode::SUCCESS) << "DoAppendEntriesCommit fail,detail:" << _rsp.DebugString(); + } + + }; + + this->LaunchMultipleThread(_op,_test_thread_num); + + std::cout << "total req send&recv:" << _counter.load() << std::endl; + + this->EndTiming(_tp, "Follower service benchmark cost"); + + std::cout << "sleeping... CHECK if the memory cost is decreasing...???"; + std::this_thread::sleep_for(std::chrono::seconds(5)); +} + +TEST_F(TestFollowerService, Benchmark) { + + this->DoBenchmark(false); +} + +TEST_F(TestFollowerServiceClient, Benchmark) { + + this->DoBenchmark(); +} + +#endif diff --git a/src/gtest/service/test_leader_service.h b/src/gtest/service/test_leader_service.h new file mode 100644 index 0000000..80ec91b --- /dev/null +++ b/src/gtest/service/test_leader_service.h @@ -0,0 +1,222 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_LEADER_SERVICE_H__ +#define __GTEST_LEADER_SERVICE_H__ + +#include +#include +#include + +#include "gtest/service/test_benchmark.h" +#include "leader/leader_view.h" + +class TestLeaderServiceBanchmark : public BenchmarkBase { + +public: + + TestLeaderServiceBanchmark() { + uint32_t _buf_len = ::RaftCore::Config::FLAGS_value_len + 1; + this->m_val_buf = (char*)malloc(_buf_len); + std::memset(this->m_val_buf, 'a', _buf_len); + this->m_val_buf[_buf_len - 1] = '\0'; + } + + virtual ~TestLeaderServiceBanchmark() { + free(this->m_val_buf); + } + + virtual void EntrustClient2CQ(std::shared_ptr shp_channel, + std::shared_ptr shp_cq, int idx)noexcept override { + + auto *_p_append_client = new WriteBenchmarkClient(shp_channel, shp_cq, idx); + + auto us = std::chrono::duration_cast(std::chrono::system_clock::now() - this->m_start_tp); + + std::shared_ptr _shp_req(new ClientWriteRequest()); + auto * _req = _shp_req->mutable_req(); + _req->set_key("leader_benchmark_key_" + std::to_string(idx)); + + char * _p_val = (char*)malloc(::RaftCore::Config::FLAGS_value_len); + + std::string _val = std::string(_WRITE_VAL_TS_) + std::to_string(us.count()) + std::string(this->m_val_buf); + + _req->set_value(_val); + + auto _req_setter = [&_shp_req](std::shared_ptr& _target)->void { + _target = _shp_req; + }; + auto _f_prepare = std::bind(&::raft::RaftService::Stub::PrepareAsyncWrite, + _p_append_client->GetStub().get(), std::placeholders::_1, + std::placeholders::_2, std::placeholders::_3); + _p_append_client->EntrustRequest(_req_setter, _f_prepare, + ::RaftCore::Config::FLAGS_client_write_timo_ms, idx); + } + +private: + + char* m_val_buf = nullptr; + +}; + +class TestLeaderService : public TestCluster , public TestLeaderServiceBanchmark { + + public: + + TestLeaderService() { + + //Give a long enough timeout value to facilitate unit test in debug mode l. + //::RaftCore::Config::FLAGS_leader_append_entries_rpc_timeo_ms = 5 *1000; + //::RaftCore::Config::FLAGS_leader_commit_entries_rpc_timeo_ms = 100 *1000; + //::RaftCore::Config::FLAGS_leader_resync_log_rpc_timeo_ms = 100 *1000; + //::RaftCore::Config::FLAGS_leader_heartbeat_rpc_timeo_ms = 100 *1000; + } + + virtual void SetUp() override {} + + virtual void TearDown() override {} + + protected: + + void ClientRead() { + + std::shared_ptr<::grpc::Channel> _channel = grpc::CreateChannel(this->m_leader_addr, grpc::InsecureChannelCredentials()); + std::unique_ptr<::raft::RaftService::Stub> _stub = ::raft::RaftService::NewStub(_channel); + + //std::chrono::system_clock::time_point _deadline = std::chrono::system_clock::now() + std::chrono::seconds(1); + //_context.set_deadline(_deadline); + + ::raft::ClientReadRequest _r_req; + ::raft::ClientReadResponse _r_rsp; + + _r_req.set_key("client_key_test"); + + ::grpc::ClientContext _contextX; + ::grpc::Status _status = _stub->Read(&_contextX, _r_req, &_r_rsp); + ASSERT_TRUE(_status.ok()); + ASSERT_TRUE(_r_rsp.client_comm_rsp().result()==::raft::ErrorCode::SUCCESS) << "ClientRead fail,detail:" << _r_rsp.DebugString(); + //ASSERT_STREQ(_r_rsp.value().c_str(),"client_val_test") << "ClientRead value not correct:" << _r_rsp.DebugString(); + } +}; + +class TestLeaderServiceClient : public TestBase, public TestLeaderServiceBanchmark { + + public: + + TestLeaderServiceClient() {} + + virtual ~TestLeaderServiceClient() {} + + protected: + +}; + +TEST_F(TestLeaderService, GeneralOperation) { + + std::cout << "start general test.." << std::endl; + + this->ClientWrite(); + + this->ClientRead(); + + std::cout << "end general test.." << std::endl; +} + +TEST_F(TestLeaderService, ConcurrentOperation) { + + //Be sure to remove binlog file before this test. + + auto _tp = this->StartTimeing(); + + auto _op = [&](int thread_idx) { + + std::shared_ptr<::grpc::Channel> _channel = grpc::CreateChannel(this->m_leader_addr, grpc::InsecureChannelCredentials()); + std::unique_ptr<::raft::RaftService::Stub> _stub = ::raft::RaftService::NewStub(_channel); + + int _read_counter = 0; + + int _start = 0; + int _run_times = 1000; + for (int k = 0; k < _run_times; ++k) { + + int _write_num = 10; + for (int i = 0; i < _write_num; ++i) { + int _cur_idx = _start + k * _write_num * this->m_cpu_cores + thread_idx * _write_num + i; + + ClientWriteRequest _w_req; + ClientWriteResponse _w_rsp; + + std::string _key = "client_key_no_order_" + std::to_string(_cur_idx); + std::string _val = "client_val_no_order_" + std::to_string(_cur_idx); + + auto *_p_wop = _w_req.mutable_req(); + _p_wop->set_key(_key); + _p_wop->set_value(_val); + + ::grpc::ClientContext _contextX; + std::chrono::system_clock::time_point _deadline = std::chrono::system_clock::now() + + std::chrono::milliseconds(::RaftCore::Config::FLAGS_client_write_timo_ms); + //_contextX.set_deadline(_deadline); + + ::grpc::Status _status = _stub->Write(&_contextX, _w_req, &_w_rsp); + ASSERT_TRUE(_status.ok()) << "status wrong:" << _status.error_message(); + ASSERT_TRUE(_w_rsp.client_comm_rsp().result()==::raft::ErrorCode::SUCCESS) << "ClientWrite fail,detail:" << _w_rsp.DebugString(); + + if (_read_counter++ > 20) { + ::raft::ClientReadRequest _r_req; + ::raft::ClientReadResponse _r_rsp; + _r_req.set_key(_key); + + ::grpc::ClientContext _contextY; + std::chrono::system_clock::time_point _deadline = std::chrono::system_clock::now() + std::chrono::seconds(3); + //_contextY.set_deadline(_deadline); + ::grpc::Status _status = _stub->Read(&_contextY, _r_req, &_r_rsp); + ASSERT_TRUE(_status.ok()) << "status wrong:" << _status.error_message(); + ASSERT_TRUE(_r_rsp.client_comm_rsp().result()==::raft::ErrorCode::SUCCESS) << "ClientRead fail,detail:" << _r_rsp.DebugString(); + + std::cout << "read key:" << _key << ",val:" << _r_rsp.value() << std::endl; + _read_counter = 0; + } + + //std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + } + }; + + this->LaunchMultipleThread(_op); + + this->EndTiming(_tp, "leader service benchmark cost"); + + std::cout << "sleeping... CHECK if the memory cost is decreasing?????"; + std::this_thread::sleep_for(std::chrono::seconds(20)); +} + +TEST_F(TestLeaderService, Benchmark) { + + this->DoBenchmark(false); +} + +TEST_F(TestLeaderServiceClient, Benchmark) { + + this->DoBenchmark(); +} + + +#endif diff --git a/src/gtest/state/test_all.h b/src/gtest/state/test_all.h new file mode 100644 index 0000000..06b9ace --- /dev/null +++ b/src/gtest/state/test_all.h @@ -0,0 +1,26 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_ALL_STATE_H__ +#define __GTEST_ALL_STATE_H__ + +#include "gtest/state/test_state.h" + +#endif diff --git a/src/gtest/state/test_state.h b/src/gtest/state/test_state.h new file mode 100644 index 0000000..d9a98ba --- /dev/null +++ b/src/gtest/state/test_state.h @@ -0,0 +1,86 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_STATE_H__ +#define __GTEST_STATE_H__ + +#include +#include +#include + +#include "gtest/test_base.h" +#include "state/state_mgr.h" +#include "topology/topology_mgr.h" + +using ::RaftCore::State::RaftRole; +using ::RaftCore::State::StateMgr; +using ::RaftCore::Topology; +using ::RaftCore::CTopologyMgr; + +class TestState : public TestBase { + + public: + + TestState() {} + + virtual void SetUp() override { + } + + virtual void TearDown() override { + } + +}; + +TEST_F(TestState, GeneralOperation) { + + CTopologyMgr::Initialize(); + + Topology _topo; + CTopologyMgr::Read(&_topo); + + StateMgr::Initialize(_topo); + + auto _state = StateMgr::GetRole(); + ASSERT_EQ(_state, RaftRole::LEADER); + ASSERT_STREQ(StateMgr::GetRoleStr(), "leader"); + ASSERT_STREQ(StateMgr::GetMyAddr().c_str(),this->m_leader_addr.c_str()); + + //---------------Leader --> Follower----------------// +#define _NEW_LEADER_ADDRESS_ "192.168.0.100:10077" + int _old_follower_size = _topo.m_followers.size(); + StateMgr::SwitchTo(RaftRole::FOLLOWER,_NEW_LEADER_ADDRESS_); + + _state = StateMgr::GetRole(); + ASSERT_EQ(_state, RaftRole::FOLLOWER); + ASSERT_STREQ(StateMgr::GetRoleStr(), "follower"); + + CTopologyMgr::Read(&_topo); + ASSERT_STREQ(_topo.m_leader.c_str(),_NEW_LEADER_ADDRESS_); + + int _new_follower_size = _topo.m_followers.size(); + ASSERT_EQ(_old_follower_size + 1, _new_follower_size ); + + ASSERT_TRUE(std::find(_topo.m_followers.cbegin(),_topo.m_followers.cend(),this->m_leader_addr) != _topo.m_followers.cend()); + + StateMgr::UnInitialize(); +} + + +#endif diff --git a/src/gtest/storage/test_all.h b/src/gtest/storage/test_all.h new file mode 100644 index 0000000..0f485b1 --- /dev/null +++ b/src/gtest/storage/test_all.h @@ -0,0 +1,29 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_ALL_STORAGE_H__ +#define __GTEST_ALL_STORAGE_H__ + +#include "gtest/storage/test_storage.h" +#include "gtest/storage/test_hashable_string.h" +#include "gtest/storage/test_memory_table.h" +#include "gtest/storage/test_sstable.h" + +#endif diff --git a/src/gtest/storage/test_hashable_string.h b/src/gtest/storage/test_hashable_string.h new file mode 100644 index 0000000..851660a --- /dev/null +++ b/src/gtest/storage/test_hashable_string.h @@ -0,0 +1,78 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_HASHABLE_STRING_H__ +#define __GTEST_HASHABLE_STRING_H__ + +#include +#include + +#include "gtest/test_base.h" +#include "storage/hashable_string.h" + +using ::RaftCore::Storage::HashableString; + +class TestHashableString : public TestBase { + + public: + + TestHashableString() {} + + virtual void SetUp() override {} + + virtual void TearDown() override {} + + protected: + +}; + +TEST_F(TestHashableString, GeneralOperation) { + + HashableString _obj1("abc1"); + + HashableString _obj2("abc2"); + + HashableString _obj3("abc2"); + + ASSERT_TRUE(_obj2.Hash() == _obj3.Hash()); + + ASSERT_TRUE(_obj1 < _obj2); + + ASSERT_TRUE(_obj2 == _obj3); + + ASSERT_TRUE(_obj2 == "abc2"); + + _obj3 = _obj1; + + ASSERT_TRUE(_obj3 == "abc1"); + + ASSERT_TRUE(_obj3.GetStr() == "abc1"); + + std::string _tmp = "on_the_fly"; + + HashableString _obj4(_tmp, false); + + ASSERT_TRUE(_obj4.GetStr()==_tmp); + +} + + + +#endif diff --git a/src/gtest/storage/test_memory_table.h b/src/gtest/storage/test_memory_table.h new file mode 100644 index 0000000..1654c09 --- /dev/null +++ b/src/gtest/storage/test_memory_table.h @@ -0,0 +1,90 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_MEMORY_TABLE_H__ +#define __GTEST_MEMORY_TABLE_H__ + +#include +#include + +#include "gtest/test_base.h" +#include "storage/memory_table.h" + +using ::RaftCore::Storage::MemoryTable; +using ::RaftCore::Storage::TypePtrHashableString; +using ::RaftCore::Storage::TypePtrHashValue; + +class TestMemoryTable : public TestBase { + + public: + + TestMemoryTable() {} + + virtual void SetUp() override {} + + virtual void TearDown() override {} + + protected: + + void GeneralOperation() { + + this->m_obj.Insert("k1", "v1", 1, 1); + this->m_obj.Insert("k2", "v2", 1, 2); + this->m_obj.Insert("k3", "v3", 1, 3); + + auto _iterator = [](const TypePtrHashableString &k,const TypePtrHashValue &v) { + //std::cout << "k:" << k->GetStr() << ",v:" << v->m_term << "|" << v->m_index << "|" << v->m_val << std::endl;; + return true; + }; + + this->m_obj.IterateByKey(_iterator); + + std::string _val = ""; + ASSERT_TRUE(this->m_obj.GetData("k1", _val)); + + ASSERT_TRUE(_val == "v1"); + + ASSERT_TRUE(this->m_obj.Size() == 3) << ",actual size:" << this->m_obj.Size(); + } + + protected: + + MemoryTable m_obj; +}; + +TEST_F(TestMemoryTable, GeneralOperation) { + this->GeneralOperation(); +} + +TEST_F(TestMemoryTable, ConcurrentOperation) { + + auto _op = [&](int idx) { + int _run_times = 1000; + for (int i = 0; i < _run_times; ++i) { + this->GeneralOperation(); + std::cout << "thread:" << std::this_thread::get_id() << " finish round:" << i << std::endl; + } + }; + + this->LaunchMultipleThread(_op); +} + + +#endif diff --git a/src/gtest/storage/test_sstable.h b/src/gtest/storage/test_sstable.h new file mode 100644 index 0000000..1b02dfa --- /dev/null +++ b/src/gtest/storage/test_sstable.h @@ -0,0 +1,132 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_SSTABLE_H__ +#define __GTEST_SSTABLE_H__ + +#include +#include + +#include "gtest/test_base.h" +#include "storage/sstable.h" + +using ::RaftCore::Storage::MemoryTable; +using ::RaftCore::Storage::SSTAble; + +class TestSSTable : public TestBase { + + public: + + TestSSTable() {} + + virtual void SetUp() override {} + + virtual void TearDown() override {} + + protected: + + void GeneralOperation() { + + } + +}; + +TEST_F(TestSSTable, GeneralOperation) { + MemoryTable _memtable_1; + _memtable_1.Insert("k1", "v1", 1, 1); + _memtable_1.Insert("k2", "v2", 1, 2); + _memtable_1.Insert("k3", "v3", 1, 3); + + SSTAble _sstable_1(_memtable_1); + + MemoryTable _memtable_2; + _memtable_2.Insert("k2", "v2", 1, 2); + _memtable_2.Insert("k3", "v3", 1, 3); + _memtable_2.Insert("k4", "v4", 1, 4); + + SSTAble _sstable_2(_memtable_2); + + SSTAble _sstable_3(_sstable_2.GetFilename().c_str()); + + //Older file merged into newer file. + SSTAble _sstable_merged(_sstable_1,_sstable_2); + + std::string _val = ""; + ASSERT_TRUE(_sstable_merged.Read("k4", _val)); + ASSERT_TRUE(_val == "v4"); + + auto _max_id = _sstable_merged.GetMaxLogID(); + ASSERT_TRUE(_max_id.m_term == 1 && _max_id.m_index == 4); + + ASSERT_TRUE(_sstable_merged.GetFilename() == (_sstable_2.GetFilename() + _AURORA_SSTABLE_MERGE_SUFFIX_)); + + auto _traverse = [](const SSTAble::Meta &meta,const HashableString &key) ->bool{ + std::cout << "traverse term:" << meta.m_term << ",index:" << meta.m_index << std::endl;; + return true; + }; + + _sstable_merged.IterateByVal(_traverse); +} + +TEST_F(TestSSTable, Performance) { + + MemoryTable _memtable; + + std::string _key = "", _val=""; + + for (int i = 0; i < 20000; ++i) { + _key = std::to_string(i); + _val = std::to_string(i); + + _memtable.Insert(_key, _val, 1, 1); + } + + std::cout << "........." << std::endl; + + SSTAble _sstable(_memtable); +} + +TEST_F(TestSSTable, ConcurrentOperation) { + + MemoryTable _memtable_1; + _memtable_1.Insert("k1", "v1", 1, 1); + _memtable_1.Insert("k2", "v2", 1, 2); + _memtable_1.Insert("k3", "v3", 1, 3); + _memtable_1.Insert("k4", "v4", 1, 4); + + SSTAble _sstable_1(_memtable_1); + + auto _op = [&](int idx) { + for (int i = 1; i <= 4; ++i) { + std::string _key = "k" + std::to_string(i); + std::string _expected_val = "v" + std::to_string(i); + + std::string _val = ""; + ASSERT_TRUE(_sstable_1.Read(_key, _val)) << "key:" << _key; + ASSERT_EQ(_val, _expected_val); + } + + }; + + this->LaunchMultipleThread(_op); +} + + +#endif diff --git a/src/gtest/storage/test_storage.h b/src/gtest/storage/test_storage.h new file mode 100644 index 0000000..caaadc7 --- /dev/null +++ b/src/gtest/storage/test_storage.h @@ -0,0 +1,176 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_STORAGE_H__ +#define __GTEST_STORAGE_H__ + +#include +#include +#include + +#include "gtest/test_base.h" +#include "common/log_identifier.h" +#include "binlog/binlog_singleton.h" +#include "storage/storage.h" + +using ::RaftCore::Storage::StorageMgr; +using ::RaftCore::BinLog::BinLogGlobal; +using ::RaftCore::Common::LogIdentifier; + +class TestStorage : public TestBase { + + public: + + TestStorage() { + //::RaftCore::Config::FLAGS_memory_table_max_item = 20; + } + + virtual ~TestStorage() {} + + virtual void SetUp() override { + + //Clear all existing files. + if (::RaftCore::Config::FLAGS_clear_existing_sstable_files) { + fs::path _data_path("data"); + fs::remove_all(_data_path); + } + + //Using the test binlog file. + BinLogGlobal::m_instance.Initialize(_ROLE_STR_TEST_); + + this->m_lrl = BinLogGlobal::m_instance.GetLastReplicated(); + this->m_const_term = this->m_lrl.m_term; + this->m_base_index = this->m_lrl.m_index; + + this->m_obj.Initialize(_ROLE_STR_TEST_); + } + + virtual void TearDown() override { + this->m_obj.UnInitialize(); + } + + protected: + + void DoRW(int thread_idx = -1) { + for (int i = 1; i <= this->m_counter;++i) { + + uint64_t _idx = this->m_base_index + i; + if (thread_idx >= 0) + _idx = this->m_base_index + thread_idx*this->m_counter + i; + + //VLOG(89) << "writing idx:" << _idx; + + LogIdentifier _log_id; + _log_id.Set(this->m_const_term,_idx); + std::string _i = std::to_string(_idx); + std::string _key = "key_" + _i; + std::string _val = "val_" + _i; + + ASSERT_TRUE(this->m_obj.Set(_log_id,_key,_val)); + + std::string _val_2 = "val_" + _i; + ASSERT_TRUE(this->m_obj.Get(_key, _val_2)); + if (_val != _val_2) + ASSERT_TRUE(false) << "_key:" << _key << ",expect _val:" << _val << ",actual _val:" << _val_2; + } + } + + void GeneralOperation() { + + this->DoRW(); + + LogIdentifier _max_log_id; + _max_log_id.Set(this->m_const_term, this->m_counter + this->m_base_index); + ASSERT_EQ(this->m_obj.GetLastCommitted(), _max_log_id); + + LogIdentifier _log_id; + int _start_idx = this->m_counter / 2; + _log_id.Set(this->m_const_term,_start_idx); + std::list output_list; + + int _get_count = ::RaftCore::Config::FLAGS_storage_get_slice_count; + this->m_obj.GetSlice(_log_id,_get_count,output_list); + + ASSERT_EQ(output_list.size(), _get_count); + + _start_idx++; + for (const auto &_item : output_list) { + LogIdentifier _id; + _id.Set(this->m_const_term,_start_idx); + ASSERT_EQ(_item.m_log_id,_id); + + std::string _key = "key_" + std::to_string(_start_idx); + ASSERT_EQ(*_item.m_key,_key); + + std::string _val = "val_" + std::to_string(_start_idx); + ASSERT_EQ(*_item.m_value,_val); + + _start_idx++; + } + } + + StorageMgr m_obj; + + LogIdentifier m_lrl; + + uint32_t m_const_term; + + uint64_t m_base_index; + + int m_counter = 100; +}; + +TEST_F(TestStorage, GeneralOperation) { + this->GeneralOperation(); + + std::this_thread::sleep_for(std::chrono::milliseconds(500)); + + this->m_obj.PurgeGarbage(); + + std::this_thread::sleep_for(std::chrono::milliseconds(500)); +} + +TEST_F(TestStorage, Reset) { + for (int i = 0; i < 1000; ++i) + this->m_obj.Reset(); +} + +TEST_F(TestStorage, ConcurrentOperation) { + + auto _op = [&](int idx) { + + //Note:More rounds more memory will be consumed. + this->m_counter = 200; + this->DoRW(idx); + }; + + this->LaunchMultipleThread(_op); + + std::cout << "start GC ....." << std::endl; + + //Check memory reclaiming status. + this->m_obj.PurgeGarbage(); + + std::cout << "start GC done, check memory usage now." << std::endl; + + std::this_thread::sleep_for(std::chrono::seconds(10)); +} + +#endif diff --git a/src/gtest/test_all.h b/src/gtest/test_all.h new file mode 100644 index 0000000..f40f631 --- /dev/null +++ b/src/gtest/test_all.h @@ -0,0 +1,39 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_H__ +#define __GTEST_H__ + +#include "gtest/tools/test_all.h" +#include "gtest/topology/test_all.h" +#include "gtest/storage/test_all.h" +#include "gtest/state/test_all.h" +#include "gtest/guid/test_all.h" +#include "gtest/common/test_all.h" +#include "gtest/binlog/test_all.h" +#include "gtest/follower/test_all.h" +#include "gtest/service/test_all.h" +#include "gtest/global/test_all.h" +#include "gtest/leader/test_all.h" +#include "gtest/candidate/test_all.h" +#include "gtest/election/test_all.h" +#include "gtest/member/test_all.h" + +#endif diff --git a/src/gtest/test_base.h b/src/gtest/test_base.h new file mode 100644 index 0000000..c8ed974 --- /dev/null +++ b/src/gtest/test_base.h @@ -0,0 +1,353 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_BASE_H__ +#define __GTEST_BASE_H__ + +#ifndef _RAFT_UNIT_TEST_ +#error You must define the '_RAFT_UNIT_TEST_' macro to start unit testing ,otherwise some functions wont running correctly. +#endif + +#include "grpc++/channel.h" +#include "grpc++/create_channel.h" + +#include "boost/process.hpp" +#include "boost/process/environment.hpp" +#include "boost/filesystem.hpp" + +#include "gtest/gtest.h" + +#include "config/config.h" +#include "global/global_env.h" +#include "topology/topology_mgr.h" +#include "tools/utilities.h" +#include "guid/guid_generator.h" +#include "election/election.h" +#include "member/member_manager.h" + +#define _RAFT_UNIT_TEST_LEADER_PORT_ (10010) +#define _RAFT_UNIT_TEST_FOLLWER_PORT_ (10020) +#define _RAFT_UNIT_TEST_NEW_NODES_PORT_ (10030) + +#define _TEST_LEADER_BINLOG_ "raft.binlog.leader" +#define _TEST_FOLLOWER_BINLOG_ "raft.binlog.follower" +#define _TEST_TOPOLOGY_MEMCHG_FILE_ "topology.config.memchg" + +#define _TEST_FOLLOWER_NUM_ (3) + +namespace fs = boost::filesystem; +namespace bp = boost::process; + +class TestBase : public ::testing::Test { + + public: + + TestBase() { + this->m_leader_addr = this->m_local_ip + ":" + std::to_string(_RAFT_UNIT_TEST_LEADER_PORT_); + } + + virtual ~TestBase() {} + + protected: + + virtual void SetUp() override {} + + virtual void TearDown() override {} + + auto StartTimeing() { + return ::RaftCore::Tools::StartTimeing(); + } + + void EndTiming(const std::chrono::time_point &tp_start,const char* operation_name) { + ::RaftCore::Tools::EndTiming(tp_start,operation_name); + } + + void LaunchMultipleThread(std::function fn,int working_thread=0) { + + auto _tp = this->StartTimeing(); + + std::vector _vec; + + int _thread_num = this->m_cpu_cores; + if (working_thread > 0) + _thread_num = working_thread; + + for (int i = 0; i < _thread_num;++i) { + std::thread* _p_thread = new std::thread(fn,i); + _vec.push_back(_p_thread); + } + + for (auto &_p_thread : _vec) + _p_thread->join(); + + this->EndTiming(_tp, "total thread"); + } + + protected: + + const int m_cpu_cores = std::thread::hardware_concurrency(); + + const std::string m_local_ip = _AURORA_LOCAL_IP_; + + std::string m_leader_addr = ""; + + const int m_follower_port = _RAFT_UNIT_TEST_FOLLWER_PORT_; +}; + +class TestSingleBackendFollower : public TestBase { + + public: + + TestSingleBackendFollower() { + + this->m_follower_svc_addr = this->m_local_ip + ":" + std::to_string(this->m_follower_port); + + //Force the instance to listen on the specific port. + ::RaftCore::Config::FLAGS_port = _RAFT_UNIT_TEST_FOLLWER_PORT_; + + //Init is a time consuming operation. + ::RaftCore::Global::GlobalEnv::InitialEnv(); + + m_thread = new std::thread([]() { + ::RaftCore::Global::GlobalEnv::RunServer(); + }); + + //Waiting for server to get fully start. + std::this_thread::sleep_for(std::chrono::seconds(3)); + } + + virtual ~TestSingleBackendFollower() { + ::RaftCore::Global::GlobalEnv::StopServer(); + ::RaftCore::Global::GlobalEnv::UnInitialEnv(); + m_thread->join(); + } + + protected: + + std::string m_follower_svc_addr ; + + std::thread * m_thread = nullptr; +}; + +class TestMultipleBackendFollower : public TestBase { + + public: + + TestMultipleBackendFollower() {} + + virtual ~TestMultipleBackendFollower() {} + + protected: + + virtual void StartFollowers(int empty_follower_num=0) final { + auto _generator = [](int idx) ->const char*{ return _TEST_LEADER_BINLOG_; }; + this->StartFollowersFunc(_generator,empty_follower_num); + } + + virtual void StartFollowersFunc(std::function generator, + int empty_follower_num=0,int follower_num = _TEST_FOLLOWER_NUM_) final{ + + //-----------------------Unifying config of leader and followers.-----------------------// + ::RaftCore::CTopologyMgr::Initialize(); + ::RaftCore::Topology _topo; + ::RaftCore::CTopologyMgr::Read(&_topo); + + _topo.m_leader = this->m_local_ip + ":" + std::to_string(::RaftCore::Config::FLAGS_port); + + int port_base = _RAFT_UNIT_TEST_FOLLWER_PORT_; + + _topo.m_followers.clear(); + for (int i = 0; i < follower_num; ++i) { + int _port = port_base + i; + _topo.m_followers.emplace(this->m_local_ip + ":" + std::to_string(_port)); + } + ::RaftCore::CTopologyMgr::Update(_topo); + ::RaftCore::CTopologyMgr::UnInitialize(); + + this->m_main_path = fs::current_path(); + + //-----------------------Starting server.-----------------------// + std::unordered_map _copies; + + _copies.emplace(_AURORA_ELECTION_CONFIG_FILE_,_AURORA_ELECTION_CONFIG_FILE_); + _copies.emplace(_AURORA_MEMBER_CONFIG_FILE_,_AURORA_MEMBER_CONFIG_FILE_); + _copies.emplace(_AURORA_TOPOLOGY_CONFFIG_FILE_, _AURORA_TOPOLOGY_CONFFIG_FILE_); + + for (int i = 0; i < follower_num; ++i) { + int _port = port_base + i; + auto src = generator(i); + _copies.emplace(src, _TEST_FOLLOWER_BINLOG_); + this->StartOneFollower(i,_port,_copies); + _copies.erase(src); + } + + //Empty followers use different topology config file. + _copies.erase(_AURORA_TOPOLOGY_CONFFIG_FILE_); + _copies.emplace(_TEST_TOPOLOGY_MEMCHG_FILE_,_AURORA_TOPOLOGY_CONFFIG_FILE_); + + //Start empty followers for member change testing. + int _empty_follower_base = (follower_num / 10) * 10 + 10; + for (int i = 0; i < empty_follower_num; ++i) { + int _port = _RAFT_UNIT_TEST_NEW_NODES_PORT_ + i; + int _follower_dir_idx = _empty_follower_base + i; + this->StartOneFollower(_follower_dir_idx,_port,_copies); + } + + //Working directory already changed,go back to the initial path. + fs::current_path(this->m_main_path); + + //Waiting for the followers to get fully started. + std::this_thread::sleep_for(std::chrono::seconds(3)); + } + + virtual void EndFollowers() final{ + for (auto& p_child : this->m_p_children) + p_child->terminate(); + } + + protected: + + virtual void StartOneFollower(int follower_idx,int follower_port, + const std::unordered_map &copies) final{ + + //Set up followers. + + char sz_path[256] = {0}; + std::snprintf(sz_path,sizeof(sz_path),"follower_%d",follower_idx); + + fs::current_path(this->m_main_path); + + //Initial followers' working directory. + fs::path _dst_dir = fs::path(sz_path); + fs::remove_all(_dst_dir); + + //This reassignment is trying to solve the 'access denied' problem. + //_dst_dir = fs::path(sz_path); + fs::create_directory(_dst_dir); + + //Copy server dependent files. + for (auto &_pair_kv : copies) + fs::copy_file(fs::path(_pair_kv.first),_dst_dir/_pair_kv.second); + + //Starting follower instances. + fs::current_path(this->m_main_path / fs::path(sz_path)); + +#define _PARA_BUF_SIZE_ (64) + char* _p_port = (char*)malloc(_PARA_BUF_SIZE_); + std::snprintf(_p_port,_PARA_BUF_SIZE_,"--port=%d", follower_port); + + char* _p_a = (char*)malloc(_PARA_BUF_SIZE_); + std::snprintf(_p_a,_PARA_BUF_SIZE_,"--iterating_wait_timeo_us=%d",::RaftCore::Config::FLAGS_iterating_wait_timeo_us); + + char* _p_b = (char*)malloc(_PARA_BUF_SIZE_); + std::snprintf(_p_b,_PARA_BUF_SIZE_,"--election_heartbeat_timeo_ms=%d",::RaftCore::Config::FLAGS_election_heartbeat_timeo_ms); + + char* _p_c = (char*)malloc(_PARA_BUF_SIZE_); + std::snprintf(_p_c,_PARA_BUF_SIZE_,"--checking_heartbeat=%d",::RaftCore::Config::FLAGS_checking_heartbeat); + + auto _env = boost::this_process::environment(); + _env["GLOG_v"] = std::to_string(::RaftCore::Config::FLAGS_child_glog_v).c_str(); + +#define _CMD_BUF_SIZE_ (2048) + +#ifdef _WIN32 + const char *_p_path = "C:\\Users\\95\\Documents\\Visual Studio 2015\\Projects\\apollo\\%s\\raft_svr.exe"; + char*_p_exe_file = (char*)malloc(_CMD_BUF_SIZE_); + #ifdef _DEBUG + std::snprintf(_p_exe_file,_CMD_BUF_SIZE_,_p_path,"Debug"); + #else + std::snprintf(_p_exe_file,_CMD_BUF_SIZE_,_p_path,"Release"); + #endif +#elif __APPLE__ + const char *_p_exe_file = "/Users/arthur/git/aurora/bin/debug/aurora"; +#elif __linux__ + const char *_p_exe_file = "/root/git/aurora/bin/debug/aurora"; +#else + CHECK(false) << "unknown platform"; +#endif + + bp::child *_p_child = new bp::child(_p_exe_file, _env,_p_port,_p_a,_p_b,_p_c); + this->m_p_children.emplace_back(_p_child); + std::cout << "started child process:" << _p_child->id() << std::endl; + } + + std::vector m_p_children; + + fs::path m_main_path; +}; + +class TestCluster : public TestMultipleBackendFollower { + +public: + + TestCluster(int empty_followers=0) { + + this->StartFollowers(empty_followers); + + ::RaftCore::Config::FLAGS_port = _RAFT_UNIT_TEST_LEADER_PORT_; + + //Init is a time consuming operation. + ::RaftCore::Global::GlobalEnv::InitialEnv(); + + std::thread *_thread = new std::thread([]() { + ::RaftCore::Global::GlobalEnv::RunServer(); + }); + + _thread->detach(); + + //Waiting for server to get fully start. + std::this_thread::sleep_for(std::chrono::seconds(3)); + } + + virtual ~TestCluster() { + + //Serve could be shutdown if old leader no long exist in the new cluster. + if (::RaftCore::Global::GlobalEnv::IsRunning()) { + ::RaftCore::Global::GlobalEnv::StopServer(); + ::RaftCore::Global::GlobalEnv::UnInitialEnv(); + } + + this->EndFollowers(); + } + + virtual void ClientWrite(const std::string &key="client_key_test",const std::string &val="client_val_test") { + + std::shared_ptr<::grpc::Channel> _channel = grpc::CreateChannel(this->m_leader_addr, grpc::InsecureChannelCredentials()); + std::unique_ptr<::raft::RaftService::Stub> _stub = ::raft::RaftService::NewStub(_channel); + + //std::chrono::system_clock::time_point _deadline = std::chrono::system_clock::now() + std::chrono::seconds(1); + //_context.set_deadline(_deadline); + + ::raft::ClientWriteRequest _w_req; + ::raft::ClientWriteResponse _w_rsp; + + auto *_p_wop = _w_req.mutable_req(); + _p_wop->set_key(key); + _p_wop->set_value(val); + + ::grpc::ClientContext _contextX; + ::grpc::Status _status = _stub->Write(&_contextX, _w_req, &_w_rsp); + ASSERT_TRUE(_status.ok()); + ASSERT_TRUE(_w_rsp.client_comm_rsp().result()==::raft::ErrorCode::SUCCESS) << "ClientWrite fail,detail:" << _w_rsp.DebugString(); + } + +}; + +#endif diff --git a/src/gtest/tools/test_all.h b/src/gtest/tools/test_all.h new file mode 100644 index 0000000..6bc180c --- /dev/null +++ b/src/gtest/tools/test_all.h @@ -0,0 +1,34 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_ALL_TOOLS_H__ +#define __GTEST_ALL_TOOLS_H__ + +#include "gtest/tools/test_lock_free_deque.h" +#include "gtest/tools/test_lock_free_hash.h" +#include "gtest/tools/test_lock_free_queue.h" +#include "gtest/tools/test_lock_free_unordered_single_list.h" +#include "gtest/tools/test_lock_free_priority_queue.h" +#include "gtest/tools/test_trivial_double_list.h" +#include "gtest/tools/test_trivial_single_list.h" +#include "gtest/tools/test_utilities.h" +#include "gtest/tools/test_timer.h" + +#endif diff --git a/src/gtest/tools/test_data_structure_base.h b/src/gtest/tools/test_data_structure_base.h new file mode 100644 index 0000000..3d87ac7 --- /dev/null +++ b/src/gtest/tools/test_data_structure_base.h @@ -0,0 +1,48 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_DATA_STRUCTURE_BASE_H__ +#define __GTEST_DATA_STRUCTURE_BASE_H__ + +#include +#include +#include + +#include "gtest/test_base.h" + +template typename T , typename S> +class DataStructureBase : public TestBase { + + public: + + template + DataStructureBase(Args&&... args) : m_ds(std::forward(args)...) { } + + virtual ~DataStructureBase() {} + + protected: + + T m_ds; + + virtual void Dump() = 0; +}; + + +#endif diff --git a/src/gtest/tools/test_lock_free_deque.h b/src/gtest/tools/test_lock_free_deque.h new file mode 100644 index 0000000..066025c --- /dev/null +++ b/src/gtest/tools/test_lock_free_deque.h @@ -0,0 +1,165 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_LOCK_FREE_DEQUE_H__ +#define __GTEST_LOCK_FREE_DEQUE_H__ + + +#include "gtest/tools/test_data_structure_base.h" +#include "tools/lock_free_deque.h" + +using ::RaftCore::DataStructure::LockFreeDeque; + +class TestLockFreeDeque : public DataStructureBase { + + protected: + + virtual void SetUp() override { + + //Install GC. + std::thread _t([&]() { + while (true) { + if (!this->m_running) + break; + + LockFreeDeque::GC(); + } + }); + _t.detach(); + } + + virtual void TearDown() override { + this->m_running = false; + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + + virtual void Dump() override { + while (auto shp = this->m_ds.Pop()) { + std::cout << *shp << " "; + } + } + + bool m_running = true; +}; + +TEST_F(TestLockFreeDeque, GeneralOperation) { + + int val = 7; + std::shared_ptr _shp(new int(val)); + this->m_ds.Push(_shp); + + ASSERT_EQ(this->m_ds.Size(), 1); + + decltype(_shp) _out = this->m_ds.Pop(); + ASSERT_EQ(*_out, val); + + _out = this->m_ds.Pop(); + ASSERT_TRUE(!_out); + + //simulate a bug scenario. + int _count = 2; + for (int i = 0; i < _count;++i) + this->m_ds.Push(std::make_shared(i)); + + //simulate a bug scenario. + while (auto shp = this->m_ds.Pop()); + ASSERT_EQ(this->m_ds.Size(),0); + + for (int i = 0; i < _count;++i) + this->m_ds.Push(std::make_shared(i)); + + while (auto shp = this->m_ds.Pop()); + ASSERT_EQ(this->m_ds.Size(),0); + +} + +TEST_F(TestLockFreeDeque, ConcurrentPop) { + + int _count = ::RaftCore::Config::FLAGS_deque_push_count; + + for (int i = 0; i < _count;++i) + this->m_ds.Push(std::make_shared(i)); + + //std::cout << "push done,sleeping..." << std::endl;; + + //std::this_thread::sleep_for(std::chrono::seconds(5)); + + auto _pop_it = [&](int idx){ + while (auto shp = this->m_ds.Pop()); + //std::cout << "got "; + std::cout << "thread pop end " << std::this_thread::get_id() << std::endl; + }; + + this->LaunchMultipleThread(_pop_it); + + std::cout << "pop done,sleeping --debug size:" << this->m_ds.GetSizeByIterating() << std::endl; + + std::this_thread::sleep_for(std::chrono::seconds(10)); + + ASSERT_EQ(this->m_ds.GetLogicalSize(),0); +} + +TEST_F(TestLockFreeDeque, ConcurrentPush) { + + int _count = 10000; + std::shared_ptr _shp(new int(7)); + auto _push_it = [&](int idx){ + for (int i = 0; i < _count;++i) { + //std::cout << "thread : " << std::this_thread::get_id() << " is pushing" << std::endl; + this->m_ds.Push(_shp); + } + }; + + this->LaunchMultipleThread(_push_it); + + ASSERT_EQ(this->m_ds.Size(),this->m_cpu_cores * _count); +} + +TEST_F(TestLockFreeDeque, ConcurrentPushPop) { + + int _count = 100; + for (int i = 0; i < _count;++i) + this->m_ds.Push(std::make_shared(i)); + + auto _do_it = [&](int idx){ + int _round = 100000; + for (int i = 0; i < _round;++i) { + auto shp = this->m_ds.Pop(); + if (!shp) { + std::cout << "thread:" << std::this_thread::get_id() << " pop empty" + << ",size:" << this->m_ds.Size() << ",i:" << i << std::endl; + continue; + } + + auto x = *shp; + *shp = _count + x + 1; + this->m_ds.Push(shp); + } + }; + + this->LaunchMultipleThread(_do_it); + + std::cout << "--debug size:" << this->m_ds.GetSizeByIterating() << std::endl; + + ASSERT_EQ(this->m_ds.Size(),_count); +} + + +#endif diff --git a/src/gtest/tools/test_lock_free_hash.h b/src/gtest/tools/test_lock_free_hash.h new file mode 100644 index 0000000..b1fc7be --- /dev/null +++ b/src/gtest/tools/test_lock_free_hash.h @@ -0,0 +1,295 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_LOCK_FREE_HASH_H__ +#define __GTEST_LOCK_FREE_HASH_H__ + +#include +#include +#include + +#include "gtest/tools/test_data_structure_base.h" +#include "tools/lock_free_hash.h" +#include "tools/lock_free_hash_specific.h" + +using ::RaftCore::DataStructure::LockFreeHash; +using ::RaftCore::DataStructure::HashNodeAtomic; +using ::RaftCore::DataStructure::HashTypeBase; +using ::RaftCore::DataStructure::LockFreeHashAtomic; + +template +class DataStructureHash { + + public: + + template + DataStructureHash(Args&&... args) : m_ds(std::forward(args)...) { } + + virtual ~DataStructureHash() {} + + protected: + + T m_ds; + + virtual void Dump() = 0; +}; + + +class TestHash final : public HashTypeBase{ + + public: + + TestHash(int i) :m_i(i) {} + + virtual bool operator<(const TestHash& _other)const noexcept override { + return m_i < _other.m_i; + } + + virtual bool operator==(const TestHash& _other)const noexcept override { + return m_i == _other.m_i; + } + + virtual std::size_t Hash() const noexcept override { + return this->m_i; + } + + int m_i; +}; + +typedef std::shared_ptr TypePtrTestHash; + +template +class TestLockFreeHashBase : public TestBase, public DataStructureHash { + + protected: + + TestLockFreeHashBase() :DataStructureHash(500) {} + + virtual ~TestLockFreeHashBase() noexcept{} + + protected: + + virtual void SetUp() override {} + + virtual void TearDown() override {} + + virtual void Dump() override { + std::list> _output; + this->m_ds.GetOrderedByKey(_output); + + for (auto &_item : _output) + std::cout << _item->m_i << " "; + } + + protected: + + int m_sum = 10000; +}; + +class TestLockFreeHash : public TestLockFreeHashBase> { + + protected: + + TestLockFreeHash() {} + + virtual ~TestLockFreeHash() noexcept{} + + void GeneralOperation()noexcept { + + int val = 7; + + auto _tp = this->StartTimeing(); + + for (int i = 0; i < this->m_sum;++i) { + std::shared_ptr _shp_key(new TestHash(val+i)); + std::shared_ptr _shp_val(new int(val+i)); + this->m_ds.Insert(_shp_key,_shp_val); + } + + //Do insert again ,should overwrite all the previously inserted values. + for (int i = 0; i < this->m_sum;++i) { + std::shared_ptr _shp_key(new TestHash(val+i)); + std::shared_ptr _shp_val(new int(val+i)); + this->m_ds.Insert(_shp_key,_shp_val); + } + + ASSERT_TRUE(this->m_ds.Size() == this->m_sum); + + std::shared_ptr _shp_val; + this->m_ds.Read(TestHash(this->m_sum),_shp_val); + ASSERT_TRUE(*_shp_val == this->m_sum); + + /* + int _new_val = 17; + std::shared_ptr _shp_val_2(new int(_new_val)); + TestHash *_p_obj = new TestHash(this->m_sum); + if (!this->m_ds.Upsert(_p_obj, _shp_val_2)) + delete _p_obj; + + _shp_val.reset(); + this->m_ds.Read(TestHash(this->m_sum),_shp_val); + ASSERT_TRUE(*_shp_val == _new_val); + */ + + auto _traverse = [](const std::shared_ptr &k,const std::shared_ptr &v)->bool { + //std::cout << "k:" << k->Hash() << ",v:" << *v << std::endl; + return true; + }; + + this->m_ds.Iterate(_traverse); + + auto _cond = [&](const TestHash &x) ->bool{ + return x.m_i >= val; + }; + + ASSERT_TRUE(this->m_ds.CheckCond(_cond)); + this->EndTiming(_tp, "CheckCond"); + + std::list> _output; + this->m_ds.GetOrderedByKey(_output); + + LockFreeHash::ValueComparator _func = [](const std::shared_ptr &left, const std::shared_ptr &right) { + return *left < *right; + }; + + std::map,TypePtrTestHash,decltype(_func)> _output_val(_func); + + this->m_ds.GetOrderedByValue(_output_val); + + ASSERT_EQ(_output_val.size(), this->m_sum); + ASSERT_EQ(*_output_val.cbegin()->first, val); + + this->EndTiming(_tp, "GetOrder"); + + _tp = this->StartTimeing(); + + ASSERT_EQ(_output.size(), this->m_sum); + ASSERT_EQ(_output.front()->m_i, val); + + ASSERT_TRUE(this->m_ds.Find(*_output.front())); + ASSERT_FALSE(this->m_ds.Find(TestHash(6))); + + this->m_ds.Delete(*_output.front()); + + this->EndTiming(_tp, "find & delete"); + + _tp = this->StartTimeing(); + + this->m_ds.GetOrderedByKey(_output); + ASSERT_EQ(_output.size(), this->m_sum-1); + this->EndTiming(_tp, "GetOrderedByKey"); + + int _adder = 10; + auto _modifier = [&](std::shared_ptr &x) ->void{ + x->m_i += _adder; + }; + this->m_ds.Map(_modifier); + this->m_ds.GetOrderedByKey(_output); + ASSERT_EQ(_output.front()->m_i, val + _adder + 1); + + this->m_ds.Clear(); + this->m_ds.GetOrderedByKey(_output); + ASSERT_EQ(_output.size(), 0); + } + + void ConcurrentOperation() noexcept { + + //The program will consume more memory as _counter increasing due to its inner mechanism. + int _counter = 10000; + auto _insert = [&](int idx) { + auto _tp = this->StartTimeing(); + bool _flg = true; + for (int i = 0; i < _counter; ++i) { + + int _val = idx * _counter + i; + std::shared_ptr _shp_key(new TestHash(_val)); + std::shared_ptr _shp_val(new int(_val)); + this->m_ds.Insert(_shp_key,_shp_val); + ASSERT_TRUE(this->m_ds.Find(*_shp_key)); + + this->m_ds.Read(TestHash(_val),_shp_val); + ASSERT_TRUE(*_shp_val==_val); + + if (_flg) { + this->m_ds.Delete(*_shp_key); + ASSERT_FALSE(this->m_ds.Find(*_shp_key)); + } + _flg = !_flg; + } + + this->EndTiming(_tp, "one thread inserting"); + }; + + this->LaunchMultipleThread(_insert); + + auto _tp = this->StartTimeing(); + + std::list> _output; + this->m_ds.GetOrderedByKey(_output); + + this->EndTiming(_tp, "get ordered"); + + ASSERT_EQ(_output.size(), this->m_cpu_cores * _counter/2 ); + } +}; + +class TestLockFreeHashAtomic : public TestLockFreeHashBase> { + + protected: + + TestLockFreeHashAtomic() {} + + virtual ~TestLockFreeHashAtomic() noexcept{} + +}; + +TEST_F(TestLockFreeHash, GeneralOperation) { + + this->GeneralOperation(); +} + +TEST_F(TestLockFreeHash, ConcurrentOperation) { + + this->ConcurrentOperation(); +} + +TEST_F(TestLockFreeHash, Allocation) { + for (int i = 0; i < 1000; ++i) + LockFreeHash obj; +} + +TEST_F(TestLockFreeHashAtomic, Specifics) { + + //this->GeneralOperation(); + + int _new_val = 17; + + int* _p_val = new int(_new_val); + + TestHash *_p_obj = new TestHash(this->m_sum); + if (!this->m_ds.Upsert(_p_obj, _p_val)) + delete _p_obj; + + std::shared_ptr _shp_val; + this->m_ds.Read(TestHash(this->m_sum),_shp_val); + ASSERT_EQ(*_shp_val, _new_val); +} + +#endif diff --git a/src/gtest/tools/test_lock_free_priority_queue.h b/src/gtest/tools/test_lock_free_priority_queue.h new file mode 100644 index 0000000..7e86feb --- /dev/null +++ b/src/gtest/tools/test_lock_free_priority_queue.h @@ -0,0 +1,189 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_LOCK_FREE_PRIORITY_QUEUE_H__ +#define __GTEST_LOCK_FREE_PRIORITY_QUEUE_H__ + +#include +#include +#include + +#include "tools/lock_free_priority_queue.h" + +using ::RaftCore::DataStructure::LockFreeQueue; +using ::RaftCore::DataStructure::LockFreeQueueBase; +using ::RaftCore::DataStructure::LockFreePriotityQueue; + +class TestLockFreePriorityQueue : public DataStructureBase { + + public: + + struct STask1 { + STask1(int i):m_i(i) {} + + static bool Func1(std::shared_ptr ptr_element) { + //std::cout << "Task1 got:" << ptr_element->m_i << std::endl; + return true; + } + + int m_i; + }; + + struct STask2 { + STask2(int i):m_i(i) {} + + static bool Func2(std::shared_ptr ptr_element) { + //std::cout << "Task2 got:" << ptr_element->m_i << std::endl; + return true; + } + + int m_i; + }; + + struct STask3 { + STask3(int i):m_i(i) {} + + static bool Func3(std::shared_ptr ptr_element) { + //std::cout << "Task3 got:" << ptr_element->m_i << std::endl; + return true; + } + + int m_i; + }; + + public: + + TestLockFreePriorityQueue() {} + virtual ~TestLockFreePriorityQueue() {} + + virtual void SetUp() override { + + this->m_pri_queue.Initialize(this->m_cpu_cores * 2); + //this->m_pri_queue.Initialize(1); + + auto _p_queue_2 = new LockFreeQueue(); + _p_queue_2->Initilize(STask2::Func2,::RaftCore::Config::FLAGS_lockfree_queue_resync_data_elements); + this->m_pri_queue.AddTask(LockFreePriotityQueue::TaskType::RESYNC_DATA,(LockFreeQueueBase*)_p_queue_2); + + auto _p_queue_3 = new LockFreeQueue(); + _p_queue_3->Initilize(STask3::Func3,::RaftCore::Config::FLAGS_lockfree_queue_resync_log_elements); + this->m_pri_queue.AddTask(LockFreePriotityQueue::TaskType::RESYNC_LOG,(LockFreeQueueBase*)_p_queue_3); + + } + + virtual void TearDown() override {} + + virtual void Dump() override {} + + protected: + + LockFreePriotityQueue m_pri_queue; +}; + +TEST_F(TestLockFreePriorityQueue, GeneralOperation) { + + this->m_pri_queue.Launch(); + + std::this_thread::sleep_for(std::chrono::milliseconds(600)); + + std::shared_ptr _shp_t3_1(new STask3(1)); + int _rst_val = this->m_pri_queue.Push(LockFreePriotityQueue::TaskType::RESYNC_LOG,&_shp_t3_1); + if (_rst_val==QUEUE_SUCC) + std::cout << "T3 Push fail ,result:" << _rst_val << std::endl; + + std::shared_ptr _shp_t3_2(new STask3(2)); + _rst_val = this->m_pri_queue.Push(LockFreePriotityQueue::TaskType::RESYNC_LOG,&_shp_t3_2); + if (_rst_val==QUEUE_SUCC) + std::cout << "T3 Push fail ,result:" << _rst_val << std::endl; + + std::shared_ptr _shp_t2_1(new STask2(10)); + _rst_val = this->m_pri_queue.Push(LockFreePriotityQueue::TaskType::RESYNC_LOG,&_shp_t2_1); + if (_rst_val==QUEUE_SUCC) + std::cout << "T2 Push fail ,result:" << _rst_val << std::endl; + + std::shared_ptr _shp_t2_2(new STask2(11)); + _rst_val = this->m_pri_queue.Push(LockFreePriotityQueue::TaskType::RESYNC_LOG,&_shp_t2_2); + if (_rst_val==QUEUE_SUCC) + std::cout << "T2 Push fail ,result:" << _rst_val << std::endl; + + std::cout << "wait a little while"; + std::this_thread::sleep_for(std::chrono::milliseconds(200)); + + this->m_pri_queue.UnInitialize(); +} + +TEST_F(TestLockFreePriorityQueue, ConcurrentOperation) { + + this->m_pri_queue.Launch(); + + //Pushing. + auto _push = [&](int idx) { + auto _tp = this->StartTimeing(); + + int i = 0; + bool _process_result = true; + int _counter = 0, _run_times = 50000; + int _rst_val = 0; + while (_process_result && _run_times>=0) { + + _counter++; + _run_times--; + + if (_counter>=30) { + _counter = 0; + continue; + } + + std::shared_ptr _shp_t3(new STask3(i++)); + _rst_val = this->m_pri_queue.Push(LockFreePriotityQueue::TaskType::RESYNC_LOG,&_shp_t3); + _process_result = _rst_val==QUEUE_SUCC; + if (!_process_result) { + std::cout << "T3 Push fail ,result:" << _rst_val << std::endl; + continue; + } + + if (_counter>=20) + continue; + + std::shared_ptr _shp_t2(new STask2(i++)); + _rst_val = this->m_pri_queue.Push(LockFreePriotityQueue::TaskType::RESYNC_DATA,&_shp_t2); + _process_result = _rst_val==QUEUE_SUCC; + if (!_process_result) { + std::cout << "T2 Push fail ,result:" << _rst_val << std::endl; + continue; + } + + this->m_pri_queue.GetSize(); + + std::this_thread::sleep_for(std::chrono::microseconds(10)); + } + + this->EndTiming(_tp,"one thread inserting"); + }; + + this->LaunchMultipleThread(_push); + + this->m_pri_queue.UnInitialize(); +} + + + + +#endif diff --git a/src/gtest/tools/test_lock_free_queue.h b/src/gtest/tools/test_lock_free_queue.h new file mode 100644 index 0000000..046c00e --- /dev/null +++ b/src/gtest/tools/test_lock_free_queue.h @@ -0,0 +1,220 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_LOCK_FREE_QUEUE_H__ +#define __GTEST_LOCK_FREE_QUEUE_H__ + +#include +#include +#include +#include + +#include "gtest/tools/test_data_structure_base.h" +#include "tools/lock_free_queue.h" +#include "common/error_code.h" + +using ::RaftCore::DataStructure::QueueNode; +using ::RaftCore::DataStructure::LockFreeQueue; + +class TestLockFreeQueue : public DataStructureBase { + + public: + + TestLockFreeQueue(): DataStructureBase() {} + + virtual void SetUp() override { + this->m_fn_cb = [](std::shared_ptr ptr_element) ->bool{ + //std::cout << *ptr_element << " "; + return true; + }; + this->m_ds.Initilize(this->m_fn_cb,2 * 1024); + } + + virtual void TearDown() override { + } + + protected: + + virtual void Dump() override { + } + + std::function ptr_element)> m_fn_cb; + +}; + +TEST_F(TestLockFreeQueue, GeneralOperation) { + + int _num_processed = 0; + int i = 0; + bool _process_result = true; + int _rst_val = 0; + while (_process_result) { + std::shared_ptr _shp(new int(i++)); + _rst_val = this->m_ds.Push(&_shp); + _process_result = _rst_val==QUEUE_SUCC; + if (!_process_result) + std::cout << "Push fail ,result:" << _rst_val << std::endl; + _num_processed++; + } + + int _original_size = this->m_ds.GetCapacity(); + ASSERT_EQ(_num_processed, _original_size); + + _num_processed = 0; + _process_result = true; + while (_process_result) { + _rst_val = this->m_ds.PopConsume(); + _process_result = _rst_val==QUEUE_SUCC; + if (!_process_result) + std::cout << "Pop fail ,result:" << _rst_val << std::endl; + _num_processed++; + } + + ASSERT_EQ(_num_processed, _original_size); + ASSERT_EQ(this->m_ds.GetSize(), 0); + ASSERT_TRUE(this->m_ds.Empty()); +} + +TEST_F(TestLockFreeQueue, ConcurrentPush) { + + auto _insert = [&](int idx) { + auto _tp = this->StartTimeing(); + + int i = 0; + bool _process_result = true; + while (_process_result) { + std::shared_ptr _shp(new int(i++)); + int _rst_val = this->m_ds.Push(&_shp); + _process_result = _rst_val==QUEUE_SUCC; + if (!_process_result) + std::cout << "Push fail ,result:" << _rst_val << std::endl; + } + + this->EndTiming(_tp, "one thread inserting"); + }; + + this->LaunchMultipleThread(_insert); + + std::cout << this->m_ds.GetSize() << "|" << this->m_ds.GetCapacity() - 1; + + ASSERT_EQ(this->m_ds.GetSize(), this->m_ds.GetCapacity()-1); +} + +TEST_F(TestLockFreeQueue, ConcurrentPopConsume) { + + int i = 0; + bool _process_result = true; + while (_process_result) { + std::shared_ptr _shp(new int(i++)); + int _rst_val = this->m_ds.Push(&_shp); + _process_result = _rst_val==QUEUE_SUCC; + if (!_process_result) + std::cout << "Push fail ,result:" << _rst_val << std::endl; + } + + ASSERT_EQ(this->m_ds.GetSize(), this->m_ds.GetCapacity() -1); + + + auto _pop = [&](int idx) { + auto _tp = this->StartTimeing(); + + bool _pop_rst = true; + while (_pop_rst) { + int _rst_val = this->m_ds.PopConsume(); + _pop_rst = _rst_val==QUEUE_SUCC; + if (!_pop_rst) + std::cout << "PopConsume fail ,result:" << _rst_val << std::endl; + } + + this->EndTiming(_tp, "one thread inserting"); + }; + + this->LaunchMultipleThread(_pop); + ASSERT_EQ(this->m_ds.GetSize(), 0); +} + +TEST_F(TestLockFreeQueue, ConcurrentPushPopConsume) { + + auto _push_pop = [&](int idx) { + auto _tp = this->StartTimeing(); + + int i = 0; + bool _process_result = true; + int _counter = 0; + while (_process_result) { + bool _only_push = false; + + _counter++; + if (_counter > 10) { + _counter = 0; + _only_push = true; + } + + std::shared_ptr _shp(new int(i++)); + + int _rst_val = this->m_ds.Push(&_shp); + _process_result = _rst_val==QUEUE_SUCC; + if (!_process_result) { + std::cout << "Push fail ,result:" << _rst_val << std::endl; + continue; + } + + if (_only_push) + continue; + + _rst_val = this->m_ds.PopConsume(); + _process_result = _rst_val==QUEUE_SUCC; + if (!_process_result) { + std::cout << "PopConsume fail ,result:" << _rst_val << std::endl; + continue; + } + + //std::cout << "------current size:" << this->m_ds.GetSize() << std::endl; + } + + this->EndTiming(_tp, "one thread inserting"); + }; + + this->LaunchMultipleThread(_push_pop); + + ASSERT_EQ(this->m_ds.GetSize(), this->m_ds.GetCapacity()-1); + +} + +TEST_F(TestLockFreeQueue, Cmp1) { + std::shared_ptr _shp(new int(7)); + + for (int i = 0; i < 1000000; ++i) { + this->m_ds.Push(&_shp); + int _rst_val = this->m_ds.PopConsume(); + CHECK(_rst_val == QUEUE_SUCC); + } +} + +TEST_F(TestLockFreeQueue, Cmp2) { + std::shared_ptr _shp(new int(7)); + + for (int i = 0; i < 1000000; ++i) { + int* p = new int(7); + delete p; + } +} + +#endif diff --git a/src/gtest/tools/test_lock_free_single_list.h b/src/gtest/tools/test_lock_free_single_list.h new file mode 100644 index 0000000..c26a915 --- /dev/null +++ b/src/gtest/tools/test_lock_free_single_list.h @@ -0,0 +1,105 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_LOCK_FREE_SINGLE_LIST_H__ +#define __GTEST_LOCK_FREE_SINGLE_LIST_H__ + + +#include "gtest/tools/test_data_structure_base.h" +#include "tools/lock_free_single_list.h" + +using ::RaftCore::DataStructure::SingleListNode; +using ::RaftCore::DataStructure::LockFreeSingleList; + +class TestLockFreeSingList : public DataStructureBase { + + protected: + + virtual void SetUp() override { + + //Install GC. + std::thread _t([&]() { + //auto _f = std::bind(&LockFreeSingleList::PurgeSingleList, &(this->m_ds)); + while (true) + this->m_ds.PurgeSingleList(2); + }); + _t.detach(); + } + + virtual void TearDown() override {} + + virtual void Dump() override { + auto _printer = [](int *p) { + std::cout << *p << " "; + }; + this->m_ds.Iterate(_printer); + } +}; + +TEST_F(TestLockFreeSingList, GeneralOperation) { + + auto _deleter = [](int* p_data) { + std::cout << "customized deleter called" << std::endl; + delete p_data; + }; + + this->m_ds.SetDeleter(_deleter); + + uint32_t _push_num = 5; + for (std::size_t i = 0; i < _push_num; ++i) { + int *_p_i = new int(i); + this->m_ds.PushFront(_p_i); + } + + ASSERT_TRUE(this->m_ds.Size() <= _push_num); + + int _retain_num = 2; + this->m_ds.PurgeSingleList(_retain_num); + ASSERT_EQ(this->m_ds.Size(), _retain_num); + + std::cout << "after testing...:" << std::endl; + + this->Dump(); +} + + +TEST_F(TestLockFreeSingList, ConcurrentOperation) { + + int _count = 10000; + + auto _push_it = [&](int idx){ + for (int i = 0; i < _count; ++i) { + int *_p_i = new int(i); + this->m_ds.PushFront(_p_i); + } + }; + + this->LaunchMultipleThread(_push_it); + + //Waiting for purging done. + std::this_thread::sleep_for(std::chrono::seconds(1)); + + ASSERT_EQ(this->m_ds.Size() , 2) << "actual size:" << this->m_ds.Size(); + + std::cout << "done."; +} + + +#endif diff --git a/src/gtest/tools/test_lock_free_unordered_single_list.h b/src/gtest/tools/test_lock_free_unordered_single_list.h new file mode 100644 index 0000000..f81a8bc --- /dev/null +++ b/src/gtest/tools/test_lock_free_unordered_single_list.h @@ -0,0 +1,118 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_LOCK_FREE_UNORDERED_jSINGLE_LIST_H__ +#define __GTEST_LOCK_FREE_UNORDERED_jSINGLE_LIST_H__ + + +#include "gtest/tools/test_data_structure_base.h" +#include "tools/lock_free_unordered_single_list.h" + +using ::RaftCore::DataStructure::UnorderedSingleListNode; +using ::RaftCore::DataStructure::LockFreeUnorderedSingleList; + +class TestLockFreeUnorderedSingList : public DataStructureBase { + + protected: + + virtual void SetUp() override { + + this->m_remain = ::RaftCore::Config::FLAGS_retain_num_unordered_single_list; + + //Install GC. + std::thread _t([&]() { + //auto _f = std::bind(&LockFreeUnorderedSingleList::PurgeSingleList, &(this->m_ds)); + while (true) { + if (!this->m_running) + break; + + this->m_ds.PurgeSingleList(this->m_remain); + } + }); + _t.detach(); + } + + virtual void TearDown() override { + this->m_running = false; + std::this_thread::sleep_for(std::chrono::milliseconds(10)); + } + + virtual void Dump() override { + auto _printer = [](int *p) { + std::cout << *p << " "; + }; + this->m_ds.Iterate(_printer); + } + + int m_remain = 10000; + + bool m_running = true; +}; + +TEST_F(TestLockFreeUnorderedSingList, GeneralOperation) { + + auto _deleter = [](int* p_data) { + std::cout << "customized deleter called" << std::endl; + delete p_data; + }; + + this->m_ds.SetDeleter(_deleter); + + uint32_t _push_num = 5; + for (std::size_t i = 0; i < _push_num; ++i) { + int *_p_i = new int(i); + this->m_ds.PushFront(_p_i); + } + + ASSERT_TRUE(this->m_ds.Size() <= _push_num); + + int _retain_num = 2; + this->m_ds.PurgeSingleList(_retain_num); + ASSERT_EQ(this->m_ds.Size(), _retain_num); + + std::cout << "after testing...:" << std::endl; + + this->Dump(); +} + + +TEST_F(TestLockFreeUnorderedSingList, ConcurrentOperation) { + + int _count = 10000; + + auto _push_it = [&](int idx){ + for (int i = 0; i < _count; ++i) { + int *_p_i = new int(i); + this->m_ds.PushFront(_p_i); + } + }; + + this->LaunchMultipleThread(_push_it); + + //Waiting for purging done. + std::this_thread::sleep_for(std::chrono::seconds(1)); + + //ASSERT_EQ(this->m_ds.Size() , this->m_remain) << "actual size:" << this->m_ds.Size(); + + std::cout << "done."; +} + + +#endif diff --git a/src/gtest/tools/test_timer.h b/src/gtest/tools/test_timer.h new file mode 100644 index 0000000..7fe033d --- /dev/null +++ b/src/gtest/tools/test_timer.h @@ -0,0 +1,78 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_TIMER_H__ +#define __GTEST_TIMER_H__ + +#include +#include +#include +#include + +#include "gtest/test_base.h" +#include "tools/timer.h" + +using ::RaftCore::Timer::GlobalTimer; + +class TestTimer : public TestBase { + + public: + + TestTimer() {} + + virtual void SetUp() override { + } + + virtual void TearDown() override { + } + + protected: + + +}; + +TEST_F(TestTimer, GeneralOperation) { + + GlobalTimer::Initialize(); + + int _counter = 0; + auto _print = [&]() ->bool{ + if (_counter++ >= 10) { + return false; + } + + std::cout << "thread id: " << std::this_thread::get_id() << " job called." << std::endl; + return true; + }; + + GlobalTimer::AddTask(1000,_print); + + std::this_thread::sleep_for(std::chrono::seconds(8)); + + GlobalTimer::UnInitialize(); + + //Manually checking if the thread exist + std::this_thread::sleep_for(std::chrono::seconds(10)); + + std::cout << "done." << std::endl; +} + + +#endif diff --git a/src/gtest/tools/test_trivial_double_list.h b/src/gtest/tools/test_trivial_double_list.h new file mode 100644 index 0000000..a52207b --- /dev/null +++ b/src/gtest/tools/test_trivial_double_list.h @@ -0,0 +1,459 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_TRIVIAL_DOUBLE_LIST_H__ +#define __GTEST_TRIVIAL_DOUBLE_LIST_H__ + +#include +#include +#include +#include + +#include "gtest/tools/test_data_structure_base.h" +#include "tools/trivial_lock_double_list.h" + +using ::RaftCore::DataStructure::TrivialLockDoubleList; +using ::RaftCore::DataStructure::OrderedTypeBase; +using ::RaftCore::DataStructure::DoubleListNode; + +class TestDoubletList final : public OrderedTypeBase{ + + public: + + TestDoubletList(uint32_t i) : m_i(i) {} + + virtual bool operator<(const TestDoubletList& _other)const noexcept override { + return this->m_i < _other.m_i; + } + + virtual bool operator>(const TestDoubletList& _other)const noexcept override { + return this->m_i > _other.m_i; + } + + virtual bool operator==(const TestDoubletList& _other)const noexcept override { + return this->m_i == _other.m_i; + } + + uint32_t m_i; + +}; + +class TestTrivialLockDoubleList : public DataStructureBase { + + public: + + TestTrivialLockDoubleList(): DataStructureBase( + std::shared_ptr(new TestDoubletList(0x0)), + std::shared_ptr(new TestDoubletList(0xFFFFFFFF)) ) {} + + protected: + + virtual void Dump() override { + + auto _print = [](const TestDoubletList &_other) { + std::cout << _other.m_i << " "; + return true; + }; + + this->m_ds.Iterate(_print); + } + + int CheckCutHeadByValue(int _max,bool check_delete=true) { + DoubleListNode* output_head = this->m_ds.CutHeadByValue(TestDoubletList(_max)); + if (output_head == nullptr) { + //VLOG(89) << "--------cut head empty---------"; + return 0; + } + + int _head_size = 0; + auto _cur = output_head; + + //std::cout << "checking cut by value:" << _max << std::endl; + + std::string _line = ""; + + uint32_t _last = 0; + while (_cur) { + if (check_delete) + CHECK(!_cur->IsDeleted()) << "deleted node found in the output list."; + CHECK(_last < _cur->m_val->m_i) << "check order fail:" << _last + << "|" << _cur->m_val->m_i << ",until now values:" << _line; + _last = _cur->m_val->m_i; + + //std::cout << _cur->m_val->m_i << " "; + _line += ("|" + std::to_string(_cur->m_val->m_i)); + + _head_size++; + _cur = _cur->m_atomic_next.load(); + } + //this->m_ds.ReleaseCutHead(output_head); + + //VLOG(89) << "cuthead values: " << _line; + + return _head_size; + } + + int CheckCutHeadAdjacent(bool check_delete=true) { + auto _adjacent_judger = [](const TestDoubletList &a,const TestDoubletList &b)->bool { return b.m_i == a.m_i + 1; }; + + DoubleListNode* output_head = this->m_ds.CutHead(_adjacent_judger); + if (output_head == nullptr) { + std::cout << "--------cut head empty---------" << std::endl; + return 0; + } + + int _head_size = 0; + auto _cur = output_head; + //std::cout << std::this_thread::get_id() << " checking cut by adjacent "; + uint32_t _last = 0; + std::string _line = ""; + while (_cur) { + if (check_delete) + CHECK(!_cur->IsDeleted()) << "deleted node found in the output list."; + if (_last > 0) + CHECK(_last+1 == _cur->m_val->m_i) << "check adjacent fail:" << _last + << "|" << _cur->m_val->m_i << ",total:" << _line; + _last = _cur->m_val->m_i; + //std::cout << _cur->m_val->m_i << " "; + + _line += ("|" + std::to_string(_cur->m_val->m_i)); + + _head_size++; + _cur = _cur->m_atomic_next.load(); + } + //std::cout << std::endl; + this->m_ds.ReleaseCutHead(output_head); + + return _head_size; + } +}; + + +TEST_F(TestTrivialLockDoubleList, GeneralOperation) { + + std::shared_ptr _shp_1(new TestDoubletList(3)); + DoubleListNode* _node_1 = new DoubleListNode(_shp_1); + + std::shared_ptr _shp_2(new TestDoubletList(5)); + DoubleListNode* _node_2 = new DoubleListNode(_shp_2); + + _node_1->m_atomic_next.store(_node_2); + _node_2->m_atomic_pre.store(_node_1); + + DoubleListNode::Apply(_node_1,[](DoubleListNode* p_input) { + std::cout << "element: " << p_input->m_val->m_i << std::endl; + }); + + + //Insert 100 elements. + int _total = 100; + int _offset = 10; + for (int i = 1; i <= _total;++i) { + + bool _over_half_way = false; + if (i >= _total / 2) { + _over_half_way = true; + } + + //int _avg = (_total + _total / 2) / 2; + //int val = _over_half_way ? _avg*2-i + _offset : i ; + int val = _over_half_way ? _total + _offset - (i - _total / 2) : i; + + std::shared_ptr _shp(new TestDoubletList(val)); + DoubleListNode* new_node = new DoubleListNode(_shp); + + if (_over_half_way) { + this->m_ds.Insert(new_node); + continue; + } + + this->m_ds.Insert(_shp); + } + this->Dump(); + ASSERT_EQ(this->m_ds.GetSize(), _total); + + //Delete 10 elements. + int _delete_num = 10; + for (int i = 1; i <= _delete_num;++i) { + std::shared_ptr _shp(new TestDoubletList(i)); + this->m_ds.Delete(_shp); + } + ASSERT_EQ(this->m_ds.GetSize(), _total - _delete_num); + + //CutHead + auto _adjacent = [](const TestDoubletList &left, const TestDoubletList &right)->bool { + return left.m_i + 1 == right.m_i; + }; + int _adjacent_size = 0; + DoubleListNode* output_head = this->m_ds.CutHead(_adjacent); + auto _cur = output_head; + std::cout << "checking adjacent..." << std::endl; + while (_cur) { + CHECK(!_cur->IsDeleted()) << "deleted node found in the output list."; + + std::cout << _cur->m_val->m_i << " "; + _adjacent_size++; + _cur = _cur->m_atomic_next.load(); + } + this->m_ds.ReleaseCutHead(output_head); + ASSERT_EQ(_adjacent_size, _total / 2 - _delete_num - 1); + + //Cut by value. + int _less_than = 80; + output_head = this->m_ds.CutHeadByValue(TestDoubletList(_less_than)); + int _head_size = 0; + _cur = output_head; + std::cout << "checking cut by value:" << _less_than << std::endl; + while (_cur) { + CHECK(!_cur->IsDeleted()) << "deleted node found in the output list."; + + std::cout << _cur->m_val->m_i << " "; + _head_size++; + _cur = _cur->m_atomic_next.load(); + } + this->m_ds.ReleaseCutHead(output_head); + ASSERT_EQ(_head_size,_less_than - _total/2 - _offset + 1); + + this->m_ds.DeleteAll(); + ASSERT_EQ(this->m_ds.GetSize(),0); + + this->m_ds.Clear(); + ASSERT_EQ(this->m_ds.GetSize(),0); +} + +TEST_F(TestTrivialLockDoubleList, ConcurrentInsert) { + + int _max = 1000; + std::random_device rd; + std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd() + std::uniform_int_distribution<> dis(1, _max); + + auto _insert = [&](int idx) { + auto _tp = this->StartTimeing(); + + for (int i = 1; i <= _max/2; ++i) { + uint32_t insert_val = dis(gen); + //std::cout << "thread:" << std::this_thread::get_id() << " inserting " << insert_val << std::endl; + std::shared_ptr _shp(new TestDoubletList(insert_val)); + this->m_ds.Insert(_shp); + } + + this->EndTiming(_tp, "one thread inserting"); + }; + + + this->LaunchMultipleThread(_insert); + + ASSERT_LE(this->CheckCutHeadByValue(_max),_max); +} + +TEST_F(TestTrivialLockDoubleList, ConcurrentInsertDelete) { + + int _max = 1000; + std::random_device rd; + std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd() + std::uniform_int_distribution<> dis(1, _max); + + auto _insert_delete = [&](int idx) { + auto _tp = this->StartTimeing(); + + bool _delete_flg = false; + for (int i = 1; i <= _max/2; ++i) { + uint32_t insert_val = (dis(gen) % (_max/2)); + //std::cout << "thread:" << std::this_thread::get_id() << " inserting " << insert_val << ",i:" << i << std::endl; + std::shared_ptr _shp(new TestDoubletList(insert_val)); + this->m_ds.Insert(_shp); + + if (_delete_flg) { + //std::cout << "thread:" << std::this_thread::get_id() << " deleting " << insert_val << ",i:" << i << std::endl; + this->m_ds.Delete(_shp); + } + + //Reverse the flag. + _delete_flg = !_delete_flg; + } + + this->EndTiming(_tp, "one thread inserting"); + }; + + + this->LaunchMultipleThread(_insert_delete); + + ASSERT_LE(this->CheckCutHeadByValue(_max),_max/2); +} + +TEST_F(TestTrivialLockDoubleList, ConcurrentInsertCutHead) { + + int _max = 10; + std::random_device rd; + std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd() + std::uniform_int_distribution<> dis(1, _max); + + std::atomic _total_cut_size(0); + + auto _insert_cut = [&](int idx) { + auto _tp = this->StartTimeing(); + + int _counter = 0; + + int _start = _max*idx + 1; + int _end = _start + _max - 1; + + //std::cout << "start:" << _start << ",end:" << _end << std::endl; + + for (int i = _end; i >= _start; --i) { + uint32_t insert_val = dis(gen); + //std::cout << "thread:" << std::this_thread::get_id() << " inserting " << insert_val << ",i:" << i << std::endl; + + //VLOG(89) << "inserting " << i; + + std::shared_ptr _shp(new TestDoubletList(i)); + this->m_ds.Insert(_shp); + + //VLOG(89) << "inserted " << i; + + _counter++; + if (_counter >= 10) { + int _size = this->CheckCutHeadByValue(_end,false); + + //VLOG(89) << "interval cut head,size:" << _size; + + _counter = 0; + _total_cut_size.fetch_add(_size); + } + } + + this->EndTiming(_tp, "one thread inserting"); + }; + + int _thread_num = this->m_cpu_cores; + //int _thread_num = 6; + this->LaunchMultipleThread(_insert_cut, _thread_num); + + ASSERT_EQ(_total_cut_size.load(), _max * _thread_num); + + std::cout << "finial cutHead size:" <CheckCutHeadByValue(_max,false) << std::endl; +} + +TEST_F(TestTrivialLockDoubleList, ConcurrentInsertDeleteCutHead) { + + int _max = 2000; + std::random_device rd; + std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd() + std::uniform_int_distribution<> dis(1, _max); + + auto _insert_delete_cut = [&](int idx) { + auto _tp = this->StartTimeing(); + + bool _delete_flg = false; + int _counter = 0; + for (int i = 1; i <= _max/2; ++i) { + uint32_t insert_val = dis(gen); + //std::cout << "thread:" << std::this_thread::get_id() << " inserting " << insert_val << ",i:" << i << std::endl; + std::shared_ptr _shp(new TestDoubletList(insert_val)); + this->m_ds.Insert(_shp); + + if (_delete_flg) { + //std::cout << "thread:" << std::this_thread::get_id() << " deleting " << insert_val << ",i:" << i << std::endl; + this->m_ds.Delete(_shp); + } + //Reverse the flag. + _delete_flg = !_delete_flg; + + _counter++; + if (_counter >= 20) { + int _size = this->CheckCutHeadByValue(_max,false); + //std::cout << "interval cut head,size:" << _size << std::endl; + _counter = 0; + } + + } + + this->EndTiming(_tp, "one thread inserting"); + }; + + + this->LaunchMultipleThread(_insert_delete_cut); + + std::cout << "finial cutHead size:" <CheckCutHeadByValue(_max,false) << std::endl; +} + +TEST_F(TestTrivialLockDoubleList, Adjacent) { + + std::shared_ptr _shp(new TestDoubletList(1)); + this->m_ds.Insert(_shp); + this->m_ds.Delete(_shp); + this->m_ds.Insert(_shp); + + ASSERT_EQ(this->CheckCutHeadAdjacent(), 1); + ASSERT_EQ(this->CheckCutHeadAdjacent(), 0); + + //Do it again. + this->m_ds.Insert(_shp); + this->m_ds.Delete(_shp); + this->m_ds.Insert(_shp); + + std::shared_ptr _shp_2(new TestDoubletList(3)); + + this->m_ds.Insert(_shp_2); + this->m_ds.Delete(_shp_2); + + this->m_ds.Insert(_shp_2); + this->m_ds.Delete(_shp_2); + + this->m_ds.Insert(_shp_2); + + ASSERT_EQ(this->CheckCutHeadAdjacent(), 1); + ASSERT_EQ(this->CheckCutHeadAdjacent(), 1); +} + +TEST_F(TestTrivialLockDoubleList, ConcurrentInsertCutHeadAdjacent) { + + std::random_device rd; + std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd() + std::uniform_int_distribution<> dis(1, 10); + + auto _insert_cut_adjacent = [&](int idx) { + auto _tp = this->StartTimeing(); + + int _run_times = 8000; + for (int i = 1; i <= _run_times; ++i) { + std::shared_ptr _shp(new TestDoubletList(i)); + this->m_ds.Insert(_shp); + + //std::cout << std::this_thread::get_id() << " inserting " << i << std::endl; + + uint32_t _ran_val = dis(gen); + if (_ran_val & 1) { + int _size = this->CheckCutHeadAdjacent(); + //std::cout << "interval cut head,size:" << _size << std::endl; + } + } + + this->EndTiming(_tp, "one thread inserting"); + }; + + + this->LaunchMultipleThread(_insert_cut_adjacent); + + std::cout << "finial cutHead size:" <CheckCutHeadAdjacent() << std::endl; +} + +#endif diff --git a/src/gtest/tools/test_trivial_single_list.h b/src/gtest/tools/test_trivial_single_list.h new file mode 100644 index 0000000..11797ad --- /dev/null +++ b/src/gtest/tools/test_trivial_single_list.h @@ -0,0 +1,359 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_TRIVIAL_SINGLE_LIST_H__ +#define __GTEST_TRIVIAL_SINGLE_LIST_H__ + +#include +#include +#include +#include + +#include "gtest/tools/test_data_structure_base.h" +#include "tools/trivial_lock_single_list.h" + +using ::RaftCore::DataStructure::TrivialLockSingleList; +using ::RaftCore::DataStructure::OrderedTypeBase; +using ::RaftCore::DataStructure::SingleListNode; + +class TestSingletList final : public OrderedTypeBase{ + + public: + + TestSingletList(uint32_t i) : m_i(i) {} + + virtual bool operator<(const TestSingletList& _other)const noexcept override { + return m_i < _other.m_i; + } + + virtual bool operator>(const TestSingletList& _other)const noexcept override { + return m_i > _other.m_i; + } + + virtual bool operator==(const TestSingletList& _other)const noexcept override { + return m_i == _other.m_i; + } + + uint32_t m_i; + +}; + +class TestTrivialSingleLockList : public DataStructureBase { + + public: + + TestTrivialSingleLockList(): DataStructureBase( + std::shared_ptr(new TestSingletList(0x0)), + std::shared_ptr(new TestSingletList(0xFFFFFFFF)) ) {} + + protected: + + virtual void Dump() override { + + auto _print = [](std::shared_ptr &_other) { + std::cout << _other->m_i << " "; + return true; + }; + + this->m_ds.Iterate(_print); + } + + int CheckCutHeadByValue(int _max, bool check_delete = true) { + SingleListNode* output_head = this->m_ds.CutHeadByValue(TestSingletList(_max)); + if (output_head == nullptr) { + //VLOG(89) << "--------cut head empty---------"; + return 0; + } + + int _head_size = 0; + auto _cur = output_head; + + //std::cout << "checking cut by value:" << _max << std::endl; + + std::string _line = ""; + + uint32_t _last = 0; + bool _first = true; + + while (_cur) { + if (check_delete) + CHECK(!_cur->IsDeleted()) << "deleted node found in the output list."; + + //bool _update_last = true; + + if (!_first) { + //CHECK(_last < _cur->m_val->m_i) << "check order fail:" << _last << "|" << _cur->m_val->m_i << ",until now values:" << _line; + if (_last >= _cur->m_val->m_i) { + VLOG(89) << "check order fail:" << _last << "|" << _cur->m_val->m_i + << ",until now values:" << _line; + std::cout << "error occur!" << std::endl; + } + //_update_last = false; + } + + //if (_update_last) + _last = _cur->m_val->m_i; + + _first = false; + + //std::cout << _cur->m_val->m_i << " "; + _line += ("|" + std::to_string(_cur->m_val->m_i)); + + _head_size++; + _cur = _cur->m_atomic_next.load(); + } + + this->m_ds.ReleaseCutHead(output_head); + + VLOG(89) << "cuthead values: " << _line; + + return _head_size; + } +}; + + +TEST_F(TestTrivialSingleLockList, GeneralOperation) { + + std::shared_ptr _shp_1(new TestSingletList(3)); + SingleListNode* _node_1 = new SingleListNode(_shp_1); + + std::shared_ptr _shp_2(new TestSingletList(5)); + SingleListNode* _node_2 = new SingleListNode(_shp_2); + + _node_1->m_atomic_next.store(_node_2); + + SingleListNode::Apply(_node_1,[](SingleListNode* p_input) { + std::cout << "element: " << p_input->m_val->m_i << std::endl; + }); + + //Insert 100 elements. + int _total = 100; + int _offset = 10; + for (int i = 1; i <= _total;++i) { + + bool _over_half_way = false; + if (i >= _total / 2) { + _over_half_way = true; + } + + int val = _over_half_way ? _total + _offset - (i - _total / 2) : i; + + std::shared_ptr _shp(new TestSingletList(val)); + SingleListNode* new_node = new SingleListNode(_shp); + + if (_over_half_way) { + this->m_ds.Insert(new_node); + continue; + } + + this->m_ds.Insert(_shp); + } + this->Dump(); + ASSERT_EQ(this->m_ds.GetSize(), _total); + + //Delete 10 elements. + int _delete_num = 10; + for (int i = 1; i <= _delete_num;++i) { + std::shared_ptr _shp(new TestSingletList(i)); + this->m_ds.Delete(_shp); + } + ASSERT_EQ(this->m_ds.GetSize(), _total - _delete_num); + + //Cut by value. + int _less_than = 80; + auto *output_head = this->m_ds.CutHeadByValue(TestSingletList(_less_than)); + int _head_size = 0; + auto *_cur = output_head; + std::cout << "checking cut by value:" << _less_than << std::endl; + while (_cur) { + CHECK(!_cur->IsDeleted()) << "deleted node found in the output list."; + + std::cout << _cur->m_val->m_i << " "; + _head_size++; + _cur = _cur->m_atomic_next.load(); + } + this->m_ds.ReleaseCutHead(output_head); + ASSERT_EQ(_head_size, _less_than - _delete_num - _offset); + + this->m_ds.Clear(); + ASSERT_EQ(this->m_ds.GetSize(),0); +} + +TEST_F(TestTrivialSingleLockList, ConcurrentInsert) { + + int _max = 1000; + std::random_device rd; + std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd() + std::uniform_int_distribution<> dis(1, _max); + + auto _insert = [&](int idx) { + auto _tp = this->StartTimeing(); + + for (int i = 1; i <= _max/2; ++i) { + uint32_t insert_val = dis(gen); + //std::cout << "thread:" << std::this_thread::get_id() << " inserting " << insert_val << std::endl; + std::shared_ptr _shp(new TestSingletList(insert_val)); + this->m_ds.Insert(_shp); + } + }; + + this->LaunchMultipleThread(_insert); + + ASSERT_LE(this->CheckCutHeadByValue(_max),_max); +} + +TEST_F(TestTrivialSingleLockList, ConcurrentInsertDelete) { + + int _max = 1000; + std::random_device rd; + std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd() + std::uniform_int_distribution<> dis(1, _max); + + auto _insert_delete = [&](int idx) { + + VLOG(89) << "unit test concurrent thread started"; + + auto _tp = this->StartTimeing(); + + bool _delete_flg = false; + for (int i = 1; i <= _max/2; ++i) { + uint32_t insert_val = (dis(gen) % (_max/2)); + //std::cout << "thread:" << std::this_thread::get_id() << " inserting " << insert_val << ",i:" << i << std::endl; + std::shared_ptr _shp(new TestSingletList(insert_val)); + this->m_ds.Insert(_shp); + + if (_delete_flg) { + //std::cout << "thread:" << std::this_thread::get_id() << " deleting " << insert_val << ",i:" << i << std::endl; + this->m_ds.Delete(_shp); + } + + //Reverse the flag. + _delete_flg = !_delete_flg; + } + + this->EndTiming(_tp, "one thread inserting"); + }; + + this->LaunchMultipleThread(_insert_delete); + + ASSERT_LE(this->CheckCutHeadByValue(_max),_max/2); +} + +TEST_F(TestTrivialSingleLockList, ConcurrentInsertCutHead) { + + int _max = 2000; + std::random_device rd; + std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd() + std::uniform_int_distribution<> dis(1, _max); + + std::atomic _total_cut_size(0); + + auto _insert_cut = [&](int idx) { + auto _tp = this->StartTimeing(); + + VLOG(89) << "unite test thread spawned"; + + int _counter = 0; + + int _start = _max*idx + 1; + int _end = _start + _max - 1; + + VLOG(89) << "start:" << _start << ",end:" << _end << std::endl; + + for (int i = _end; i >= _start; --i) { + uint32_t insert_val = dis(gen); + //std::cout << "thread:" << std::this_thread::get_id() << " inserting " << insert_val << ",i:" << i << std::endl; + + //VLOG(89) << "inserting " << i; + + std::shared_ptr _shp(new TestSingletList(i)); + this->m_ds.Insert(_shp); + + //VLOG(89) << "inserted " << i; + + _counter++; + if (_counter >= 10) { + int _size = this->CheckCutHeadByValue(_end,false); + + //VLOG(89) << "interval cut head,size:" << _size; + + _counter = 0; + _total_cut_size.fetch_add(_size); + } + } + + this->EndTiming(_tp, "one thread inserting"); + }; + + int _thread_num = this->m_cpu_cores; + //int _thread_num = 6; + this->LaunchMultipleThread(_insert_cut, _thread_num); + + ASSERT_EQ(_total_cut_size.load(), _max * _thread_num); + + std::cout << "finial cutHead size:" <CheckCutHeadByValue(_max,false) << std::endl; +} + +TEST_F(TestTrivialSingleLockList, ConcurrentInsertDeleteCutHead) { + + int _max = 2000; + std::random_device rd; + std::mt19937 gen(rd()); //Standard mersenne_twister_engine seeded with rd() + std::uniform_int_distribution<> dis(1, _max); + + std::atomic _total_cut_size(0); + + auto _insert_delete_cut = [&](int idx) { + auto _tp = this->StartTimeing(); + + bool _delete_flg = false; + int _counter = 0; + for (int i = 1; i <= _max/2; ++i) { + uint32_t insert_val = dis(gen); + //std::cout << "thread:" << std::this_thread::get_id() << " inserting " << insert_val << ",i:" << i << std::endl; + std::shared_ptr _shp(new TestSingletList(insert_val)); + this->m_ds.Insert(_shp); + + if (_delete_flg) { + //std::cout << "thread:" << std::this_thread::get_id() << " deleting " << insert_val << ",i:" << i << std::endl; + this->m_ds.Delete(_shp); + } + //Reverse the flag. + _delete_flg = !_delete_flg; + + _counter++; + if (_counter >= 20) { + int _size = this->CheckCutHeadByValue(_max, false); + //std::cout << "interval cut head,size:" << _size << std::endl; + _counter = 0; + _total_cut_size.fetch_add(_size); + } + } + + this->EndTiming(_tp, "one thread inserting"); + }; + + this->LaunchMultipleThread(_insert_delete_cut); + + std::cout << "finial cutHead size:" << this->CheckCutHeadByValue(_max, false) + << ",total CutHead size:" << _total_cut_size.load() << std::endl; +} + +#endif diff --git a/src/gtest/tools/test_utilities.h b/src/gtest/tools/test_utilities.h new file mode 100644 index 0000000..6d79991 --- /dev/null +++ b/src/gtest/tools/test_utilities.h @@ -0,0 +1,111 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_UTILITIES_H__ +#define __GTEST_UTILITIES_H__ + +#include +#include +#include +#include + +#include "gtest/test_base.h" +#include "tools/utilities.h" + +using ::RaftCore::Tools::TypeSysTimePoint; + +class TestUtilities : public TestBase { + + public: + + TestUtilities() {} + + virtual void SetUp() override { + } + + virtual void TearDown() override { + } + + protected: + + +}; + +TEST_F(TestUtilities, GeneralOperation) { + + uint32_t uTest = 0x12345678; + uint32_t uTest2 = 0x78563412; + unsigned char* pTest = (unsigned char*)&uTest; + bool _big_endian = (*pTest) == 0x12; + + ASSERT_EQ(::RaftCore::Tools::LocalBigEndian(), _big_endian); + + uint32_t uTmp = 0x0; + ::RaftCore::Tools::ConvertToBigEndian(uTest,&uTmp); + if (_big_endian) + ASSERT_EQ(uTmp,uTest); + else + ASSERT_EQ(uTmp,uTest2); + + uint32_t uRst1 = 0; + ::RaftCore::Tools::ConvertBigEndianToLocal(uTmp,&uRst1); + ASSERT_EQ(uRst1,uTest); + + std::list myself_addr; + ::RaftCore::Tools::GetLocalIPs(myself_addr); + for (const auto &_item : myself_addr) + std::cout << "ip : " << _item << std::endl; + + uint32_t x = 5; + ASSERT_EQ(::RaftCore::Tools::RoundUp(x),8); + + ASSERT_EQ(::RaftCore::Tools::GetMask(x),3); + + std::string _test_buf = "test_buf"; + uint32_t _crc32_result = ::RaftCore::Tools::CalculateCRC32(_test_buf.data(), _test_buf.length()); + ASSERT_EQ(_crc32_result, 0x3BF65345); + + //Checking the log with VLOG level >= 90. + auto _tp = ::RaftCore::Tools::StartTimeing(); + ::RaftCore::Tools::EndTiming(_tp,"unit test operations"); + + std::list _output; + ::RaftCore::Tools::StringSplit("||def||abc||xyz|||",'|',_output); + + ASSERT_EQ(_output.size(),3); + + auto _iter = _output.cbegin(); + ASSERT_EQ(*_iter++,"def"); + ASSERT_EQ(*_iter++,"abc"); + ASSERT_EQ(*_iter++,"xyz"); + + ASSERT_TRUE(_iter == _output.cend()); + + + TypeSysTimePoint _deadline = std::chrono::system_clock::now() + std::chrono::milliseconds(100); + + _deadline += std::chrono::microseconds(2000); + + std::cout << "now:" << ::RaftCore::Tools::TimePointToString(_deadline) << std::endl; + +} + + +#endif diff --git a/src/gtest/topology/test_all.h b/src/gtest/topology/test_all.h new file mode 100644 index 0000000..2d9e252 --- /dev/null +++ b/src/gtest/topology/test_all.h @@ -0,0 +1,26 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_ALL_TOPOLOGY_H__ +#define __GTEST_ALL_TOPOLOGY_H__ + +#include "gtest/topology/test_topology.h" + +#endif diff --git a/src/gtest/topology/test_topology.h b/src/gtest/topology/test_topology.h new file mode 100644 index 0000000..7ef59e5 --- /dev/null +++ b/src/gtest/topology/test_topology.h @@ -0,0 +1,93 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __GTEST_TOPOLOGY_H__ +#define __GTEST_TOPOLOGY_H__ + +#include +#include +#include + +#include "gtest/test_base.h" +#include "topology/topology_mgr.h" + +using ::RaftCore::Topology; +using ::RaftCore::CTopologyMgr; + +class TestTopology : public TestBase { + + public: + + TestTopology() {} + + virtual void SetUp() override { + CTopologyMgr::Initialize(); + } + + virtual void TearDown() override { + CTopologyMgr::UnInitialize(); + } + + void GeneralOP() noexcept{ + Topology _topo; + CTopologyMgr::Read(&_topo); + std::cout << _topo; + + _topo.m_leader = "some content"; + _topo.Reset(); + ASSERT_EQ(_topo.m_leader,""); + + _topo.m_leader = ""; + _topo.m_followers.emplace("127.0.0.1:3000"); + _topo.m_followers.emplace("127.0.0.1:3001"); + + _topo.m_candidates.emplace("127.0.0.1:3002"); + _topo.m_candidates.emplace("127.0.0.1:3003"); + + ASSERT_EQ(_topo.GetClusterSize(),5); + + ASSERT_TRUE(_topo.InCurrentCluster("127.0.0.1:3000")); + ASSERT_FALSE(_topo.InCurrentCluster("127.0.0.1:3005")); + + _topo.m_leader = "new value"; + CTopologyMgr::Update(_topo); + std::cout << _topo; + } +}; + +TEST_F(TestTopology, GeneralOperation) { + + this->GeneralOP(); +} + +TEST_F(TestTopology, ConcurrentOperation) { + + auto _op = [&](int idx) { + int _run_times = 100; + for (int i = 0; i < _run_times; ++i) { + this->GeneralOP(); + } + }; + + this->LaunchMultipleThread(_op); +} + + +#endif diff --git a/src/guid/guid_generator.cc b/src/guid/guid_generator.cc new file mode 100644 index 0000000..ffd850c --- /dev/null +++ b/src/guid/guid_generator.cc @@ -0,0 +1,54 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "boost/filesystem.hpp" + +#include "config/config.h" +#include "common/comm_defs.h" +#include "guid/guid_generator.h" + +#define _AURORA_FILE_BUF_SIZE_ (1024) + +namespace RaftCore::Guid { + +std::atomic GuidGenerator::m_last_released_guid; + +void GuidGenerator::Initialize(uint64_t last_released) noexcept{ + m_last_released_guid.store(last_released); +} + +void GuidGenerator::UnInitialize() noexcept{} + +GuidGenerator::GUIDPair GuidGenerator::GenerateGuid() noexcept{ + + uint64_t _old_val = m_last_released_guid.fetch_add(1); + uint64_t _deserved_val = _old_val + 1; + + return { _old_val,_deserved_val }; +} + +void GuidGenerator::SetNextBasePoint(uint64_t base_point) noexcept { + m_last_released_guid.store(base_point); +} + +uint64_t GuidGenerator::GetLastReleasedGuid() noexcept { + return m_last_released_guid.load(); +} + +} + diff --git a/src/guid/guid_generator.h b/src/guid/guid_generator.h new file mode 100644 index 0000000..08c411d --- /dev/null +++ b/src/guid/guid_generator.h @@ -0,0 +1,74 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#ifndef _AURORA_GUID_GENERATOR_H_ +#define _AURORA_GUID_GENERATOR_H_ + +#include + +#define _GUID_ERR_ (0xFFFFFFFFFFFFFFFF) + +namespace RaftCore::Guid { + +class GuidGenerator final { + +public: + + struct GUIDPair { + + //2^64 is 18446744073709551616,which can satisfy 1M/s requests for 584942 years. + uint64_t m_pre_guid; //Based on which the allocated guid is generated + uint64_t m_cur_guid; //Allocated guid + + bool operator<(const GUIDPair &other) const noexcept { + return this->m_cur_guid < other.m_cur_guid; + } + }; + +public: + + static void Initialize(uint64_t last_released=0) noexcept; + + static void UnInitialize() noexcept; + + static GUIDPair GenerateGuid() noexcept; + + //Set m_last_released_guid to the given id , for fail recovery purpose. + static void SetNextBasePoint(uint64_t base_point) noexcept; + + static uint64_t GetLastReleasedGuid() noexcept; + +private: + + static std::atomic m_last_released_guid; + +private: + + GuidGenerator() = delete; + + virtual ~GuidGenerator() = delete; + + GuidGenerator(const GuidGenerator&) = delete; + + GuidGenerator& operator=(const GuidGenerator&) = delete; +}; + +} + + +#endif diff --git a/src/leader/channel_pool.cc b/src/leader/channel_pool.cc new file mode 100644 index 0000000..d43e7ad --- /dev/null +++ b/src/leader/channel_pool.cc @@ -0,0 +1,90 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include + +#include "grpc++/create_channel.h" + +#include "config/config.h" +#include "client/client_impl.h" +#include "leader/channel_pool.h" + +namespace RaftCore::Leader { + +using ::RaftCore::Client::HeartbeatSyncClient; + +ChannelPool::ChannelPool(const std::string &peer_addr, uint32_t pool_size) noexcept { + + this->m_channel_pool.reset(new TypeVecChannel()); + + for (std::size_t i = 0; i < ::RaftCore::Config::FLAGS_conn_per_link; ++i) { + for (std::size_t j = 0; j < pool_size; ++j) { + auto _channel_args = ::grpc::ChannelArguments(); + + std::string _key = "custom_key_" + std::to_string(i); + std::string _val = "custom_val_" + std::to_string(i); + _channel_args.SetString(_key, _val); + + auto _shp_channel = ::grpc::CreateCustomChannel(peer_addr, ::grpc::InsecureChannelCredentials(), _channel_args); + this->m_channel_pool->emplace_back(_shp_channel); + } + } + + this->m_peer_addr = peer_addr; +} + +ChannelPool::~ChannelPool() noexcept{} + +std::shared_ptr<::grpc::Channel> ChannelPool::GetOneChannel() noexcept { + uint32_t _random_idx = this->m_idx.fetch_add(1,std::memory_order_relaxed) % this->m_channel_pool->size(); + return this->m_channel_pool->operator[](_random_idx); +} + +void ChannelPool::HeartBeat(uint32_t term,const std::string &my_addr) noexcept{ + + auto _shp_channel = this->GetOneChannel(); + HeartbeatSyncClient _heartbeat_client(_shp_channel); + + auto _setter = [&](std::shared_ptr<::raft::HeartBeatRequest>& req) { + req->mutable_base()->set_addr(my_addr); + req->mutable_base()->set_term(term); + }; + + auto _rpc = std::bind(&::raft::RaftService::Stub::HeartBeat, _heartbeat_client.GetStub().get(), + std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); + + ::grpc::Status _status; + auto &_rsp = _heartbeat_client.DoRPC(_setter, _rpc, + ::RaftCore::Config::FLAGS_leader_heartbeat_rpc_timeo_ms, _status); + + if (!_status.ok()) { + LOG(ERROR) << "heart to follower:" << this->m_peer_addr << " rpc fail" + << ",err code:" << _status.error_code() << ",err msg:" << _status.error_message(); + return; + } + + if (_rsp.result()!=::raft::ErrorCode::SUCCESS) { + LOG(ERROR) << "heart to follower:" << this->m_peer_addr << " svr return fail," << ",msg:" << _rsp.err_msg(); + return; + } + + VLOG(99) << "follower " << this->m_peer_addr << " checking heartbeat success!"; +} + +} + diff --git a/src/leader/channel_pool.h b/src/leader/channel_pool.h new file mode 100644 index 0000000..c42cedb --- /dev/null +++ b/src/leader/channel_pool.h @@ -0,0 +1,68 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_CHANNEL_POOL_EX_H__ +#define __AURORA_CHANNEL_POOL_EX_H__ + +#include +#include + +#include "grpc/grpc.h" +#include "grpc++/grpc++.h" + +#include "protocol/raft.pb.h" +#include "protocol/raft.grpc.pb.h" + +namespace RaftCore::Leader { + +class ChannelPool final{ + +public: + + ChannelPool(const std::string &peer_addr,uint32_t pool_size) noexcept; + + virtual ~ChannelPool() noexcept; + + void HeartBeat(uint32_t term,const std::string &my_addr) noexcept; + + std::shared_ptr<::grpc::Channel> GetOneChannel() noexcept; + +private: + + typedef std::vector> TypeVecChannel; + + //Read only after initialization. + std::shared_ptr m_channel_pool; + + //Relatively random accessing. + std::atomic m_idx; + + std::string m_peer_addr; + +private: + + ChannelPool(const ChannelPool&) = delete; + + ChannelPool& operator=(const ChannelPool&) = delete; +}; + +} //end namespace + +#endif diff --git a/src/leader/client_pool.cc b/src/leader/client_pool.cc new file mode 100644 index 0000000..ae20e2e --- /dev/null +++ b/src/leader/client_pool.cc @@ -0,0 +1,58 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include + +#include "grpc++/create_channel.h" + +#include "config/config.h" +#include "common/comm_defs.h" +#include "global/global_env.h" +#include "client/client_impl.h" +#include "leader/client_pool.h" + +namespace RaftCore::Leader { + +using ::RaftCore::Global::GlobalEnv; +using ::RaftCore::Client::HeartbeatSyncClient; + +template +ClientPool::ClientPool(FollowerEntity* p_follower) noexcept { + this->m_p_parent_follower = p_follower; +} + +template +ClientPool::~ClientPool() noexcept{} + +template +FollowerEntity* ClientPool::GetParentFollower() noexcept { + return this->m_p_parent_follower; +} + +template +std::shared_ptr ClientPool::Fetch() noexcept { + return m_pool.Pop(); +} + +template +void ClientPool::Back(std::shared_ptr &client) noexcept { + return m_pool.Push(client); +} + +} + diff --git a/src/leader/client_pool.h b/src/leader/client_pool.h new file mode 100644 index 0000000..e416598 --- /dev/null +++ b/src/leader/client_pool.h @@ -0,0 +1,77 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_CONNECTION_POOL_EX_H__ +#define __AURORA_CONNECTION_POOL_EX_H__ + +#include +#include + +#include "grpc/grpc.h" +#include "grpc++/grpc++.h" + +#include "protocol/raft.pb.h" +#include "protocol/raft.grpc.pb.h" + +#include "tools/lock_free_deque.h" +#include "leader/channel_pool.h" + +namespace RaftCore::Leader { + +using ::RaftCore::DataStructure::LockFreeDeque; +using ::RaftCore::Leader::ChannelPool; + +class FollowerEntity; + +template +class ClientPool final{ + +public: + + ClientPool(FollowerEntity* p_follower = nullptr) noexcept; + + virtual ~ClientPool() noexcept; + + std::shared_ptr Fetch() noexcept; + + void Back(std::shared_ptr &client) noexcept; + + FollowerEntity* GetParentFollower() noexcept; + +private: + + LockFreeDeque m_pool; + + /*Cannot contain a shared_ptr since it will cause two shared_ptr points to the same + the FollowerEntity object resulting in a recursively destructing problem. */ + FollowerEntity* m_p_parent_follower; + +private: + + ClientPool(const ClientPool&) = delete; + + ClientPool& operator=(const ClientPool&) = delete; +}; + +} //end namespace + +#include "leader/client_pool.cc" + +#endif diff --git a/src/leader/follower_entity.cc b/src/leader/follower_entity.cc new file mode 100644 index 0000000..253dcc5 --- /dev/null +++ b/src/leader/follower_entity.cc @@ -0,0 +1,90 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "common/comm_view.h" +#include "global/global_env.h" +#include "storage/storage.h" +#include "leader/follower_entity.h" + +namespace RaftCore::Leader { + +using ::RaftCore::Common::CommonView; +using ::RaftCore::Global::GlobalEnv; +using ::RaftCore::Storage::StorageMgr; +using ::RaftCore::Service::OwnershipDelegator; + +const char* FollowerEntity::m_status_macro_names[] = { "NORMAL","RESYNC_LOG","RESYNC_DATA"}; + +FollowerEntity::FollowerEntity(const std::string &follower_addr,FollowerStatus status, + uint32_t joint_consensus_flag) noexcept{ + + this->m_shp_channel_pool.reset(new ChannelPool(follower_addr,::RaftCore::Config::FLAGS_channel_pool_size)); + + auto _channel = this->m_shp_channel_pool->GetOneChannel(); + + uint32_t _pool_size = ::RaftCore::Config::FLAGS_client_pool_size; + + this->m_append_client_pool.reset(new ClientPool(this)); + for (std::size_t i = 0; i < _pool_size; ++i) { + auto* _p_client = new AppendEntriesAsyncClient(_channel, GlobalEnv::GetClientCQInstance()); + auto _shp_client = _p_client->OwnershipDelegator::GetOwnership(); + this->m_append_client_pool->Back(_shp_client); + } + + uint32_t _group_commit = ::RaftCore::Config::FLAGS_group_commit_count; + + uint32_t _commit_client_size = _pool_size / _group_commit; + CHECK(_commit_client_size > 0) << "pool_size:" << _pool_size << ",group_commit:" << _commit_client_size; + + VLOG(89) << "debug commit client size:" << _commit_client_size << ",addr:" << this->my_addr; + + this->m_commit_client_pool.reset(new ClientPool(this)); + for (std::size_t i = 0; i < _commit_client_size; ++i) { + auto* _p_client = new CommitEntriesAsyncClient(_channel, GlobalEnv::GetClientCQInstance()); + auto _shp_client = _p_client->OwnershipDelegator::GetOwnership(); + this->m_commit_client_pool->Back(_shp_client); + } + + this->m_joint_consensus_flag = joint_consensus_flag; + this->my_addr = follower_addr; + this->m_status = status; + + this->m_last_sent_committed.store(CommonView::m_zero_log_id); +} + +FollowerEntity::~FollowerEntity() noexcept{} + +bool FollowerEntity::UpdateLastSentCommitted(const LogIdentifier &to) noexcept { + + while (true) { + auto _cur_last_commit = this->m_last_sent_committed.load(); + if (to <= _cur_last_commit) + return false; + + if (!this->m_last_sent_committed.compare_exchange_weak(_cur_last_commit, to)) + continue; + + VLOG(89) << "m_last_sent_committed update to:" << to << ", addr:" << this->my_addr; + break; + } + + return true; +} + +} + diff --git a/src/leader/follower_entity.h b/src/leader/follower_entity.h new file mode 100644 index 0000000..a2ffc2d --- /dev/null +++ b/src/leader/follower_entity.h @@ -0,0 +1,101 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_FOLLOWER_STATE_H__ +#define __AURORA_FOLLOWER_STATE_H__ + +#include +#include + +#include "common/comm_view.h" +#include "common/comm_defs.h" +#include "client/client_impl.h" +#include "leader/client_pool.h" + +namespace RaftCore::Leader { + +using ::RaftCore::Common::LogIdentifier; +using ::RaftCore::Member::JointConsensusMask; +using ::RaftCore::Client::AppendEntriesAsyncClient; +using ::RaftCore::Client::CommitEntriesAsyncClient; +using ::RaftCore::Leader::ChannelPool; +using ::RaftCore::Leader::ClientPool; + +enum class FollowerStatus { + NORMAL = 0, + RESYNC_LOG, + RESYNC_DATA, +}; + +/* This is the class for representing follower's state in leader's view */ +class FollowerEntity final{ + +public: + + inline static const char* MacroToString(FollowerStatus enum_val) { + return m_status_macro_names[int(enum_val)]; + } + +public: + + FollowerEntity(const std::string &follower_addr,FollowerStatus status=FollowerStatus::NORMAL, + uint32_t joint_consensus_flag=uint32_t(JointConsensusMask::IN_OLD_CLUSTER)) noexcept; + + virtual ~FollowerEntity() noexcept; + + //If ever successfully updated the last sent committed for this follower. + bool UpdateLastSentCommitted(const LogIdentifier &to) noexcept; + + std::string my_addr; + + //A simple type , can be read & blind write simultaneously by multiple thread. + FollowerStatus m_status; + + uint32_t m_joint_consensus_flag; + + //Record #(timeout entries) since the latest successfully replicated log. + int32_t m_timeout_counter; + + std::shared_ptr m_shp_channel_pool; + + //Note: only high frequency used client need to be pooled. + std::unique_ptr> m_append_client_pool; + + std::unique_ptr> m_commit_client_pool; + + std::atomic m_last_sent_committed; + +private: + + static const char* m_status_macro_names[]; + +private: + + FollowerEntity(const FollowerEntity&) = delete; + + FollowerEntity& operator=(const FollowerEntity&) = delete; + +}; + +typedef std::shared_ptr<::RaftCore::Leader::FollowerEntity> TypePtrFollowerEntity; + +} //end namespace + +#endif diff --git a/src/leader/leader_bg_task.cc b/src/leader/leader_bg_task.cc new file mode 100644 index 0000000..2183008 --- /dev/null +++ b/src/leader/leader_bg_task.cc @@ -0,0 +1,249 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "service/service.h" +#include "member/member_manager.h" +#include "leader/leader_bg_task.h" + +namespace RaftCore::Leader::BackGroundTask { + +using ::RaftCore::Member::MemberMgr; + +TwoPhaseCommitContext::PhaseState::RpcStatistic::RpcStatistic(){ this->Reset(); } + +TwoPhaseCommitContext::PhaseState::RpcStatistic::~RpcStatistic(){} + +std::string TwoPhaseCommitContext::PhaseState::RpcStatistic::Dump() const noexcept { + char sz_buf[256] = { 0 }; + std::snprintf(sz_buf,sizeof(sz_buf),"cq_entrust_num:%d,events_got:%d,succeed_num:%d,implicitly_fail_num:%d,explicitly_fail_num:%d", + this->m_cq_entrust_num.load(),this->EventsGot(),this->m_succeed_num.load(), + this->m_implicitly_fail_num.load(),this->m_explicitly_fail_num.load()); + return sz_buf; +} + +int TwoPhaseCommitContext::PhaseState::RpcStatistic::EventsGot()const noexcept { + return this->m_succeed_num.load() + this->m_explicitly_fail_num.load() + this->m_implicitly_fail_num.load(); +} + +void TwoPhaseCommitContext::PhaseState::RpcStatistic::Reset() noexcept { + this->m_cq_entrust_num.store(0); + this->m_succeed_num.store(0); + this->m_implicitly_fail_num.store(0); + this->m_explicitly_fail_num.store(0); +} + +TwoPhaseCommitContext::PhaseState::PhaseState() { this->Reset(); } + +TwoPhaseCommitContext::PhaseState::~PhaseState() {} + +std::string TwoPhaseCommitContext::PhaseState::Dump() const noexcept{ + + auto _cur_cluster_dump = this->m_cur_cluster.Dump(); + auto _new_cluster_dump = this->m_new_cluster.Dump(); + + char sz_buf[512] = { 0 }; + std::snprintf(sz_buf,sizeof(sz_buf),"cur_cluster:[%s],new_cluster:[%s]",_cur_cluster_dump.c_str(),_new_cluster_dump.c_str()); + return sz_buf; +} + +void TwoPhaseCommitContext::PhaseState::Reset() noexcept { + this->m_cur_cluster.Reset(); + this->m_new_cluster.Reset(); + this->m_conn_todo_set.clear(); +} + +void TwoPhaseCommitContext::PhaseState::Increase(uint32_t flag, std::atomic &cur_cluster_data, + std::atomic &new_cluster_data) noexcept { + if (flag & int(JointConsensusMask::IN_OLD_CLUSTER)) + cur_cluster_data.fetch_add(1); + + if (flag & int(JointConsensusMask::IN_NEW_CLUSTER)) + new_cluster_data.fetch_add(1); +} + +void TwoPhaseCommitContext::PhaseState::IncreaseEntrust(uint32_t flag) noexcept { + this->Increase(flag,this->m_cur_cluster.m_cq_entrust_num, this->m_new_cluster.m_cq_entrust_num); +} + +void TwoPhaseCommitContext::PhaseState::IncreaseSuccess(uint32_t flag) noexcept { + this->Increase(flag,this->m_cur_cluster.m_succeed_num, this->m_new_cluster.m_succeed_num); +} + +void TwoPhaseCommitContext::PhaseState::IncreaseImplicitFail(uint32_t flag) noexcept { + this->Increase(flag,this->m_cur_cluster.m_implicitly_fail_num, this->m_new_cluster.m_implicitly_fail_num); +} + +void TwoPhaseCommitContext::PhaseState::IncreaseExplicitFail(uint32_t flag) noexcept { + this->Increase(flag,this->m_cur_cluster.m_explicitly_fail_num, this->m_new_cluster.m_explicitly_fail_num); +} + +bool TwoPhaseCommitContext::PhaseState::JudgeClusterPotentiallySucceed(RpcStatistic &cluster_stat, std::size_t majority) noexcept { + return (cluster_stat.m_succeed_num.load() + cluster_stat.m_implicitly_fail_num.load()) >= (int)majority; +} + +FinishStatus TwoPhaseCommitContext::PhaseState::JudgeClusterDetermined(RpcStatistic &cluster_stat,std::size_t majority) noexcept { + if (cluster_stat.m_succeed_num.load() >= (int)majority) + return FinishStatus::POSITIVE_FINISHED; + + int _unknown = cluster_stat.m_cq_entrust_num - cluster_stat.EventsGot(); + if (cluster_stat.m_succeed_num.load() + _unknown < (int)majority) + return FinishStatus::NEGATIVE_FINISHED; + + return FinishStatus::UNFINISHED; +} + +bool TwoPhaseCommitContext::PhaseState::JudgeFinished() noexcept { + return (this->m_cur_cluster.m_cq_entrust_num == this->m_cur_cluster.EventsGot() && + this->m_new_cluster.m_cq_entrust_num == this->m_new_cluster.EventsGot()); +} + +bool TwoPhaseCommitContext::JudgePhaseIPotentiallySucceed() noexcept { + auto &_phaseI = this->m_phaseI_state; + + if (!_phaseI.JudgeClusterPotentiallySucceed(_phaseI.m_cur_cluster, this->m_cluster_majority)) + return false; + + return _phaseI.JudgeClusterPotentiallySucceed(_phaseI.m_new_cluster, this->m_new_cluster_majority); +} + +FinishStatus TwoPhaseCommitContext::JudgePhaseIDetermined() noexcept { + auto &_phaseI = this->m_phaseI_state; + + FinishStatus _cur = _phaseI.JudgeClusterDetermined(_phaseI.m_cur_cluster, this->m_cluster_majority); + FinishStatus _new = _phaseI.JudgeClusterDetermined(_phaseI.m_new_cluster, this->m_new_cluster_majority); + + if ((_cur == FinishStatus::NEGATIVE_FINISHED) || (_new == FinishStatus::NEGATIVE_FINISHED)) + return FinishStatus::NEGATIVE_FINISHED; + + if ((_cur == FinishStatus::POSITIVE_FINISHED) && (_new == FinishStatus::POSITIVE_FINISHED)) + return FinishStatus::POSITIVE_FINISHED; + + return FinishStatus::UNFINISHED; +} + +TwoPhaseCommitContext::TwoPhaseCommitContext() { this->Reset(); } + +TwoPhaseCommitContext::~TwoPhaseCommitContext() {} + +bool TwoPhaseCommitContext::JudgeAllFinished() noexcept { + + if (!this->m_phaseI_state.JudgeFinished()) + return false; + + int _phaseII_obligation_x = this->m_phaseI_state.m_cur_cluster.m_cq_entrust_num.load(); + if (this->m_phaseII_state.m_cur_cluster.m_succeed_num.load() < _phaseII_obligation_x) + return false; + + int _phaseII_obligation_y = this->m_phaseI_state.m_new_cluster.m_cq_entrust_num.load(); + if (this->m_phaseII_state.m_new_cluster.m_succeed_num.load() < _phaseII_obligation_y) + return false; + + return true; +} + +std::string TwoPhaseCommitContext::Dump() const noexcept{ + + auto _phaseI_dump = this->m_phaseI_state.Dump(); + auto _phaseII_dump = this->m_phaseII_state.Dump(); + + char sz_buf[1024] = { 0 }; + std::snprintf(sz_buf,sizeof(sz_buf),"phaseI_state:[%s],phaseII_state:[%s],m_cluster_size:%u," + "m_cluster_majority:%u,m_new_cluster_size:%u,m_new_cluster_majority:%u", + _phaseI_dump.c_str(),_phaseII_dump.c_str(),(uint32_t)this->m_cluster_size,(uint32_t)this->m_cluster_majority, + (uint32_t)this->m_new_cluster_size,(uint32_t)this->m_new_cluster_majority); + return sz_buf; +} + +void TwoPhaseCommitContext::Reset() noexcept { + this->m_phaseI_state.Reset(); + this->m_phaseII_state.Reset(); +} + +SyncDataContenxt::SyncDataContenxt(TypePtrFollowerEntity &shp_follower) noexcept{ + //Sync Data will hold one connection until this job finished,since it is stateful. + this->m_follower = shp_follower; + + auto _shp_channel = this->m_follower->m_shp_channel_pool->GetOneChannel(); + this->m_shp_client.reset(new BackGroundTask::SyncDataSyncClient(_shp_channel)); + + this->m_last_sync.Set(0, 0); +} + +SyncDataContenxt::~SyncDataContenxt() noexcept {} + +bool SyncDataContenxt::IsBeginning() const noexcept { + return (this->m_last_sync.m_term == 0 && this->m_last_sync.m_index == 0); +} + +LogReplicationContext::LogReplicationContext()noexcept { + this->m_p_joint_snapshot = new MemberMgr::JointSummary(); +} + +LogReplicationContext::~LogReplicationContext()noexcept{ + delete (MemberMgr::JointSummary*)this->m_p_joint_snapshot; +} + +CutEmptyContext::CutEmptyContext(int value_flag)noexcept{ + this->m_value_flag = value_flag; + this->m_generation_tp = std::chrono::system_clock::now(); + this->m_processed_flag.store(false); +} + +CutEmptyContext::~CutEmptyContext()noexcept{} + +bool CutEmptyContext::operator<(const CutEmptyContext& other)const noexcept { + + if (this->m_value_flag < 0 || other.m_value_flag > 0) + return true; + + if (this->m_value_flag > 0 || other.m_value_flag < 0) + return false; + + const auto &_shp_req = this->m_write_request->GetReqCtx(); + const auto &_shp_req_other = other.m_write_request->GetReqCtx(); + + return _shp_req->m_cur_log_id < _shp_req_other->m_cur_log_id; +} + +bool CutEmptyContext::operator>(const CutEmptyContext& other)const noexcept { + + if (this->m_value_flag < 0 || other.m_value_flag > 0) + return false; + + if (this->m_value_flag > 0 || other.m_value_flag < 0) + return true; + + const auto &_shp_req = this->m_write_request->GetReqCtx(); + const auto &_shp_req_other = other.m_write_request->GetReqCtx(); + + return _shp_req->m_cur_log_id > _shp_req_other->m_cur_log_id; +} + +bool CutEmptyContext::operator==(const CutEmptyContext& other)const noexcept { + + if (other.m_value_flag != 0 || this->m_value_flag != 0) + return false; + + const auto &_shp_req = this->m_write_request->GetReqCtx(); + const auto &_shp_req_other = other.m_write_request->GetReqCtx(); + + return _shp_req->m_cur_log_id == _shp_req_other->m_cur_log_id; +} + +} + diff --git a/src/leader/leader_bg_task.h b/src/leader/leader_bg_task.h new file mode 100644 index 0000000..00eeba7 --- /dev/null +++ b/src/leader/leader_bg_task.h @@ -0,0 +1,225 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_LEADER_BG_TASK_H__ +#define __AURORA_LEADER_BG_TASK_H__ + +#include + +#include "grpc/grpc.h" +#include "grpc++/grpc++.h" + +#include "protocol/raft.pb.h" +#include "protocol/raft.grpc.pb.h" + +#include "common/log_identifier.h" +#include "tools/trivial_lock_double_list.h" +#include "client/client_impl.h" +#include "leader/follower_entity.h" + +namespace RaftCore { + namespace Service { + class Write; + } +} + +namespace RaftCore::Leader::BackGroundTask { + +using grpc::CompletionQueue; +using ::RaftCore::Common::LogIdentifier; +using ::RaftCore::Common::ReactInfo; +using ::RaftCore::Common::FinishStatus; +using ::RaftCore::Tools::TypeSysTimePoint; +using ::RaftCore::DataStructure::OrderedTypeBase; +using ::RaftCore::Client::SyncDataSyncClient; +using ::RaftCore::Leader::TypePtrFollowerEntity; +using ::RaftCore::Service::Write; + +class TwoPhaseCommitContext { + + public: + + struct PhaseState { + + struct RpcStatistic { + + RpcStatistic(); + + virtual ~RpcStatistic(); + + std::atomic m_cq_entrust_num ; + std::atomic m_succeed_num; + std::atomic m_implicitly_fail_num; + std::atomic m_explicitly_fail_num; + + int EventsGot()const noexcept; + + std::string Dump() const noexcept; + + void Reset() noexcept; + }; + + std::string Dump() const noexcept; + + PhaseState(); + + virtual ~PhaseState(); + + void Reset() noexcept; + + void Increase(uint32_t flag, std::atomic &cur_cluster_data, + std::atomic &new_cluster_data) noexcept; + + void IncreaseEntrust(uint32_t flag) noexcept; + + void IncreaseSuccess(uint32_t flag) noexcept; + + void IncreaseImplicitFail(uint32_t flag) noexcept; + + void IncreaseExplicitFail(uint32_t flag) noexcept; + + FinishStatus JudgeClusterDetermined(RpcStatistic &cluster_stat, std::size_t majority) noexcept; + + bool JudgeClusterPotentiallySucceed(RpcStatistic &cluster_stat, std::size_t majority) noexcept; + + bool JudgeFinished() noexcept; + + RpcStatistic m_cur_cluster; + RpcStatistic m_new_cluster; + + std::set m_conn_todo_set; + }; + + public: + + TwoPhaseCommitContext(); + + virtual ~TwoPhaseCommitContext(); + + FinishStatus JudgePhaseIDetermined() noexcept; + + bool JudgePhaseIPotentiallySucceed() noexcept; + + bool JudgeAllFinished() noexcept; + + std::string Dump() const noexcept; + + void Reset() noexcept; + + PhaseState m_phaseI_state; + PhaseState m_phaseII_state; + + std::size_t m_cluster_size = 0; + std::size_t m_cluster_majority = 0; + + std::size_t m_new_cluster_size = 0; + std::size_t m_new_cluster_majority = 0; +}; + +/*Contains all information needed for a single client RPC context */ +class LogReplicationContext final : public TwoPhaseCommitContext { + + public: + + LogReplicationContext()noexcept; + + virtual ~LogReplicationContext()noexcept; + + LogIdentifier m_cur_log_id; + + /*Have to use a pointer getting around of header files recursively including . + Can't use the struct forward declaration here, shit. */ + void* m_p_joint_snapshot; //A snapshot for consistent reading. +}; + +class ReSyncLogContext final { + + public: + + LogIdentifier m_last_sync_point; + + TypePtrFollowerEntity m_follower; + + std::function m_on_success_cb; + + bool m_hold_pre_lcl = false; +}; + +class SyncDataContenxt final { + + public: + + SyncDataContenxt(TypePtrFollowerEntity &shp_follower) noexcept; + + virtual ~SyncDataContenxt() noexcept; + + bool IsBeginning() const noexcept; + + public: + + LogIdentifier m_last_sync; + + TypePtrFollowerEntity m_follower; + + std::shared_ptr m_shp_client; + + std::function m_on_success_cb; + + ::grpc::Status m_final_status; +}; + +class ClientReactContext final { + + public: + + ReactInfo m_react_info; +}; + +class CutEmptyContext final : public OrderedTypeBase { + + public: + + CutEmptyContext(int value_flag = 0)noexcept; + + virtual ~CutEmptyContext()noexcept; + + virtual bool operator<(const CutEmptyContext& other)const noexcept override; + + virtual bool operator>(const CutEmptyContext& other)const noexcept override; + + virtual bool operator==(const CutEmptyContext& other)const noexcept override; + + std::shared_ptr m_write_request; + + TypeSysTimePoint m_generation_tp; + + /*<0: minimal value; + >0:max value; + ==0:comparable value. */ + int m_value_flag = 0; + + std::atomic m_processed_flag; + + bool m_log_flag = false; +}; + +} //end namespace + +#endif diff --git a/src/leader/leader_request.cc b/src/leader/leader_request.cc new file mode 100644 index 0000000..7878a6d --- /dev/null +++ b/src/leader/leader_request.cc @@ -0,0 +1,31 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "leader/leader_request.h" + +namespace RaftCore::Leader { + +template +LeaderRequest::LeaderRequest() noexcept {} + +template +LeaderRequest::~LeaderRequest() noexcept {} + + +} + diff --git a/src/leader/leader_request.h b/src/leader/leader_request.h new file mode 100644 index 0000000..4c26a53 --- /dev/null +++ b/src/leader/leader_request.h @@ -0,0 +1,59 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_LEADER_REQUEST_H__ +#define __AURORA_LEADER_REQUEST_H__ + +#include + +#include "protocol/raft.grpc.pb.h" +#include "protocol/raft.pb.h" + +#include "common/request_base.h" + +using ::raft::RaftService; +using ::grpc::ServerCompletionQueue; +using ::RaftCore::Common::UnaryRequest; + +namespace RaftCore::Leader { + +//Just a thin wrapper for differentiate rpcs. +template +class LeaderRequest : public UnaryRequest{ + +public: + + LeaderRequest()noexcept; + + virtual ~LeaderRequest()noexcept; + +private: + + LeaderRequest(const LeaderRequest&) = delete; + + LeaderRequest& operator=(const LeaderRequest&) = delete; + +}; + +} //end namespace + +#include "leader_request.cc" + +#endif diff --git a/src/leader/leader_view.cc b/src/leader/leader_view.cc new file mode 100644 index 0000000..acb6ee4 --- /dev/null +++ b/src/leader/leader_view.cc @@ -0,0 +1,753 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "common/comm_defs.h" +#include "common/error_code.h" +#include "tools/timer.h" +#include "tools/lock_free_priority_queue.h" +#include "binlog/binlog_singleton.h" +#include "storage/storage_singleton.h" +#include "state/state_mgr.h" +#include "election/election.h" +#include "member/member_manager.h" +#include "leader/follower_entity.h" +#include "client/client_framework.h" +#include "service/service.h" +#include "leader/leader_view.h" + +namespace RaftCore::Leader { + +using ::raft::AppendEntriesRequest; +using ::raft::AppendEntriesResponse; +using ::raft::CommitEntryRequest; +using ::raft::CommitEntryResponse; +using ::RaftCore::DataStructure::LockFreePriotityQueue; +using ::RaftCore::Member::MemberMgr; +using ::RaftCore::Member::EJointStatus; +using ::RaftCore::Client::UnarySyncClient; +using ::RaftCore::Service::Write; + +std::unordered_map LeaderView::m_hash_followers; + +TrivialLockDoubleList LeaderView::m_entity_pending_list( std::shared_ptr(new MemoryLogItemLeader(0x0, 0x0)), + std::shared_ptr(new MemoryLogItemLeader(_MAX_UINT32_, _MAX_UINT64_))); + +std::condition_variable LeaderView::m_cv; + +std::mutex LeaderView::m_cv_mutex; + +const char* LeaderView::m_invoker_macro_names[] = { "CLIENT_RPC","BACKGROUP_THREAD" }; + +std::shared_timed_mutex LeaderView::m_hash_followers_mutex; + +LockFreeUnorderedSingleList> LeaderView::m_garbage; + +std::atomic LeaderView::m_last_cut_log; + +LeaderView::ServerStatus LeaderView::m_status = LeaderView::ServerStatus::NORMAL; + +TrivialLockSingleList LeaderView::m_cut_empty_list(std::shared_ptr(new CutEmptyContext(-1)), + std::shared_ptr(new CutEmptyContext(1))); + +LockFreeUnorderedSingleList> LeaderView::m_cut_empty_garbage; + +std::atomic LeaderView::m_last_log_waiting_num; + +//To avoid issues caused by including header files mutually. +using ::raft::ErrorCode; +using ::RaftCore::BinLog::BinLogGlobal; +using ::RaftCore::BinLog::FileMetaData; +using ::RaftCore::DataStructure::LockFreeQueue; +using ::RaftCore::DataStructure::LockFreeQueueBase; +using ::RaftCore::Storage::StorageMgr; +using ::RaftCore::Storage::StorageGlobal; +using ::RaftCore::Timer::GlobalTimer; +using ::RaftCore::State::StateMgr; +using ::RaftCore::State::RaftRole; +using ::RaftCore::Election::ElectionMgr; +using ::RaftCore::Leader::FollowerStatus; +using ::RaftCore::Common::ReadLock; +using ::RaftCore::Tools::TypeSysTimePoint; + +void LeaderView::Initialize(const ::RaftCore::Topology& _topo) noexcept { + + CommonView::Initialize(); + + for (const auto & _follower_addr : _topo.m_followers) + m_hash_followers[_follower_addr] = std::shared_ptr(new FollowerEntity(_follower_addr)); + + int consumer_threads_num = ::RaftCore::Config::FLAGS_lockfree_queue_consumer_threads_num; + if (consumer_threads_num == 0) + consumer_threads_num = ::RaftCore::Common::CommonView::m_cpu_cores * 2; + + auto _heartbeat = [&]()->bool { + + if (!::RaftCore::Config::FLAGS_do_heartbeat) + return true; + + BroadcastHeatBeat(); + + //Unit test need a switch. + return !::RaftCore::Config::FLAGS_heartbeat_oneshot; + }; + GlobalTimer::AddTask(::RaftCore::Config::FLAGS_leader_heartbeat_interval_ms,_heartbeat); + + //Register connection pool GC. + auto _conn_pool_deque_gc = []() ->bool { + LockFreeDeque::GC(); + LockFreeDeque::GC(); + return true; + }; + GlobalTimer::AddTask(::RaftCore::Config::FLAGS_gc_interval_ms ,_conn_pool_deque_gc); + + //Register GC task to the global timer. + CommonView::InstallGC(&m_garbage); + CommonView::InstallGC(&m_cut_empty_garbage); + + //Add task & callbacks by tasks' priority,highest priority add first. + auto *_p_client_reacting_queue = new LockFreeQueue(); + _p_client_reacting_queue->Initilize(ClientReactCB, ::RaftCore::Config::FLAGS_lockfree_queue_client_react_elements); + m_priority_queue.AddTask(LockFreePriotityQueue::TaskType::CLIENT_REACTING,(LockFreeQueueBase*)_p_client_reacting_queue); + + auto *_p_fresher_sync_data_queue = new LockFreeQueue(); + _p_fresher_sync_data_queue->Initilize(SyncDataCB,::RaftCore::Config::FLAGS_lockfree_queue_resync_data_elements); + m_priority_queue.AddTask(LockFreePriotityQueue::TaskType::RESYNC_DATA,(LockFreeQueueBase*)_p_fresher_sync_data_queue); + + auto *_p_resync_log_queue = new LockFreeQueue(); + _p_resync_log_queue->Initilize(ReSyncLogCB,::RaftCore::Config::FLAGS_lockfree_queue_resync_log_elements); + m_priority_queue.AddTask(LockFreePriotityQueue::TaskType::RESYNC_LOG,(LockFreeQueueBase*)_p_resync_log_queue); + + //Launching the background threads for processing that queue. + m_priority_queue.Launch(); + + m_last_log_waiting_num.store(0); + + //Must be initialized before 'CutEmptyRoutine' started. + CommonView::m_running_flag = true; + + //Start Leader routine thread. + for (std::size_t i = 0; i < ::RaftCore::Config::FLAGS_iterating_threads; ++i) + CommonView::m_vec_routine.emplace_back(new std::thread(Write::CutEmptyRoutine)); +} + +void LeaderView::UnInitialize() noexcept { + + //Waiting for routine thread exit. + CommonView::m_running_flag = false; + + for (auto* p_thread : CommonView::m_vec_routine) { + p_thread->join(); + delete p_thread; + } + + m_hash_followers.clear(); + m_entity_pending_list.Clear(); + m_cut_empty_list.Clear(); + + CommonView::UnInitialize(); +} + +void LeaderView::BroadcastHeatBeat() noexcept { + + auto *_p_ref = &m_hash_followers; + auto *_p_ref_mutex = &m_hash_followers_mutex; + + auto *_p_ref_joint = &MemberMgr::m_joint_summary; + auto *_p_ref_joing_mutex = &MemberMgr::m_mutex; + + //Sending heartbeat to the nodes in the current cluster. + { + ReadLock _r_lock(*_p_ref_mutex); + for (auto &_pair_kv : *_p_ref) { + VLOG(89) << "heartbeat sending, follower:" << _pair_kv.second->my_addr; + _pair_kv.second->m_shp_channel_pool->HeartBeat(ElectionMgr::m_cur_term.load(),StateMgr::GetMyAddr()); + } + } + + //Sending heartbeat to the nodes in the new cluster if there are any. + do{ + ReadLock _r_lock(*_p_ref_joing_mutex); + if (_p_ref_joint->m_joint_status != EJointStatus::JOINT_CONSENSUS) + break; + for (auto &_pair_kv : _p_ref_joint->m_joint_topology.m_added_nodes) + _pair_kv.second->m_shp_channel_pool->HeartBeat(ElectionMgr::m_cur_term.load(),StateMgr::GetMyAddr()); + } while (false); +} + +auto LeaderView::PrepareAppendEntriesRequest(std::shared_ptr &shp_context) { + auto _null_shp = std::shared_ptr(); + + //Need to get a new handler of binlog file. + std::string _binlog_file_name = BinLogGlobal::m_instance.GetBinlogFileName(); + std::FILE* _f_handler = std::fopen(_binlog_file_name.c_str(),_AURORA_BINLOG_READ_MODE_); + if (_f_handler == nullptr) { + LOG(ERROR) << "ReSyncLogCB open binlog file " << _binlog_file_name << "fail..,errno:" + << errno << ",follower:" << shp_context->m_follower->my_addr; + return _null_shp; + } + + std::list> _file_meta; + BinLogGlobal::m_instance.GetOrderedMeta(_file_meta); + + //Find the earlier X entries + auto _reverse_iter = _file_meta.crbegin(); + if (_reverse_iter == _file_meta.crend()) { + LOG(WARNING) << "binlog is empty"; + return _null_shp; + } + + //_reverse_bound is the first element of _file_meta. + auto _reverse_bound = _file_meta.crend(); + _reverse_bound--; + + auto _log_id_lcl = StorageGlobal::m_instance.GetLastCommitted(); + uint32_t _precede_lcl_counter = 0; //Count for #log entries that preceding the LCL. + + /*Since _reverse_iter is a reserve iterator , and we are getting the non-reserve iterator + based on it , so there is one more place(the '<=' in the for loop statement below) to advance.*/ + for (std::size_t n = 0; n <= ::RaftCore::Config::FLAGS_resync_log_reverse_step_len; ) { + if ((*_reverse_iter)->operator<(_log_id_lcl)) + _precede_lcl_counter++; + + if ((*_reverse_iter)->operator<(shp_context->m_last_sync_point)) + n++; + + _reverse_iter++; + + //Should stopping at the first element of _file_meta. + if (_reverse_iter == _reverse_bound) + break; + } + + /*_cur_iter will points to _reverse_iter-1, aka, the second element of _file_meta after the + following line, because every log entry need its previous log info when doing resyncing, so we + cannot start at the first one.*/ + auto _cur_iter = _reverse_iter.base(); + + /*The start point log's ID must be greater than (ID-LCL - FLAGS_binlog_reserve_log_num), otherwise the further ahead log entries may be absent.*/ + if (_precede_lcl_counter > ::RaftCore::Config::FLAGS_binlog_reserve_log_num) { + shp_context->m_hold_pre_lcl = true; + BinLogGlobal::m_instance.AddPreLRLUseCount(); + } + + /*Note: _reverse_iter is now points to the previous entry of 'STEP_LEN' or the boundary which at least >= (ID-LCL - FLAGS_binlog_reserve_log_num). + In both cases we just need to begin iterating at _reverse_iter-1 .*/ + + //Appending new entries + std::shared_ptr _shp_req(new AppendEntriesRequest()); + _shp_req->mutable_base()->set_addr(StateMgr::GetMyAddr()); + _shp_req->mutable_base()->set_term(ElectionMgr::m_cur_term.load()); + + //Update last sync point to the first entry that will be sent in the next steps. + shp_context->m_last_sync_point = *(*_cur_iter); + + /*There will not much log entries between [ (ID-LCL - FLAGS_binlog_reserve_log_num) , ID-LRL ], so resync all the logs in one RPC is acceptable. */ + + //_cur_iter unchanged ,_pre_iter points to the previous position of _cur_iter. + auto _pre_iter = (--_cur_iter)++; + unsigned char* _p_buf = nullptr; + for (; _cur_iter!=_file_meta.cend(); ++_pre_iter,++_cur_iter) { + + auto _p_entry = _shp_req->add_replicate_entity(); + auto _p_entity_id = _p_entry->mutable_entity_id(); + _p_entity_id->set_term((*_cur_iter)->m_term); + _p_entity_id->set_idx((*_cur_iter)->m_index); + + auto _p_pre_entity_id = _p_entry->mutable_pre_log_id(); + _p_pre_entity_id->set_term((*_pre_iter)->m_term); + _p_pre_entity_id->set_idx((*_pre_iter)->m_index); + + //Seek to position + if (std::fseek(_f_handler, (*_cur_iter)->m_offset, SEEK_SET) != 0) { + LOG(ERROR) << "ReSyncLogCB seek binlog file " << _binlog_file_name << "fail..,errno:" + << errno << ",follower:" << shp_context->m_follower->my_addr; + + std::fclose(_f_handler); + return _null_shp; + } + + //Read protobuf buf length + uint32_t _buf_len = 0; + if (std::fread(&_buf_len,1,_FOUR_BYTES_,_f_handler) != _FOUR_BYTES_) { + LOG(ERROR) << "ReSyncLogCB read binlog file " << _binlog_file_name << "fail..,errno:" + << errno << ",follower:" << shp_context->m_follower->my_addr; + std::fclose(_f_handler); + return _null_shp; + } + ::RaftCore::Tools::ConvertBigEndianToLocal(_buf_len, &_buf_len); + + //Read protobuf buf + _p_buf = (_p_buf) ? (unsigned char*)std::realloc(_p_buf,_buf_len): (unsigned char*)malloc(_buf_len); + if ( std::fread(_p_buf,1,_buf_len, _f_handler) != _buf_len) { + LOG(ERROR) << "ReSyncLogCB read binlog file " << _binlog_file_name << " fail..,errno:" + << errno << ",follower:" << shp_context->m_follower->my_addr; + std::free(_p_buf); + std::fclose(_f_handler); + return _null_shp; + } + + ::raft::BinlogItem _binlog_item; + if (!_binlog_item.ParseFromArray(_p_buf,_buf_len)) { + LOG(ERROR) << "ReSyncLogCB parse protobuf buffer fail " << _binlog_file_name << ",follower:" + << shp_context->m_follower->my_addr; + std::free(_p_buf); + std::fclose(_f_handler); + return _null_shp; + } + + auto _p_wop = _p_entry->mutable_write_op(); + _p_wop->set_key(_binlog_item.entity().write_op().key()); + _p_wop->set_value(_binlog_item.entity().write_op().value()); + } + + if (_p_buf) + std::free(_p_buf); + + //VLOG(89) << "debug pos2" << ",leader sent resync log:" << _shp_req->DebugString(); + + std::fclose(_f_handler); + return _shp_req; +} + +void LeaderView::AddRescynDataTask(std::shared_ptr &shp_context) noexcept { + //Prevent from duplicated task being executed. + if (shp_context->m_follower->m_status == FollowerStatus::RESYNC_DATA) { + LOG(INFO) << "a RESYNC_DATA task already in progress for follower:" << shp_context->m_follower->my_addr + << ", no need to generate a new one, just return"; + return; + } + + shp_context->m_follower->m_status = FollowerStatus::RESYNC_DATA; + + /*Here is synonymous to that , the leader is talking to the follower , and says : "Currently I don't have enough log entries to heal your + log falling behind issue , you have to resync all the whole data , namely , starting the resync data procedure all over again. " */ + + std::shared_ptr _shp_sync_data_ctx(new BackGroundTask::SyncDataContenxt(shp_context->m_follower)); + + //Pass the callback function down through since SyncData will eventually need to all that ,too. + _shp_sync_data_ctx->m_on_success_cb = shp_context->m_on_success_cb; + + int _ret_code = m_priority_queue.Push(LockFreePriotityQueue::TaskType::RESYNC_DATA, &_shp_sync_data_ctx); + LOG(INFO) << "Add SYNC-DATA task bool ret:" << _ret_code << ",logID:" << shp_context->m_last_sync_point + << ",follower:" << shp_context->m_follower->my_addr; +} + +bool LeaderView::ReSyncLogCB(std::shared_ptr &shp_context) noexcept{ + + LOG(INFO) << "resync log background task received, peer:" << shp_context->m_follower->my_addr + << ",last synced point:" << shp_context->m_last_sync_point; + + //Follower must in RESYNC_LOG state + if (shp_context->m_follower->m_status != FollowerStatus::RESYNC_LOG) { + LOG(WARNING) << "ReSyncLogCB follower " << shp_context->m_follower->my_addr << " is under " + << FollowerEntity::MacroToString(shp_context->m_follower->m_status) << " status,won't resync log to it"; + return false; + } + + auto _shp_req = PrepareAppendEntriesRequest(shp_context); + if (!_shp_req) { + LOG(ERROR) << "PrepareAppendEntriesRequest got an empty result,probably due to a resync-data event happened,check it."; + return false; + } + + auto _shp_channel = shp_context->m_follower->m_shp_channel_pool->GetOneChannel(); + UnarySyncClient _sync_log_client(_shp_channel); + + auto _rpc = std::bind(&::raft::RaftService::Stub::AppendEntries, _sync_log_client.GetStub().get(), + std::placeholders::_1, std::placeholders::_2, std::placeholders::_3); + + ::grpc::Status _status; + auto &_rsp = _sync_log_client.DoRPC([&](std::shared_ptr& req) { + req = _shp_req; }, _rpc ,::RaftCore::Config::FLAGS_leader_resync_log_rpc_timeo_ms, _status); + + if (!_status.ok()) { + LOG(ERROR) << "ReSyncLogCB AppendEntries fail,error code:" << _status.error_code() + << ",err msg: " << _status.error_message() ; + return false; + } + + //const auto &_last_entity = _shp_req->replicate_entity(_shp_req->replicate_entity_size() - 1); + + ErrorCode _error_code = _rsp.comm_rsp().result(); + if (_error_code!=ErrorCode::SUCCESS && _error_code!=ErrorCode::SUCCESS_MERGED) { + if (_error_code != ErrorCode::APPEND_ENTRY_CONFLICT && _error_code != ErrorCode::WAITING_TIMEOUT + && _error_code != ErrorCode::OVERSTEP_LCL) { + LOG(ERROR) << "ReSyncLogCB AppendEntries fail,detail:" << _rsp.DebugString(); + return false; + } + + if (_error_code == ErrorCode::OVERSTEP_LCL) { + AddRescynDataTask(shp_context); + return true; + } + + //If still conflict, the task should be re-queued , no task could hold a thread for a long time. + int _ret_code = m_priority_queue.Push(LockFreePriotityQueue::TaskType::RESYNC_LOG, &shp_context); + LOG(INFO) << "Add RESYNC-LOG task ret:" << _ret_code << ",last synced point:" + << shp_context->m_last_sync_point << ", remote peer:" << shp_context->m_follower->my_addr; + + return true; + } + + //Reduce the use count for pre-lrl if currently holding one. + if (shp_context->m_hold_pre_lcl) { + shp_context->m_hold_pre_lcl = false; + BinLogGlobal::m_instance.SubPreLRLUseCount(); + } + + //Reset follower status to NORMAL,allowing user threads to do normal AppendEntries RPC again. + shp_context->m_follower->m_status = FollowerStatus::NORMAL; + + if (shp_context->m_on_success_cb) + shp_context->m_on_success_cb(shp_context->m_follower); + + return true; +} + +bool LeaderView::SyncLogAfterLCL(std::shared_ptr &shp_context) { + /*Note : If start syncing logs , never turn back , do it until finished. Two reasons : + 1. #logs which larger then ID-LCL is quite small. + 2. If turn back , could incur follower committing a log which already been synced in the data zone, + this potential break the version sequence of the committed data. */ + + //Prepare sync log. + std::list> _file_meta; + BinLogGlobal::m_instance.GetOrderedMeta(_file_meta); + + //Find the start syncing point. + int _precede_lcl_counter = 0; //count for num of exceeding the LCL. + + auto _iter_begin = _file_meta.cend(); + for (auto _iter = _file_meta.crbegin(); _iter != _file_meta.crend();++_iter) { + + if ((*_iter)->operator<(shp_context->m_last_sync)) + _precede_lcl_counter++; + + if ((*_iter)->operator>(shp_context->m_last_sync)) + continue; + + _iter_begin = _iter.base(); + break; + } + + if (_iter_begin == _file_meta.cend()) { + LOG(ERROR) << "SyncDataCB cannot find the sync log starting point"; + return false; + } + + int _reserve_before_lcl = ::RaftCore::Config::FLAGS_binlog_reserve_log_num; + CHECK(_precede_lcl_counter < _reserve_before_lcl) << "SyncDBAfter LCL fail,_precede_lcl_counter :" + << _precede_lcl_counter << ",exceeds limit:" << _reserve_before_lcl; + + TypePtrFollowerEntity _shp_follower = shp_context->m_follower; + auto _follower_addr = _shp_follower->my_addr; + + //Start syncing log. + std::string _binlog_file_name = BinLogGlobal::m_instance.GetBinlogFileName(); + std::FILE* _f_handler = std::fopen(_binlog_file_name.c_str(),_AURORA_BINLOG_READ_MODE_); + if (_f_handler == nullptr) { + LOG(ERROR) << "ReSyncLogCB open binlog file " << _binlog_file_name << "fail..,errno:" + << errno << ",follower:" << _follower_addr; + return false; + } + + if (std::fseek(_f_handler, (*_iter_begin)->m_offset, SEEK_SET) != 0) { + LOG(ERROR) << "ReSyncLogCB seek binlog file " << _binlog_file_name << "fail..,errno:" + << errno << ",follower:" << _follower_addr; + std::fclose(_f_handler); + return false; + } + + int _rpc_counter = 0; + unsigned char* _p_buf = nullptr; + + auto &_shp_client = shp_context->m_shp_client; + auto _shp_stream = _shp_client->GetReaderWriter(); + auto* _rsp = _shp_client->GetResponse(); + + auto _shp_req = _shp_client->GetInstantiatedReq(); + _shp_req->mutable_base()->set_term(ElectionMgr::m_cur_term.load()); + _shp_req->mutable_base()->set_addr(StateMgr::GetMyAddr()); + + _shp_req->clear_entity(); + _shp_req->set_msg_type(::raft::SyncDataMsgType::SYNC_LOG); + + bool _sync_log_result = true; + while (true) { + + _shp_req->clear_entity(); + bool _read_end = false; + for (std::size_t i = 0; i < ::RaftCore::Config::FLAGS_resync_data_log_num_each_rpc; ++i) { + + uint32_t _buf_len = 0; + if (std::fread(&_buf_len, _FOUR_BYTES_, 1, _f_handler) != 1) { + LOG(ERROR) << "ReSyncLogCB read binlog file " << _binlog_file_name << "fail..,errno:" + << errno << ",follower:" << shp_context->m_follower->my_addr; + _sync_log_result = false; + break; + } + ::RaftCore::Tools::ConvertBigEndianToLocal(_buf_len, &_buf_len); + + //Read protobuf buf + _p_buf = (_p_buf) ? (unsigned char*)std::realloc(_p_buf,_buf_len): (unsigned char*)malloc(_buf_len); + if (std::fread(_p_buf, _buf_len, 1, _f_handler) != 1) { + LOG(ERROR) << "ReSyncLogCB read binlog file " << _binlog_file_name << "fail..,errno:" + << errno << ",follower:" << shp_context->m_follower->my_addr; + _sync_log_result = false; + break; + } + + ::raft::BinlogItem _binlog_item; + if (!_binlog_item.ParseFromArray(_p_buf,_buf_len)) { + LOG(ERROR) << "ReSyncLogCB parse protobuf buffer fail " << _binlog_file_name << ",follower:" + << shp_context->m_follower->my_addr; + _sync_log_result = false; + break; + } + + auto *_p_entity = _shp_req->add_entity(); + + //TODO:figure out why this could resulting in a coredump in ~BinlogItem(). + //_p_entity->Swap(_binlog_item.mutable_entity()); + //_binlog_item.clear_entity(); + + _p_entity->CopyFrom(_binlog_item.entity()); + + if (!EntityIDSmaller(_p_entity->entity_id(), BinLogGlobal::m_instance.GetLastReplicated())) { + _read_end = true; + break; + } + } + + if (!_sync_log_result) + break; + + if (!_shp_stream->Write(*_shp_req)) { + LOG(ERROR) << "SyncDataCB send log fail,follower:" << shp_context->m_follower->my_addr << ",logID:" << shp_context->m_last_sync; + _sync_log_result = false; + break; + } + + if (!_shp_stream->Read(_rsp)) { + LOG(ERROR) << "SyncDataCB get prepare result fail,follower:" << shp_context->m_follower->my_addr + << ",logID:" << shp_context->m_last_sync; + break; + } + + if (_rsp->comm_rsp().result() != ErrorCode::SYNC_LOG_CONFRIMED) { + LOG(ERROR) << "SyncDataCB prepare fail,follower:" << shp_context->m_follower->my_addr << ",logID:" << shp_context->m_last_sync; + break; + } + + if (_read_end) + break; + } + + std::fclose(_f_handler); + if (_p_buf) + std::free(_p_buf); + + if (!_shp_stream->WritesDone()) { + LOG(ERROR) << "SyncDataCB send log WritesDone fail,follower:" << shp_context->m_follower->my_addr + << ",logID:" << shp_context->m_last_sync; + return false; + } + + shp_context->m_final_status = _shp_stream->Finish(); + + if (!shp_context->m_final_status.ok()) { + LOG(ERROR) << "SyncDataCB send log final status fail,follower:" << shp_context->m_follower->my_addr + << ",logID:" << shp_context->m_last_sync << ",error_code:" + << shp_context->m_final_status.error_code() << ",error_status:" + << shp_context->m_final_status.error_message(); + return false; + } + + if (!_sync_log_result) + return false; + + return true; +} + +bool LeaderView::ClientReactCB(std::shared_ptr &shp_context) noexcept { + void* _tag = shp_context->m_react_info.m_tag; + ::RaftCore::Common::ReactBase* _p_ins = (::RaftCore::Common::ReactBase*)_tag; + _p_ins->React(shp_context->m_react_info.m_cq_result); + + return true; +} + +bool LeaderView::SyncDataCB(std::shared_ptr &shp_context) noexcept{ + + TypePtrFollowerEntity _shp_follower = shp_context->m_follower; + auto _follower_addr = _shp_follower->my_addr; + + LOG(INFO) << "sync data background task received,peer:" << _follower_addr; + + //Iterating over the storage , sync data to the follower in a batch manner. + auto GetCurrentMS = []()->uint64_t{ + return std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + }; + + uint64_t _start_ts = GetCurrentMS(); + + auto ScheduleNext = [&]()->bool{ + uint64_t _now = GetCurrentMS(); + if (_now - _start_ts <= ::RaftCore::Config::FLAGS_resync_data_task_max_time_ms) + return false; + + int _ret_code = m_priority_queue.Push(LockFreePriotityQueue::TaskType::RESYNC_DATA, &shp_context); + LOG(INFO) << "Add RESYNC_DATA task result:" << _ret_code << ",follower:" << _follower_addr + << ",logID:" << shp_context->m_last_sync; + if (_ret_code != QUEUE_SUCC) + return false; + + return true; + }; + + //After stream established , follower gets into its RPC interface and start waiting to read. + auto &_shp_client = shp_context->m_shp_client; + auto _shp_req = _shp_client->GetInstantiatedReq(); + _shp_req->mutable_base()->set_term(ElectionMgr::m_cur_term.load()); + _shp_req->mutable_base()->set_addr(StateMgr::GetMyAddr()); + + auto _shp_stream = _shp_client->GetReaderWriter(); + auto* _rsp = _shp_client->GetResponse(); + + if (shp_context->IsBeginning()) { + + _shp_req->set_msg_type(::raft::SyncDataMsgType::PREPARE); + if (!_shp_stream->Write(*_shp_req)) { + LOG(ERROR) << "SyncDataCB send prepare msg fail,follower:" << _follower_addr << ",logID:" + << shp_context->m_last_sync; + return false; + } + + VLOG(89) << " sync_data_debug PREPARE sent."; + + if (!_shp_stream->Read(_rsp)) { + LOG(ERROR) << "SyncDataCB get prepare result fail,follower:" << _follower_addr + << ",logID:" << shp_context->m_last_sync; + return false; + } + + VLOG(89) << " sync_data_debug prepare received."; + + if (_rsp->comm_rsp().result() != ErrorCode::PREPARE_CONFRIMED) { + LOG(ERROR) << "SyncDataCB prepare fail,follower:" << _follower_addr << ",logID:" + << shp_context->m_last_sync << ",result:" << _rsp->DebugString(); + return false; + } + } + + while (true) { + _shp_req->clear_entity(); + _shp_req->set_msg_type(::raft::SyncDataMsgType::SYNC_DATA); + + std::list _list; + StorageGlobal::m_instance.GetSlice(shp_context->m_last_sync,::RaftCore::Config::FLAGS_resync_data_item_num_each_rpc,_list); + + if (_list.empty()) { + VLOG(89) << "list empty after GetSlice"; + break; + } + + for (const auto &_item : _list) { + + auto *_p_entity = _shp_req->add_entity(); + auto *_p_wop = _p_entity->mutable_write_op(); + + //Ownership of the following two can be taken over. + _p_wop->set_allocated_key(_item.m_key.get()); + _p_wop->set_allocated_value(_item.m_value.get()); + + auto _p_entity_id = _p_entity->mutable_entity_id(); + _p_entity_id->set_term(_item.m_log_id.m_term); + _p_entity_id->set_idx(_item.m_log_id.m_index); + } + + bool _rst = _shp_stream->Write(*_shp_req); + + //Release the allocated write_op first. + for (int i = 0; i < _shp_req->entity_size(); ++i) { + auto *_p_wop = _shp_req->mutable_entity(i)->mutable_write_op(); + _p_wop->release_key(); + _p_wop->release_value(); + } + + if (!_rst) { + LOG(ERROR) << "SyncDataCB send data fail,follower:" << _follower_addr << ",logID:" + << shp_context->m_last_sync; + return false; + } + + if (!_shp_stream->Read(_rsp)) { + LOG(ERROR) << "SyncDataCB get prepare result fail,follower:" << _follower_addr + << ",logID:" << shp_context->m_last_sync; + return false; + } + + if (_rsp->comm_rsp().result() != ErrorCode::SYNC_DATA_CONFRIMED) { + LOG(ERROR) << "SyncDataCB prepare fail,follower:" << _follower_addr << ",logID:" + << shp_context->m_last_sync; + return false; + } + + //Update last synced storage data item. + /*TODO: Prevent from losing data(a rare case) when using the 'm_last_sync' as the task restart + point since the order is not strictly guaranteed among sstables.*/ + shp_context->m_last_sync.Set(_list.back().m_log_id); + + //Return if successfully push the task again to the queue. + if (ScheduleNext()) + return true; + + //Means there are no more data items to be synced due to 'GetSlice'. + if (int(_list.size()) < ::RaftCore::Config::FLAGS_resync_data_item_num_each_rpc) { + VLOG(89) << "list num less than required after GetSlice."; + break; + } + } + + //Re-check if current thread timed out. + if (ScheduleNext()) + return true; + + LOG(INFO) << "SYNC_DATA end, start sync log after lrl."; + + bool _resync_log_rst = SyncLogAfterLCL(shp_context); + if (_resync_log_rst && shp_context->m_on_success_cb) + shp_context->m_on_success_cb(_shp_follower); + + //Reset follower status to NORMAL,allow user threads to do normal AppendEntries RPC. + shp_context->m_follower->m_status = FollowerStatus::NORMAL; + + return true; +} + +void LeaderView::ClientThreadReacting(const ReactInfo &info) noexcept { + + std::shared_ptr _shp_task(new ReactInfo(info)); + + int _ret_code = LeaderView::m_priority_queue.Push(LockFreePriotityQueue::TaskType::CLIENT_REACTING, &_shp_task); + if (_ret_code != QUEUE_SUCC) + LOG(ERROR) << "Add CLIENT_REACTING task fail,ret:" << _ret_code; +} + + +} diff --git a/src/leader/leader_view.h b/src/leader/leader_view.h new file mode 100644 index 0000000..786c711 --- /dev/null +++ b/src/leader/leader_view.h @@ -0,0 +1,144 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_LEADER_VIEW_H__ +#define __AURORA_LEADER_VIEW_H__ + +#include +#include +#include + +#include "grpc/grpc.h" +#include "grpc++/grpc++.h" + +#include "protocol/raft.pb.h" + +#include "common/comm_defs.h" +#include "common/comm_view.h" +#include "topology/topology_mgr.h" +#include "tools/lock_free_queue.h" +#include "tools/trivial_lock_double_list.h" +#include "tools/trivial_lock_single_list.h" +#include "leader/follower_entity.h" +#include "leader/leader_bg_task.h" +#include "leader/memory_log_leader.h" + +namespace RaftCore::Leader { + +using grpc::CompletionQueue; +using ::RaftCore::Common::CommonView; +using ::RaftCore::Leader::MemoryLogItemLeader; +using ::RaftCore::Leader::BackGroundTask::CutEmptyContext; +using ::RaftCore::Common::LogIdentifier; +using ::RaftCore::Common::ReactInfo; +using ::RaftCore::DataStructure::DoubleListNode; +using ::RaftCore::DataStructure::TrivialLockDoubleList; +using ::RaftCore::DataStructure::SingleListNode; +using ::RaftCore::DataStructure::TrivialLockSingleList; +using ::RaftCore::DataStructure::LockFreeUnorderedSingleList; + +class LeaderView :public CommonView{ + +public: + + enum class ServerStatus { + NORMAL=0, + HALTED, + SHUTTING_DOWN, + }; + +public: + + static void Initialize(const ::RaftCore::Topology& _topo) noexcept; + + static void UnInitialize() noexcept; + +//Set the following member functions to protected is to facilitate gtest. +#ifdef _LEADER_VIEW_TEST_ +public: +#else +private: +#endif + + static bool ReSyncLogCB(std::shared_ptr &shp_context)noexcept; + + static bool SyncDataCB(std::shared_ptr &shp_context)noexcept; + + static bool ClientReactCB(std::shared_ptr &shp_context) noexcept; + + static void ClientThreadReacting(const ReactInfo &info) noexcept; + + static void BroadcastHeatBeat() noexcept; + +public: + + static std::string my_addr; + + static std::unordered_map m_hash_followers; + + static std::shared_timed_mutex m_hash_followers_mutex; + + static TrivialLockDoubleList m_entity_pending_list; + + static LockFreeUnorderedSingleList> m_garbage; + + //Used for write requests which cannot get finished after it CutHead. + static TrivialLockSingleList m_cut_empty_list; + + static LockFreeUnorderedSingleList> m_cut_empty_garbage; + + //CV used for multiple threads cooperating on append binlog operations. + static std::condition_variable m_cv; + + static std::mutex m_cv_mutex; + + static std::atomic m_last_cut_log; + + static ServerStatus m_status; + + static std::atomic m_last_log_waiting_num; + +private: + + static void AddRescynDataTask(std::shared_ptr &shp_context) noexcept; + + static auto PrepareAppendEntriesRequest(std::shared_ptr &shp_context); + + static bool SyncLogAfterLCL(std::shared_ptr &shp_context); + +private: + + static const char* m_invoker_macro_names[]; + +private: + + LeaderView() = delete; + + virtual ~LeaderView() = delete; + + LeaderView(const LeaderView &) = delete; + + LeaderView& operator=(const LeaderView &) = delete; +}; + +} + + +#endif diff --git a/src/leader/memory_log_leader.cc b/src/leader/memory_log_leader.cc new file mode 100644 index 0000000..0bde8c4 --- /dev/null +++ b/src/leader/memory_log_leader.cc @@ -0,0 +1,53 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "leader/memory_log_leader.h" + +namespace RaftCore::Leader { + +MemoryLogItemLeader::~MemoryLogItemLeader() noexcept{ + /*The write_op of m_entity of LeaderLogItem if from set_allocated_write_op in the leader service, + so we need to release the ownership of write_op before releasing m_entity. */ + //this->m_entity->release_write_op(); +} + +MemoryLogItemLeader::MemoryLogItemLeader(uint32_t _term, uint64_t _index) noexcept:MemoryLogItemBase(_term, _index) {} + +MemoryLogItemLeader::MemoryLogItemLeader(const ::raft::Entity &_entity) noexcept:MemoryLogItemBase(_entity) {} + +bool MemoryLogItemLeader::operator<(const MemoryLogItemLeader& _other)const noexcept { + return this->MemoryLogItemBase::operator<(_other); +} + +bool MemoryLogItemLeader::operator>(const MemoryLogItemLeader& _other)const noexcept { + return this->MemoryLogItemBase::operator>(_other); +} + +bool MemoryLogItemLeader::operator==(const MemoryLogItemLeader& _other)const noexcept { + return this->MemoryLogItemBase::operator==(_other); +} + +bool MemoryLogItemLeader::operator!=(const MemoryLogItemLeader& _other)const noexcept { + return !this->MemoryLogItemBase::operator==(_other); +} + +bool CmpMemoryLogLeader(const MemoryLogItemLeader& left, const MemoryLogItemLeader& right) noexcept { + return CmpMemoryLog(&left,&right); +} + +} diff --git a/src/leader/memory_log_leader.h b/src/leader/memory_log_leader.h new file mode 100644 index 0000000..7fdbf3d --- /dev/null +++ b/src/leader/memory_log_leader.h @@ -0,0 +1,63 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef _AURORA_MEMORY_LOG_LEADER_H_ +#define _AURORA_MEMORY_LOG_LEADER_H_ + +#include + +#include "protocol/raft.pb.h" + +#include "tools/trivial_lock_double_list.h" +#include "common/memory_log_base.h" + +using ::RaftCore::Common::MemoryLogItemBase; + +namespace RaftCore::Leader { + +class MemoryLogItemLeader final : public ::RaftCore::DataStructure::OrderedTypeBase , public MemoryLogItemBase { + +public: + + virtual ~MemoryLogItemLeader() noexcept; + + MemoryLogItemLeader(uint32_t _term, uint64_t _index) noexcept; + + MemoryLogItemLeader(const ::raft::Entity &_entity) noexcept; + + virtual bool operator<(const MemoryLogItemLeader& _other)const noexcept; + + virtual bool operator>(const MemoryLogItemLeader& _other)const noexcept; + + virtual bool operator==(const MemoryLogItemLeader& _other)const noexcept; + + virtual bool operator!=(const MemoryLogItemLeader& _other)const noexcept; + +protected: + + virtual void NotImplemented() noexcept{} + +}; + +bool CmpMemoryLogLeader(const MemoryLogItemLeader& left, const MemoryLogItemLeader& right) noexcept; + +} + +#endif diff --git a/src/main.cc b/src/main.cc new file mode 100644 index 0000000..c669dab --- /dev/null +++ b/src/main.cc @@ -0,0 +1,38 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "gflags/gflags.h" +#include "glog/logging.h" + +#include "global/global_env.h" + +int main(int argc,char** argv) { + + google::ParseCommandLineFlags(&argc, &argv, true); + google::InitGoogleLogging(argv[0]); + + FLAGS_log_dir = "."; + FLAGS_logbuflevel = -1; + + //Start the whole thing. + ::RaftCore::Global::GlobalEnv::InitialEnv(); + ::RaftCore::Global::GlobalEnv::RunServer(); + + return 0; +} + diff --git a/src/member/member_manager.cc b/src/member/member_manager.cc new file mode 100644 index 0000000..4ad8d29 --- /dev/null +++ b/src/member/member_manager.cc @@ -0,0 +1,687 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include + +#include "grpc/grpc.h" +#include "grpc++/grpc++.h" + +#include "protocol/raft.grpc.pb.h" + +#include "common/comm_defs.h" +#include "config/config.h" +#include "topology/topology_mgr.h" +#include "storage/storage_singleton.h" +#include "leader/follower_entity.h" +#include "leader/leader_view.h" +#include "leader/client_pool.h" +#include "tools/lock_free_priority_queue.h" +#include "tools/utilities.h" +#include "state/state_mgr.h" +#include "election/election.h" +#include "global/global_env.h" +#include "client/client_impl.h" +#include "member/member_manager.h" + +#define _AURORA_MEMBER_CLUSTER_STATUS_PREFIX_ "cluster status:" +#define _AURORA_MEMBER_NEW_CLUSTER_PREFIX_ "new cluster:" +#define _AURORA_MEMBER_VERSION_PREFIX_ "version:" + +namespace RaftCore::Member { + +std::condition_variable MemberMgr::m_resync_data_cv; + +std::mutex MemberMgr::m_resync_data_cv_mutex; + +MemberMgr::JointSummary MemberMgr::m_joint_summary; + +std::shared_timed_mutex MemberMgr::m_mutex; + +std::atomic MemberMgr::m_in_processing; + +MemberMgr::JointTopology MemberMgr::m_joint_topo_snapshot; + +const char* MemberMgr::m_macro_names[] = {"STABLE","JOINT_CONSENSUS"}; + +MemberMgr::MemberChangeContext MemberMgr::m_memchg_ctx; + +TwoPhaseCommitBatchTask MemberMgr::m_phaseI_task; + +TwoPhaseCommitBatchTask MemberMgr::m_phaseII_task; + +#ifdef _MEMBER_MANAGEMENT_TEST_ +bool MemberMgr::m_execution_flag = false; +#endif + +using ::raft::ErrorCode; +using ::grpc::CompletionQueue; +using ::RaftCore::Common::WriteLock; +using ::RaftCore::Common::ReadLock; +using ::RaftCore::Common::FinishStatus; +using ::RaftCore::Leader::FollowerEntity; +using ::RaftCore::Leader::FollowerStatus; +using ::RaftCore::Storage::StorageGlobal; +using ::RaftCore::Topology; +using ::RaftCore::CTopologyMgr; +using ::RaftCore::Leader::LeaderView; +using ::RaftCore::Leader::BackGroundTask::ReSyncLogContext; +using ::RaftCore::DataStructure::LockFreePriotityQueue; +using ::RaftCore::State::StateMgr; +using ::RaftCore::State::RaftRole; +using ::RaftCore::Election::ElectionMgr; +using ::RaftCore::Global::GlobalEnv; +using ::RaftCore::Tools::TypeSysTimePoint; +using ::RaftCore::Client::MemberChangePrepareAsyncClient; +using ::RaftCore::Client::MemberChangeCommitAsyncClient; + +const MemberMgr::JointTopology& MemberMgr::JointTopology::operator=(const MemberMgr::JointTopology &one) { + this->m_new_cluster = one.m_new_cluster; + this->m_added_nodes = one.m_added_nodes; + this->m_removed_nodes = one.m_removed_nodes; + this->m_leader_gone_away = one.m_leader_gone_away; + this->m_old_leader = one.m_old_leader; + + return *this; +} + +const MemberMgr::JointTopology& MemberMgr::JointTopology::operator=(MemberMgr::JointTopology &&one) { + this->m_new_cluster = std::move(one.m_new_cluster); + this->m_added_nodes = std::move(one.m_added_nodes); + this->m_removed_nodes = std::move(one.m_removed_nodes); + this->m_leader_gone_away = one.m_leader_gone_away; + this->m_old_leader = one.m_old_leader; + + return *this; +} + +void MemberMgr::JointTopology::Reset() noexcept{ + this->m_new_cluster.clear(); + this->m_added_nodes.clear(); + this->m_removed_nodes.clear(); + this->m_leader_gone_away = false; + this->m_old_leader = ""; +} + +void MemberMgr::JointTopology::Update(const std::set * p_new_cluster)noexcept { + + if (p_new_cluster) + this->m_new_cluster = *p_new_cluster; + + //Topology should be ready for reading. + Topology _cur_topo; + CTopologyMgr::Read(&_cur_topo); + + bool _is_leader = StateMgr::GetRole() == RaftRole::LEADER; + + //Find added nodes. + this->m_added_nodes.clear(); + + for (const auto& _item : this->m_new_cluster) { + + if (_cur_topo.InCurrentCluster(_item)) + continue; + + FollowerEntity* _p_follower = nullptr; + if (_is_leader) + _p_follower = new FollowerEntity(_item, FollowerStatus::RESYNC_LOG, + uint32_t(JointConsensusMask::IN_NEW_CLUSTER)); + + this->m_added_nodes.emplace(_item, _p_follower); + } + + //Find removed nodes. + this->m_removed_nodes.clear(); + { + ReadLock _r_lock(LeaderView::m_hash_followers_mutex); + for (const auto& _pair : LeaderView::m_hash_followers) + if (this->m_new_cluster.find(_pair.first) == this->m_new_cluster.cend()) + this->m_removed_nodes.emplace(_pair.first); + } + + this->m_leader_gone_away = (this->m_new_cluster.find(_cur_topo.m_leader)==this->m_new_cluster.cend()); + this->m_old_leader = _cur_topo.m_leader; +} + +void MemberMgr::JointSummary::Reset()noexcept { + this->m_joint_status = EJointStatus::STABLE; + this->m_joint_topology.Reset(); + //m_version is monotonic,shouldn't been reset in any time. +} + +void MemberMgr::Initialize() noexcept { + m_in_processing.store(false); + m_joint_summary.Reset(); + ResetMemchgEnv(); + LoadFile(); +} + +void MemberMgr::UnInitialize() noexcept { + m_joint_summary.Reset(); +} + +void MemberMgr::ResetMemchgEnv() noexcept { + //Reset all the followings before using them. + m_joint_topo_snapshot.Reset(); + m_memchg_ctx.Reset(); +} + +void MemberMgr::LoadFile() noexcept { + //Read the config file. + std::ifstream f_input(_AURORA_MEMBER_CONFIG_FILE_); + + for (std::string _ori_line; std::getline(f_input, _ori_line); ) { + + std::string _line = ""; + std::copy_if(_ori_line.begin(), _ori_line.end(), std::back_inserter(_line), [](char c) { return c != '\r' && c != '\n'; }); + + if (_line.find(_AURORA_MEMBER_CLUSTER_STATUS_PREFIX_) != std::string::npos) { + std::size_t pos = _line.find(":"); + CHECK (pos != std::string::npos) << "cannot find delimiter[:] in member config file, _line:" << _line; + m_joint_summary.m_joint_status = StringToMacro(_line.substr(pos + 1).c_str()); + continue; + } + + if (_line.find(_AURORA_MEMBER_NEW_CLUSTER_PREFIX_) != std::string::npos) { + std::size_t pos = _line.find(":"); + CHECK (pos != std::string::npos) << "cannot find delimiter[:] in member config file, _line:" << _line; + ::RaftCore::Tools::StringSplit(_line.substr(pos + 1),',',m_joint_summary.m_joint_topology.m_new_cluster); + continue; + } + + if (_line.find(_AURORA_MEMBER_VERSION_PREFIX_) != std::string::npos) { + std::size_t pos = _line.find(":"); + CHECK (pos != std::string::npos) << "cannot find delimiter[:] in member config file, _line:" << _line; + m_joint_summary.m_version = std::atol(_line.substr(pos + 1).c_str()); + continue; + } + } + + m_joint_summary.m_joint_topology.Update(); +} + +void MemberMgr::SaveFile() noexcept{ + + ReadLock _r_lock(m_mutex); + std::FILE* f_handler = std::fopen(_AURORA_MEMBER_CONFIG_FILE_, "w+"); + CHECK(f_handler != nullptr) << "open BaseState file " << _AURORA_MEMBER_CONFIG_FILE_ << "fail..,errno:" << errno; + + auto &_cluster_topo = m_joint_summary.m_joint_topology; + + std::string _new_cluster = ""; + for (auto iter = _cluster_topo.m_new_cluster.crbegin(); iter != _cluster_topo.m_new_cluster.crend(); ++iter) + _new_cluster += ((*iter) + ","); + + std::string buf = _AURORA_MEMBER_CLUSTER_STATUS_PREFIX_ + std::string(MacroToString(m_joint_summary.m_joint_status)) + "\n" + + _AURORA_MEMBER_NEW_CLUSTER_PREFIX_ + _new_cluster + "\n" + + _AURORA_MEMBER_VERSION_PREFIX_ + std::to_string(m_joint_summary.m_version) + "\n"; + + std::size_t written = fwrite(buf.data(), 1, buf.size(), f_handler); + CHECK(written == buf.size()) << "fwrite BaseState file fail...,errno:" << errno << ",written:" << written << ",expected:" << buf.size(); + CHECK(!std::fclose(f_handler)) << "close BaseState file fail...,errno:" << errno; +} + +void MemberMgr::NotifyOnSynced(TypePtrFollowerEntity &shp_follower) noexcept { + + LOG(INFO) << "[Membership Change] peer " << shp_follower->my_addr << " notify called,switched to NORMAL status"; + + shp_follower->m_status = FollowerStatus::NORMAL; + + std::unique_lock _lock(m_resync_data_cv_mutex); + m_resync_data_cv.notify_all(); +} + +void MemberMgr::SwitchToJointConsensus(JointTopology &updated_topo,uint32_t version)noexcept { + + WriteLock _w_lock(m_mutex); + + m_joint_summary.m_joint_status = EJointStatus::JOINT_CONSENSUS; + m_joint_summary.m_joint_topology = std::move(updated_topo); + + //Update old cluster followers' status. + if (StateMgr::GetRole() == RaftRole::LEADER) { + for (const auto& _item : m_joint_summary.m_joint_topology.m_new_cluster) { + ReadLock _r_lock(LeaderView::m_hash_followers_mutex); + auto _iter = LeaderView::m_hash_followers.find(_item); + if (_iter != LeaderView::m_hash_followers.cend()) + _iter->second->m_joint_consensus_flag |= uint32_t(JointConsensusMask::IN_NEW_CLUSTER); + } + } + + m_joint_summary.m_version++; + if (version != _MAX_UINT32_) + m_joint_summary.m_version = version; + _w_lock.unlock(); + + //Persist changes. + SaveFile(); +} + +uint32_t MemberMgr::GetVersion()noexcept { + ReadLock _r_lock(m_mutex); + return m_joint_summary.m_version; +} + +bool MemberMgr::SwitchToStable()noexcept { + + /*Since topology config and membership-change config are in separated files, they cannot be updated + atomically, but if we update topology first ,the leader or follower server can still serve normally + after recovered from a crash. */ + ::RaftCore::Topology _old_topo; + ::RaftCore::CTopologyMgr::Read(&_old_topo); + + //Update to the latest topology. + ::RaftCore::Topology _new_topo; + + { + ReadLock _w_lock(m_mutex); + auto &_cluster_topo = m_joint_summary.m_joint_topology; + for (const auto &_node : _cluster_topo.m_new_cluster) { + if (_node == _old_topo.m_leader) { + _new_topo.m_leader = _node; + continue; + } + _new_topo.m_followers.emplace(_node); + } + _new_topo.m_my_addr = _old_topo.m_my_addr; + ::RaftCore::CTopologyMgr::Update(_new_topo); + + //Update to latest follower list in leader's view. + if (StateMgr::GetRole() == RaftRole::LEADER) { + WriteLock _r_lock(LeaderView::m_hash_followers_mutex); + LeaderView::m_hash_followers.clear(); + for (const auto &_node : _cluster_topo.m_new_cluster) + if (_node != _new_topo.m_leader) + LeaderView::m_hash_followers[_node] = std::shared_ptr(new FollowerEntity(_node)); + } + } + + { + WriteLock _w_lock(m_mutex); + m_joint_summary.Reset(); + m_joint_summary.m_version++; + } + + //Persist changes. + SaveFile(); + + return _new_topo.InCurrentCluster(StateMgr::GetMyAddr()); +} + +std::string MemberMgr::FindPossibleAddress(const std::list &nic_addrs)noexcept{ + ReadLock _r_lock(m_mutex); + + auto &_new_nodes = m_joint_summary.m_joint_topology.m_added_nodes; + for (const auto &_item : nic_addrs) { + auto _iter = _new_nodes.find(_item); + if (_iter != _new_nodes.cend()) + return _iter->first; + } + + return ""; +} + +const char* MemberMgr::MacroToString(EJointStatus enum_val) noexcept { + return m_macro_names[int(enum_val)]; +} + +EJointStatus MemberMgr::StringToMacro(const char* src) noexcept { + int _size = sizeof(m_macro_names) / sizeof(const char*); + for (int i = 0; i < _size; ++i) + if (std::strncmp(src, m_macro_names[i], std::strlen(m_macro_names[i])) == 0) + return (EJointStatus)i; + + CHECK(false) << "convert string to enum fail,unknown cluster status :" << src; + + //Just for erase compile warnings. + return EJointStatus::STABLE; +} + +const char* MemberMgr::PullTrigger(const std::set &new_cluster)noexcept { + + bool _in_processing = false; + if (!m_in_processing.compare_exchange_strong(_in_processing, true)) { + static const char * _p_err_msg = "I'm changing the membership now,cannot process another changing request."; + LOG(ERROR) << "[MembershipChange] " << _p_err_msg; + return _p_err_msg; + } + + /*Issuing a resync log task to the background threads.This will cover all cases: + 1. the new node's log doesn't lag too far behind ,a few resync log operations is enough. + 2. the new node's log do lag too far behind , will trigger the resync-data operation eventually. + 3. the new node's log is empty , trigger resync-data operation eventually. */ + + /*Using ID-LCL instead of ID-LRL to reduce the amount of log entries each resync log RPC may carry, + even though both the two options will eventually triggered the SYNC-DATA process. */ + auto _id_lcl = StorageGlobal::m_instance.GetLastCommitted(); + + m_joint_topo_snapshot.Update(&new_cluster); + + { + ReadLock _r_lock(m_mutex); + for (auto _iter = m_joint_topo_snapshot.m_added_nodes.cbegin(); _iter != m_joint_topo_snapshot.m_added_nodes.cend(); ++_iter) { + std::shared_ptr _shp_task(new ReSyncLogContext()); + _shp_task->m_last_sync_point = _id_lcl; + _shp_task->m_follower = _iter->second; + _shp_task->m_on_success_cb = &MemberMgr::NotifyOnSynced; + + auto _ret_code = LeaderView::m_priority_queue.Push(LockFreePriotityQueue::TaskType::RESYNC_LOG, &_shp_task); + if (_ret_code != QUEUE_SUCC) + LOG(INFO) << "[Membership Change] Add RESYNC-LOG task ret code:" << _ret_code << ",logID:" << _shp_task->m_last_sync_point << ",remote peer:" << _iter->first; + } + } + + /*Note:Waiting for the log replication task to finish is a time consuming operation, we'd better entrust that + to a dedicated thread, returning to client immediately. It's the client's(usually the administrator) + duty to check when the membership change job will be done. */ + std::thread _th_member_changing(MemberMgr::Routine); + _th_member_changing.detach(); + + return nullptr; +} + +void MemberMgr::WaitForSyncDataDone() noexcept { + std::size_t _required_synced_size = m_joint_topo_snapshot.m_added_nodes.size(); + + std::unique_lock _lock(m_resync_data_cv_mutex); + + auto _wait_cond = [&]()->bool{ + std::size_t _counter = 0; + //Calculate #followers that are fully synced. + { + ReadLock _r_lock(m_mutex); + for (auto iter = m_joint_topo_snapshot.m_added_nodes.cbegin(); iter != m_joint_topo_snapshot.m_added_nodes.cend(); ++iter) + if (iter->second->m_status == FollowerStatus::NORMAL) { + _counter++; + LOG(INFO) << "[Membership Change] new node " << iter->second->my_addr << ",finished sync data"; + } + } + + /*Need to wait until all new nodes get synchronized,reason for this is that there will be no change for + nodes who lag behind to catch up in the future , in the current implementation. */ + return _counter >= _required_synced_size ; + }; + + auto _wait_sec = std::chrono::seconds(::RaftCore::Config::FLAGS_memchg_sync_data_wait_seconds); + while (!m_resync_data_cv.wait_for(_lock, _wait_sec, _wait_cond)) + LOG(WARNING) << "[Membership Change] syncing data is not finished yet,continue waiting..."; + + LOG(INFO) << "[Membership Change] finish to sync data & logs to all the new added nodes."; + +} + +void MemberMgr::Routine() noexcept { + + LOG(INFO) << "[Membership Change] Routine started, waiting for the majority of new cluster get synced."; + + WaitForSyncDataDone(); + + uint32_t _next_verion = 1; + { + ReadLock _r_lock(m_mutex); + _next_verion += m_joint_summary.m_version; + } + + auto& _ctx_phaseI = m_memchg_ctx.m_phaseI_state; + auto &_joint_new_cluster = m_joint_topo_snapshot.m_new_cluster; + + { + ReadLock _r_lock(LeaderView::m_hash_followers_mutex); + for (auto &_pair_kv : LeaderView::m_hash_followers) { + m_phaseI_task.m_todo.emplace_back(_pair_kv.second); + bool _in_new = _joint_new_cluster.find(_pair_kv.first) != _joint_new_cluster.cend(); + + uint32_t _flag = uint32_t(JointConsensusMask::IN_OLD_CLUSTER); + if (_joint_new_cluster.find(_pair_kv.first) != _joint_new_cluster.cend()) + _flag |= uint32_t(JointConsensusMask::IN_NEW_CLUSTER); + + m_phaseI_task.m_flags.emplace_back(_flag); + } + + _ctx_phaseI.m_cur_cluster.m_cq_entrust_num = (int)LeaderView::m_hash_followers.size(); + m_memchg_ctx.m_cluster_size = LeaderView::m_hash_followers.size() + 1; + m_memchg_ctx.m_cluster_majority = m_memchg_ctx.m_cluster_size / 2 + 1; + } + + auto &_joint_added_nodes = m_joint_topo_snapshot.m_added_nodes; + for (auto _iter = _joint_added_nodes.begin(); _iter != _joint_added_nodes.end(); ++_iter) { + m_phaseI_task.m_todo.emplace_back(_iter->second); + m_phaseI_task.m_flags.emplace_back(uint32_t(JointConsensusMask::IN_NEW_CLUSTER)); + } + + _ctx_phaseI.m_new_cluster.m_cq_entrust_num = (int)_joint_new_cluster.size(); + if (!m_joint_topo_snapshot.m_leader_gone_away) + _ctx_phaseI.m_new_cluster.m_cq_entrust_num--; + + m_memchg_ctx.m_new_cluster_size = _joint_new_cluster.size(); + m_memchg_ctx.m_new_cluster_majority = m_memchg_ctx.m_new_cluster_size / 2 + 1; + + //Requests in the two phase rpc are the same. + auto cur_term = ElectionMgr::m_cur_term.load(); + TypePtrMemberChangReq _shp_req(new MemberChangeInnerRequest()); + _shp_req->mutable_base()->set_addr(StateMgr::GetMyAddr()); + _shp_req->mutable_base()->set_term(cur_term); + _shp_req->set_version(_next_verion); + + for (const auto& _node : _joint_new_cluster) + _shp_req->add_node_list(_node); + + CHECK(PropagateMemberChange(_shp_req, PhaseID::PhaseI)) << "[Membership Change] prepare phase fail " + << "of membership changing,cannot revert,check this."; + + //m_joint_topology is moved after this call. + SwitchToJointConsensus(m_joint_topo_snapshot); + + LOG(INFO) << "switching to joint consensus done. going to do phaseII."; + +#ifdef _MEMBER_MANAGEMENT_TEST_ + PendingExecution(); +#endif + + CHECK(PropagateMemberChange(_shp_req, PhaseID::PhaseII)) << "[Membership Change] commit phase fail " + << "of membership changing,cannot revert,check this."; + + bool _still_in_new_cluster = SwitchToStable(); + + LOG(INFO) << "phaseII and switching to stable done."; + + /*After successfully changed the membership from the C-old to C-new , there is still one more thing to do : + If the current leader , aka this node, is not belonging to the C-new cluster,it need to be stepped down to follower according to the RAFT paper, + but in this implementation ,we just shut it down which is also correct, but quite simple and directly. */ + if (!_still_in_new_cluster) { + LOG(INFO) << "I'm no longer in the new cluster , shutdown myself ,goodbye and have a good time."; + GlobalEnv::ShutDown(); + } + + bool _in_processing = true; + CHECK(m_in_processing.compare_exchange_strong(_in_processing, false)) << "[MembershipChange] cannot switch in processing status back to true,check this." ; +} + +bool MemberMgr::PropagateMemberChange(TypePtrMemberChangReq &shp_req, PhaseID phase_id) noexcept { + + TypePtrMemberChangReq* _p_newbie_req = &shp_req; + + auto *_p_phase_task = &m_phaseI_task; + if (phase_id == PhaseID::PhaseII) { + _p_phase_task = &m_phaseII_task; + MemberChangeInnerRequest *_newbie_req = new MemberChangeInnerRequest(*shp_req); + _newbie_req->set_flag(::raft::MembershipFlag::NEWBIE); + _p_newbie_req = new TypePtrMemberChangReq(_newbie_req); + } + + std::vector &_todo_set = _p_phase_task->m_todo; + std::vector &_flags = _p_phase_task->m_flags; + + auto _req_setter = [&](std::shared_ptr<::raft::MemberChangeInnerRequest>& _target, + bool newbie = false)->void { _target = newbie ? *_p_newbie_req : shp_req; }; + + std::shared_ptr<::grpc::CompletionQueue> _shp_cq(new ::grpc::CompletionQueue()); + + auto _entrust_prepare_client = [&](auto &_shp_channel,auto &shp_follower, std::size_t idx){ + auto _shp_client = new MemberChangePrepareAsyncClient(_shp_channel, _shp_cq); + auto _f_prepare = std::bind(&::raft::RaftService::Stub::PrepareAsyncMemberChangePrepare, + _shp_client->GetStub().get(), std::placeholders::_1, + std::placeholders::_2, std::placeholders::_3); + auto _bind_setter = std::bind(_req_setter, std::placeholders::_1, false); + + _shp_client->EntrustRequest(_bind_setter, _f_prepare, ::RaftCore::Config::FLAGS_memchg_rpc_timeo_ms); + _shp_client->PushCallBackArgs(shp_follower.get()); + _shp_client->PushCallBackArgs(reinterpret_cast(idx)); + }; + + auto _entrust_commit_client = [&](auto &_shp_channel, auto &shp_follower, std::size_t idx) { + auto _shp_client = new MemberChangeCommitAsyncClient(_shp_channel, _shp_cq); + + auto &_added_nodes = m_joint_topo_snapshot.m_added_nodes; + bool _im_new_node = m_joint_topo_snapshot.m_leader_gone_away; + _im_new_node &= _added_nodes.find(shp_follower->my_addr) != _added_nodes.cend(); + + auto _f_prepare = std::bind(&::raft::RaftService::Stub::PrepareAsyncMemberChangeCommit, + _shp_client->GetStub().get(), std::placeholders::_1, + std::placeholders::_2, std::placeholders::_3); + + auto _bind_setter = std::bind(_req_setter, std::placeholders::_1, _im_new_node); + + _shp_client->EntrustRequest(_bind_setter, _f_prepare, ::RaftCore::Config::FLAGS_memchg_rpc_timeo_ms); + _shp_client->PushCallBackArgs(shp_follower.get()); + _shp_client->PushCallBackArgs(reinterpret_cast(idx)); + }; + + int _entrust_total_num = 0; + for (std::size_t i = 0; i < _todo_set.size(); ++i) { + auto &_shp_follower = _todo_set[i]; + + if (_shp_follower->m_status != FollowerStatus::NORMAL) { + LOG(WARNING) << "[Membership Change] follower " << _shp_follower->my_addr << " is under " + << FollowerEntity::MacroToString(_shp_follower->m_status) + << ",won't propagate member change prepare request to it"; + continue; + } + + auto _shp_channel = _shp_follower->m_shp_channel_pool->GetOneChannel(); + if (phase_id == PhaseID::PhaseI) + _entrust_prepare_client(_shp_channel, _shp_follower, i); + else { + _entrust_commit_client(_shp_channel, _shp_follower, i); + } + + _entrust_total_num++; + } + + //Polling for phaseI. + PollingCQ(_shp_cq,_entrust_total_num); + + if (phase_id == PhaseID::PhaseII) { + delete _p_newbie_req; + return m_memchg_ctx.JudgeAllFinished(); + } + + //phase_id == PhaseID::PhaseI + return m_memchg_ctx.JudgePhaseIDetermined() == FinishStatus::POSITIVE_FINISHED; +} + +void MemberMgr::Statistic(const ::grpc::Status &status, + const ::raft::MemberChangeInnerResponse& rsp, void* ptr_follower, uint32_t joint_flag, + PhaseID phase_id) noexcept { + + auto* _ptr_follower = (FollowerEntity*)ptr_follower; + const auto& _addr = _ptr_follower->my_addr; + + auto *_phase_state = &m_memchg_ctx.m_phaseI_state; + if (phase_id == PhaseID::PhaseII) + _phase_state = &m_memchg_ctx.m_phaseII_state; + + std::string _phase_str = (phase_id == PhaseID::PhaseI) ? "prepare" : "commit"; + + if (!status.ok()) { + LOG(ERROR) << "[Membership Change]" << _phase_str << "fail,error code:" << status.error_code() + << ",error msg:" << status.error_message() << ",follower joint consensus flag:" + << joint_flag << ",remote peer:" << _addr; + _phase_state->IncreaseExplicitFail(joint_flag); + return; + } + + const auto &comm_rsp = rsp.comm_rsp(); + auto _error_code = comm_rsp.result(); + if (_error_code!=ErrorCode::SUCCESS) { + LOG(INFO) << "[Membership Change] peer " << _addr << " " << _phase_str + << " fail,follower joint consensus flag:" << joint_flag << ",remote peer:" << _addr; + _phase_state->IncreaseExplicitFail(joint_flag); + return; + } + + _phase_state->IncreaseSuccess(joint_flag); + + LOG(INFO) << "[Membership Change] peer " << _addr << " " << _phase_str + << " successfully,follower joint consensus flag:" << joint_flag << ",remote peer:" << _addr; +} + +void MemberMgr::MemberChangePrepareCallBack(const ::grpc::Status &status, + const ::raft::MemberChangeInnerResponse& rsp, void* ptr_follower, uint32_t idx) noexcept { + + Statistic(status, rsp, ptr_follower, m_phaseI_task.m_flags[idx], PhaseID::PhaseI); + + for (std::size_t i = 0; i < m_phaseI_task.m_todo.size(); ++i) { + auto &_shp_follower = m_phaseI_task.m_todo[i]; + if (_shp_follower->my_addr == ((FollowerEntity*)ptr_follower)->my_addr) { + m_phaseII_task.m_todo.emplace_back(_shp_follower); + + uint32_t _node_flag = m_phaseI_task.m_flags[i]; + m_phaseII_task.m_flags.emplace_back(_node_flag); + m_memchg_ctx.m_phaseII_state.IncreaseEntrust(_node_flag); + } + } +} + +void MemberMgr::MemberChangeCommitCallBack(const ::grpc::Status &status, + const ::raft::MemberChangeInnerResponse& rsp, void* ptr_follower, uint32_t idx)noexcept { + + Statistic(status, rsp, ptr_follower, m_phaseII_task.m_flags[idx], PhaseID::PhaseII); + + if (m_memchg_ctx.JudgeAllFinished()) + LOG(INFO) << "[Membership Change] done"; +} + +void MemberMgr::PollingCQ(std::shared_ptr<::grpc::CompletionQueue> shp_cq,int entrust_num)noexcept { + void* tag; + bool ok; + + int _counter = 0; + while (_counter < entrust_num) { + if (!shp_cq->Next(&tag, &ok)) + break; + + ::RaftCore::Common::ReactBase* _p_ins = (::RaftCore::Common::ReactBase*)tag; + _p_ins->React(ok); + _counter++; + } +} + +#ifdef _MEMBER_MANAGEMENT_TEST_ +void MemberMgr::PendingExecution()noexcept { + while(!m_execution_flag) + std::this_thread::sleep_for(std::chrono::seconds(1)); + m_execution_flag = false; +} + +void MemberMgr::ContinueExecution()noexcept { + m_execution_flag = true; +} +#endif + +} + diff --git a/src/member/member_manager.h b/src/member/member_manager.h new file mode 100644 index 0000000..c6ee3f0 --- /dev/null +++ b/src/member/member_manager.h @@ -0,0 +1,188 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_MEMBER_MANAGER_H__ +#define __AURORA_MEMBER_MANAGER_H__ + +#include +#include +#include + +#include "protocol/raft.pb.h" + +#include "common/comm_defs.h" +#include "leader/follower_entity.h" +#include "leader/leader_bg_task.h" + +#define _AURORA_MEMBER_CONFIG_FILE_ "membership-change.config" + +namespace RaftCore::Member { + +using ::raft::MemberChangeInnerRequest; +using ::RaftCore::Leader::TypePtrFollowerEntity; +using ::RaftCore::Leader::BackGroundTask::TwoPhaseCommitContext; +using ::RaftCore::Common::PhaseID; +using ::RaftCore::Common::TwoPhaseCommitBatchTask; + +class MemberMgr final { + +public: + + struct JointTopology { + + const JointTopology& operator=(const JointTopology &one); + + const JointTopology& operator=(JointTopology &&one); + + void Reset()noexcept; + + void Update(const std::set * p_new_cluster=nullptr)noexcept; + + std::set m_new_cluster; + + //Treat all new nodes all followers first. + std::unordered_map m_added_nodes; + + std::set m_removed_nodes; + + bool m_leader_gone_away; + + std::string m_old_leader; + }; + + struct JointSummary { + + void Reset()noexcept; + + EJointStatus m_joint_status; + + JointTopology m_joint_topology; + + //Increased monotonously,never go back. + uint32_t m_version; + }; + + class MemberChangeContext final : public TwoPhaseCommitContext{}; + +public: + + static void Initialize() noexcept; + + static void UnInitialize() noexcept; + + static void ResetMemchgEnv() noexcept; + + static const char* PullTrigger(const std::set &new_cluster)noexcept; + + static void SwitchToJointConsensus(JointTopology &updated_topo,uint32_t version=_MAX_UINT32_)noexcept; + + static bool SwitchToStable()noexcept; + + static std::string FindPossibleAddress(const std::list &nic_addrs)noexcept; + + static uint32_t GetVersion()noexcept; + + static void MemberChangePrepareCallBack(const ::grpc::Status &status, + const ::raft::MemberChangeInnerResponse& rsp, void* ptr_follower, uint32_t idx)noexcept; + + static void MemberChangeCommitCallBack(const ::grpc::Status &status, + const ::raft::MemberChangeInnerResponse& rsp, void* ptr_follower, uint32_t idx)noexcept; + + static void Statistic(const ::grpc::Status &status, + const ::raft::MemberChangeInnerResponse& rsp, void* ptr_follower, uint32_t joint_flag, + PhaseID phase_id) noexcept; + +#ifdef _MEMBER_MANAGEMENT_TEST_ + static void PendingExecution()noexcept; + + static void ContinueExecution()noexcept; +#endif + + typedef std::shared_ptr TypePtrMemberChangReq; + +public: + + /*Not persisted.If leader crashes, the new elected leader takes the responsibilities to continue. */ + static JointSummary m_joint_summary; + + static std::shared_timed_mutex m_mutex; + + //No multi threads accessing, no lock for this. + static MemberChangeContext m_memchg_ctx; + +private: + +#ifdef _MEMBER_MANAGEMENT_TEST_ + static bool m_execution_flag; +#endif + + static void PollingCQ(std::shared_ptr<::grpc::CompletionQueue> shp_cq,int entrust_num)noexcept; + + static void LoadFile() noexcept; + + static void SaveFile() noexcept; + + //The callback function used to notify a node are fully synced event. + static void NotifyOnSynced(TypePtrFollowerEntity &shp_follower) noexcept; + + //The main logic of membership-change after all nodes synced. + static void Routine() noexcept; + + inline static const char* MacroToString(EJointStatus enum_val) noexcept; + + inline static EJointStatus StringToMacro(const char* src) noexcept; + + static bool PropagateMemberChange(TypePtrMemberChangReq& shp_req, PhaseID phase_id) noexcept; + + static void WaitForSyncDataDone() noexcept; + +private: + + //A temporary variable used for further switching. + static JointTopology m_joint_topo_snapshot; + + //CV used for notifying resync-data completion in membership change phase. + static std::condition_variable m_resync_data_cv; + + static std::mutex m_resync_data_cv_mutex; + + static std::atomic m_in_processing; + + static TwoPhaseCommitBatchTask m_phaseI_task; + + static TwoPhaseCommitBatchTask m_phaseII_task; + + static const char* m_macro_names[]; + +private: + + MemberMgr() = delete; + + virtual ~MemberMgr() noexcept = delete; + + MemberMgr(const MemberMgr&) = delete; + + MemberMgr& operator=(const MemberMgr&) = delete; + +}; + +} //end namespace + +#endif diff --git a/src/protocol/gen_pb.bat b/src/protocol/gen_pb.bat new file mode 100644 index 0000000..4c4b60d --- /dev/null +++ b/src/protocol/gen_pb.bat @@ -0,0 +1,33 @@ + + +@echo off +SETLOCAL +CALL :myDosFunc raft.proto + +@pause +EXIT /B %ERRORLEVEL% + +:myDosFunc + +@set grpc_cpp_plugin_bin="C:\Users\95\.babun\cygwin\home\arthur\git\grpc\build-dir\Debug\grpc_cpp_plugin.exe" +@"C:\Users\95\.babun\cygwin\home\arthur\git\protobuf\build-dir\Debug\protoc.exe" --grpc_out=. --plugin=protoc-gen-grpc=%grpc_cpp_plugin_bin% %~1 +@if not errorlevel 0 ( + @echo generate crm_trans fail + @pause +) + + +@cd C:\Users\95\Documents\Visual Studio 2015\Projects\apollo\raft\src\protocol +@"C:\Users\95\.babun\cygwin\home\arthur\git\protobuf\build-dir\Debug\protoc.exe" --cpp_out=. %~1 +@if not errorlevel 0 ( + @echo generate srpc fail + @pause +) + +@echo generate %~1 succ ! + +EXIT /B 0 + + + + diff --git a/src/protocol/raft.proto b/src/protocol/raft.proto new file mode 100644 index 0000000..d65ed93 --- /dev/null +++ b/src/protocol/raft.proto @@ -0,0 +1,226 @@ + +syntax = "proto2"; + +package raft; + +enum ErrorCode{ + //----------------for client----------------// + //common error code + SUCCESS = 0; + FAIL = 1; + IMPLICIT_FAIL = 2; + HALTED = 3; + + //----------------for inner communicating----------------// + + //AppendEntries RPC: + APPEND_ENTRY_CONFLICT = 1000; + WAITING_TIMEOUT = 1001; + SUCCESS_MERGED = 1002; //Log merged into existing binlog entries. + OVERSTEP_LCL = 1003; + + //CommitEntries RPC: + ALREADY_COMMITTED = 2000; + + //SyncData RPC: + PREPARE_CONFRIMED = 3000; + SYNC_DATA_CONFRIMED = 3001; + SYNC_LOG_CONFRIMED = 3002; + + //Prevote RPC: + PREVOTE_YES = 4000; + PREVOTE_NO = 4001; + + //Vote RPC: + VOTE_YES = 5000; + VOTE_NO = 5001; + + //MemberChange RPC: + ALREADY_JOINT_CONSENSUS = 6001; +} + +enum SyncDataMsgType{ + PREPARE = 0; + SYNC_DATA = 1; + SYNC_LOG = 2; +} + +enum MembershipFlag{ + EMPTY = 0; + NEWBIE = 1; +} + +message RequestBase { + required string addr = 1; //Requesting node addr,format: 192.168.0.100:10010. + required uint32 term = 2; //Requesting node term. +} + +message ClientWriteRequest { + required WriteRequest req = 1; +} + +message WriteRequest { + required string key = 1; + required string value = 2; +} + +message CommonResponse { + required ErrorCode result = 1 [default = SUCCESS]; + optional string err_msg = 2 [default = ""]; +} + +message ClientCommonResponse { + required ErrorCode result = 1 [default = SUCCESS]; + optional string err_msg = 2 [default = ""]; + optional string redirect_to = 3 ; +} + +message ClientWriteResponse { + required ClientCommonResponse client_comm_rsp = 1 ; +} + +message MemberChangeRequest{ + //A list of new node addresses, address format : `ip:port` + repeated string node_list = 1; +} + +message MemberChangeResponse { + required ClientCommonResponse client_comm_rsp = 1 ; +} + + +message ClientReadRequest { + required string key = 1; +} + +message ClientReadResponse { + required ClientCommonResponse client_comm_rsp = 1 ; + required string value = 2 [default = ""]; +} + +/*Important:modification of this message must be backward compitable*/ +message EntityID { + required uint32 term = 1; + required uint64 idx = 2; +} + +/*Important:modification of this message must be backward compitable*/ +message Entity { + required WriteRequest write_op = 1; + required EntityID entity_id = 2; + optional EntityID pre_log_id = 3; +} + +message AppendEntriesRequest { + required RequestBase base = 1; + repeated Entity replicate_entity = 2; + optional string debug_info = 3; +} + +message AppendEntriesResponse { + required CommonResponse comm_rsp = 1; + //Follower's last replicated log ID + optional EntityID my_lrl = 2; +} + +message CommitEntryRequest{ + required RequestBase base = 1; + required EntityID entity_id = 4; +} + +message CommitEntryResponse{ + required CommonResponse comm_rsp = 1 ; +} + +message HeartBeatRequest { + required RequestBase base = 1; +} + +message SyncDataRequest { + required RequestBase base = 1; + required SyncDataMsgType msg_type = 2; + repeated Entity entity = 3; +} + +message SyncDataResponse { + required CommonResponse comm_rsp = 1 ; +} + +message VoteRequest{ + required RequestBase base = 1; + required EntityID last_log_entity = 2; + required uint32 member_version = 3; +} + +message VoteResponse { + required CommonResponse comm_rsp = 1 ; +} + +message MemberChangeInnerRequest{ + required RequestBase base = 1; + repeated string node_list = 2; + required uint32 version = 3; + optional MembershipFlag flag = 4; +} + +message MemberChangeInnerResponse { + required CommonResponse comm_rsp = 1 ; +} + +service RaftService { + + //-----------------for client invoking-----------------// + rpc Write(ClientWriteRequest) returns (ClientWriteResponse) {} + + rpc Read(ClientReadRequest) returns (ClientReadResponse) {} + + //Membership change RPCs. + rpc MembershipChange(MemberChangeRequest) returns (MemberChangeResponse) {} + + //-----------------for raft nodes inner communicating-----------------// + //Log replication PhaseI. + rpc AppendEntries(AppendEntriesRequest) returns (AppendEntriesResponse) {} + + //Log replication PhaseII. + rpc CommitEntries(CommitEntryRequest) returns (CommitEntryResponse) {} + + rpc SyncData(stream SyncDataRequest) returns (stream SyncDataResponse) {} + + //Membership change PhaseI. + rpc MemberChangePrepare(MemberChangeInnerRequest) returns (MemberChangeInnerResponse) {} + + //Membership change PhaseII. + rpc MemberChangeCommit(MemberChangeInnerRequest) returns (MemberChangeInnerResponse) {} + + //-----------------for election-----------------// + rpc PreVote(VoteRequest) returns (VoteResponse) {} + + rpc Vote(VoteRequest) returns (VoteResponse) {} + + rpc HeartBeat(HeartBeatRequest) returns (CommonResponse) {} +} + +/* + The followings are the messages which can also be used in raft inner communicating , i.g, binlog item ,follower status permanent info + Any modification to the following message definitions should be compatible with their previous versions. +*/ + +message BinlogItem { + //required BinlogItemEntity entity = 1; + required Entity entity = 1; + required uint64 timestamp_ms = 2; + + //required uint32 crc32 = 2; //TODO: is this useful for data consistency checking? +} + +message LogOffsetItem { + required uint32 log_term = 1; + required uint64 log_idx = 2; + required uint32 offset = 3; + required uint32 key_crc32 = 4; + required uint32 value_crc32 = 5; +} + +message LogOffset { + repeated LogOffsetItem mappings = 1; +} diff --git a/src/service/ownership_delegator.cc b/src/service/ownership_delegator.cc new file mode 100644 index 0000000..f71577e --- /dev/null +++ b/src/service/ownership_delegator.cc @@ -0,0 +1,53 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "service/ownership_delegator.h" + +namespace RaftCore::Service { + +template +OwnershipDelegator::OwnershipDelegator() { + this->m_p_shp_delegator = new std::shared_ptr(); +} + +template +OwnershipDelegator::~OwnershipDelegator() { + delete this->m_p_shp_delegator; +} + +template +void OwnershipDelegator::ResetOwnership(T *src) noexcept{ + this->m_p_shp_delegator->reset(src); +} + +template +void OwnershipDelegator::ReleaseOwnership() noexcept{ + this->m_p_shp_delegator->reset(); +} + +template +std::shared_ptr OwnershipDelegator::GetOwnership()noexcept { + return *this->m_p_shp_delegator; +} + +template +void OwnershipDelegator::CopyOwnership(std::shared_ptr from)noexcept { + *this->m_p_shp_delegator = from; +} + +} diff --git a/src/service/ownership_delegator.h b/src/service/ownership_delegator.h new file mode 100644 index 0000000..b0dbea8 --- /dev/null +++ b/src/service/ownership_delegator.h @@ -0,0 +1,60 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_OWNERSHIP_DELEGATOR_H__ +#define __AURORA_OWNERSHIP_DELEGATOR_H__ + +#include + +namespace RaftCore::Service { + +template +class OwnershipDelegator { + +public: + + OwnershipDelegator(); + + virtual ~OwnershipDelegator(); + + void ResetOwnership(T *src) noexcept; + + void ReleaseOwnership() noexcept; + + std::shared_ptr GetOwnership()noexcept; + + void CopyOwnership(std::shared_ptr from)noexcept; + +private: + + std::shared_ptr *m_p_shp_delegator = nullptr; + +private: + + OwnershipDelegator(const OwnershipDelegator&) = delete; + + OwnershipDelegator& operator=(const OwnershipDelegator&) = delete; +}; + +} + +#include "service/ownership_delegator.cc" + +#endif diff --git a/src/service/service.cc b/src/service/service.cc new file mode 100644 index 0000000..741df0e --- /dev/null +++ b/src/service/service.cc @@ -0,0 +1,2012 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include + +#include "common/log_identifier.h" +#include "common/error_code.h" +#include "state/state_mgr.h" +#include "global/global_env.h" +#include "follower/follower_view.h" +#include "follower/follower_request.h" +#include "follower/memory_log_follower.h" +#include "binlog/binlog_singleton.h" +#include "storage/storage_singleton.h" +#include "leader/follower_entity.h" +#include "leader/memory_log_leader.h" +#include "tools/lock_free_priority_queue.h" +#include "tools/utilities.h" +#include "election/election.h" +#include "member/member_manager.h" +#include "service/service.h" +#include "client/client_impl.h" + +namespace RaftCore::Service { + +using grpc::CompletionQueue; +using ::raft::Entity; +using ::raft::ErrorCode; +using ::RaftCore::State::RaftRole; +using ::RaftCore::State::StateMgr; +using ::RaftCore::Common::CommonView; +using ::RaftCore::Common::ReadLock; +using ::RaftCore::Common::WriteLock; +using ::RaftCore::Common::LogIdentifier; +using ::RaftCore::Common::TypeEntityList; +using ::RaftCore::Follower::MemoryLogItemFollower; +using ::RaftCore::Follower::CmpMemoryLogFollower; +using ::RaftCore::Follower::FollowerView; +using ::RaftCore::Follower::TypeMemlogFollowerList; +using ::RaftCore::BinLog::BinLogGlobal; +using ::RaftCore::BinLog::BinLogOperator; +using ::RaftCore::Leader::CmpMemoryLogLeader; +using ::RaftCore::Leader::LeaderView; +using ::RaftCore::Leader::FollowerEntity; +using ::RaftCore::Leader::TypePtrFollowerEntity; +using ::RaftCore::Leader::FollowerStatus; +using ::RaftCore::Leader::BackGroundTask::ReSyncLogContext; +using ::RaftCore::DataStructure::UnorderedSingleListNode; +using ::RaftCore::DataStructure::DoubleListNode; +using ::RaftCore::DataStructure::LockFreePriotityQueue; +using ::RaftCore::DataStructure::DoubleListNode; +using ::RaftCore::Election::ElectionMgr; +using ::RaftCore::Member::MemberMgr; +using ::RaftCore::Member::EJointStatus; +using ::RaftCore::Member::JointConsensusMask; +using ::RaftCore::Tools::TypeSysTimePoint; +using ::RaftCore::Global::GlobalEnv; +using ::RaftCore::Client::AppendEntriesAsyncClient; +using ::RaftCore::Storage::StorageGlobal; + +const char* RPCBase::m_status_macro_names[] = {"NORMAL","HALTED","SHUTTING_DOWN"}; + +RPCBase::RPCBase() {} + +RPCBase::~RPCBase() {} + +bool RPCBase::LeaderCheckVailidity( ::raft::ClientCommonResponse* response) noexcept { + + response->set_result(ErrorCode::SUCCESS); + + auto _current_role = StateMgr::GetRole(); + if ( _current_role != RaftRole::LEADER) { + + response->set_result(ErrorCode::FAIL); + if (_current_role == RaftRole::CANDIDATE) { + response->set_err_msg("I'm not a leader ,tell you the right leader."); + return false; + } + + ::RaftCore::Topology _topo; + ::RaftCore::CTopologyMgr::Read(&_topo); + + //I'm a follower + response->set_err_msg("I'm not a leader ,tell you the right leader."); + response->set_redirect_to(_topo.m_leader); + return false; + } + + auto _status = LeaderView::m_status; + if (_status != LeaderView::ServerStatus::NORMAL) { + response->set_result(ErrorCode::FAIL); + response->set_err_msg(std::string("I'm in a status of:") + this->MacroToString(_status)); + return false; + } + + return true; +} + +std::string RPCBase::FollowerCheckValidity(const ::raft::RequestBase &req_base, TypeTimePoint* p_tp, LogIdentifier *p_cur_id) noexcept { + + //Check current node status + auto _current_role = StateMgr::GetRole(); + if ( _current_role != RaftRole::FOLLOWER) + return "I'm not a follower, I'm a:" + std::string(StateMgr::GetRoleStr()); + + //if (p_tp != nullptr) + // ::RaftCore::Tools::EndTiming(*p_tp, "start processing debugpos1.2", p_cur_id); + + //Check leader address validity + ::RaftCore::Topology _topo; + ::RaftCore::CTopologyMgr::Read(&_topo); + + //if (p_tp != nullptr) + // ::RaftCore::Tools::EndTiming(*p_tp, "start processing debugpos1.3", p_cur_id); + + if (_topo.m_leader != req_base.addr()) + return "Sorry,my leader is[" + _topo.m_leader + "],not you" + "[" + req_base.addr() +"]"; + + //Check leader term validity + if (req_base.term() < ElectionMgr::m_cur_term.load()) + return "your term " + std::to_string(req_base.term()) + " is smaller than mine:" + std::to_string(ElectionMgr::m_cur_term.load()); + + if (req_base.term() > ElectionMgr::m_cur_term.load()) + return "your term " + std::to_string(req_base.term()) + " is greater than mine:" + std::to_string(ElectionMgr::m_cur_term.load()) + + ",waiting for you heartbeat msg only by which I could upgrade my term."; + + //if (p_tp != nullptr) + // ::RaftCore::Tools::EndTiming(*p_tp, "start processing debugpos1.4", p_cur_id); + + return ""; +} + +bool RPCBase::ValidClusterNode(const std::string &peer_addr) noexcept { + + ::RaftCore::Topology _topo; + ::RaftCore::CTopologyMgr::Read(&_topo); + if (_topo.InCurrentCluster(peer_addr)) + return true; + + ReadLock _r_lock(MemberMgr::m_mutex); + if (MemberMgr::m_joint_summary.m_joint_status != EJointStatus::JOINT_CONSENSUS) + return false; + + const auto &_new_cluster = MemberMgr::m_joint_summary.m_joint_topology.m_new_cluster; + + return _new_cluster.find(peer_addr)!=_new_cluster.cend(); +} + +Write::Write(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq) noexcept { + + /*Set parent delegator's managed ownership, need to ahead of the following 'Initialize' since otherwise + this object will serving request promptly while not ready for that. */ + this->ResetOwnership(this); + + this->Initialize(shp_svc, shp_notify_cq, shp_call_cq); + + this->m_async_service->RequestWrite(&this->m_server_context, &this->m_request, &this->m_responder, + this->m_server_call_cq.get(), this->m_server_notify_cq.get(), this); + this->m_phaseI_determined_point.store(false); + this->m_phaseII_ready_list.store(nullptr); + +#ifdef _SVC_WRITE_TEST_ + this->m_start_tp = std::chrono::system_clock::from_time_t(std::mktime(&m_start_tm)); +#endif +} + +Write::~Write() {} + +void Write::FinishRequest(WriteProcessStage state) noexcept { + this->m_write_stage = state; + this->m_responder.Finish(this->m_response, ::grpc::Status::OK, this); +} + +void Write::React(bool cq_result) noexcept { + + if (!cq_result) { + /*Only when m_shp_req_ctx containing something, it's worthy to log, otherwise it's from the + default pool's request.*/ + if (this->m_shp_req_ctx) + LOG(ERROR) << "Server WriteRequest got false result from CQ, log:" + << this->m_shp_req_ctx->m_cur_log_id; + this->ReleaseOwnership(); + return; + } + + bool _result = true; + switch (this->m_write_stage) { + case WriteProcessStage::CREATE: + + new Write(this->m_async_service, this->m_server_notify_cq,this->m_server_call_cq); + + _result = this->BeforeReplicate(); + if (!_result) + this->FinishRequest(WriteProcessStage::ABOURTED); + + break; + + case WriteProcessStage::FRONT_FINISH: + this->ReleaseOwnership(); + break; + + case WriteProcessStage::ABOURTED: + this->ReleaseOwnership(); + break; + + default: + CHECK(false) << "Unexpected tag " << int(this->m_write_stage); + break; + } +} + +::grpc::Status Write::Process() noexcept { + return ::grpc::Status::OK; +} + +bool Write::PrepareReplicationStatistic(std::list> &entrust_list) noexcept { + + int _entrusted_client_num = 0; + auto &_phaseI_state = this->m_shp_req_ctx->m_phaseI_state; + auto _prepare_statistic = [&](TypePtrFollowerEntity& shp_follower) { + if (shp_follower->m_status != FollowerStatus::NORMAL) { + LOG(WARNING) << "follower " << shp_follower->my_addr << " is under " + << FollowerEntity::MacroToString(shp_follower->m_status) + << ",won't appending entries to it"; + return; + } + + auto _shp_client = shp_follower->m_append_client_pool->Fetch(); + + VLOG(90) << "AppendEntriesAsyncClient fetched:" << shp_follower->my_addr; + + CHECK(_shp_client) << "no available AppendEntries clients, may need a bigger pool."; + + /*The self-delegated ownership will be existing at the mean time, we can just copy it from the + delegator. */ + _shp_client->OwnershipDelegator::CopyOwnership(this->GetOwnership()); + _shp_client->PushCallBackArgs(shp_follower->m_append_client_pool.get()); + entrust_list.emplace_back(_shp_client); + + _entrusted_client_num++; + _phaseI_state.IncreaseEntrust(shp_follower->m_joint_consensus_flag); + }; + + //Prepare the commit request ahead of time. + this->m_shp_commit_req.reset(new ::raft::CommitEntryRequest()); + this->m_shp_commit_req->mutable_base()->set_addr(StateMgr::GetMyAddr()); + this->m_shp_commit_req->mutable_base()->set_term(this->m_shp_req_ctx->m_cur_log_id.m_term); + + auto _p_entity_id = this->m_shp_commit_req->mutable_entity_id(); + _p_entity_id->set_term(this->m_shp_req_ctx->m_cur_log_id.m_term); + _p_entity_id->set_idx(this->m_shp_req_ctx->m_cur_log_id.m_index); + + std::size_t follower_num = 0; + { + ReadLock _r_lock(LeaderView::m_hash_followers_mutex); + for (auto &_pair_kv : LeaderView::m_hash_followers) + _prepare_statistic(_pair_kv.second); + follower_num = LeaderView::m_hash_followers.size(); + } + this->m_shp_req_ctx->m_cluster_size = follower_num + 1; + this->m_shp_req_ctx->m_cluster_majority = (follower_num + 1) / 2 + 1; // +1 means including the leader. + if ((std::size_t)_entrusted_client_num < this->m_shp_req_ctx->m_cluster_majority) { + LOG(ERROR) << "can't get majority client entrusted for the stable cluster,log:" << this->m_shp_req_ctx->m_cur_log_id; + return false; + } + + _entrusted_client_num = 0; + + uint32_t _leader_joint_consensus_flag = (uint32_t)JointConsensusMask::IN_OLD_CLUSTER; + + std::size_t _new_cluster_node_num = 0; + do{ + ReadLock _r_lock(MemberMgr::m_mutex); + if (MemberMgr::m_joint_summary.m_joint_status != EJointStatus::JOINT_CONSENSUS) + break; + + for (auto &_pair_kv : MemberMgr::m_joint_summary.m_joint_topology.m_added_nodes) + _prepare_statistic(_pair_kv.second); + + if (!MemberMgr::m_joint_summary.m_joint_topology.m_leader_gone_away) + _leader_joint_consensus_flag |= (uint32_t)JointConsensusMask::IN_NEW_CLUSTER; + + _new_cluster_node_num = MemberMgr::m_joint_summary.m_joint_topology.m_new_cluster.size(); + *((MemberMgr::JointSummary*)this->m_shp_req_ctx->m_p_joint_snapshot) = MemberMgr::m_joint_summary; + } while (false); + this->m_shp_req_ctx->m_new_cluster_size = _new_cluster_node_num; + this->m_shp_req_ctx->m_new_cluster_majority = (_new_cluster_node_num > 0) ? (_new_cluster_node_num / 2 + 1) : 0; + + if ((std::size_t)_entrusted_client_num < this->m_shp_req_ctx->m_new_cluster_majority) { + LOG(ERROR) << "can't get majority client entrusted for the joint cluster,log:" << this->m_shp_req_ctx->m_cur_log_id; + return false; + } + + //Count the leader to the majority. + _phaseI_state.IncreaseSuccess(_leader_joint_consensus_flag); + + return true; +} + +bool Write::PrepareReplicationContext(uint32_t cur_term, uint32_t pre_term) noexcept { + + std::shared_ptr<::raft::AppendEntriesRequest> _shp_req(new ::raft::AppendEntriesRequest()); + _shp_req->mutable_base()->set_addr(StateMgr::GetMyAddr()); + _shp_req->mutable_base()->set_term(cur_term); + + auto _p_entry = _shp_req->add_replicate_entity(); + auto _p_entity_id = _p_entry->mutable_entity_id(); + _p_entity_id->set_term(cur_term); + _p_entity_id->set_idx(this->m_guid_pair.m_cur_guid); + + auto _p_pre_entity_id = _p_entry->mutable_pre_log_id(); + _p_pre_entity_id->set_term(pre_term); + _p_pre_entity_id->set_idx(this->m_guid_pair.m_pre_guid); + + auto _p_wop = _p_entry->mutable_write_op(); + +#ifdef _SVC_WRITE_TEST_ + auto us = std::chrono::duration_cast(std::chrono::system_clock::now() - this->m_start_tp); + _shp_req->set_debug_info(std::to_string(us.count())); +#endif + + //Memory copy overhead happened here, no way to avoid,background threads and phaseII need this too. + //TODO: memory copy overhead can be optimized out since there is no 'background threads' in the async mode. + _p_wop->set_key(this->m_client_request->req().key()); + _p_wop->set_value(this->m_client_request->req().value()); + + this->m_shp_req_ctx.reset(new LogReplicationContext()); + this->m_shp_req_ctx->m_cur_log_id.m_term = cur_term; + this->m_shp_req_ctx->m_cur_log_id.m_index = this->m_guid_pair.m_cur_guid; + + auto &_phaseI_state = this->m_shp_req_ctx->m_phaseI_state; + + auto _req_setter = [&_shp_req](std::shared_ptr<::raft::AppendEntriesRequest>& _target)->void { + _target = _shp_req; + }; + + //Require get current replication context prepared before entrusting any of the request. + std::list> _entrust_list; + if (!this->PrepareReplicationStatistic(_entrust_list)) { + LOG(ERROR) << "can't get majority client entrusted,log:" << this->m_shp_req_ctx->m_cur_log_id; + return false; + } + + for (auto &_shp_client : _entrust_list) { + auto _f_prepare = std::bind(&::raft::RaftService::Stub::PrepareAsyncAppendEntries, + _shp_client->GetStub().get(), std::placeholders::_1, + std::placeholders::_2, std::placeholders::_3); + _shp_client->EntrustRequest(_req_setter, _f_prepare, ::RaftCore::Config::FLAGS_leader_append_entries_rpc_timeo_ms); + } + + return true; +} + +bool Write::BeforeReplicate() noexcept { + + this->m_tp_start = ::RaftCore::Tools::StartTimeing(); + this->m_rsp = this->m_response.mutable_client_comm_rsp(); + + if (!this->LeaderCheckVailidity(this->m_rsp)) + return false; + + /* Area X */ + + //----------------Step 0: get the unique log entry term and index by guid----------------// + this->m_guid_pair = GuidGenerator::GenerateGuid(); + auto _cur_term = ElectionMgr::m_cur_term.load(); + + VLOG(89) << "Generating GUID done,idx:" << this->m_guid_pair.m_cur_guid; + +#ifdef _SVC_WRITE_TEST_ + auto _p_test_wop = this->m_request.mutable_req(); + std::string _idx = std::to_string(this->m_guid_pair.m_cur_guid); + + const std::string &_val = _p_test_wop->value(); + std::size_t _pos = _val.find(_WRITE_VAL_TS_); + CHECK(_pos != std::string::npos); + + std::string _start_us = _val.substr(_pos + std::strlen(_WRITE_VAL_TS_)); + this->m_rsp->set_err_msg(_start_us); + _p_test_wop->set_key("test_client_key_" + _idx); +#endif + + this->m_client_request = &this->m_request; + + //----------------Step 1: Add the current request to the pending list----------------// + this->m_shp_entity.reset(new MemoryLogItemLeader(_cur_term,this->m_guid_pair.m_cur_guid)); + this->m_shp_entity->GetEntity()->set_allocated_write_op(const_cast<::raft::WriteRequest*>(&this->m_client_request->req())); + + auto _p_entity_id = this->m_shp_entity->GetEntity()->mutable_entity_id(); + _p_entity_id->set_term(_cur_term); + _p_entity_id->set_idx(this->m_guid_pair.m_cur_guid); + + /*If this leader has just been elected out, it's term will be different from the latest log entry + in the binlog file, needing to make sure pre_term is correct.*/ + uint32_t _pre_term = _cur_term; + + //VLOG(89) << "my pre guid:" << this->m_guid_pair.m_pre_guid << ",debut:" << ElectionMgr::m_leader_debut << ",debut LRL:" << ElectionMgr::m_pre_term_lrl; + + this->m_first_of_cur_term = ElectionMgr::m_leader_debut && (ElectionMgr::m_pre_term_lrl.m_index == this->m_guid_pair.m_pre_guid); + + //Current guid is the first released guid under the leader's new term. + if (this->m_first_of_cur_term) + _pre_term = ElectionMgr::m_pre_term_lrl.m_term; + + this->m_p_pre_entity_id = this->m_shp_entity->GetEntity()->mutable_pre_log_id(); + this->m_p_pre_entity_id->set_term(_pre_term); + this->m_p_pre_entity_id->set_idx(this->m_guid_pair.m_pre_guid); + + LeaderView::m_entity_pending_list.Insert(this->m_shp_entity); + + //Test.. + //this->m_shp_entity->GetEntity()->release_write_op(); + //this->FinishRequest(WriteProcessStage::FRONT_FINISH); + //return true; + + //Note: all get good result(~5w/s tp, ~2ms lt.) before here. + + //----------------Step 2: replicated to the majority of cluster----------------// + if (!this->PrepareReplicationContext(_cur_term, _pre_term)) { + LeaderView::m_entity_pending_list.Delete(this->m_shp_entity); + this->m_rsp->set_result(ErrorCode::FAIL); + this->m_rsp->set_err_msg("PrepareReplicationContext fail."); + return false; + } + + //Note: get a bad result(~2w/s tp, ~15ms lt.) if reach here. + + ::RaftCore::Tools::EndTiming(this->m_tp_start, "finished entrust phaseI clients:", &this->m_shp_req_ctx->m_cur_log_id); + + //Test + //this->FinishRequest(WriteProcessStage::FRONT_FINISH); + +#ifdef _SVC_WRITE_TEST_ + auto _now_us = (std::chrono::duration_cast(std::chrono::system_clock::now() - this->m_start_tp)).count(); + uint64_t _lantency_us = (uint64_t)(_now_us - std::atoll(_start_us.c_str())); + VLOG(2) << "server side single req latency(us):" << _lantency_us << ",idx:" + << this->m_shp_req_ctx->m_cur_log_id; +#endif + + return true; +} + +void Write::CommitDoneCallBack(const ::grpc::Status &status, const ::raft::CommitEntryResponse& rsp, + FollowerEntity* ptr_follower) noexcept { + + VLOG(89) << "CommitDoneCallBack called,log:" << this->m_shp_req_ctx->m_cur_log_id << ",addr:" << ptr_follower->my_addr; + + auto _joint_consensus_state = ptr_follower->m_joint_consensus_flag; + auto &_phaseII_state = this->m_shp_req_ctx->m_phaseII_state; + + if (!status.ok()) { + LOG(ERROR) << "CommitEntries:RPC fail,error code:" << status.error_code() + << ",error msg:" << status.error_message() << ",logID:" + << this->m_shp_req_ctx->m_cur_log_id + << ",remote peer:" << ptr_follower->my_addr; + + if (status.error_code() == ::grpc::StatusCode::DEADLINE_EXCEEDED) + _phaseII_state.IncreaseImplicitFail(_joint_consensus_state); + else + _phaseII_state.IncreaseExplicitFail(_joint_consensus_state); + + return; + } + + const ::raft::CommonResponse& comm_rsp = rsp.comm_rsp(); + auto _error_code = comm_rsp.result(); + if (_error_code!=ErrorCode::SUCCESS && _error_code!=ErrorCode::ALREADY_COMMITTED) { + LOG(ERROR) << "CommitEntries:RPC return fail,error code:" << comm_rsp.result() + << ",error msg:" << comm_rsp.err_msg() << ",logID" << this->m_shp_req_ctx->m_cur_log_id; + _phaseII_state.IncreaseExplicitFail(_joint_consensus_state); + return; + } + + _phaseII_state.IncreaseSuccess(_joint_consensus_state); +} + +const std::shared_ptr& Write::GetReqCtx() noexcept { + return this->m_shp_req_ctx; +} + +void Write::ProcessReplicateFailure(const ::raft::CommonResponse& comm_rsp, + TwoPhaseCommitContext::PhaseState &phaseI_state, FollowerEntity* ptr_follower, + uint32_t joint_consensus_state) noexcept { + + LOG(ERROR) << "AppendEntries:RPC return fail,detail:" << comm_rsp.DebugString() << ",logID" + << this->m_shp_req_ctx->m_cur_log_id << ",remote peer:" << ptr_follower->my_addr; + + auto _error_code = comm_rsp.result(); + if (_error_code == ErrorCode::FAIL) { + phaseI_state.IncreaseExplicitFail(joint_consensus_state); + return; + } + + if (_error_code == ErrorCode::IMPLICIT_FAIL) { + phaseI_state.IncreaseImplicitFail(joint_consensus_state); + return; + } + + if (_error_code != ErrorCode::APPEND_ENTRY_CONFLICT && _error_code != ErrorCode::WAITING_TIMEOUT + && _error_code != ErrorCode::OVERSTEP_LCL ) { + LOG(ERROR) << "unexpected returned value: " << _error_code << ",logID:" + << this->m_shp_req_ctx->m_cur_log_id; + phaseI_state.IncreaseExplicitFail(joint_consensus_state); + return; + } + + if (_error_code == ErrorCode::APPEND_ENTRY_CONFLICT || _error_code == ErrorCode::OVERSTEP_LCL) + phaseI_state.IncreaseExplicitFail(joint_consensus_state); + else + phaseI_state.IncreaseImplicitFail(joint_consensus_state); + + LogIdentifier _sync_point = (_error_code == ErrorCode::APPEND_ENTRY_CONFLICT) ? \ + this->m_shp_req_ctx->m_cur_log_id : BinLogGlobal::m_instance.GetLastReplicated(); + + this->AddResyncLogTask(ptr_follower, _sync_point); + + return; +} + +void Write::AddResyncLogTask(FollowerEntity* ptr_follower, const LogIdentifier &sync_point) noexcept { + + /*Follower status has already been set 2o resync, some other threads must have started + resyncing-log no need to do more.*/ + if (ptr_follower->m_status == FollowerStatus::RESYNC_LOG) { + LOG(INFO) << "a RESYNC_LOG task already in progress for follower:" << ptr_follower->my_addr + << ", no need to generate a new one, just return"; + return; + } + + // Set follower status + ptr_follower->m_status = FollowerStatus::RESYNC_LOG; + + // Generate a task + std::shared_ptr _shp_task(new ReSyncLogContext()); + _shp_task->m_last_sync_point = sync_point; + + //Find the follower's shared_ptr and copy the ownership. + { + ReadLock _r_lock(LeaderView::m_hash_followers_mutex); + + auto _cmp = [&](const std::pair &_pair) { + return _pair.first == ptr_follower->my_addr; + }; + auto _iter = std::find_if(LeaderView::m_hash_followers.cbegin(), + LeaderView::m_hash_followers.cend(),_cmp); + if (_iter != LeaderView::m_hash_followers.cend()) + _shp_task->m_follower = _iter->second; + } + + if (!_shp_task->m_follower) { + LOG(ERROR) << "Can't find the corresponding follower in leader's view " << ptr_follower->my_addr + << ",remote peer:" << ptr_follower->my_addr; + return; + } + + auto _ret_code = LeaderView::m_priority_queue.Push(LockFreePriotityQueue::TaskType::RESYNC_LOG, &_shp_task); + if (_ret_code != QUEUE_SUCC) { + LOG(ERROR) << "Add RESYNC-LOG task fail,ret:" << _ret_code << ",logID:" << _shp_task->m_last_sync_point << ",remote peer:" << ptr_follower->my_addr; + return; + } + + LOG(ERROR) << "Add RESYNC-LOG succeed,sync point" << _shp_task->m_last_sync_point << ",remote peer:" << ptr_follower->my_addr; +} + +void Write::EntrustCommitRequest(FollowerEntity* ptr_follower, AppendEntriesAsyncClient* ptr_client) noexcept { + + //Must update statistic data before really do entrust. + auto &_phaseII_state = this->m_shp_req_ctx->m_phaseII_state; + _phaseII_state.IncreaseEntrust(ptr_follower->m_joint_consensus_flag); + + auto _shp_client = ptr_follower->m_commit_client_pool->Fetch(); + + VLOG(90) << "CommitEntriesAsyncClient fetched:" << ptr_follower->my_addr << ",log:" << this->m_shp_req_ctx->m_cur_log_id; + + CHECK(_shp_client) << "no available Commit clients, may need a bigger pool."; + + /*The commit-client-delegated ownership will be existing at the mean time, we can just copy it + from the delegator. */ + auto _shp_write = ptr_client->OwnershipDelegator::GetOwnership(); + _shp_client->OwnershipDelegator::CopyOwnership(_shp_write); + _shp_client->PushCallBackArgs(ptr_follower->m_commit_client_pool.get()); + + auto _req_setter = [&](std::shared_ptr<::raft::CommitEntryRequest>& _target)->void { + _target = this->m_shp_commit_req; + }; + auto _f_prepare = std::bind(&::raft::RaftService::Stub::PrepareAsyncCommitEntries, + _shp_client->GetStub().get(), std::placeholders::_1, + std::placeholders::_2, std::placeholders::_3); + _shp_client->EntrustRequest(_req_setter, _f_prepare, + ::RaftCore::Config::FLAGS_leader_commit_entries_rpc_timeo_ms); +}; + +bool Write::UpdatePhaseIStatistic(const ::grpc::Status &status, + const ::raft::AppendEntriesResponse& rsp, + FollowerEntity* ptr_follower) noexcept { + + auto _joint_consensus_state = ptr_follower->m_joint_consensus_flag; + auto &_phaseI_state = this->m_shp_req_ctx->m_phaseI_state; + if (!status.ok()) { + LOG(ERROR) << "AppendEntries:RPC fail,error code:" << status.error_code() + << ",error msg:" << status.error_message() << ",logID:" + << this->m_shp_req_ctx->m_cur_log_id + << ",remote peer:" << ptr_follower->my_addr; + + if (status.error_code() == ::grpc::StatusCode::DEADLINE_EXCEEDED) { + _phaseI_state.IncreaseImplicitFail(_joint_consensus_state); + + LogIdentifier _sync_point = BinLogGlobal::m_instance.GetLastReplicated(); + this->AddResyncLogTask(ptr_follower, _sync_point); + return false; + } + + _phaseI_state.IncreaseExplicitFail(_joint_consensus_state); + return false; + } + + const ::raft::CommonResponse& comm_rsp = rsp.comm_rsp(); + auto _error_code = comm_rsp.result(); + if (_error_code!=ErrorCode::SUCCESS && _error_code!=ErrorCode::SUCCESS_MERGED) { + this->ProcessReplicateFailure(comm_rsp, _phaseI_state, ptr_follower, _joint_consensus_state); + return false; + } + + //Here succeed. + _phaseI_state.IncreaseSuccess(_joint_consensus_state); + + if (_error_code == ErrorCode::SUCCESS_MERGED) { + LOG(INFO) << "This log has been unsuccessfully merged, no need to entrust a commit client, " + << "logID:" << this->m_shp_req_ctx->m_cur_log_id << ",remote peer:" + << ptr_follower->my_addr; + return false; + } + + return true; +} + +void Write::ReplicateDoneCallBack(const ::grpc::Status &status, const ::raft::AppendEntriesResponse& rsp, + FollowerEntity* ptr_follower, AppendEntriesAsyncClient* ptr_client) noexcept { + + //Test. + //return; + + ::RaftCore::Tools::EndTiming(this->m_tp_start, "replication callback comes:", &this->m_shp_req_ctx->m_cur_log_id); + + bool _phaseI_result = this->UpdatePhaseIStatistic(status,rsp,ptr_follower); + if (!_phaseI_result) + LOG(INFO) << "replication callback won't entrust a commit client due to above described" + << " reasons, logID:" << this->m_shp_req_ctx->m_cur_log_id << ",remote peer:" + << ptr_follower->my_addr; + + const auto &_cur_log_id = this->m_shp_req_ctx->m_cur_log_id; + uint32_t _diff = _cur_log_id.GreaterThan(ptr_follower->m_last_sent_committed.load()); + bool _group_commit_reached = _diff >= ::RaftCore::Config::FLAGS_group_commit_count; + + bool _need_entrust = _phaseI_result && _group_commit_reached; + + auto _push_list_if_necessary = [&]() ->void { + + if (!_need_entrust) + return; + + if (!ptr_follower->UpdateLastSentCommitted(_cur_log_id)) + return; + + auto *_p_cur_client_head = this->m_phaseII_ready_list.load(); + auto * _p_new_node = new UnorderedSingleListNode(ptr_follower); + _p_new_node->m_next = _p_cur_client_head; + while (!this->m_phaseII_ready_list.compare_exchange_strong(_p_cur_client_head, _p_new_node)) + _p_new_node->m_next = _p_cur_client_head; + }; + + //Judge if replication result has been determined. + FinishStatus _determined_value = this->m_shp_req_ctx->JudgePhaseIDetermined(); + if (_determined_value == FinishStatus::UNFINISHED) { + //If not determined, just push the current client(if any) to the entrust list. + _push_list_if_necessary(); + return; + } + + _need_entrust &= (_determined_value == FinishStatus::POSITIVE_FINISHED); + + bool _determined = false; + if (!this->m_phaseI_determined_point.compare_exchange_strong(_determined, true)) { + /*Only determined with a success result, can we do further processing(aka,pushing the client + to the entrust list) */ + if (_need_entrust) + if (ptr_follower->UpdateLastSentCommitted(_cur_log_id)) + this->EntrustCommitRequest(ptr_follower, ptr_client); + return; + } + + //Only one thread could reach here for a certain log entry. + _push_list_if_necessary(); //Push current request to list. + + this->AfterDetermined(ptr_client); +} + +FinishStatus Write::JudgeReplicationResult() noexcept{ + + const auto &_entity_id = this->m_shp_entity->GetEntity()->entity_id(); + + //If majority succeed. + FinishStatus _ret_val = this->m_shp_req_ctx->JudgePhaseIDetermined(); + if (_ret_val == FinishStatus::POSITIVE_FINISHED) { + /*No matter what's the reason, just return FAIL to the client ,and don't distinguish the + IMPLICIT_FAIL case from all the other failure cases. */ + if (LeaderView::m_status == LeaderView::ServerStatus::HALTED) { + //Waiting in a conservative manner. + + LeaderView::m_last_log_waiting_num.fetch_add(1); + + auto _last_released_guid = this->WaitForLastGuidReleasing(); + if (_entity_id.idx() == _last_released_guid) + this->LastlogResolve(true, _last_released_guid); + } + + return _ret_val; + } + + this->m_rsp->set_result(ErrorCode::FAIL); + this->m_rsp->set_err_msg("cannot replicate to the majority"); + + //Push the [implicit] failed request to the bg queue. + auto _shp_ctx = std::shared_ptr(new CutEmptyContext()); + _shp_ctx->m_write_request = this->GetOwnership(); + LeaderView::m_cut_empty_list.Insert(_shp_ctx); + + VLOG(89) << "Write Request failed, pushed it to bg list:" << this->m_shp_req_ctx->m_cur_log_id.m_index; + + /*Note:The failure cases, regardless explicit or implicit, are indicating the advent of errors, + making it reasonable for the server to stop and take a look at what happened and choose the best + way to deal with the causes, only after that the server can continue serving the clients. + Besides ,the 'UpdateServerStatus' function returning false is just okay, because other threads + with a larger guid_pair may have already set server status to LeaderView::ServerStatus::HALTED. */ + this->UpdateServerStatus(this->m_guid_pair.m_cur_guid, LeaderView::ServerStatus::HALTED); + + /*There is a time windows during which one thread can still generating guids even the server status + already been set to HALT(the corresponding code is marked as 'Area X' in the above code). The + following code aim at waiting it to elapse.*/ + auto _last_released_guid = this->WaitForLastGuidReleasing(); + + LOG(ERROR) << "AppendEntries:cannot replicate to the majority of cluster,write fail ,idx:" + << this->m_shp_req_ctx->m_cur_log_id.m_index + << ",context details:" << this->m_shp_req_ctx->Dump(); + + CHECK(this->m_guid_pair.m_cur_guid <= _last_released_guid) << "guid issue :" << this->m_guid_pair.m_cur_guid << "|" << _last_released_guid; + + //Increasing the waiting num. + LeaderView::m_last_log_waiting_num.fetch_add(1); + + //Current log id is the LRG server halting on. + if (this->m_guid_pair.m_cur_guid == _last_released_guid) { + //latest log update overall info. And there are no potential failures for the latest issued log id. + this->LastlogResolve(false, _last_released_guid); + } + + return _ret_val; +} + +void Write::ReleasePhaseIIReadyList()noexcept { + auto *_p_cur_client = this->m_phaseII_ready_list.load(); + while (_p_cur_client != nullptr) { + auto *_p_tmp = _p_cur_client; + _p_cur_client = _p_cur_client->m_next; + + //Shouldn't delete the _p_tmp->m_data, it's the ptr_follower, just detach it. + _p_tmp->m_data = nullptr; + + /*Note: just delete the outer side wrapper(aka the 'UnorderedSingleListNode'), rather than the inner + data, which will be released by itself in the future.*/ + delete _p_tmp; + } + + this->m_phaseII_ready_list.store(nullptr); +} + +bool Write::AppendBinlog(AppendEntriesAsyncClient* ptr_client) noexcept{ + + //Only one thread could reach here for a certain log entry. + const auto &_log_id = this->m_shp_req_ctx->m_cur_log_id; + + auto _cmp = [&](const MemoryLogItemLeader &one) ->bool{ + return !::RaftCore::Common::EntityIDLarger(one.GetEntity()->entity_id(), _log_id); + }; + + //std::unique_lock _mutex_lock(LeaderView::m_cv_mutex); + DoubleListNode *_p_head = LeaderView::m_entity_pending_list.CutHead(_cmp); + //_mutex_lock.unlock(); + + if (_p_head == nullptr) { + VLOG(89) << "CutHead empty occur, transfer to bg list:" << _log_id; + + //Push unfinished requests to background singlist_ordered_queue. + auto _shp_ctx = std::shared_ptr(new CutEmptyContext()); + _shp_ctx->m_write_request = ptr_client->OwnershipDelegator::GetOwnership(); + + LeaderView::m_cut_empty_list.Insert(_shp_ctx); + + return false; + } + + std::list> _input_list; + auto _push = [&](decltype(_p_head) p_cur)->void { + _input_list.emplace_back(p_cur->m_val->GetEntity()); + }; + DoubleListNode::Apply(_p_head, _push); + + //Note: Multiple thread appending could happen + CHECK(BinLogGlobal::m_instance.AppendEntry(_input_list)) << "AppendEntry to binlog fail,never should this happen,something terribly wrong."; + + ::RaftCore::Tools::EndTiming(this->m_tp_start, "Append binlog done.", &this->m_shp_req_ctx->m_cur_log_id); + + //No shared resource between the waiting threads and the notifying thread(s), no mutex is needed here. + LeaderView::m_cv.notify_all(); + + LeaderView::m_garbage.PushFront(_p_head); + + return true; +} + +void Write::AfterDetermined(AppendEntriesAsyncClient* ptr_client) noexcept { + + ::RaftCore::Tools::EndTiming(this->m_tp_start, "log entry is determined now.", &this->m_shp_req_ctx->m_cur_log_id); + + //Only one thread could reach here for a certain log entry. + FinishStatus _ret_val = this->JudgeReplicationResult(); + CHECK(_ret_val != FinishStatus::UNFINISHED) << "got an undetermined result in AfterDetermined."; + + if (_ret_val == FinishStatus::NEGATIVE_FINISHED) { + this->ReleasePhaseIIReadyList(); + return; + } + + //Now, phaseI succeed, entrust all the pending phaseII request. + auto *_p_cur_client = this->m_phaseII_ready_list.load(); + while (_p_cur_client != nullptr) { + auto *_p_tmp = _p_cur_client; + _p_cur_client = _p_cur_client->m_next; + this->EntrustCommitRequest(_p_tmp->m_data, ptr_client); + + /*Note: just delete the outer side wrapper(aka the 'UnorderedSingleListNode'), rather than the inner + data, which will be released by itself in the future.*/ + _p_tmp->m_data = nullptr; + delete _p_tmp; + } + + ::RaftCore::Tools::EndTiming(this->m_tp_start, "entrust all phaseII [necessary] clients done.", &this->m_shp_req_ctx->m_cur_log_id); + + //----------------Step 3: append to local binlog----------------// + if (!this->AppendBinlog(ptr_client)) + return; + + this->AfterAppendBinlog(); +} + +void Write::AfterAppendBinlog() noexcept { + + this->m_shp_entity->GetEntity()->release_write_op(); + + if (this->m_rsp->result() != ErrorCode::SUCCESS) { + this->FinishRequest(WriteProcessStage::ABOURTED); + return; + } + + if (this->m_first_of_cur_term) + ElectionMgr::m_leader_debut = false; + + //----------------Step 4: update local storage----------------// + //Note: Out of order setting could happen , but is acceptable for blind writing operations. + + const auto &_log_id = this->m_shp_req_ctx->m_cur_log_id; + + WriteProcessStage _final_status = WriteProcessStage::FRONT_FINISH; + + if (!StorageGlobal::m_instance.Set(_log_id, this->m_client_request->req().key(), this->m_client_request->req().value())) { + LOG(ERROR) << "Write to storage fail,logID:" << _log_id; + this->m_rsp->set_result(ErrorCode::FAIL); + this->m_rsp->set_err_msg("Log replicated succeed , but cannot write to storage."); + WriteProcessStage _final_status = WriteProcessStage::ABOURTED; + } + else + ::RaftCore::Tools::EndTiming(this->m_tp_start, "Update storage done.", &this->m_shp_req_ctx->m_cur_log_id); + + this->FinishRequest(_final_status); +} + +void Write::CutEmptyRoutine() noexcept { + + LOG(INFO) << "leader CutEmpty msg processor thread started."; + + while (true) { + + if (!CommonView::m_running_flag) + return; + + auto _wait_cond = [&]()->bool { return !LeaderView::m_cut_empty_list.Empty(); }; + + auto _wait_timeo_us = std::chrono::microseconds(::RaftCore::Config::FLAGS_iterating_wait_timeo_us); + std::unique_lock _unique_wrapper(LeaderView::m_cv_mutex); + bool _waiting_result = LeaderView::m_cv.wait_for(_unique_wrapper, _wait_timeo_us, _wait_cond); + + //There is no shared state among different threads, so it's better to release this lock ASAP. + _unique_wrapper.unlock(); + + if (!_waiting_result) + continue; + + auto _now = std::chrono::system_clock::now(); + std::shared_ptr _shp_last_return; + + bool _recheck = false; + auto _lambda = [&](std::shared_ptr &one) { + auto &_p_req = one->m_write_request; + + auto _upper = BinLogGlobal::m_instance.GetLastReplicated(); + + if (_p_req->ProcessCutEmptyRequest(_now, _upper, one, _recheck)) { + _shp_last_return = one; + return true; + } + + return false;//No need to go further, stop iterating over the list. + }; + + LeaderView::m_cut_empty_list.Iterate(_lambda); + if (!_shp_last_return) + continue; + + auto* _p_head = LeaderView::m_cut_empty_list.CutHeadByValue(*_shp_last_return); + if (_p_head == nullptr) + continue; + + /* Double check here for 2 reasons: + 1. For the way of TrivialLockSingleList' work, to get rid of missing elements inserted at + head at the moment of cuthead. + 2. failed write requests need a recheck, and reset its result to SUCCESS if necessary. */ + LeaderView::m_cut_empty_list.IterateCutHead(_lambda, _p_head); + + LeaderView::m_cut_empty_garbage.PushFront(_p_head); + } +} + +bool Write::ProcessCutEmptyRequest(const TypeSysTimePoint &tp, const LogIdentifier ¤t_lrl, + std::shared_ptr &one, bool recheck) noexcept { + + if (one->m_processed_flag.load()) + return true; + + const auto &_cur_log_id = this->m_shp_req_ctx->m_cur_log_id; + if (recheck) + CHECK(current_lrl >= _cur_log_id); + + //::RaftCore::Tools::EndTiming(this->m_tp_start, "entry process disorder.", &_cur_log_id); + + auto _diff = std::chrono::duration_cast(tp - one->m_generation_tp); + if (!one->m_log_flag && _diff.count() >= ::RaftCore::Config::FLAGS_cut_empty_timeos_ms) { + + LOG(ERROR) << "waiting for CutHead append to binlog timeout,cur_log_id:" + << _cur_log_id << ",lrl:" << current_lrl << ", wait ms:" << ::RaftCore::Config::FLAGS_cut_empty_timeos_ms; + + one->m_log_flag = true; + } + + //Current request's log hasn't been appended to the binlog file. + if (current_lrl < _cur_log_id) + return false; + + //Once the current disorder message has already been processed by other iterating threads. + bool _processed = false; + if (!one->m_processed_flag.compare_exchange_strong(_processed, true)) { + VLOG(89) << "cutEmpty req processing permission has been taken:" << _cur_log_id; + return true; + } + + //All requests go through here are successfully processed at end. + this->m_rsp->set_result(ErrorCode::SUCCESS); + + this->AfterAppendBinlog(); + + //::RaftCore::Tools::EndTiming(this->m_tp_start, "process CutEmpty done ,has responded to client.", &_cur_log_id); + + return true; +} + +uint32_t Write::GetConservativeTimeoutValue(uint64_t idx,bool last_guid) const noexcept { + auto _snapshot = BinLogGlobal::m_instance.GetLastReplicated(); + + int _minimum_factor = 2; + if (!last_guid) + _minimum_factor += 1; + int _rpc_timeout = ::RaftCore::Config::FLAGS_leader_append_entries_rpc_timeo_ms; + return uint32_t(((idx - _snapshot.m_index) / 2 + _minimum_factor) * _rpc_timeout); +} + +bool Write::UpdateServerStatus(uint64_t guid,LeaderView::ServerStatus status) noexcept { + + //Only bigger guids are allowed to modify the server status.Since what we need is the most updated status. + { + ReadLock _r_lock(this->m_mutex); + if (guid < this->m_last_trigger_guid) + return false; + } + + WriteLock _w_lock(this->m_mutex); + + this->m_last_trigger_guid = guid; + + auto _old_status = LeaderView::m_status; + + if (_old_status == status) + return true; + + LeaderView::m_status = status; + + if (status == LeaderView::ServerStatus::HALTED) + this->m_wait_time_point = std::chrono::steady_clock::now() + std::chrono::microseconds(::RaftCore::Config::FLAGS_cgg_wait_for_last_released_guid_finish_us); + + LOG(INFO) << "update server status from " << this->MacroToString(_old_status) << " to " + << this->MacroToString(status) << " with the guid of " << guid; + + return true; +} + +void Write::LastlogResolve(bool result, uint64_t last_released_guid) noexcept { + + /*This is a relative accurate method for judging if all the logs before the LRG has been + determined, false negative might occur but it's acceptable.*/ + + LogIdentifier _cur_lrl = BinLogGlobal::m_instance.GetLastReplicated(); + uint64_t _gap = last_released_guid - _cur_lrl.m_index; + + do { + VLOG(89) << "Waiting all logs before LRG resolved, last_released_guid:" << last_released_guid + << ", cur_lrl:" << _cur_lrl.m_index << ",gap:" << _gap << ", waiting_num:" + << LeaderView::m_last_log_waiting_num.load() << ", should be quickly resolved."; + + /*Note: _gap could < m_last_log_waiting_num at last whereas they should be equal, since + _cur_lrl is a relative accurate calculated value.*/ + } while (LeaderView::m_last_log_waiting_num.load() < _gap); + + //To get around the issues(CHECK failed) caused by above deviation, wait an additional time. + uint32_t _wait_ms = ::RaftCore::Config::FLAGS_leader_last_log_resolve_additional_wait_ms; + std::this_thread::sleep_for(std::chrono::milliseconds(_wait_ms)); + + if (!result) { + //Get the latest LRL as the new base guid. + _cur_lrl = BinLogGlobal::m_instance.GetLastReplicated(); + GuidGenerator::SetNextBasePoint(_cur_lrl.m_index); + LOG(INFO) << "base point of guid has been set to:" << _cur_lrl.m_index; + } + + /*Once reach here, the false negative requests should have been reset to SUCCESS, and all elements + in the 'm_cut_empty_list' now are the ones that are true negative fail, what we need to do is + just send them home, aka return to the client.*/ + auto *_p_remains = LeaderView::m_cut_empty_list.SetEmpty(); + + auto _lambda = [&](std::shared_ptr &one) { + auto &_p_req = one->m_write_request; + + const auto &_entity_id = _p_req->GetInnerLog()->GetEntity()->entity_id(); + + VLOG(89) << "start processing the bg list after last log resolved:" << _entity_id.idx(); + + _p_req->AfterAppendBinlog(); + return true; + }; + + if (_p_remains != nullptr) { + LeaderView::m_cut_empty_list.IterateCutHead(_lambda, _p_remains); + LeaderView::m_cut_empty_garbage.PushFront(_p_remains); + } + + //It's the last log thread's duty to clear all the remaining item in the pending list. + LeaderView::m_entity_pending_list.Clear(); + + LOG(INFO) << "last log resolved with the last release guid:" << last_released_guid; + + LeaderView::m_last_log_waiting_num.store(0); + + CHECK(this->UpdateServerStatus(last_released_guid, LeaderView::ServerStatus::NORMAL)) << "latest log:" + << last_released_guid <<" update server status to NORMAL fail."; +} + +uint64_t Write::WaitForLastGuidReleasing() const noexcept { + //Business logic guaranteeing that there are no race conditions for 'm_wait_time_point'. + std::this_thread::sleep_until(this->m_wait_time_point); + return GuidGenerator::GetLastReleasedGuid(); +} + +Read::Read(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq) noexcept { + this->Initialize(shp_svc, shp_notify_cq, shp_call_cq); + this->m_async_service->RequestRead(&this->m_server_context, &this->m_request, &this->m_responder, + this->m_server_call_cq.get(), this->m_server_notify_cq.get(), this); +} + +::grpc::Status Read::Process() noexcept { + auto _p_rsp = this->m_response.mutable_client_comm_rsp(); + + if (!this->LeaderCheckVailidity(_p_rsp)) + return ::grpc::Status::OK; + + auto *_p_val = this->m_response.mutable_value(); + if (!StorageGlobal::m_instance.Get(this->m_request.key(), *_p_val)) + LOG(INFO) << "val doesn't exist for key :" << this->m_request.key(); + + return ::grpc::Status::OK; +} + +MembershipChange::MembershipChange(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq) noexcept { + this->Initialize(shp_svc, shp_notify_cq, shp_call_cq); + this->m_async_service->RequestMembershipChange(&this->m_server_context, &this->m_request, + &this->m_responder, this->m_server_call_cq.get(), this->m_server_notify_cq.get(), this); +} + +::grpc::Status MembershipChange::Process() noexcept { + //TODO: There should have some authentications here. + + auto *_p_rsp = this->m_response.mutable_client_comm_rsp(); + if (!this->LeaderCheckVailidity(_p_rsp)) + return ::grpc::Status::OK; + + std::set _new_cluster; + for (int i = 0; i < this->m_request.node_list_size(); ++i) + _new_cluster.emplace(this->m_request.node_list(i)); + + const char* _p_err_msg = MemberMgr::PullTrigger(_new_cluster); + if (_p_err_msg) { + LOG(ERROR) << "[Membership Change] pull the trigger fail:" << _p_err_msg; + _p_rsp->set_result(ErrorCode::FAIL); + _p_rsp->set_err_msg("pull trigger fail,check the log for details."); + return ::grpc::Status::OK; + } + + _p_rsp->set_result(ErrorCode::SUCCESS); + return ::grpc::Status::OK; +} + +AppendEntries::AppendEntries(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq) noexcept { + + this->ResetOwnership(this); + + this->Initialize(shp_svc, shp_notify_cq, shp_call_cq); + + this->m_async_service->RequestAppendEntries(&this->m_server_context, &this->m_request, + &this->m_responder, this->m_server_call_cq.get(), this->m_server_notify_cq.get(), this); +} + +AppendEntries::~AppendEntries()noexcept {} + +std::string AppendEntries::ComposeInputLogs() noexcept { + //Check validity of the input logs + + MemoryLogItemFollower *_p_previous = nullptr; + const auto &_replicate_entity = this->m_request.replicate_entity(); + for (auto iter = _replicate_entity.cbegin(); iter != _replicate_entity.cend(); ++iter) { + + //Note: This is where memory copy overhead occurs! + MemoryLogItemFollower *_p_log_item = new MemoryLogItemFollower(*iter); + + this->m_log_list.emplace_back(_p_log_item); + + //Ensure the log entries are continuous + if (!_p_previous) { + _p_previous = _p_log_item; + continue; + } + + if (!_p_log_item->AfterOf(*_p_previous)) { + char sz_err[1024] = { 0 }; + std::snprintf(sz_err,sizeof(sz_err),"inputing logs are not continuous,pre:%d|%llu,cur:%d|%llu", + _p_previous->GetEntity()->pre_log_id().term(), _p_previous->GetEntity()->pre_log_id().idx(), + _p_previous->GetEntity()->entity_id().term(), _p_previous->GetEntity()->entity_id().idx()); + LOG(ERROR) << sz_err; + return sz_err; + } + + _p_previous = _p_log_item; + } + + return ""; +} + +void AppendEntries::ProcessOverlappedLog() noexcept { + + static const char* _p_err_msg = "revert log fail."; + static const char* _p_step_over = "overstep lcl"; + + const char* _p_ret_msg = ""; + + /*Note : This can be invoked simultaneously , for correctness and simplicity , only one thread could + successfully reverted the binlog, others will fail, in which case we will return an explicit fail to the + client . */ + ErrorCode _error_code = ErrorCode::SUCCESS; + const auto &_lcl = StorageGlobal::m_instance.GetLastCommitted(); + + auto _revert_code = BinLogGlobal::m_instance.RevertLog(this->m_log_list, _lcl); + if (_revert_code > BinLogOperator::BinlogErrorCode::SUCCEED_MAX) { + auto _lrl = BinLogGlobal::m_instance.GetLastReplicated(); + LOG(ERROR) << "log conflict detected,but reverting log fail,current ID-LRL:" << _lrl + << ",_pre_entity_id:" << this->m_pre_entity_id->DebugString() + << ",retCode:" << int(_revert_code); + + _error_code = ErrorCode::FAIL; + _p_ret_msg = _p_err_msg; + + if (_revert_code == BinLogOperator::BinlogErrorCode::NO_CONSISTENT_ERROR) + _error_code = ErrorCode::APPEND_ENTRY_CONFLICT; + else if (_revert_code == BinLogOperator::BinlogErrorCode::OVER_BOUNDARY) { + _error_code = ErrorCode::OVERSTEP_LCL; + _p_ret_msg = _p_step_over; + } + + } else if (_revert_code == BinLogOperator::BinlogErrorCode::SUCCEED_TRUNCATED) { + + /*Note: this elif section may be executed simultaneously and lead to misunderstanding for + 'm_phaseI_pending_list', but will easily get resolved by a new RESYNC_LOG + triggered on the lead side. */ + + static std::mutex _m; + + std::unique_lock _mutex_lock(_m); + + //In case of successfully reverting log, all the pending lists are also become invalid,need to be cleared. + FollowerView::m_phaseI_pending_list.DeleteAll(); //Cannot use 'Clear' avoiding conflict with 'Insert' operations. + FollowerView::m_phaseII_pending_list.Clear(); + + /*There maybe remaining items in this->m_log_list those already been appended to the binlog after + reverting,for phaseII correctly committing ,they need to be inserted to phaseII_pending_list.*/ + std::for_each(this->m_log_list.cbegin(), this->m_log_list.cend(), [&](const auto &_one) { FollowerView::m_phaseII_pending_list.Insert(_one); }); + + } else if (_revert_code == BinLogOperator::BinlogErrorCode::SUCCEED_MERGED) + _error_code = ErrorCode::SUCCESS_MERGED; + + this->m_rsp->set_result(_error_code); + this->m_rsp->set_err_msg(_p_ret_msg); +} + +bool AppendEntries::BeforeJudgeOrder() noexcept { + + this->m_tp_start = ::RaftCore::Tools::StartTimeing(); + + this->m_rsp = this->m_response.mutable_comm_rsp(); + this->m_rsp->set_result(ErrorCode::SUCCESS); + + //Testing... + /* + const auto &_entity = this->m_request.replicate_entity(this->m_request.replicate_entity_size() - 1); + uint32_t _idx = _entity.entity_id().idx(); + + VLOG(89) << " msg received & idx:" << _idx; + + const auto& _start_ts = this->m_request.debug_info(); + +#ifdef _SVC_APPEND_ENTRIES_TEST_ + auto _start_us = std::atoll(_start_ts.c_str()); + auto _now_us = (std::chrono::duration_cast(std::chrono::system_clock::now() - this->m_start_tp)).count(); + uint64_t _lantency_us = (uint64_t)(_now_us - _start_us); + VLOG(2) << "server side single req latency(us):" << _lantency_us << ",now:" << _now_us + << ",start:" << _start_us << ", tp:" << std::chrono::duration_cast(this->m_start_tp.time_since_epoch()).count(); +#endif + + this->m_rsp->set_err_msg(_start_ts); + + return true; + */ + + auto _err_msg = this->FollowerCheckValidity(this->m_request.base(), &this->m_tp_start, &this->m_last_log); + if (!_err_msg.empty()) { + LOG(ERROR) << "check request validity fail :" << _err_msg; + this->m_rsp->set_result(ErrorCode::FAIL); + this->m_rsp->set_err_msg(_err_msg); + return true; + } + + _err_msg = this->ComposeInputLogs(); + if (!_err_msg.empty()) { + LOG(ERROR) << "input log invalid,detail"; + this->m_rsp->set_result(ErrorCode::FAIL); + this->m_rsp->set_err_msg(_err_msg); + return true; + } + + //this->m_log_list need to be sorted. + auto _cmp = [](const std::shared_ptr& left, const std::shared_ptr& right) ->bool { + return ::RaftCore::Common::EntityIDSmaller(left->GetEntity()->entity_id(),right->GetEntity()->entity_id()); + }; + this->m_log_list.sort(_cmp); + + this->m_pre_entity_id = &(this->m_log_list.front()->GetEntity()->pre_log_id()); + this->m_last_entity_id = &(this->m_log_list.back()->GetEntity()->entity_id()); + + this->m_last_log = ::RaftCore::Common::ConvertID(*this->m_last_entity_id); + + ::RaftCore::Tools::EndTiming(this->m_tp_start, "start processing to :", &this->m_last_log); + + //Check if the first log conflict with the written logs + auto _lrl = BinLogGlobal::m_instance.GetLastReplicated(); + if (::RaftCore::Common::EntityIDSmaller(*this->m_pre_entity_id,_lrl)) { + ::RaftCore::Tools::EndTiming(this->m_tp_start, "start process overlap log.", &this->m_last_log); + this->ProcessOverlappedLog(); + auto _lrl = BinLogGlobal::m_instance.GetLastReplicated(); + ::RaftCore::Tools::EndTiming(this->m_tp_start, "overlap log process done, lrl:", &_lrl); + return true; + } + + /*Inserting the log entries to the follower's pending list in a reverse order to get rid of the + 'partially inserted' problem. */ + std::for_each(this->m_log_list.crbegin(), this->m_log_list.crend(), [&](const auto &_one) { FollowerView::m_phaseI_pending_list.Insert(_one); }); + + /* If the minimum ID of the log entries is greater than the ID-LRL, means the current thread need to wait.. */ + _lrl = BinLogGlobal::m_instance.GetLastReplicated(); + + VLOG(89) << "debug pos1,pre_id:" << ::RaftCore::Common::ConvertID(*this->m_pre_entity_id) + << ",snapshot:" << _lrl; + + if (!::RaftCore::Common::EntityIDEqual(*this->m_pre_entity_id, _lrl)) { + + this->m_append_entries_stage = AppendEntriesProcessStage::WAITING; + + //Here need to wait on a CV, push it to background threads. + auto _shp_ctx = std::shared_ptr(new DisorderMessageContext()); + _shp_ctx->m_append_request = this->GetOwnership(); + + FollowerView::m_disorder_list.Insert(_shp_ctx); + + ::RaftCore::Tools::EndTiming(this->m_tp_start, "insert a disorder msg.", &this->m_last_log); + + return false; + } + + this->ProcessAdjacentLog(); + + ::RaftCore::Tools::EndTiming(this->m_tp_start, "adjacent log process done: ", &this->m_last_log); + + return true; +} + +const LogIdentifier& AppendEntries::GetLastLogID() const noexcept { + return this->m_last_log; +} + +void AppendEntries::DisorderLogRoutine() noexcept { + + LOG(INFO) << "follower disorder msg processor thread started."; + + while (true) { + + if (!CommonView::m_running_flag) + return; + + auto _wait_cond = [&]()->bool { return !FollowerView::m_disorder_list.Empty(); }; + + auto _wait_timeo_us = std::chrono::microseconds(::RaftCore::Config::FLAGS_iterating_wait_timeo_us); + std::unique_lock _unique_wrapper(FollowerView::m_cv_mutex); + bool _waiting_result = FollowerView::m_cv.wait_for(_unique_wrapper, _wait_timeo_us, _wait_cond); + + //There is no shared state among different threads, so it's better to release this lock ASAP. + _unique_wrapper.unlock(); + + if (!_waiting_result) + continue; + + auto _now = std::chrono::system_clock::now(); + std::shared_ptr _shp_last_return; + + auto _lambda = [&](std::shared_ptr &one) { + auto &_shp_req = one->m_append_request; + + auto _upper = BinLogGlobal::m_instance.GetLastReplicated(); + + if (_shp_req->ProcessDisorderLog(_now, _upper, one)) { + _shp_last_return = one; + return true; + } + + return false;//No need to go further, stop iterating over the list. + }; + + FollowerView::m_disorder_list.Iterate(_lambda); + if (!_shp_last_return) + continue; + + auto* _p_head = FollowerView::m_disorder_list.CutHeadByValue(*_shp_last_return); + if (_p_head == nullptr) + continue; + + //_shp_last_return may become invalid here. + + /*For the way of TrivialLockDoubleList' work, here we need a double check to get rid of + missing elements inserted at head at the moment of CutHead. */ + FollowerView::m_disorder_list.IterateCutHead(_lambda, _p_head); + + FollowerView::m_disorder_garbage.PushFront(_p_head); + } +} + +bool AppendEntries::ProcessDisorderLog(const TypeSysTimePoint &tp, const LogIdentifier &upper_log, + std::shared_ptr &one) noexcept { + + /*Judge whether the current disorder message has already been processed by the current or + other iterating threads.*/ + if (one->m_processed_flag.load()) + return true; + + /*There is no need to consider the overlapped logs, because : + 1. elements in the pending list can be over written. + 2. overlapped ones can all get a positive result from the follower, you can do nothing + to prevent this. */ + + //Current request's log hasn't been appended to the binlog file. + bool _not_reach_me = upper_log < this->m_last_log; + bool _adjacent = ::RaftCore::Common::EntityIDEqual(*this->m_pre_entity_id, upper_log); + + if (_not_reach_me && !_adjacent) + return false; + + bool _processed = false; + if (!one->m_processed_flag.compare_exchange_strong(_processed, true)) { + VLOG(89) << "disorder req processing permission has been taken:" << this->m_last_log; + return true; + } + + if (_adjacent) { + VLOG(89) << "process adjacent in routine:" << this->m_last_log; + this->ProcessAdjacentLog(); + } + + auto _diff = std::chrono::duration_cast(tp - one->m_generation_tp); + if (_diff.count() >= ::RaftCore::Config::FLAGS_disorder_msg_timeo_ms) { + + /*Here we don't need to delete elements from FollowerView::m_phaseI_pending_list where + encounter failures with it, just leave it here, and they'll get replaced with the ones.*/ + + LOG(ERROR) << "Waiting for cv timeout,upper log:" << upper_log << ", last log:" + << this->m_last_log << ",pre_log_id:" << ::RaftCore::Common::ConvertID(*this->m_pre_entity_id) + << ", diff:" << _diff.count() << ", wait ms:" << ::RaftCore::Config::FLAGS_disorder_msg_timeo_ms; + + //Return a WAITING_TIMEOUT error indicating the leader that this follower need to be set to `RESYNC_LOG` status. + this->m_rsp->set_result(ErrorCode::WAITING_TIMEOUT); + this->m_rsp->set_err_msg("Waiting for cv timeout."); + } + + this->m_append_entries_stage = AppendEntriesProcessStage::FINISH; + this->m_responder.Finish(this->m_response, ::grpc::Status::OK, this); + + ::RaftCore::Tools::EndTiming(this->m_tp_start, "process disorder done ,has responded to client.", &this->m_last_log); + + return true; +} + +::grpc::Status AppendEntries::Process() noexcept { + return ::grpc::Status::OK; +} + +void AppendEntries::React(bool cq_result) noexcept { + if (!cq_result) { + LOG(ERROR) << "AppendEntries got false result from CQ,last log:" << this->m_last_log; + this->ReleaseOwnership(); + return; + } + + switch (this->m_append_entries_stage) { + case AppendEntriesProcessStage::CREATE: + /* Spawn a new subclass instance to serve new clients while we process + the one for this . The instance will deallocate itself as + part of its FINISH state.*/ + new AppendEntries(this->m_async_service,this->m_server_notify_cq,this->m_server_call_cq); + + if (this->BeforeJudgeOrder()) { + this->m_append_entries_stage = AppendEntriesProcessStage::FINISH; + this->m_responder.Finish(this->m_response, ::grpc::Status::OK, this); + } + break; + + case AppendEntriesProcessStage::WAITING: + //do nothing. + break; + + case AppendEntriesProcessStage::FINISH: + this->ReleaseOwnership(); + break; + + default: + CHECK(false) << "Unexpected tag " << int(this->m_append_entries_stage); + break; + } +} + +void AppendEntries::ProcessAdjacentLog() noexcept { + + DoubleListNode *_p_head = FollowerView::m_phaseI_pending_list.CutHead(CmpMemoryLogFollower); + + CHECK(_p_head) << "cut head empty"; + if (_p_head->m_atomic_next.load() == nullptr) { + const auto &_pre_log_id = _p_head->m_val->GetEntity()->pre_log_id(); + auto _lrl = BinLogGlobal::m_instance.GetLastReplicated(); + CHECK(::RaftCore::Common::EntityIDEqual(_pre_log_id,_lrl)) + << "cut head got one element but its pre_log_id != ID-LCL :" + << _pre_log_id.ShortDebugString() << "!=" << _lrl; + } + + std::list> _input_list; + int _cuthead_size = 0; + + auto _push = [&](decltype(_p_head) p_cur)->void{ + _cuthead_size++; + + _input_list.emplace_back(p_cur->m_val->GetEntity()); + + /*Insert will take the ownership of p_cur, so no need to release them later.*/ + FollowerView::m_phaseII_pending_list.Insert(p_cur); + }; + + DoubleListNode::Apply(_p_head, _push); + + CHECK(BinLogGlobal::m_instance.AppendEntry(_input_list)) << "AppendEntry to binlog fail,never should this happen,something terribly wrong."; + + //notify the background thread that the LRL has updated now. + FollowerView::m_cv.notify_all(); +} + +CommitEntries::CommitEntries(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq) noexcept { + this->Initialize(shp_svc, shp_notify_cq, shp_call_cq); + this->m_async_service->RequestCommitEntries(&this->m_server_context, &this->m_request, + &this->m_responder, this->m_server_call_cq.get(), this->m_server_notify_cq.get(), this); +} + +::grpc::Status CommitEntries::Process() noexcept { + + //...Test.... + //this->m_response.mutable_comm_rsp()->set_result(ErrorCode::SUCCESS); + //return ::grpc::Status::OK; + + auto _req_term = this->m_request.entity_id().term(); + auto _req_idx = this->m_request.entity_id().idx(); + + VLOG(89) << "Enter CommitEntries,term:" << _req_term << ",idx:" << _req_idx; + + auto _p_rsp = this->m_response.mutable_comm_rsp(); + _p_rsp->set_result(ErrorCode::SUCCESS); + + auto _err_msg = this->FollowerCheckValidity(this->m_request.base()); + if (!_err_msg.empty()) { + _p_rsp->set_result(ErrorCode::FAIL); + _p_rsp->set_err_msg(_err_msg); + VLOG(89) << "done CommitEntries,pos0:" << _req_term << ",idx:" << _req_idx; + return ::grpc::Status::OK; + } + + LogIdentifier req_log; + req_log.Set(_req_term, _req_idx); + if (req_log < StorageGlobal::m_instance.GetLastCommitted()) { + VLOG(89) << "done CommitEntries,pos1:" << _req_term << ",idx:" << _req_idx; + _p_rsp->set_result(ErrorCode::ALREADY_COMMITTED); + return ::grpc::Status::OK; + } + + auto follower_log_item = MemoryLogItemFollower(_req_term, _req_idx); + DoubleListNode *_p_head = FollowerView::m_phaseII_pending_list.CutHeadByValue(follower_log_item); + if (_p_head == nullptr) { + //In case of (req_log >= ID-LCL && cannot get valueset_result(ErrorCode::ALREADY_COMMITTED); + _p_rsp->set_err_msg("CutHeadByValue got a nullptr"); + + VLOG(89) << "done CommitEntries,pos2:" << _req_term << ",idx:" << _req_idx; + return ::grpc::Status::OK; + } + + int _cuthead_size = 0; + + //Updating storage. + auto _store = [&](decltype(_p_head) p_cur)->void{ + auto _entity = p_cur->m_val->GetEntity(); + + _cuthead_size++; + + LogIdentifier _log; + _log.Set(_entity->entity_id().term(),_entity->entity_id().idx()); + + /*Since write_op is inside the pb structure rather than in a 'shared_ptr', we have to do a + memory copy here.*/ + StorageGlobal::m_instance.Set(_log,_entity->write_op().key(), _entity->write_op().value()); + }; + + DoubleListNode::Apply(_p_head, _store); + + FollowerView::m_garbage.PushFront(_p_head); + + VLOG(89) << "done CommitEntries,term:" << _req_term << ",idx:" << _req_idx << ",cuthead size:" << _cuthead_size; + + return ::grpc::Status::OK; +} + +SyncData::SyncData(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq) noexcept { + this->Initialize(shp_svc, shp_notify_cq, shp_call_cq); + this->m_async_service->RequestSyncData(&this->m_server_context, &this->m_reader_writer, this->m_server_call_cq.get(), this->m_server_notify_cq.get(), this); +} + +::grpc::Status SyncData::Process() noexcept { + auto _p_comm_rsp = this->m_response.mutable_comm_rsp(); + + auto _err_msg = this->FollowerCheckValidity(this->m_request.base()); + if (!_err_msg.empty()) { + LOG(ERROR) << "[Sync Data Stream] read an invalid input ,error msg:" << _err_msg; + _p_comm_rsp->set_result(ErrorCode::FAIL); + _p_comm_rsp->set_err_msg(_err_msg); + return ::grpc::Status::OK; + } + + const ::raft::SyncDataMsgType &_msg_type = this->m_request.msg_type(); + switch (_msg_type) { + + case ::raft::SyncDataMsgType::PREPARE: { + LOG(INFO) << "[Sync Data Stream]receive PREPARE."; + FollowerView::Clear(); + + StorageGlobal::m_instance.Reset(); + + if (!BinLogGlobal::m_instance.Clear()) + LOG(ERROR) << "SyncData clear storage data fail:"; + + _p_comm_rsp->set_result(ErrorCode::PREPARE_CONFRIMED); + break; + } + + case ::raft::SyncDataMsgType::SYNC_DATA: + LOG(INFO) << "[Sync Data Stream]receive SYNC_DATA, size:" << this->m_request.entity_size(); + for (int i = 0; i < this->m_request.entity_size(); ++i) { + const ::raft::Entity &_entity = this->m_request.entity(i); + LogIdentifier _log_id = ::RaftCore::Common::ConvertID(_entity.entity_id()); + if (!StorageGlobal::m_instance.Set(_log_id, _entity.write_op().key(), _entity.write_op().key())) { + LOG(ERROR) << "SyncData set storage fail,log id:" << _log_id; + break; + } + + /*Along with storing ,the latest log entry should also be appended to the binlog file for + further uses.*/ + if (i != this->m_request.entity_size() - 1) + continue; + + std::shared_ptr<::raft::Entity> _shp_entity(new ::raft::Entity()); + auto _p_entity_id = _shp_entity->mutable_entity_id(); + _p_entity_id->set_term(_entity.entity_id().term()); + _p_entity_id->set_idx(_entity.entity_id().idx()); + + _shp_entity->set_allocated_write_op(const_cast<::raft::WriteRequest*>(&_entity.write_op())); + auto _set_head_error_code = BinLogGlobal::m_instance.SetHead(_shp_entity); + _shp_entity->release_write_op(); + if (_set_head_error_code != BinLogOperator::BinlogErrorCode::SUCCEED_TRUNCATED) { + LOG(ERROR) << "SyncData SetHead fail,log id:" << _log_id; + break; // break for loop. + } + } + _p_comm_rsp->set_result(ErrorCode::SYNC_DATA_CONFRIMED); + break; + + case ::raft::SyncDataMsgType::SYNC_LOG: { + LOG(INFO) << "[Sync Data Stream]receive SYNC_LOG, size:" << this->m_request.entity_size(); + TypeEntityList _input_list; + for (int i = 0; i < this->m_request.entity_size(); ++i) { + const ::raft::Entity &_entity = this->m_request.entity(i); + + /*Note: 1.must specify the deleter for std::shared_ptr,otherwise double-free could happen. + 2. convert Entity* to const Entity* is safe demonstrated by other tests. */ + _input_list.emplace_back(const_cast(&_entity), [](auto p) {}); + + //Also need to add log entries to pending list II. + std::shared_ptr _shp_follower_log(new MemoryLogItemFollower(_entity)); + DoubleListNode *_p_node = new DoubleListNode(_shp_follower_log); + FollowerView::m_phaseII_pending_list.Insert(_p_node); + } + + CHECK(BinLogGlobal::m_instance.AppendEntry(_input_list)) << "AppendEntry to binlog fail,never should this happen,something terribly wrong."; + _p_comm_rsp->set_result(ErrorCode::SYNC_LOG_CONFRIMED); + break; + } + + default: + LOG(ERROR) << "SyncData unknown msgType:" << _msg_type; + break; + } + + return ::grpc::Status::OK; +} + +MemberChangePrepare::MemberChangePrepare(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq) noexcept { + + this->Initialize(shp_svc, shp_notify_cq, shp_call_cq); + this->m_async_service->RequestMemberChangePrepare(&this->m_server_context, &this->m_request, &this->m_responder, this->m_server_call_cq.get(), this->m_server_notify_cq.get(), this); +} + +::grpc::Status MemberChangePrepare::Process() noexcept { + LOG(INFO) << "[Membership Change] MemberChangePrepare starts."; + + auto *_p_rsp = this->m_response.mutable_comm_rsp(); + + auto _err_msg = this->FollowerCheckValidity(this->m_request.base()); + if (!_err_msg.empty()) { + LOG(ERROR) << "[Membership Change] cannot do membership change prepare,error message:" << _err_msg; + _p_rsp->set_result(ErrorCode::FAIL); + _p_rsp->set_err_msg(_err_msg); + return ::grpc::Status::OK; + } + + std::set _new_cluster; + for (int i = 0; i < this->m_request.node_list_size(); ++i) + _new_cluster.emplace(this->m_request.node_list(i)); + + MemberMgr::JointTopology _joint_topo; + _joint_topo.Update(&_new_cluster); + + MemberMgr::SwitchToJointConsensus(_joint_topo,this->m_request.version()); + + std::string _removed_nodes="",_added_nodes=""; + { + ReadLock _r_lock(MemberMgr::m_mutex); + for (const auto& _node : MemberMgr::m_joint_summary.m_joint_topology.m_added_nodes) + _added_nodes += (_node.first + "|"); + + for (const auto& _node : MemberMgr::m_joint_summary.m_joint_topology.m_removed_nodes) + _removed_nodes += (_node + "|"); + } + + LOG(INFO) << "[Membership Change] switched to JointConsensus status with new nodes:" << _added_nodes + << " and removed nodes:" << _removed_nodes; + + _p_rsp->set_result(ErrorCode::SUCCESS); + + LOG(INFO) << "[Membership Change] MemberChangePrepare ends."; + + return ::grpc::Status::OK; +} + +MemberChangeCommit::MemberChangeCommit(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq) noexcept { + this->Initialize(shp_svc, shp_notify_cq, shp_call_cq); + this->m_async_service->RequestMemberChangeCommit(&this->m_server_context, &this->m_request, &this->m_responder, this->m_server_call_cq.get(), this->m_server_notify_cq.get(), this); +} + +::grpc::Status MemberChangeCommit::Process() noexcept { + LOG(INFO) << "[Membership Change] MemberChangeCommit starts."; + + auto *_p_rsp = this->m_response.mutable_comm_rsp(); + + auto _err_msg = this->FollowerCheckValidity(this->m_request.base()); + if (!_err_msg.empty()) { + LOG(ERROR) << "[Membership Change] cannot do membership change commit,error message:" << _err_msg; + _p_rsp->set_result(ErrorCode::FAIL); + _p_rsp->set_err_msg(_err_msg); + return ::grpc::Status::OK; + } + + bool _still_in_new_cluster = MemberMgr::SwitchToStable(); + + LOG(INFO) << "[Membership Change] switched to Stable status "; + + if(!_still_in_new_cluster){ + LOG(INFO) << "[Membership Change]I'm no longer in the new cluster , shutdown myself in 3 seconds,goodbye and have a good time."; + //Must start a new thread to shutdown myself. + auto _shutdown = [&]()->void { + std::this_thread::sleep_for(std::chrono::seconds(3)); + this->SetServerShuttingDown(); + GlobalEnv::ShutDown(); + }; + std::thread _t(_shutdown); + _t.detach(); + } + + /*If the old leader is not in the new cluster, the nodes in the new cluster will soon after start + new rounds of elections, to achieve this, we need to reset the heartbeat clock. */ + if (this->m_request.has_flag()) { + if (this->m_request.flag() == ::raft::MembershipFlag::NEWBIE) { + WriteLock _w_lock(FollowerView::m_last_heartbeat_lock); + FollowerView::m_last_heartbeat = std::chrono::steady_clock::now(); + } + } + + _p_rsp->set_result(ErrorCode::SUCCESS); + + LOG(INFO) << "[Membership Change] MemberChangeCommit ends."; + + return ::grpc::Status::OK; +} + +void MemberChangeCommit::SetServerShuttingDown() noexcept { + WriteLock _w_lock(this->m_mutex); + LeaderView::m_status = LeaderView::ServerStatus::SHUTTING_DOWN; +} + +PreVote::PreVote(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq) noexcept { + this->Initialize(shp_svc, shp_notify_cq, shp_call_cq); + this->m_async_service->RequestPreVote(&this->m_server_context, &this->m_request, &this->m_responder, this->m_server_call_cq.get(), this->m_server_notify_cq.get(), this); +} + +::grpc::Status PreVote::Process() noexcept { + auto _p_rsp = this->m_response.mutable_comm_rsp(); + _p_rsp->set_result(ErrorCode::PREVOTE_YES); + + auto _req_term = this->m_request.base().term(); + auto _req_addr = this->m_request.base().addr(); + + if (!this->ValidClusterNode(_req_addr)) { + _p_rsp->set_result(ErrorCode::PREVOTE_NO); + _p_rsp->set_err_msg("You are not in my cluster config list:" + _req_addr); + return ::grpc::Status::OK; + } + + ElectionMgr::AddVotingTerm(_req_term,_req_addr); + + //Only candidate can vote. + auto _current_role = StateMgr::GetRole(); + if (_current_role != RaftRole::CANDIDATE) { + _p_rsp->set_result(ErrorCode::PREVOTE_NO); + _p_rsp->set_err_msg("I'm a " + std::string(StateMgr::GetRoleStr(_current_role)) + " rather than a candidate."); + } + + return ::grpc::Status::OK; +} + +Vote::Vote(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq) noexcept { + this->Initialize(shp_svc, shp_notify_cq, shp_call_cq); + this->m_async_service->RequestVote(&this->m_server_context, &this->m_request, &this->m_responder, this->m_server_call_cq.get(), this->m_server_notify_cq.get(), this); +} + +::grpc::Status Vote::Process() noexcept { + auto _p_rsp = this->m_response.mutable_comm_rsp(); + _p_rsp->set_result(ErrorCode::VOTE_YES); + + auto _req_term = this->m_request.base().term(); + auto _req_addr = this->m_request.base().addr(); + + if (!this->ValidClusterNode(_req_addr)) { + _p_rsp->set_result(ErrorCode::PREVOTE_NO); + _p_rsp->set_err_msg("You are not in my cluster config list:" + _req_addr); + return ::grpc::Status::OK; + } + + ElectionMgr::AddVotingTerm(_req_term,_req_addr); + + auto _current_role = StateMgr::GetRole(); + if (_current_role != RaftRole::CANDIDATE) { + _p_rsp->set_result(ErrorCode::VOTE_NO); + _p_rsp->set_err_msg("I'm not a candidate."); + return ::grpc::Status::OK; + } + + auto _my_term = ElectionMgr::m_cur_term.load(); + if ( _req_term < _my_term) { + _p_rsp->set_result(ErrorCode::VOTE_NO); + _p_rsp->set_err_msg("I have a greater term:" + std::to_string(_my_term) + " than yours:" + std::to_string(_req_term)); + return ::grpc::Status::OK; + } + + if (_req_term == _my_term) { + _p_rsp->set_result(ErrorCode::VOTE_NO); + _p_rsp->set_err_msg("I already issued an election in your term:" + std::to_string(_req_term)); + return ::grpc::Status::OK; + } + + //Judge LOG ID. + const auto & _id_lrl = BinLogGlobal::m_instance.GetLastReplicated(); + if (::RaftCore::Common::EntityIDSmaller(this->m_request.last_log_entity(), _id_lrl)) { + _p_rsp->set_result(ErrorCode::VOTE_NO); + _p_rsp->set_err_msg("Your log ID " + this->m_request.last_log_entity().DebugString() + + " is smaller than mine:" + _id_lrl.ToString()); + return ::grpc::Status::OK; + } + + if (::RaftCore::Common::EntityIDEqual(this->m_request.last_log_entity(), _id_lrl)) { + auto _req_version = this->m_request.member_version(); + auto _my_version = MemberMgr::GetVersion(); + if (_req_version < _my_version) { + _p_rsp->set_result(ErrorCode::VOTE_NO); + _p_rsp->set_err_msg("Your membership version: " + std::to_string(_req_version) + + " is smaller than mine:" + std::to_string(_my_version)); + return ::grpc::Status::OK; + } + } + + auto _voted_addr = ElectionMgr::TryVote(_req_term, _req_addr); + if (!_voted_addr.empty()) { + _p_rsp->set_result(ErrorCode::VOTE_NO); + _p_rsp->set_err_msg("Try vote fail,I've already voted addr:" + _voted_addr); + return ::grpc::Status::OK; + } + + return ::grpc::Status::OK; +} + +HeartBeat::HeartBeat(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq) noexcept { + this->Initialize(shp_svc, shp_notify_cq, shp_call_cq); + this->m_async_service->RequestHeartBeat(&this->m_server_context, &this->m_request, &this->m_responder, this->m_server_call_cq.get(), this->m_server_notify_cq.get(), this); +} + +::grpc::Status HeartBeat::Process() noexcept { + + VLOG(89) << "receive heartbeat."; + + const auto &_leader_term = this->m_request.base().term(); + const auto &_leader_addr = this->m_request.base().addr(); + + if (!this->ValidClusterNode(_leader_addr)) { + this->m_response.set_result(ErrorCode::PREVOTE_NO); + this->m_response.set_err_msg("You are not in my cluster config list:" + _leader_addr); + return ::grpc::Status::OK; + } + + /*(Read & judge & take action) wrap the three to be an atomic operation. Tiny overhead for + processing the periodically sent heartbeat messages. */ + WriteLock _w_lock(ElectionMgr::m_election_mutex); + + uint32_t _cur_term = ElectionMgr::m_cur_term.load(); + if (_leader_term < _cur_term) { + LOG(ERROR) << "a lower term heartbeat received,detail:" << this->m_request.DebugString(); + this->m_response.set_result(ErrorCode::FAIL); + this->m_response.set_err_msg("your term " + std::to_string(_leader_term) + " is smaller than mine:" + std::to_string(_cur_term)); + return ::grpc::Status::OK; + } + + { + WriteLock _w_lock(FollowerView::m_last_heartbeat_lock); + FollowerView::m_last_heartbeat = std::chrono::steady_clock::now(); + } + + this->m_response.set_result(::raft::SUCCESS); + + auto _cur_role = StateMgr::GetRole(); + + if (_leader_term == _cur_term) { + CHECK(_cur_role != RaftRole::LEADER) << "I'm a leader,receive heartbeat from the same term,detail:" << this->m_request.DebugString(); + + if (_cur_role == RaftRole::FOLLOWER) { + ::RaftCore::Topology _topo; + ::RaftCore::CTopologyMgr::Read(&_topo); + CHECK(_topo.m_leader == _leader_addr) << "A different leader under term:" << _cur_term << " found" + << ",my leader addr:" << _topo.m_leader << ",peer leader addr : " << _leader_addr << ",ignore it."; + } + else if (_cur_role == RaftRole::CANDIDATE) + ElectionMgr::NotifyNewLeaderEvent(_leader_term,_leader_addr); + + return ::grpc::Status::OK; + } + + //Now :_leader_term > _cur_term ,switch role is needed. + LOG(INFO) << "higher term found: " << _leader_term << ",current term:" << _cur_term + << ",prepare to switch to follower with respect to the new leader :" << _leader_addr; + + if (_cur_role == RaftRole::CANDIDATE) { + //I'm in a electing state. + ElectionMgr::NotifyNewLeaderEvent(_leader_term,_leader_addr); + } + else if (_cur_role == RaftRole::FOLLOWER) { + ::RaftCore::Topology _topo; + ::RaftCore::CTopologyMgr::Read(&_topo); + + //New leader with a higher term found ,move the old leader to follower list. + _topo.m_followers.emplace(_topo.m_leader); + _topo.m_followers.erase(_leader_addr); + _topo.m_candidates.erase(_leader_addr); + _topo.m_leader = _leader_addr; + + ::RaftCore::CTopologyMgr::Update(_topo); + + ElectionMgr::m_cur_term.store(_leader_term); + } + else if (_cur_role == RaftRole::LEADER) { + //Leader step down. + ElectionMgr::m_cur_term.store(_leader_term); + ElectionMgr::SwitchRole(RaftRole::FOLLOWER, _leader_addr); + } + + this->m_response.set_result(::raft::SUCCESS); + return ::grpc::Status::OK; +} + +} diff --git a/src/service/service.h b/src/service/service.h new file mode 100644 index 0000000..7a934c2 --- /dev/null +++ b/src/service/service.h @@ -0,0 +1,457 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_SERVICE_EX_H__ +#define __AURORA_SERVICE_EX_H__ + +#include +#include + +#include "protocol/raft.pb.h" +#include "protocol/raft.grpc.pb.h" + +#include "common/log_identifier.h" +#include "leader/leader_request.h" +#include "leader/follower_entity.h" +#include "leader/leader_view.h" +#include "leader/leader_bg_task.h" +#include "follower/follower_request.h" +#include "follower/memory_log_follower.h" +#include "follower/follower_bg_task.h" +#include "candidate/candidate_request.h" +#include "guid/guid_generator.h" +#include "client/client_impl.h" +#include "tools/lock_free_unordered_single_list.h" +#include "service/ownership_delegator.h" + +namespace RaftCore::Service { + +using ::RaftCore::Guid::GuidGenerator; +using ::RaftCore::Common::LogIdentifier; +using ::RaftCore::DataStructure::AtomicPtrSingleListNode; +using ::RaftCore::Common::FinishStatus; +using ::RaftCore::Service::OwnershipDelegator; +using ::RaftCore::Leader::BackGroundTask::LogReplicationContext; +using ::RaftCore::Leader::BackGroundTask::TwoPhaseCommitContext; +using ::RaftCore::Leader::MemoryLogItemLeader; +using ::RaftCore::Leader::LeaderRequest; +using ::RaftCore::Leader::FollowerEntity; +using ::RaftCore::Leader::LeaderView; +using ::RaftCore::Follower::FollowerUnaryRequest; +using ::RaftCore::Follower::FollowerBidirectionalRequest; +using ::RaftCore::Follower::TypeMemlogFollowerList; +using ::RaftCore::Follower::BackGroundTask::DisorderMessageContext; +using ::RaftCore::Leader::BackGroundTask::CutEmptyContext; +using ::RaftCore::Candidate::CandidateUnaryRequest; +using ::RaftCore::Client::AppendEntriesAsyncClient; +using ::RaftCore::Tools::TypeTimePoint; +using ::RaftCore::Tools::TypeSysTimePoint; + +//For the prospective common properties . +class RPCBase { + +public: + + RPCBase(); + + virtual ~RPCBase(); + +protected: + + bool LeaderCheckVailidity(::raft::ClientCommonResponse* response) noexcept; + + std::string FollowerCheckValidity(const ::raft::RequestBase &req_base, TypeTimePoint* p_tp = nullptr, LogIdentifier *p_cur_id = nullptr) noexcept; + + bool ValidClusterNode(const std::string &peer_addr) noexcept; + + inline static const char* MacroToString(LeaderView::ServerStatus enum_val) { + return m_status_macro_names[int(enum_val)]; + } + +protected: + + std::shared_timed_mutex m_mutex; + + static const char* m_status_macro_names[]; + +private: + + RPCBase(const RPCBase&) = delete; + + RPCBase& operator=(const RPCBase&) = delete; +}; + +class Write final : public LeaderRequest<::raft::ClientWriteRequest, ::raft::ClientWriteResponse, Write>, + public OwnershipDelegator, public RPCBase { + +public: + + Write(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq)noexcept; + + virtual ~Write(); + + /*In Write RPC, the whole 'Process' procedure is divided into several smaller parts, + just give an empty implementation here.*/ + ::grpc::Status Process() noexcept override; + + void ReplicateDoneCallBack(const ::grpc::Status &status, const ::raft::AppendEntriesResponse& rsp, + FollowerEntity* ptr_follower, AppendEntriesAsyncClient* ptr_client) noexcept; + + //Return: the to-be entrusted client if any, otherwise a nullptr is returned. + bool UpdatePhaseIStatistic(const ::grpc::Status &status, + const ::raft::AppendEntriesResponse& rsp, FollowerEntity* ptr_follower) noexcept; + + void CommitDoneCallBack(const ::grpc::Status &status, const ::raft::CommitEntryResponse& rsp, + FollowerEntity* ptr_follower) noexcept; + +#ifdef _SVC_WRITE_TEST_ + auto GetInnerLog() { return this->m_shp_entity; } +#endif + + const std::shared_ptr& GetReqCtx() noexcept; + + void AfterAppendBinlog() noexcept; + + static void CutEmptyRoutine() noexcept; + +private: + + enum class WriteProcessStage { CREATE, FRONT_FINISH, ABOURTED }; + + virtual void React(bool cq_result = true) noexcept override; + + bool BeforeReplicate() noexcept; + + void AfterDetermined(AppendEntriesAsyncClient* ptr_client) noexcept; + + bool PrepareReplicationStatistic(std::list> &entrust_list) noexcept; + + //Return : if get majority entrusted. + bool PrepareReplicationContext(uint32_t cur_term, uint32_t pre_term) noexcept; + + FinishStatus JudgeReplicationResult() noexcept; + + void ProcessReplicateFailure(const ::raft::CommonResponse& comm_rsp, + TwoPhaseCommitContext::PhaseState &phaseI_state, FollowerEntity* ptr_follower, + uint32_t joint_consensus_state) noexcept; + + void AddResyncLogTask(FollowerEntity* ptr_follower, const LogIdentifier &sync_point) noexcept; + + void EntrustCommitRequest(FollowerEntity* ptr_follower, AppendEntriesAsyncClient* ptr_client)noexcept; + + void ReleasePhaseIIReadyList()noexcept; + + void FinishRequest(WriteProcessStage state) noexcept; + + //Return: If successfully CutHead someone off from the pending list. + bool AppendBinlog(AppendEntriesAsyncClient* ptr_client) noexcept; + + /*The microseconds that the thread should waiting for. After detecting a failure, + waiting for its previous logs to have a deterministic result(success or implicitly/explicitly fail). + Both the latest and non-latest logs have to be waited for. */ + uint32_t GetConservativeTimeoutValue(uint64_t idx,bool last_guid=false) const noexcept; + + //Return : if server status successfully changed . + bool UpdateServerStatus(uint64_t guid, LeaderView::ServerStatus status) noexcept; + + void LastlogResolve(bool result, uint64_t last_released_guid) noexcept; + + //Return the last released GUID. + uint64_t WaitForLastGuidReleasing() const noexcept; + + bool ProcessCutEmptyRequest(const TypeSysTimePoint &tp, const LogIdentifier ¤t_lrl, + std::shared_ptr &one, bool recheck) noexcept; + +private: + + bool m_first_of_cur_term = false; + + std::shared_ptr m_shp_entity; + + // The latest guid used setting server status. + uint64_t m_last_trigger_guid = 0; + + ::raft::EntityID* m_p_pre_entity_id = nullptr; + + std::shared_ptr m_shp_req_ctx; + + TypeTimePoint m_tp_start; + + std::chrono::time_point m_wait_time_point; + + ::raft::ClientCommonResponse* m_rsp = nullptr; + + GuidGenerator::GUIDPair m_guid_pair; + + ::raft::ClientWriteRequest* m_client_request = nullptr; + + std::shared_ptr<::raft::CommitEntryRequest> m_shp_commit_req; + + //Indicating if the entry point of majority succeed is already taken by other threads. + std::atomic m_phaseI_determined_point; + + AtomicPtrSingleListNode m_phaseII_ready_list; + + WriteProcessStage m_write_stage{ WriteProcessStage::CREATE }; + +#ifdef _SVC_WRITE_TEST_ + std::tm m_start_tm = { 0, 0, 0, 26, 9 - 1, 2019 - 1900 }; + + std::chrono::time_point m_start_tp; +#endif + +private: + + Write(const Write&) = delete; + + Write& operator=(const Write&) = delete; +}; + +class Read final : public LeaderRequest<::raft::ClientReadRequest,::raft::ClientReadResponse,Read>, public RPCBase { + +public: + + Read(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq)noexcept; + + virtual ::grpc::Status Process() noexcept override; + +private: + + Read(const Read&) = delete; + + Read& operator=(const Read&) = delete; +}; + +class MembershipChange final : public LeaderRequest<::raft::MemberChangeRequest,::raft::MemberChangeResponse,MembershipChange>, public RPCBase { + +public: + + MembershipChange(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq)noexcept; + + virtual ::grpc::Status Process() noexcept override; + +private: + + MembershipChange(const MembershipChange&) = delete; + + MembershipChange& operator=(const MembershipChange&) = delete; +}; + +class AppendEntries final : public FollowerUnaryRequest<::raft::AppendEntriesRequest, ::raft::AppendEntriesResponse, AppendEntries>, + public OwnershipDelegator, public RPCBase { + +public: + + AppendEntries(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq)noexcept; + + virtual ~AppendEntries()noexcept; + + //Return: If current request finished processing. + bool BeforeJudgeOrder() noexcept ; + + const LogIdentifier& GetLastLogID() const noexcept; + + static void DisorderLogRoutine() noexcept; + +protected: + + ::grpc::Status Process() noexcept override; + + virtual void React(bool cq_result = true) noexcept override; + +private: + + bool ProcessDisorderLog(const TypeSysTimePoint &tp, const LogIdentifier &upper_log, + std::shared_ptr &one) noexcept; + + void ProcessAdjacentLog() noexcept; + + void ProcessOverlappedLog() noexcept; + + std::string ComposeInputLogs() noexcept; + +private: + + TypeTimePoint m_tp_start; + + ::raft::CommonResponse* m_rsp = nullptr; + + //std::unique_lock *m_mutex_lock = nullptr; + + TypeMemlogFollowerList m_log_list; + + const ::raft::EntityID *m_pre_entity_id = nullptr; + + const ::raft::EntityID *m_last_entity_id = nullptr; + + LogIdentifier m_last_log; + + enum class AppendEntriesProcessStage { CREATE, WAITING, FINISH }; + + AppendEntriesProcessStage m_append_entries_stage{ AppendEntriesProcessStage::CREATE }; + +#ifdef _SVC_APPEND_ENTRIES_TEST_ + std::tm m_start_tm = { 0, 0, 0, 26, 9 - 1, 2019 - 1900 }; + + std::chrono::time_point m_start_tp; +#endif + +private: + + AppendEntries(const AppendEntries&) = delete; + + AppendEntries& operator=(const AppendEntries&) = delete; +}; + +class CommitEntries final : public FollowerUnaryRequest<::raft::CommitEntryRequest,::raft::CommitEntryResponse,CommitEntries>, public RPCBase { + +public: + + CommitEntries(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq)noexcept; + + virtual ::grpc::Status Process() noexcept override; + +private: + + CommitEntries(const CommitEntries&) = delete; + + CommitEntries& operator=(const CommitEntries&) = delete; +}; + +class SyncData final : public FollowerBidirectionalRequest<::raft::SyncDataRequest,::raft::SyncDataResponse,SyncData>, public RPCBase { + +public: + + SyncData(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq)noexcept; + + virtual ::grpc::Status Process() noexcept override; + +private: + + SyncData(const SyncData&) = delete; + + SyncData& operator=(const SyncData&) = delete; +}; + +class MemberChangePrepare final : public FollowerUnaryRequest<::raft::MemberChangeInnerRequest,::raft::MemberChangeInnerResponse,MemberChangePrepare>, public RPCBase { + +public: + + MemberChangePrepare(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq)noexcept; + + virtual ::grpc::Status Process() noexcept override; + +private: + + MemberChangePrepare(const MemberChangePrepare&) = delete; + + MemberChangePrepare& operator=(const MemberChangePrepare&) = delete; +}; + +class MemberChangeCommit final : public FollowerUnaryRequest<::raft::MemberChangeInnerRequest,::raft::MemberChangeInnerResponse,MemberChangeCommit>, public RPCBase { + +public: + + MemberChangeCommit(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq)noexcept; + + virtual ::grpc::Status Process() noexcept override; + +private: + + void SetServerShuttingDown() noexcept; + +private: + + MemberChangeCommit(const MemberChangeCommit&) = delete; + + MemberChangeCommit& operator=(const MemberChangeCommit&) = delete; +}; + +class PreVote final : public CandidateUnaryRequest<::raft::VoteRequest,::raft::VoteResponse,PreVote>, public RPCBase { + +public: + + PreVote(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq)noexcept; + + virtual ::grpc::Status Process() noexcept override; + +private: + + PreVote(const PreVote&) = delete; + + PreVote& operator=(const PreVote&) = delete; +}; + +class Vote final : public CandidateUnaryRequest<::raft::VoteRequest,::raft::VoteResponse,Vote>, public RPCBase { + +public: + + Vote(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq)noexcept; + + virtual ::grpc::Status Process() noexcept override; + +private: + + Vote(const Vote&) = delete; + + Vote& operator=(const Vote&) = delete; +}; + +//This RPC making sense to multiple roles. +class HeartBeat final : public UnaryRequest<::raft::HeartBeatRequest,::raft::CommonResponse,HeartBeat>, public RPCBase { + +public: + + HeartBeat(std::shared_ptr shp_svc, + std::shared_ptr &shp_notify_cq, + std::shared_ptr &shp_call_cq)noexcept; + + virtual ::grpc::Status Process() noexcept override; + +private: + + HeartBeat(const HeartBeat&) = delete; + + HeartBeat& operator=(const HeartBeat&) = delete; +}; + +} + +#endif diff --git a/src/state/state_mgr.cc b/src/state/state_mgr.cc new file mode 100644 index 0000000..a8df199 --- /dev/null +++ b/src/state/state_mgr.cc @@ -0,0 +1,204 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include + +#include "common/comm_defs.h" +#include "config/config.h" +#include "tools/utilities.h" +#include "state/state_mgr.h" + +namespace RaftCore::State { + +RaftRole StateMgr::m_cur_state = RaftRole::UNKNOWN; + +bool StateMgr::m_initialized = false; + +std::string StateMgr::m_my_addr = ""; + +std::list StateMgr::m_nic_addrs; + +using ::RaftCore::CTopologyMgr; +using ::RaftCore::Topology; + +void StateMgr::Initialize(const ::RaftCore::Topology &global_topo) noexcept { + + ::RaftCore::Tools::GetLocalIPs(m_nic_addrs); + std::for_each(m_nic_addrs.begin(), m_nic_addrs.end(), [](std::string &_ip) { + _ip += std::string(":" + std::to_string(::RaftCore::Config::FLAGS_port) ); + }); + + std::list _find_in; + if (global_topo.m_my_addr.empty()) + _find_in = m_nic_addrs; + else + _find_in.emplace_back(global_topo.m_my_addr); + + auto _init_topology = [&]() -> ::RaftCore::State::RaftRole { + + if (std::find(_find_in.cbegin(), _find_in.cend(), global_topo.m_leader) != std::end(_find_in)) { + m_my_addr = global_topo.m_leader; + return ::RaftCore::State::RaftRole::LEADER; + } + + for (const auto & _item : global_topo.m_followers) { + if (std::find(_find_in.cbegin(), _find_in.cend(), _item) == std::end(_find_in)) + continue; + + m_my_addr = _item; + return ::RaftCore::State::RaftRole::FOLLOWER; + } + + for (const auto & _item : global_topo.m_candidates) { + if (std::find(_find_in.cbegin(), _find_in.cend(), _item) == std::end(_find_in)) + continue; + + m_my_addr = _item; + return ::RaftCore::State::RaftRole::CANDIDATE; + } + + //If I'm not in the topology list , assume I'm an empty node ready to be joined. + return ::RaftCore::State::RaftRole::UNKNOWN; + }; + + m_cur_state = _init_topology(); + CHECK(m_cur_state != ::RaftCore::State::RaftRole::UNKNOWN) << "m_cur_state invalid."; + + m_initialized = true; +} + +bool StateMgr::Ready() noexcept { + return m_initialized; +} + +void StateMgr::UnInitialize() noexcept { + m_cur_state = ::RaftCore::State::RaftRole::UNKNOWN; + m_my_addr = ""; +} + +State::RaftRole StateMgr::GetRole() noexcept{ + return m_cur_state; +} + +const std::list& StateMgr::GetNICAddrs() noexcept { + return m_nic_addrs; +} + +bool StateMgr::AddressUndetermined() noexcept { + return m_my_addr.empty(); +} + +const char* StateMgr::GetRoleStr(RaftRole state) noexcept +{ + RaftRole _role = m_cur_state; + if (state != RaftRole::UNKNOWN) + _role = state; + + if (_role == RaftRole::LEADER) + return _ROLE_STR_LEADER_; + else if (_role == RaftRole::FOLLOWER) + return _ROLE_STR_FOLLOWER_; + else if (_role == RaftRole::CANDIDATE) + return _ROLE_STR_CANDIDATE_; + else if (_role == RaftRole::UNKNOWN) + return _ROLE_STR_UNKNOWN_; + else + CHECK(false); + + return nullptr; +} + +void StateMgr::SwitchTo(RaftRole state,const std::string &new_leader) noexcept { + + /*There are 4 valid transitions: + 1. Leader -> Follower. (step down.) + 2. Follower -> Candidate. (start electing.) + 3. Candidate -> Follower. (new leader elected but not me.) + 4. Candidate -> Follower. (new leader elected.It's me.) */ + + Topology _topo; + CTopologyMgr::Read(&_topo); + + //Update topology before switching role. + if (m_cur_state == RaftRole::LEADER) { + CHECK(state == RaftRole::FOLLOWER) << "invalid state transition found : Leader -> " << state; + + //Check new leader address format validity. + std::regex _pattern("\\d{1,3}\.\\d{1,3}\.\\d{1,3}\.\\d{1,3}:\\d+"); + std::smatch _sm; + CHECK(std::regex_match(new_leader, _sm, _pattern)) << "new leader format valid:" << new_leader; + + _topo.m_leader = new_leader; + _topo.m_followers.erase(new_leader); + _topo.m_candidates.erase(new_leader); + _topo.m_followers.emplace(m_my_addr); + + } else if (m_cur_state == RaftRole::FOLLOWER) { + CHECK(state == RaftRole::CANDIDATE) << "invalid state transition found : Follower -> " << state; + + _topo.m_followers.erase(m_my_addr); + _topo.m_candidates.emplace(m_my_addr); + + } else if (m_cur_state == RaftRole::CANDIDATE) { + CHECK(state == RaftRole::LEADER || state == RaftRole::FOLLOWER) << "invalid state transition found : Candidate -> " << state; + + _topo.m_candidates.erase(m_my_addr); + + if (state == RaftRole::LEADER) { + _topo.m_followers.emplace(_topo.m_leader); + _topo.m_candidates.erase(m_my_addr); + _topo.m_leader = m_my_addr; + } + else if (state == RaftRole::FOLLOWER) { + /* This transition can be caused of : + 1. pre-vote fail. + 2. a new leader has been detected. */ + + //This is for case 2. + if (!new_leader.empty()) { + _topo.m_followers.emplace(_topo.m_leader); + _topo.m_followers.erase(new_leader); + _topo.m_candidates.erase(new_leader); + _topo.m_leader = new_leader; + } + + _topo.m_followers.emplace(m_my_addr); + } + } else + CHECK(false) << "unknown role found :" << m_cur_state; + + m_cur_state = state; + + //Writing new topology to config file. + CTopologyMgr::Update(_topo); +} + +const std::string& StateMgr::GetMyAddr() noexcept { + return m_my_addr; +} + +void StateMgr::SetMyAddr(const std::string& addr) noexcept { + m_my_addr = addr; +} + +std::ostream& operator<<(std::ostream& os, const RaftRole& obj) { + os << StateMgr::GetRoleStr(obj); + return os; +} + +} diff --git a/src/state/state_mgr.h b/src/state/state_mgr.h new file mode 100644 index 0000000..0bef9c7 --- /dev/null +++ b/src/state/state_mgr.h @@ -0,0 +1,90 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_STATE_MGR_H__ +#define __AURORA_STATE_MGR_H__ + +#include + +#include "topology/topology_mgr.h" + +namespace RaftCore::State { + +enum class RaftRole{ UNKNOWN , //Init state + LEADER, //Raft Leader + CANDIDATE , //Raft Candidate + FOLLOWER , //Raft Follower + }; + +class StateMgr final{ + +public: + + static void Initialize(const ::RaftCore::Topology &global_topo) noexcept; + + static void UnInitialize() noexcept; + + static bool Ready() noexcept; + + static RaftRole GetRole() noexcept; + + static const char* GetRoleStr(RaftRole state=RaftRole::UNKNOWN) noexcept; + + static void SwitchTo(RaftRole state,const std::string &new_leader="") noexcept; + + static const std::string& GetMyAddr() noexcept; + + static const std::list& GetNICAddrs() noexcept; + + static void SetMyAddr(const std::string& addr) noexcept; + + static bool AddressUndetermined() noexcept; + +private: + + StateMgr() = delete; + + virtual ~StateMgr() noexcept = delete; + + StateMgr(const StateMgr &) = delete; + + StateMgr& operator=(const StateMgr &) = delete; + +private: + + /* For the 'm_cur_state' variable ,multiple thread reading, one thread blind-writing, no need to lock. + Yeah , hopefully things like EMSI protocol can keep CPU caches from being inconsistent. + I believe it anyway. */ + static RaftRole m_cur_state; + + static bool m_initialized; + + static std::string m_my_addr; + + static std::list m_nic_addrs; +}; + +std::ostream& operator<<(std::ostream& os, const RaftRole& obj); + + +} + + +#endif diff --git a/src/storage/hashable_string.cc b/src/storage/hashable_string.cc new file mode 100644 index 0000000..312a233 --- /dev/null +++ b/src/storage/hashable_string.cc @@ -0,0 +1,72 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "tools/utilities.h" +#include "storage/hashable_string.h" + +namespace RaftCore::Storage { + +HashableString::HashableString(const std::string &other,bool on_fly) noexcept { + //Ignore delete operation. + if (on_fly) + this->m_shp_str.reset(const_cast(&other), [](auto *p) {}); + else + this->m_shp_str = std::make_shared(other); +} + +HashableString::~HashableString() noexcept {} + +bool HashableString::operator<(const HashableString &other)const noexcept { + return std::strcmp(this->m_shp_str->c_str(), other.m_shp_str->c_str()) < 0; +} + +bool HashableString::operator==(const HashableString &other)const noexcept { + return std::strcmp(this->m_shp_str->c_str(), other.m_shp_str->c_str()) == 0; +} + +bool HashableString::operator==(const std::string &other)const noexcept { + return std::strcmp(this->m_shp_str->c_str(), other.c_str()) == 0; +} + +const HashableString& HashableString::operator=(const HashableString &other)noexcept { + //Deleter will also be transferred. + this->m_shp_str = other.m_shp_str; + return *this; +} + +std::size_t HashableString::Hash() const noexcept { + return std::hash{}(*this->m_shp_str); +} + +const std::string& HashableString::GetStr() const noexcept { + return *this->m_shp_str; +} + +const std::shared_ptr HashableString::GetStrPtr() const noexcept { + return this->m_shp_str; +} + +std::size_t PtrHSHasher::operator()(const TypePtrHashableString &shp_hashable_string)const { + return std::hash{}(shp_hashable_string->GetStr()); +} + +bool PtrHSEqualer::operator()(const TypePtrHashableString &left,const TypePtrHashableString &right)const { + return left->GetStr() == right->GetStr(); +} + +} diff --git a/src/storage/hashable_string.h b/src/storage/hashable_string.h new file mode 100644 index 0000000..a80fe31 --- /dev/null +++ b/src/storage/hashable_string.h @@ -0,0 +1,74 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_HASHABLE_STRING_H__ +#define __AURORA_HASHABLE_STRING_H__ + +#include + +#include "common/comm_defs.h" +#include "tools/lock_free_hash.h" + +namespace RaftCore::Storage { + +using ::RaftCore::DataStructure::HashTypeBase; + +class HashableString final: public HashTypeBase { + +public: + + //Constructing a object for temporary usage like querying in hash. + HashableString(const std::string &other,bool on_fly=false) noexcept; + + virtual ~HashableString() noexcept; + + virtual bool operator<(const HashableString &other)const noexcept override; + + virtual bool operator==(const HashableString &other)const noexcept override; + + virtual bool operator==(const std::string &other)const noexcept ; + + virtual const HashableString& operator=(const HashableString &other)noexcept override; + + virtual std::size_t Hash() const noexcept override; + + virtual const std::string& GetStr() const noexcept ; + + virtual const std::shared_ptr GetStrPtr() const noexcept ; + +private: + + std::shared_ptr m_shp_str; + +}; + +typedef std::shared_ptr TypePtrHashableString; + +struct PtrHSHasher { + std::size_t operator()(const TypePtrHashableString &shp_hashable_string)const; +}; + +struct PtrHSEqualer { + bool operator()(const TypePtrHashableString &left, const TypePtrHashableString &right)const; +}; + +} //end namespace + +#endif diff --git a/src/storage/memory_table.cc b/src/storage/memory_table.cc new file mode 100644 index 0000000..77b1688 --- /dev/null +++ b/src/storage/memory_table.cc @@ -0,0 +1,99 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "config/config.h" +#include "storage/memory_table.h" + +namespace RaftCore::Storage { + +using ::RaftCore::Storage::TypePtrHashableString; + +HashValue::HashValue(uint32_t a, uint64_t b, const std::string &c) { + this->m_term = a; + this->m_index = b; + this->m_val = std::make_shared(c); +} + +bool HashValue::operator<(const HashValue &other)const noexcept { + if (this->m_term < other.m_term) + return true; + + if (this->m_term > other.m_term) + return false; + + return this->m_index < other.m_index; +} + +MemoryTable::MemoryTable() noexcept { + this->m_shp_records = std::make_shared(::RaftCore::Config::FLAGS_memory_table_hash_slot_num); +} + +MemoryTable::~MemoryTable() noexcept {} + +void MemoryTable::Insert(const std::string &key,const std::string &val,uint32_t term,uint64_t index) noexcept{ + + //Memory copy overhead,can't not get around. + TypePtrHashableString _shp_key = std::make_shared(key); + TypePtrHashValue _shp_val = std::make_shared(term, index, val); + + this->m_shp_records->Insert(_shp_key,_shp_val); +} + +void MemoryTable::IterateByKey(std::function op) const noexcept { + + std::list _ordered_meta; + this->m_shp_records->GetOrderedByKey(_ordered_meta); + + for (const auto &_meta : _ordered_meta) { + TypePtrHashValue _shp_val; + this->m_shp_records->Read(*_meta, _shp_val); + if (!op(_meta, _shp_val)) + break; + } +} + +bool MemoryTable::IterateByVal(std::function op) const noexcept { + + LockFreeHash::ValueComparator _cmp = [](const TypePtrHashValue &left, const TypePtrHashValue &right)->bool { return *left < *right; }; + + std::map, std::shared_ptr,decltype(_cmp)> _ordered_by_value_map(_cmp); + + this->m_shp_records->GetOrderedByValue(_ordered_by_value_map); + + for (const auto &_item : _ordered_by_value_map) + if (!op(*_item.first, *_item.second)) + return false; + + return true; +} + +bool MemoryTable::GetData(const std::string &key,std::string &val) const noexcept { + + TypePtrHashValue _shp_val; + if (!this->m_shp_records->Read(HashableString(key,true), _shp_val)) + return false; + + val = *_shp_val->m_val; + return true; +} + +std::size_t MemoryTable::Size() const noexcept { + return this->m_shp_records->Size(); +} + +} diff --git a/src/storage/memory_table.h b/src/storage/memory_table.h new file mode 100644 index 0000000..f88b746 --- /dev/null +++ b/src/storage/memory_table.h @@ -0,0 +1,82 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_MEMORY_TABLE_H__ +#define __AURORA_MEMORY_TABLE_H__ + +#include "common/comm_defs.h" +#include "tools/lock_free_hash.h" +#include "storage/hashable_string.h" + +namespace RaftCore::Storage { + +using ::RaftCore::DataStructure::LockFreeHash; +using ::RaftCore::Storage::HashableString; + +struct HashValue { + + HashValue(uint32_t a, uint64_t b, const std::string &c); + + bool operator<(const HashValue &other)const noexcept; + + uint32_t m_term; + uint64_t m_index; + std::shared_ptr m_val; +}; + +typedef std::shared_ptr TypePtrHashValue; +typedef LockFreeHash TypeRecords; +typedef std::shared_ptr PtrRecords; + +class MemoryTable final{ + +public: + + MemoryTable() noexcept; + + virtual ~MemoryTable() noexcept; + + void Insert(const std::string &key, const std::string &val, uint32_t term, uint64_t index) noexcept; + + void IterateByKey(std::function op) const noexcept; + + bool IterateByVal(std::function op) const noexcept; + + bool GetData(const std::string &key, std::string &val) const noexcept; + + std::size_t Size() const noexcept; + +private: + + PtrRecords m_shp_records; + +private: + + MemoryTable(const MemoryTable&) = delete; + + MemoryTable& operator=(const MemoryTable&) = delete; + +}; + +typedef std::shared_ptr TypePtrMemoryTable; + +} //end namespace + +#endif diff --git a/src/storage/sstable.cc b/src/storage/sstable.cc new file mode 100644 index 0000000..d27a09d --- /dev/null +++ b/src/storage/sstable.cc @@ -0,0 +1,497 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include +#include + +#include "boost/filesystem.hpp" + +#include "common/comm_view.h" +#include "config/config.h" +#include "tools/utilities.h" +#include "storage/sstable.h" + +#define _AURORA_SSTABLE_OP_MODE_ "ab+" +#define _AURORA_SSTABLE_READ_MODE_ "rb" +#define _AURORA_SSTABLE_FOOTER_ "!@#$sstable$#@!" + +namespace RaftCore::Storage { + +namespace fs = ::boost::filesystem; +using ::RaftCore::Common::CommonView; +using ::RaftCore::Tools::ConvertToBigEndian; +using ::RaftCore::Tools::ConvertBigEndianToLocal; +using ::RaftCore::Storage::TypePtrHashableString; +using ::RaftCore::Storage::PtrHSHasher; +using ::RaftCore::Storage::PtrHSEqualer; + +SSTAble::Meta::Meta(uint32_t a, uint16_t b, uint32_t c, uint16_t d,uint32_t e,uint64_t f) { + this->m_key_offset = a; + this->m_key_len = b; + this->m_val_offset = c; + this->m_val_len = d; + this->m_term = e; + this->m_index = f; +} + +bool SSTAble::Meta::operator<(const Meta &other) { + + if (this->m_term < other.m_term) + return true; + + if (this->m_term > other.m_term) + return false; + + return this->m_index < other.m_index; +} + +SSTAble::SSTAble(const char* file) noexcept { + this->m_shp_meta = std::make_shared(::RaftCore::Config::FLAGS_sstable_table_hash_slot_num); + + this->m_min_log_id.Set(CommonView::m_max_log_id); + this->m_max_log_id.Set(CommonView::m_zero_log_id); + + this->m_associated_file = file; + this->ParseFile(); +} + +SSTAble::SSTAble(const MemoryTable &src) noexcept { + this->m_shp_meta = std::make_shared(::RaftCore::Config::FLAGS_sstable_table_hash_slot_num); + + this->CreateFile(); + this->DumpFrom(src); +} + +SSTAble::SSTAble(const SSTAble &from, const SSTAble &to) noexcept { + + this->m_shp_meta = std::make_shared(::RaftCore::Config::FLAGS_sstable_table_hash_slot_num); + + std::list _from_keys; + from.m_shp_meta->GetOrderedByKey(_from_keys); + CHECK(_from_keys.size() > 0) << "sstable meta size invalid ,file:" << from.GetFilename(); + + std::list _to_keys; + to.m_shp_meta->GetOrderedByKey(_to_keys); + CHECK(_to_keys.size() > 0) << "sstable meta size invalid ,file:" << to.GetFilename(); + + //Merge sort. + MemoryTable _mem_table; + + std::unordered_set _intersection; + auto _cmp = [](const TypePtrHashableString &left, const TypePtrHashableString &right) ->bool { + return left->operator<(*right); + }; + + std::set_intersection(_from_keys.cbegin(), _from_keys.cend(), _to_keys.cbegin(), _to_keys.cend(), std::inserter(_intersection,_intersection.end()),_cmp); + + auto _update_mem_table = [&](const std::list &_key_list,const SSTAble &sstable,bool filter=false) { + + for (const auto &_key : _key_list) { + + if (filter && _intersection.find(_key) != _intersection.cend()) + continue; + + std::string _val = ""; + const std::string &_key_str = _key->GetStr(); + + CHECK(sstable.Read(_key_str, _val)) << "key:" << _key_str << " doesn't exist in file:" << sstable.GetFilename(); + + TypePtrMeta _shp_meta; + CHECK(sstable.m_shp_meta->Read(*_key, _shp_meta)) << "key:" << _key_str << " doesn't exist in meta:" << sstable.GetFilename(); + + _mem_table.Insert(_key_str,_val,_shp_meta->m_term,_shp_meta->m_index); + } + }; + + _update_mem_table(_to_keys, to); + _update_mem_table(_from_keys, from, true); + + std::string _new_file_name = to.m_associated_file + _AURORA_SSTABLE_MERGE_SUFFIX_; + this->CreateFile(_new_file_name.c_str()); + + this->DumpFrom(_mem_table); +} + +SSTAble::~SSTAble() noexcept { + if (this->m_file_handler != nullptr) + CHECK(fclose(this->m_file_handler) == 0) << "close sstable file fail."; +} + +const std::string& SSTAble::GetFilename() const noexcept { + return this->m_associated_file; +} + +void SSTAble::CreateFile(const char* file_name) noexcept { + + char sz_file[128] = { 0 }; + + if (file_name) { + std::snprintf(sz_file,sizeof(sz_file),"%s",file_name); + this->m_associated_file = sz_file; + } else { + auto _rand = ::RaftCore::Tools::GenerateRandom(0, 1000); + auto _now = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()); + std::snprintf(sz_file,sizeof(sz_file),_AURORA_SSTABLE_PREFIX_".%llu.%03u",_now.count(),_rand); + fs::path _path(_AURORA_DATA_DIR_); + if (!fs::exists(_path)) + fs::create_directory(_path); + _path /= sz_file; + this->m_associated_file = _path.string(); + } + + this->m_file_handler = std::fopen(this->m_associated_file.c_str(), _AURORA_SSTABLE_OP_MODE_); + CHECK(this->m_file_handler != nullptr) << "create sstable file fail."; +} + +void SSTAble::ParseFile() noexcept { + + this->m_file_handler = std::fopen(this->m_associated_file.c_str(),_AURORA_SSTABLE_READ_MODE_); + CHECK(this->m_file_handler != nullptr) << "open sstable file fail."; + + CHECK(std::fseek(this->m_file_handler, 0, SEEK_END) == 0) << "seek sstable file fail" << this->m_associated_file; + + std::size_t _footer_len = std::strlen(_AURORA_SSTABLE_FOOTER_); + std::size_t _mininal_file_size = _FOUR_BYTES_ * 3 + _footer_len; + + uint32_t _file_size = std::ftell(this->m_file_handler); + CHECK(_file_size >= _mininal_file_size) << "ftell sstable file " << this->m_associated_file << " fail..,errno:" << errno; + + std::size_t _tail_len = _footer_len + _FOUR_BYTES_ * 2; + long _tail_offset = (long)(_file_size - _tail_len); + CHECK(std::fseek(this->m_file_handler, _tail_offset, SEEK_SET) == 0); + + unsigned int _init_size = 1024; + unsigned char *_p_buf = (unsigned char *)malloc(_init_size); + + std::size_t _read = std::fread(_p_buf, 1, _tail_len, this->m_file_handler); + CHECK(_read==_tail_len) << "fread fail,need:" << _tail_len << ",actual:" << _read; + + //Check footer. + CHECK(std::strncmp((char*)&_p_buf[8], _AURORA_SSTABLE_FOOTER_, _footer_len) == 0) << "sstable footer wrong,check it:" << this->m_associated_file; + + //Read meta offset. + uint32_t _meta_offset = 0; + std::memcpy(&_meta_offset, &_p_buf[4], _FOUR_BYTES_); + ConvertBigEndianToLocal(_meta_offset,&_meta_offset); + + //Read meta checksum. + uint32_t _meta_crc = 0; + std::memcpy(&_meta_crc, &_p_buf, _FOUR_BYTES_); + ConvertBigEndianToLocal(_meta_crc,&_meta_crc); + + CHECK(std::fseek(this->m_file_handler, _meta_offset, SEEK_SET) == 0); + + std::size_t _meta_area_len = _tail_offset - _meta_offset; + if (_meta_area_len > _init_size) + _p_buf = (unsigned char*)std::realloc(_p_buf, _meta_area_len); + + this->ParseMeta(_p_buf,_meta_area_len); + + free(_p_buf); +} + +void SSTAble::ParseMeta(unsigned char* &allocated_buf,std::size_t meta_len) noexcept { + + std::size_t _read = std::fread(allocated_buf, 1, meta_len, this->m_file_handler); + CHECK(_read==meta_len) << "fread fail,need:" << meta_len << ",actual:" << _read; + + auto *_p_cur = allocated_buf; + + while (_p_cur < allocated_buf + meta_len) { + //Parse key offset. + uint32_t _key_offset = 0; + std::memcpy(&_key_offset, _p_cur, _FOUR_BYTES_); + ConvertBigEndianToLocal(_key_offset,&_key_offset); + _p_cur += _FOUR_BYTES_; + + //Parse key len. + uint16_t _key_len = 0; + std::memcpy(&_key_len, _p_cur, _TWO_BYTES_); + ConvertBigEndianToLocal(_key_len,&_key_len); + _p_cur += _TWO_BYTES_; + + //Parse val offset. + uint32_t _val_offset = 0; + std::memcpy(&_val_offset, _p_cur, _FOUR_BYTES_); + ConvertBigEndianToLocal(_val_offset,&_val_offset); + _p_cur += _FOUR_BYTES_; + + //Parse val len. + uint16_t _val_len = 0; + std::memcpy(&_val_len, _p_cur, _TWO_BYTES_); + ConvertBigEndianToLocal(_val_len,&_val_len); + _p_cur += _TWO_BYTES_; + + //Parse term. + uint32_t _term = 0; + std::memcpy(&_term, _p_cur, _FOUR_BYTES_); + ConvertBigEndianToLocal(_term,&_term); + _p_cur += _FOUR_BYTES_; + + //Parse index. + uint64_t _index = 0; + std::memcpy(&_index, _p_cur, _EIGHT_BYTES_); + ConvertBigEndianToLocal(_index,&_index); + _p_cur += _EIGHT_BYTES_; + + //Read key. + std::string _key(_key_len,0); + CHECK(std::fseek(this->m_file_handler, _key_offset, SEEK_SET) == 0); + std::fread((char*)_key.data(), 1, _key_len, this->m_file_handler); + + TypePtrHashableString _shp_key(new HashableString(_key)); + TypePtrMeta _shp_meta(new Meta(_key_offset,_key_len,_val_offset,_val_len,_term,_index)); + this->m_shp_meta->Insert(_shp_key,_shp_meta); + + //Update max log ID. + LogIdentifier _cur_id; + _cur_id.Set(_term, _index); + if (_cur_id > this->m_max_log_id) + this->m_max_log_id.Set(_cur_id); + if (_cur_id < this->m_min_log_id) + this->m_min_log_id.Set(_cur_id); + } +} + +bool SSTAble::Read(const std::string &key, std::string &val) const noexcept { + + std::shared_ptr _shp_meta; + + if (!this->m_shp_meta->Read(HashableString(key,true), _shp_meta)) + return false; + + //Need to open a new fd to support concurrently reading. + std::FILE* _handler = std::fopen(this->m_associated_file.c_str(),_AURORA_SSTABLE_READ_MODE_); + CHECK(_handler != nullptr) << "fopen sstable file fail."; + + CHECK(std::fseek(_handler, _shp_meta->m_val_offset, SEEK_SET) == 0); + + val.resize(_shp_meta->m_val_len); + std::size_t _read = std::fread((char*)val.data(), 1, _shp_meta->m_val_len, _handler); + CHECK(_read==_shp_meta->m_val_len) << "fread fail,need:" << _shp_meta->m_val_len << ",actual:" << _read; + + CHECK(std::fclose(_handler)==0) << "Read sstable file fclose failed"; + + return true; +} + +void SSTAble::AppendKvPair(const TypePtrHashableString &key, const TypePtrHashValue &val, void* buf, + uint32_t buff_len, uint32_t &buf_offset, uint32_t &file_offset) noexcept { + + uint16_t _key_len = (uint16_t)key->GetStr().length(); + uint16_t _val_len = (uint16_t)val->m_val->length(); + + int _cur_len = _key_len + _val_len; + + if (buf_offset + _cur_len > buff_len) { + CHECK(std::fwrite(buf, 1, buf_offset, this->m_file_handler) == buf_offset) << "fwrite KV records fail,error no:" << errno; + CHECK (std::fflush(this->m_file_handler) == 0 ) << "fflush KV data to end of binlog file fail..."; + + //Reset the offset after a successful flush. + buf_offset = 0; + } + + unsigned char* _p_start_point = (unsigned char*)buf + buf_offset; + unsigned char* _p_cur = _p_start_point; + + //Advance the global position identifiers. + buf_offset += _cur_len; + + //Field-1 : key content. + std::memcpy(_p_cur, key->GetStr().data(), _key_len); + _p_cur += _key_len; + + //Field-2 : val content. + std::memcpy(_p_cur, val->m_val->data(), _val_len); + _p_cur += _val_len; + + uint32_t _record_crc = ::RaftCore::Tools::CalculateCRC32(_p_start_point, _cur_len); + this->m_record_crc += _record_crc; + + uint32_t _val_offset = file_offset + _key_len; + this->m_shp_meta->Insert(key, std::make_shared(file_offset, _key_len, _val_offset, + _val_len, val->m_term, val->m_index)); + + file_offset += _cur_len; + + //Update max log ID. + LogIdentifier _cur_id; + _cur_id.Set(val->m_term, val->m_index); + if (_cur_id > this->m_max_log_id) + this->m_max_log_id.Set(_cur_id); + if (_cur_id < this->m_min_log_id) + this->m_min_log_id.Set(_cur_id); +} + +void SSTAble::AppendChecksum(uint32_t checksum) noexcept { + uint32_t _copy = checksum; + ConvertToBigEndian(_copy, &_copy); + CHECK(std::fwrite(&_copy, 1, _FOUR_BYTES_, this->m_file_handler) == _FOUR_BYTES_) << "fwrite CRC fail,error no:" << errno; + CHECK (std::fflush(this->m_file_handler) == 0 ) << "fflush checksum to end of binlog file fail..."; +} + +void SSTAble::CalculateMetaOffset() noexcept { + this->m_meta_offset = std::ftell(this->m_file_handler); + CHECK(this->m_meta_offset >= 0) << "ftell sstable file " << this->m_associated_file << "fail..,errno:" << errno; +} + +void SSTAble::AppendMetaOffset() noexcept { + uint32_t _copy = this->m_meta_offset; + ConvertToBigEndian(_copy, &_copy); + CHECK(std::fwrite(&_copy, 1, _FOUR_BYTES_, this->m_file_handler) == _FOUR_BYTES_) << "fwrite CRC fail,error no:" << errno; + CHECK(std::fflush(this->m_file_handler) == 0) << "fflush meta offset to end of binlog file fail..."; +} + +void SSTAble::AppendMeta(const TypePtrHashableString &key, const TypePtrMeta &shp_meta, void* buf, + uint32_t buff_len, uint32_t &buf_offset, uint32_t &file_offset) noexcept { + + if (buf_offset + this->m_single_meta_len > buff_len) { + CHECK(std::fwrite(buf, 1, buf_offset, this->m_file_handler) == buf_offset) << "fwrite meta fail,error no:" << errno; + CHECK (std::fflush(this->m_file_handler) == 0 ) << "fflush meta to end of binlog file fail..."; + + buf_offset = 0; + } + + uint32_t _key_offset = shp_meta->m_key_offset; + uint16_t _key_len = (uint16_t)key->GetStr().length(); + uint32_t _val_offset = shp_meta->m_val_offset; + uint16_t _val_len = shp_meta->m_val_len; + uint32_t _term = shp_meta->m_term; + uint64_t _index = shp_meta->m_index; + + int _cur_buf_len = this->m_single_meta_len; + + unsigned char* _p_start_point = (unsigned char*)buf + buf_offset; + auto *_p_cur = _p_start_point; + + buf_offset += this->m_single_meta_len; + + //Field-1 : key offset. + ConvertToBigEndian(_key_offset, &_key_offset); + std::memcpy(_p_cur, (unsigned char*)&_key_offset, _FOUR_BYTES_); + _p_cur += _FOUR_BYTES_; + + //Field-2 : key len. + ConvertToBigEndian(_key_len, &_key_len); + std::memcpy(_p_cur, (unsigned char*)&_key_len, _TWO_BYTES_); + _p_cur += _TWO_BYTES_; + + //Field-3 : val offset. + ConvertToBigEndian(_val_offset, &_val_offset); + std::memcpy(_p_cur, (unsigned char*)&_val_offset, _FOUR_BYTES_); + _p_cur += _FOUR_BYTES_; + + //Field-4 : val len. + ConvertToBigEndian(_val_len, &_val_len); + std::memcpy(_p_cur, (unsigned char*)&_val_len, _TWO_BYTES_); + _p_cur += _TWO_BYTES_; + + //Field-5 : term. + ConvertToBigEndian(_term, &_term); + std::memcpy(_p_cur, (unsigned char*)&_term, _FOUR_BYTES_); + _p_cur += _FOUR_BYTES_; + + //Field-6 : index. + ConvertToBigEndian(_index, &_index); + std::memcpy(_p_cur, (unsigned char*)&_index, _EIGHT_BYTES_); + _p_cur += _EIGHT_BYTES_; + + uint32_t _meta_crc = ::RaftCore::Tools::CalculateCRC32(_p_start_point, _cur_buf_len); + this->m_meta_crc += _meta_crc; +} + +void SSTAble::AppendFooter() noexcept { + static std::size_t _len = std::strlen(_AURORA_SSTABLE_FOOTER_); + CHECK(std::fwrite(_AURORA_SSTABLE_FOOTER_, 1, _len, this->m_file_handler) == _len) << "fwrite footer fail,error no:" << errno; + CHECK (std::fflush(this->m_file_handler) == 0 ) << "fflush footer to end of binlog file fail..."; +} + +void SSTAble::DumpFrom(const MemoryTable &src) noexcept { + + const uint32_t _estimated_avg_record_bytes = 20; + uint32_t _buf_size = _estimated_avg_record_bytes * ::RaftCore::Config::FLAGS_memory_table_max_item; + uint32_t _buf_offset = 0, _file_offset = 0; + + void* _p_buf = malloc(_buf_size); + + //Append KV records. + auto _append_kv = [&](const TypePtrHashableString &shp_key, const TypePtrHashValue &shp_val)->bool { + this->AppendKvPair(shp_key, shp_val, _p_buf, _buf_size, _buf_offset, _file_offset); + return true; + }; + src.IterateByKey(_append_kv); + + //Check if there are remaining bytes + if (_buf_offset > 0) { + CHECK(std::fwrite(_p_buf, 1, _buf_offset, this->m_file_handler) == _buf_offset) << "fwrite KV records fail,error no:" << errno; + CHECK (std::fflush(this->m_file_handler) == 0 ) << "fflush KV data to end of binlog file fail..."; + } + + //Append checksum of KV records. + this->AppendChecksum(this->m_record_crc); + + //Get the offset of meta. Must be called immediately after appending KV checksum. + this->CalculateMetaOffset(); + + //To reuse the buff. + _buf_offset = 0; + + //Append Meta data. + auto _append_meta = [&](const TypePtrHashableString &shp_key, const TypePtrMeta &shp_meta)->bool { + this->AppendMeta(shp_key, shp_meta, _p_buf, _buf_size, _buf_offset, _file_offset); + return true; + }; + this->m_shp_meta->Iterate(_append_meta); + + free(_p_buf); + _p_buf = nullptr; + + //Append checksum of meta. + this->AppendChecksum(this->m_meta_crc); + + this->AppendMetaOffset(); + + //Append footprint. + this->AppendFooter(); +} + +LogIdentifier SSTAble::GetMaxLogID() const noexcept { + return this->m_max_log_id; +} + +LogIdentifier SSTAble::GetMinLogID() const noexcept { + return this->m_min_log_id; +} + +bool SSTAble::IterateByVal(std::function op) const noexcept { + + LockFreeHash::ValueComparator _cmp = [](const TypePtrMeta &left, const TypePtrMeta &right)->bool { return *left < *right; }; + + std::map, std::shared_ptr,decltype(_cmp)> _ordered_by_value_map(_cmp); + + this->m_shp_meta->GetOrderedByValue(_ordered_by_value_map); + + for (const auto &_item : _ordered_by_value_map) + if (!op(*_item.first, *_item.second)) + return false; + + return true; +} + +} + diff --git a/src/storage/sstable.h b/src/storage/sstable.h new file mode 100644 index 0000000..b2ecbc2 --- /dev/null +++ b/src/storage/sstable.h @@ -0,0 +1,138 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_SSTABLE_H__ +#define __AURORA_SSTABLE_H__ + +#include "common/comm_defs.h" +#include "common/log_identifier.h" +#include "tools/lock_free_hash.h" +#include "storage/hashable_string.h" +#include "storage/memory_table.h" + +#define _AURORA_SSTABLE_PREFIX_ "sstable.data" +#define _AURORA_SSTABLE_MERGE_SUFFIX_ ".merged" +#define _AURORA_DATA_DIR_ "data" + +namespace RaftCore::Storage { + +using ::RaftCore::DataStructure::LockFreeHash; +using ::RaftCore::Common::LogIdentifier; +using ::RaftCore::Storage::HashableString; + +class SSTAble final{ + +public: + + struct Meta { + + Meta(uint32_t a, uint16_t b, uint32_t c, uint16_t d,uint32_t e,uint64_t f); + + uint32_t m_key_offset; + uint16_t m_key_len; + uint32_t m_val_offset; + uint16_t m_val_len; + + uint32_t m_term; + uint64_t m_index; + + bool operator<(const Meta &other); + }; + + typedef std::shared_ptr TypePtrMeta; + +public: + + SSTAble(const char* file) noexcept; + + SSTAble(const SSTAble &from,const SSTAble &to) noexcept; + + SSTAble(const MemoryTable &src) noexcept; + + virtual ~SSTAble() noexcept; + + bool Read(const std::string &key, std::string &val) const noexcept; + + LogIdentifier GetMaxLogID() const noexcept; + + LogIdentifier GetMinLogID() const noexcept; + + const std::string& GetFilename() const noexcept; + + bool IterateByVal(std::function op) const noexcept; + +private: + + void CreateFile(const char* file_name = nullptr) noexcept; + + void ParseFile() noexcept; + + void ParseMeta(unsigned char* &allocated_buf,std::size_t meta_len) noexcept; + + void DumpFrom(const MemoryTable &src) noexcept; + + void AppendKvPair(const TypePtrHashableString &key, const TypePtrHashValue &val, void* buf, + uint32_t buff_len, uint32_t &buf_offset, uint32_t &file_offset) noexcept; + + void AppendMeta(const TypePtrHashableString &key, const TypePtrMeta &shp_meta, void* buf, + uint32_t buff_len, uint32_t &buf_offset, uint32_t &file_offset) noexcept; + + void CalculateMetaOffset() noexcept; + + void AppendChecksum(uint32_t checksum) noexcept; + + void AppendMetaOffset() noexcept; + + void AppendFooter() noexcept; + +private: + + uint32_t m_record_crc = 0; + + uint32_t m_meta_crc = 0; + + long m_meta_offset = 0; + + typedef LockFreeHash TypeOffset; + typedef std::shared_ptr TypePtrOffset; + + TypePtrOffset m_shp_meta; + + LogIdentifier m_max_log_id; + + LogIdentifier m_min_log_id; + + std::string m_associated_file = ""; + + std::FILE *m_file_handler = nullptr; + + static const int m_single_meta_len = _FOUR_BYTES_ * 3 + _TWO_BYTES_ * 2 + _EIGHT_BYTES_; + +private: + + SSTAble(const SSTAble&) = delete; + + SSTAble& operator=(const SSTAble&) = delete; + +}; + +} //end namespace + +#endif diff --git a/src/storage/storage.cc b/src/storage/storage.cc new file mode 100644 index 0000000..49aa5c5 --- /dev/null +++ b/src/storage/storage.cc @@ -0,0 +1,682 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + + +#include + +#include "boost/filesystem.hpp" + +#include "binlog/binlog_meta_data.h" +#include "binlog/binlog_singleton.h" +#include "storage/storage.h" +#include "state/state_mgr.h" +#include "common/comm_view.h" + +namespace RaftCore::Storage { + +namespace fs = ::boost::filesystem; + +using ::RaftCore::Common::ReadLock; +using ::RaftCore::Common::WriteLock; +using ::RaftCore::Common::CommonView; +using ::RaftCore::BinLog::FileMetaData; +using ::RaftCore::BinLog::BinLogOperator; +using ::RaftCore::Storage::HashableString; +using ::RaftCore::State::StateMgr; + +StorageMgr::StorageMgr() noexcept : m_path(_AURORA_DATA_DIR_) {} + +StorageMgr::~StorageMgr() noexcept { + if (this->m_initialized) + this->UnInitialize(); +} + +bool StorageMgr::Initialize(const char* role, bool reset) noexcept { + + CHECK(role != nullptr); + + this->m_role_str = role; + + this->m_garbage_sstable.store(nullptr); + this->m_garbage_memory_table.store(nullptr); + + this->m_last_committed.store(CommonView::m_zero_log_id); + this->m_last_persist.store(CommonView::m_zero_log_id); + + if (!fs::exists(this->m_path)) + fs::create_directory(this->m_path); + + CHECK(fs::is_directory(this->m_path)) << "scan data directory fail,cannot save current file"; + + this->m_memory_table_head.store(new UnorderedSingleListNode()); + + std::list _all_sstable_files; + + for (auto&& x : fs::directory_iterator(this->m_path)) { + std::string _file_name = x.path().filename().string(); + std::string::size_type pos = _file_name.find(_AURORA_SSTABLE_PREFIX_); + if (pos == std::string::npos) + continue; + _all_sstable_files.emplace_back(_file_name); + } + + /*Sort by descending order.File number won't be large,it's acceptable to sorting a std::list + compared to using a 'std::vector' and 'std::sort' .*/ + _all_sstable_files.sort([](const std::string &left, const std::string &right)->bool {return right < left; }); + + this->m_sstable_table_head.store(nullptr); + UnorderedSingleListNode* _p_cur_node = this->m_sstable_table_head.load(); + + bool _merged_flag = false; + for (const auto &_item : _all_sstable_files) { + const std::string &_file_name = _item; + auto _cur_path = this->m_path / _file_name; + + if (_merged_flag) { + LOG(INFO) << "Deleting merged files during initializing: " << _file_name; + fs::remove(fs::path(_cur_path)); + continue; + } + + LOG(INFO) << "Parsing and loading sstable:" << _cur_path.string(); + auto* _p_new_node = new UnorderedSingleListNode(_cur_path.string().c_str()); + + if (_p_cur_node) + _p_cur_node->m_next.store(_p_new_node); + else + this->m_sstable_table_head.store(_p_new_node); + _p_cur_node = _p_new_node; + + //File with a merged suffix must be the last one need to be loaded. + if (_file_name.find(_AURORA_SSTABLE_MERGE_SUFFIX_) != std::string::npos) + _merged_flag = true; //Delete all following sstable files, since they are already merged. + } + + //Find latest entry ID that has been stored in SSTAbles. + LogIdentifier _max_log_id; + _max_log_id.Set(0, 0); + + _p_cur_node = this->m_sstable_table_head.load(); + if (_p_cur_node != nullptr) + _max_log_id = _p_cur_node->m_data->GetMaxLogID(); + else + LOG(WARNING) << "no sstable found,data will remain empty after initialization."; + + this->m_last_committed.store(_max_log_id); + this->m_last_persist.store(_max_log_id); + + //Construct memory table from binlog by _max_log_id. + if (!reset) + this->ConstructMemoryTable(_max_log_id); + + this->m_initialized = true; + + LOG(INFO) << "[Storage] m_last_committed initialized as:" << this->m_last_committed.load(); + + return true; +} + +bool StorageMgr::ConstructFromBinlog(const LogIdentifier &from, const std::string &binlog_file_name) noexcept { + + LOG(INFO) << "[Storage] parsing binlog file:" << binlog_file_name; + + BinLogOperator _cur_binlog; + _cur_binlog.Initialize(binlog_file_name.c_str(), true); + + std::list> _file_meta; + _cur_binlog.GetOrderedMeta(_file_meta); + + if (_file_meta.empty()) + return true; + + std::FILE* _f_handler = std::fopen(binlog_file_name.c_str(),_AURORA_BINLOG_READ_MODE_); + + auto _riter = _file_meta.crbegin(); + for (; _riter != _file_meta.crend(); ++_riter) { + if ((*_riter)->operator<=(from)) + break; + } + + bool _finished = (_riter != _file_meta.crend()); + + unsigned char* _p_buf = nullptr; + for (auto _iter = _riter.base(); _iter != _file_meta.cend(); _iter++) { + + //Seek to position + CHECK(std::fseek(_f_handler, (*_iter)->m_offset, SEEK_SET) == 0) << "ConstructMemoryTable seek binlog file " + << binlog_file_name << "fail..,errno:" << errno; + + //Read protobuf buf length + uint32_t _buf_len = 0; + CHECK(std::fread(&_buf_len, 1, _FOUR_BYTES_, _f_handler) == _FOUR_BYTES_) << "ConstructMemoryTable read binlog file " + << binlog_file_name << "fail..,errno:" << errno; + ::RaftCore::Tools::ConvertBigEndianToLocal(_buf_len, &_buf_len); + + //Read protobuf buf + _p_buf = (_p_buf) ? (unsigned char*)std::realloc(_p_buf,_buf_len): (unsigned char*)malloc(_buf_len); + CHECK(std::fread(_p_buf, 1, _buf_len, _f_handler) == _buf_len) << "ConstructMemoryTable read binlog file " + << binlog_file_name << " fail..,errno:" << errno; + + ::raft::BinlogItem _binlog_item; + CHECK(_binlog_item.ParseFromArray(_p_buf, _buf_len)) << "ConstructMemoryTable parse protobuf buffer fail " << binlog_file_name; + + //If the first log entry's pre_id matches 'from', also means the parsing process is finished. + if (_iter == _riter.base()) { + auto _pre_of_first = ::RaftCore::Common::ConvertID(_binlog_item.entity().pre_log_id()); + _finished |= (_pre_of_first == from); + } + + auto *_p_wop = _binlog_item.mutable_entity()->mutable_write_op(); + const auto &_entity_id = _binlog_item.entity().entity_id(); + this->m_memory_table_head.load()->m_data->Insert(*_p_wop->mutable_key(), *_p_wop->mutable_value(), _entity_id.term(), _entity_id.idx()); + + LogIdentifier _cur_id; + _cur_id.Set(_entity_id.term(), _entity_id.idx()); + + /*Note: m_last_committed may greater than the real LCL of the current server, it's okay b/c: + 1> if current server is the leader, any log entries in the binlog should have been committed, as the way aurora works. + 2> if current server is a follower, and the 'm_last_committed' > the last consistent + log entry, a SYNC_DATA would eventually triggered. + 3> if current server is a candidate, no influence on that. + */ + if (_cur_id > this->m_last_committed.load()) + this->m_last_committed.store(_cur_id); + } + + CHECK(fclose(_f_handler) == 0) << "ConstructMemoryTable: close binlog file fail."; + + if (_finished) + LOG(INFO) << "binlog:" << binlog_file_name << " reach end,from:" << from << ", _riter idx:" << (*_riter.base())->m_index; + + return _finished; +} + +void StorageMgr::ConstructMemoryTable(const LogIdentifier &from) noexcept { + + FindRoleBinlogFiles(this->m_role_str, this->m_loaded_binlog_files); + + if (this->m_loaded_binlog_files.empty()) { + LOG(INFO) << "found no binlog available."; + return; + } + + /*Sort by descending order.File number won't be large,it's acceptable to sorting a std::list + compared to using a 'std::vector' and 'std::sort' .*/ + this->m_loaded_binlog_files.sort([](const std::string &left, const std::string &right)->bool { + + auto _get_suffix = [](const std::string &file_name) { + int _suffix = 0; + std::string::size_type pos = file_name.find("-"); + if (pos != std::string::npos) + _suffix = std::atoi(file_name.substr(pos + 1).c_str()); + return _suffix; + }; + + return _get_suffix(left) > _get_suffix(right); + }); + + //list: 5/4/3/2/1/0, but 0 is the latest one, move it to the first place of the list. + std::string _lastest_file = this->m_loaded_binlog_files.back(); + this->m_loaded_binlog_files.pop_back(); + this->m_loaded_binlog_files.push_front(_lastest_file); + + //VLOG(89) << "debug size:" << this->m_loaded_binlog_files.size() << ",last:" << _lastest_file; + + bool _find_latest = false; + for (const auto& _file_name : this->m_loaded_binlog_files) { + if (!this->ConstructFromBinlog(from, _file_name)) + continue; + + LOG(INFO) << "[Storage] binlog file before(not include): " << _file_name << " can be manually deleted"; + + _find_latest = true; + break; + } + + CHECK(_find_latest) << "binlog content incomplete, last persistent id:" << this->m_last_persist.load(); +} + +void StorageMgr::UnInitialize() noexcept { + this->ClearInMemoryData(); + this->m_initialized = false; +} + +bool StorageMgr::Get(const std::string &key,std::string &val) const noexcept{ + + //Find in memory tables. + auto _cur_mem_node = this->m_memory_table_head.load(); + while (_cur_mem_node != nullptr) { + if (_cur_mem_node->m_data->GetData(key, val)) + return true; + + _cur_mem_node = _cur_mem_node->m_next.load(); + } + + //Find in SSTables. + auto _p_cur_sstable_node = this->m_sstable_table_head.load(); + while (_p_cur_sstable_node != nullptr) { + if (_p_cur_sstable_node->m_data->Read(key, val)) + return true; + + _p_cur_sstable_node = _p_cur_sstable_node->m_next.load(); + } + + return false; +} + +void StorageMgr::DumpMemoryTable(const MemoryTable *src) noexcept { + + auto *_cur_head = this->m_sstable_table_head.load(); + + auto *_new_sstable_head = new UnorderedSingleListNode(*src); + _new_sstable_head->m_next.store(_cur_head); + + while (!this->m_sstable_table_head.compare_exchange_strong(_cur_head, _new_sstable_head)) { + _new_sstable_head->m_next.store(_cur_head); + LOG(WARNING) << "concurrently dumping memory table CAS conflict ,continue..."; + } + + VLOG(89) << "storage successfully dumped a memory table to sstable:" + << _new_sstable_head->m_data->GetFilename(); +} + +void StorageMgr::PurgeGarbage() noexcept { + //Purging process should be mutual exclusion from releasing process. + WriteLock _w_lock(this->m_mutex); + + VLOG(89) << "storage purging started."; + + this->PurgeMemoryTable(); + +#ifdef _STORAGE_TEST_ + while (this->PurgeSSTable()); +#else + this->PurgeSSTable(); +#endif +} + +void StorageMgr::PurgeMemoryTable() noexcept { + + auto _p_cur = this->m_garbage_memory_table.load(); + if (_p_cur == nullptr) + return ; + + while (!this->m_garbage_memory_table.compare_exchange_weak(_p_cur, nullptr)) + continue; + + //Here '_p_cur' is the cut off list and all elements in it should be reclaimed. + while (_p_cur != nullptr) { + auto *_p_pre = _p_cur; + _p_cur = _p_cur->m_next.load(); + delete _p_pre; + + LOG(INFO) << "purged one memory table."; + } +} + +void StorageMgr::RecycleLast2SStables() noexcept { + + auto *_p_cur_garbage = this->m_garbage_sstable.load(); + if (_p_cur_garbage == nullptr) + return; + + auto *_p_next_garbage = _p_cur_garbage->m_next.load(); + if (_p_next_garbage == nullptr) + return; + + int _garbage_size = 2; + + auto *_p_pre_garbage = _p_cur_garbage; + while (_p_next_garbage->m_next.load() != nullptr) { + if (_p_pre_garbage != _p_cur_garbage) + _p_cur_garbage = _p_cur_garbage; + _p_cur_garbage = _p_next_garbage; + _p_next_garbage = _p_next_garbage->m_next.load(); + _garbage_size++; + } + + //Detach the last two garbage nodes. + if (_garbage_size == 2) { + if (!this->m_garbage_sstable.compare_exchange_strong(_p_cur_garbage, nullptr)) { + LOG(INFO) << "recursive RecycleLast2SStables occurred."; + return this->RecycleLast2SStables(); + } + } else { + CHECK(_p_pre_garbage->m_next.compare_exchange_strong(_p_cur_garbage, nullptr)) + << "recycle sstable garbage last2 CAS fail."; + } + + auto* _release_cur = _p_cur_garbage; + while (_release_cur != nullptr) { + std::string _filename = _release_cur->m_data->GetFilename(); + + auto *_p_tmp = _release_cur->m_next.load(); + //Clear the in-memory data. + delete _release_cur; + + //Remove the sstable file. + LOG(INFO) << "Deleting garbage sstable files : " << _filename; + fs::remove(fs::path(_filename)); + + _release_cur = _p_tmp; + } +} + +bool StorageMgr::PurgeSSTable() noexcept { + + this->RecycleLast2SStables(); + + auto _p_cur = this->m_sstable_table_head.load(); + if (_p_cur == nullptr) + return false; + + auto _p_next = _p_cur->m_next.load(); + if (_p_next == nullptr) + return false; + + int _garbage_part_size = 2; + + auto _p_pre = _p_cur; + while (_p_next->m_next.load() != nullptr) { + if (_p_pre != _p_cur) + _p_pre = _p_cur; + _p_cur = _p_next; + _p_next = _p_next->m_next.load(); + ++_garbage_part_size; + } + + UnorderedSingleListNode *_new_sstable_node = new UnorderedSingleListNode(*_p_next->m_data,*_p_cur->m_data); + + if (_garbage_part_size == 2) + this->m_sstable_table_head.store(_new_sstable_node); + else { + /*_p_pre always pointing to the element immediately preceding the one to be merged into, and its + origin value is related to the #sstables.And the following CAS operation should always + succeed since there is no multiple thread updating scenarios for the 'next' pointer of + _p_pre.*/ + CHECK(_p_pre->m_next.compare_exchange_strong(_p_cur, _new_sstable_node)) + << "update merged sstable previous next pointer fail."; + } + + /*Note :We cannot release the purged sstable objects immediately after the moment we've finished purging + since there may have other threads accessing them. Here we push them into a 'garbage' list and + releasing them later,aka next round of purging. */ + + LOG(INFO) << "merged sstable of " << _p_cur->m_data->GetFilename() << " and " + << _p_next->m_data->GetFilename() << " into " << _new_sstable_node->m_data->GetFilename(); + + //Insert the new nodes at garbage list's head. + auto *_p_cur_head = m_garbage_sstable.load(); + _p_next->m_next.store(_p_cur_head); + + while (!m_garbage_sstable.compare_exchange_weak(_p_cur_head, _p_cur)) + _p_next->m_next.store(_p_cur_head); + + return true; +} + +bool StorageMgr::Set(const LogIdentifier &log_id ,const std::string &key, const std::string &value) noexcept{ + + auto *_cur_head = this->m_memory_table_head.load(); + _cur_head->m_data->Insert(key, value, log_id.m_term, log_id.m_index); + + LogIdentifier _cur_id; + _cur_id.Set(this->m_last_committed.load()); + + while (log_id > _cur_id) { + if (this->m_last_committed.compare_exchange_strong(_cur_id, log_id)) + break; + } + + if (_cur_head->m_data->Size() <= ::RaftCore::Config::FLAGS_memory_table_max_item) + return true; + + UnorderedSingleListNode* _new_head = new UnorderedSingleListNode(); + + bool _insert_succeed = false; + while (_cur_head->m_data->Size() > ::RaftCore::Config::FLAGS_memory_table_max_item) { + + _new_head->m_next.store(_cur_head); + + _insert_succeed = this->m_memory_table_head.compare_exchange_strong(_cur_head, _new_head); + if (!_insert_succeed) + continue; //Here '_cur_head' already been updated to the latest head of 'm_memory_table_head'. + + this->DumpMemoryTable(_cur_head->m_data); + + /*Note: 1.Since DumpMemoryTable() can be executed simultaneously, we need to ensure that all subsequent + nodes after '_cur_head' in the single linked list are dumped(aka,their data can be found in the sstable + link list now) before cutting off _cur_head's ancestor's link to it. Otherwise client cannot query + data in the node which are cut off too early. + 2. Records among sstables can be not in order because of the reasons: + 1> records can invode 'StorageMgr::Set' simultanously. + 2> memory tables are dummped into stables simultanously. + 3> sstable file names are not in lexicographical order if more one sstables falls into the + same microsecond time windows. + + But the change of get unordered records among sstables is slim: + 1> & 2> depends on a concurrent dumping which itself is also rare. + 3> can be basically ignored. + + It's still acceptable even if that happened: + 1) No impacts on reading, you can't decide a strict order for records that very close + to each other, in the first place. + 2) get unordered result in 'GetSlice', but it's okay for the scenario where it applies + to : sync data to the lag behind followers. + */ + while (_cur_head->m_next.load() != nullptr); + + _new_head->m_next.store(nullptr); + + auto *_p_garbage_head = this->m_garbage_memory_table.load(); + _cur_head->m_next = _p_garbage_head; + while (!this->m_garbage_memory_table.compare_exchange_weak(_p_garbage_head, _cur_head)) + _cur_head->m_next = _p_garbage_head; + + return true; + } + + if (!_insert_succeed) + delete _new_head; + + return true; +} + +const LogIdentifier StorageMgr::GetLastCommitted() const noexcept { + return this->m_last_committed.load(); +} + +void StorageMgr::ClearInMemoryData() noexcept { + + //Clear in-memory data themselves. + this->ReleaseData(this->m_garbage_memory_table); + this->ReleaseData(this->m_memory_table_head); + this->ReleaseData(this->m_sstable_table_head); + this->ReleaseData(this->m_garbage_sstable); +} + +void StorageMgr::Reset() noexcept { + + this->ClearInMemoryData(); + + //Clear on-disk data. + CHECK(!this->m_loaded_binlog_files.empty()); + + if(this->m_loaded_binlog_files.size() > 1) { + auto _iter = this->m_loaded_binlog_files.cbegin(); + + //Skip the first one since it's the default binlog and shall be delete in the BinlogMgr. + _iter++; + + for (; _iter != this->m_loaded_binlog_files.cend(); ++_iter) { + const std::string &_delete_binlog_file = *_iter; + LOG(INFO) << "Deleting the loaded binlog files for storage Reset:" << _delete_binlog_file; + CHECK(std::remove(_delete_binlog_file.c_str()) == 0); + } + } + + LOG(INFO) << "Deleting the whole data directory for storage Reset."; + fs::remove(this->m_path); + + //TODO: figure why sometimes this failed under win10. + CHECK(!fs::exists(this->m_path)); + fs::create_directory(this->m_path); + + //Reinitialization. + this->Initialize(this->m_role_str.c_str(), true); +} + +void StorageMgr::GetSliceInSSTable(const LogIdentifier& start_at, int step_len, std::list &output_list) const noexcept { + + //Once goes here, the binlog must have been iterated, so we only need to + auto *_p_cur_sstable = this->m_sstable_table_head.load(); + if (_p_cur_sstable == nullptr) + return; + + std::vector _access_list; + + do { + auto _cur_max_id = _p_cur_sstable->m_data->GetMaxLogID(); + if (start_at >= _cur_max_id) + break; + + _access_list.emplace_back(_p_cur_sstable); + _p_cur_sstable = _p_cur_sstable->m_next.load(); + } while (_p_cur_sstable != nullptr); + + if (_access_list.empty()) + return; + + //Records' log ID in the newer sstable are all larger than those in the older sstable,so just need to start at _p_pre. + int _counter = 0; + + auto _reading = [&](const SSTAble::Meta &meta,const HashableString &key) ->bool{ + + if (_counter >= step_len) + return false; + + LogIdentifier _cur_id; + _cur_id.Set(meta.m_term,meta.m_index); + if (_cur_id <= start_at) + return true; + + std::string _val = ""; + _p_cur_sstable->m_data->Read(*key.GetStrPtr(), _val); + + output_list.emplace_back(_cur_id, key.GetStrPtr(), std::make_shared(std::move(_val))); + _counter++; + + return true; + }; + + for (auto _iter = _access_list.crbegin(); _iter != _access_list.crend(); ++_iter) { + _p_cur_sstable = *_iter; + if (!(*_iter)->m_data->IterateByVal(_reading)) + break; + } + + return; +} + +void StorageMgr::GetSliceInMemory(const LogIdentifier& start_at, int step_len, + std::list &output_list) const noexcept { + + //Once goes here, the binlog must have been iterated, so we only need to + auto *_p_cur_memory_table = this->m_memory_table_head.load(); + if (_p_cur_memory_table == nullptr) + return; + + /*Caveat : There is an implicit constrain that the dumping memory tables won't be reclaimed + during the following iterations. Since there are several seconds before next GC, we can just + rely on it.*/ + + std::vector _access_list; + + /*TODO: Prevent from a rare case of losing dumping tables while switch from iterating sstable + to iterating memory table.*/ + while (_p_cur_memory_table != nullptr) { + _access_list.push_back(_p_cur_memory_table); + _p_cur_memory_table = _p_cur_memory_table->m_next.load(); + } + + CHECK(!_access_list.empty()); + + //std::function op + + int _counter = 0; + auto _reading = [&](const HashValue &hash_val,const HashableString &key) ->bool{ + + if (_counter >= step_len) + return false; + + LogIdentifier _cur_id; + _cur_id.Set(hash_val.m_term, hash_val.m_index); + if (_cur_id <= start_at) + return true; + + output_list.emplace_back(_cur_id, key.GetStrPtr(), hash_val.m_val); + _counter++; + + return true; + }; + + for (auto _iter = _access_list.crbegin(); _iter != _access_list.crend(); ++_iter) { + _p_cur_memory_table = *_iter; + (*_iter)->m_data->IterateByVal(_reading); + } + + return; +} + +void StorageMgr::GetSlice(const LogIdentifier& start_at,uint32_t step_len,std::list &output_list) const noexcept { + output_list.clear(); + + this->GetSliceInSSTable(start_at, step_len, output_list); + std::size_t _got_size = output_list.size(); + if (_got_size >= step_len) + return; + + uint32_t _remain = (uint32_t)(step_len - _got_size); + + this->GetSliceInMemory(start_at, _remain, output_list); +} + +void StorageMgr::FindRoleBinlogFiles(const std::string &role, std::list &output) { + + output.clear(); + + std::string _filename_reg_pattern = _AURORA_BINLOG_NAME_REG_ + std::string("\\.") + role + + std::string("(-[0-9]*){0,1}"); + + LOG(INFO) << "searching binlog file with pattern:" << _filename_reg_pattern; + + std::regex _pattern(_filename_reg_pattern); + std::smatch _sm; + + for (auto&& x : fs::directory_iterator(fs::path("."))) { + std::string _file_name = x.path().filename().string(); + + if (!std::regex_match(_file_name, _sm, _pattern)) + continue; + + output.emplace_back(_file_name); + } +} + + +} \ No newline at end of file diff --git a/src/storage/storage.h b/src/storage/storage.h new file mode 100644 index 0000000..04bc55c --- /dev/null +++ b/src/storage/storage.h @@ -0,0 +1,157 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_STORAGE_H__ +#define __AURORA_STORAGE_H__ + +#include +#include + +#include "boost/filesystem.hpp" + +#include "common/comm_defs.h" +#include "common/log_identifier.h" +#include "tools/lock_free_unordered_single_list.h" +#include "storage/sstable.h" + +namespace RaftCore::Storage { + +using ::RaftCore::Common::LogIdentifier; +using ::RaftCore::Common::WriteLock; +using ::RaftCore::DataStructure::AtomicPtrSingleListNode; +using ::RaftCore::DataStructure::UnorderedSingleListNode; +using ::RaftCore::Storage::MemoryTable;; +namespace fs = ::boost::filesystem; + +class StorageMgr final{ + +public: + + struct StorageItem { + + StorageItem(const LogIdentifier &log_id,const std::shared_ptr &key, + const std::shared_ptr &value) : m_log_id(log_id),m_key(key),m_value(value) {} + + LogIdentifier m_log_id; + + //Ownership of the following two std::shared_ptr can be taken over. + std::shared_ptr m_key; + std::shared_ptr m_value; + }; + +public: + + StorageMgr() noexcept; + + virtual ~StorageMgr() noexcept; + + bool Initialize(const char* role, bool reset = false) noexcept; + + void UnInitialize() noexcept; + + //Delete all data both in memory & disk. + void Reset() noexcept; + + bool Get(const std::string &key, std::string &val) const noexcept; + + bool Set(const LogIdentifier &log_id, const std::string &key, const std::string &value) noexcept; + + const LogIdentifier GetLastCommitted() const noexcept; + + /*Note: return the `step_len` number of records greater than start_at.If start_at is earlier than the oldest item in the storage, + return the earliest step_len records. */ + void GetSlice(const LogIdentifier& start_at,uint32_t step_len,std::list &output_list) const noexcept; + + void PurgeGarbage() noexcept; + + static void FindRoleBinlogFiles(const std::string &role, std::list &output); + +private: + + bool ConstructFromBinlog(const LogIdentifier &from, const std::string &binlog_file_name) noexcept; + + void GetSliceInSSTable(const LogIdentifier& start_at, int step_len, std::list &output_list) const noexcept; + + void GetSliceInMemory(const LogIdentifier& start_at, int step_len, std::list &output_list) const noexcept; + + void ClearInMemoryData() noexcept; + + //return : indicating if purging can proceed or not. + bool PurgeSSTable() noexcept; + + void PurgeMemoryTable() noexcept; + + void DumpMemoryTable(const MemoryTable *src) noexcept; + + void ConstructMemoryTable(const LogIdentifier &from) noexcept; + + void RecycleLast2SStables() noexcept; + + template + void ReleaseData(AtomicPtrSingleListNode &head) noexcept { + //Releasing process should be mutual exclusion from purging process. + WriteLock _w_lock(this->m_mutex); + auto *_p_cur = head.load(); + while (_p_cur != nullptr) { + auto _tmp = _p_cur; + _p_cur = _p_cur->m_next.load(); + //delete _tmp->m_data; + delete _tmp; + } + head.store(nullptr); + } + +private: + + bool m_initialized = false; + + std::string m_role_str = ""; + + fs::path m_path; + + std::atomic m_last_committed; + + std::atomic m_last_persist; + + /*There are several special operations for the followings, so use the raw version of single list + instead of the wrapped version 'LockFreeUnorderedSingleList'. */ + AtomicPtrSingleListNode m_memory_table_head; + + AtomicPtrSingleListNode m_garbage_memory_table; + + AtomicPtrSingleListNode m_sstable_table_head; + + AtomicPtrSingleListNode m_garbage_sstable; + + std::shared_timed_mutex m_mutex; + + std::list m_loaded_binlog_files; + +private: + + StorageMgr(const StorageMgr&) = delete; + + StorageMgr& operator=(const StorageMgr&) = delete; + +}; + +} //end namespace + +#endif diff --git a/src/storage/storage_singleton.cc b/src/storage/storage_singleton.cc new file mode 100644 index 0000000..c129a21 --- /dev/null +++ b/src/storage/storage_singleton.cc @@ -0,0 +1,25 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "storage/storage_singleton.h" + +namespace RaftCore::Storage { + +StorageMgr StorageGlobal::m_instance; + +} \ No newline at end of file diff --git a/src/storage/storage_singleton.h b/src/storage/storage_singleton.h new file mode 100644 index 0000000..f702cc7 --- /dev/null +++ b/src/storage/storage_singleton.h @@ -0,0 +1,51 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_STORAGE_SINGLETON_H__ +#define __AURORA_STORAGE_SINGLETON_H__ + +#include "storage/storage.h" + +namespace RaftCore::Storage { + +using ::RaftCore::Storage::StorageMgr; + +class StorageGlobal final{ + +public: + + static StorageMgr m_instance; + +private: + + StorageGlobal() = delete; + + virtual ~StorageGlobal() = delete; + + StorageGlobal(const StorageGlobal&) = delete; + + StorageGlobal& operator=(const StorageGlobal&) = delete; + +}; + +} //end namespace + + +#endif diff --git a/src/tools/data_structure_base.cc b/src/tools/data_structure_base.cc new file mode 100644 index 0000000..b858083 --- /dev/null +++ b/src/tools/data_structure_base.cc @@ -0,0 +1,88 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#include "tools/data_structure_base.h" + +namespace RaftCore::DataStructure { + +template +OrderedTypeBase::OrderedTypeBase() noexcept{} + +template +OrderedTypeBase::~OrderedTypeBase() noexcept{} + +template +bool OrderedTypeBase::operator!=(const T&_other)const noexcept { + return !this->operator==(_other); +} + +template +bool OrderedTypeBase::operator<=(const T& _other)const noexcept { + if (this->operator==(_other)) + return true; + + return this->operator<(_other); +} + +template +bool OrderedTypeBase::operator>=(const T& _other)const noexcept { + if (this->operator==(_other)) + return true; + + return this->operator>(_other); +} + +template +LogicalDelete::LogicalDelete() noexcept {} + +template +LogicalDelete::~LogicalDelete() noexcept {} + +template +bool LogicalDelete::IsDeleted() const noexcept { + return this->m_deleted; +} + +template +void LogicalDelete::SetDeleted() noexcept { + this->m_deleted = true; +} + +template +LockableNode::LockableNode()noexcept { + this->m_spin_lock.clear(); +} + +template +LockableNode::~LockableNode()noexcept {} + +template +void LockableNode::SpinLock()noexcept { + while (this->m_spin_lock.test_and_set()); +} + +template +void LockableNode::SpinUnLock()noexcept { + this->m_spin_lock.clear(); +} + + +} //end namespace + diff --git a/src/tools/data_structure_base.h b/src/tools/data_structure_base.h new file mode 100644 index 0000000..6a3109e --- /dev/null +++ b/src/tools/data_structure_base.h @@ -0,0 +1,97 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_DATA_STRUCTURE_COMMON_H__ +#define __AURORA_DATA_STRUCTURE_COMMON_H__ + +namespace RaftCore::DataStructure { + +template +class OrderedTypeBase { + +public: + + OrderedTypeBase() noexcept; + + virtual ~OrderedTypeBase() noexcept; + + //The element of this list should be able to be compared with each other. + virtual bool operator<(const T&)const noexcept = 0; + + virtual bool operator>(const T&)const noexcept = 0; + + virtual bool operator==(const T&)const noexcept = 0; + + virtual bool operator!=(const T&_other)const noexcept; + + //Should be non-final, providing a way for the subclass to override. + virtual bool operator<=(const T& _other)const noexcept; + + virtual bool operator>=(const T& _other)const noexcept; + + //virtual std::string PrintMe() const noexcept { return ""; } +}; + +//Here template is just a padding. +template +class LogicalDelete { + +public: + + LogicalDelete() noexcept; + + virtual ~LogicalDelete() noexcept; + + virtual bool IsDeleted() const noexcept final; + + virtual void SetDeleted() noexcept final; + +private: + + bool m_deleted = false; +}; + +//Here template is just a padding. +template +class LockableNode { + +public: + + LockableNode() noexcept; + + virtual ~LockableNode() noexcept; + +protected: + + void SpinLock() noexcept; + + void SpinUnLock() noexcept; + +private: + + std::atomic_flag m_spin_lock; +}; + + +} //end namespace + +#include "tools/data_structure_base.cc" + +#endif diff --git a/src/tools/lock_free_deque.cc b/src/tools/lock_free_deque.cc new file mode 100644 index 0000000..fdf8d46 --- /dev/null +++ b/src/tools/lock_free_deque.cc @@ -0,0 +1,215 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "tools/lock_free_deque.h" + +namespace RaftCore::DataStructure { + +template +LockFreeUnorderedSingleList> LockFreeDeque::m_garbage; + +template +DequeNode::DequeNode() noexcept{} + +template +DequeNode::DequeNode(const std::shared_ptr &p_val) noexcept{ + this->m_atomic_next.store(nullptr); + this->m_val = p_val; + + //This is an estimated value for security. + //TODO:why this can't compile. + //::RaftCore::Config::FLAGS_garbage_deque_retain_num = CommonView::m_cpu_cores; +} + +template +DequeNode::~DequeNode() noexcept {} + +template +LockFreeDeque::LockFreeDeque() noexcept{ + //The dummy node points to itself. + this->m_dummy = new DequeNode(); + this->m_dummy->m_atomic_next.store(this->m_dummy); + + //Head and tail are initially points the dummy node which indicating the entire deque is empty. + this->m_head.store(this->m_dummy); + this->m_tail.store(this->m_dummy); + +#ifdef _DEQUE_TEST_ + this->m_logical_size.store(0); + this->m_physical_size.store(0); +#endif +} + +template +LockFreeDeque::~LockFreeDeque() noexcept{ + auto *_p_cur = this->m_head.load()->m_atomic_next.load(); + while (_p_cur != this->m_dummy) { + auto _p_tmp = _p_cur->m_atomic_next.load(); + delete _p_cur; + _p_cur = _p_tmp; + } + + delete this->m_dummy; +} + +template +void LockFreeDeque::Push(const std::shared_ptr &p_one, uint32_t flag) noexcept { + //Node node points to dummy. + auto* _p_new_node = new DequeNode(p_one); + _p_new_node->m_atomic_next.store(this->m_dummy); + _p_new_node->m_flag = flag; + + auto* _p_insert_after = this->m_tail.load(); + std::atomic*> *_p_insert_pos = &_p_insert_after->m_atomic_next; + + auto* _p_tmp = this->m_dummy; + /*Note: + 1. 'compare_exchange_weak' is a better approach for performance, but for now, + it is acceptable to use 'compare_exchange_strong' making code simpler and being more readable. + 2. _p_insert_after may has been freed if the free operation didn't get deferred ,so the reference to _p_insert_pos->compare_exchange_strong + will core in that scene. */ + while (!_p_insert_pos->compare_exchange_strong(_p_tmp, _p_new_node)) { + _p_insert_after = _p_tmp; + _p_insert_pos = &_p_insert_after->m_atomic_next; + + /*Insert operation must append the '_p_new_node' at the end of the deque. So '_p_tmp' must be + set to 'this->m_dummy' each time compare_exchange_strong fails. + */ + _p_tmp = this->m_dummy; + } + + auto * _p_from = _p_insert_after; + while (!this->m_tail.compare_exchange_weak(_p_from, _p_new_node)) + _p_from = _p_insert_after; + +#ifdef _DEQUE_TEST_ + if (flag == 0) + this->m_logical_size.fetch_add(1); + this->m_physical_size.fetch_add(1); +#endif +} + +template +std::shared_ptr LockFreeDeque::Pop() noexcept { + + while (true) { + auto *_deque_node = this->PopNode(); + if (_deque_node == nullptr) + return std::shared_ptr(); + + //Encountering a 'fake node'. + if (_deque_node->m_flag == 1) + continue; + + auto _transfer = _deque_node->m_val; + + //Once the ownership has been copied out, the node itself cannot hold it. + _deque_node->m_val.reset(); + + return _transfer; + } +} + +template +DequeNode* LockFreeDeque::PopNode() noexcept { + + std::atomic*> *_p_head_next = &this->m_head.load()->m_atomic_next; + + //Judge if list is empty + auto *_p_cur = _p_head_next->load(); + if (_p_cur == this->m_dummy) + return _p_cur; + + auto *_p_cur_next = _p_cur->m_atomic_next.load(); + + while (true) { + + //If '_p_cur' is the last node at the moment. + if (_p_cur_next == this->m_dummy) { + //If '_p_cur' is a 'fake-node'. + if (_p_cur->m_flag == 1) + return nullptr; + + //'_p_cur' isn't a 'fake-node'. Push a 'fake-node' first. + this->Push(std::shared_ptr(), 1); + + //'_p_cur' next pointer changed, update it. + _p_cur_next = _p_cur->m_atomic_next.load(); + } + + /* _p_cur may has been freed if freeing process didn't get deferred , so the reference to + _p_cur->m_atomic_next.load() will core in that scene. */ + if (!_p_head_next->compare_exchange_strong(_p_cur, _p_cur_next)) { + + //Now, '_p_cur' is the next node about to be popped. + _p_cur_next = _p_cur->m_atomic_next.load(); + continue; + } + + break; + } + + m_garbage.PushFront(_p_cur); + +#ifdef _DEQUE_TEST_ + if (_p_cur->m_flag == 0) + this->m_logical_size.fetch_sub(1); + this->m_physical_size.fetch_sub(1); +#endif + + //Always return the ptr of the node successfully popped, regardless what the role it is. + return _p_cur; +} + +template +void LockFreeDeque::GC() noexcept { + m_garbage.PurgeSingleList(::RaftCore::Config::FLAGS_garbage_deque_retain_num); +} + +#ifdef _DEQUE_TEST_ +template +std::size_t LockFreeDeque::GetLogicalSize() const noexcept { + return this->m_logical_size.load(); +} + +template +std::size_t LockFreeDeque::GetPhysicalSize() const noexcept { + return this->m_physical_size.load(); +} + +template +std::size_t LockFreeDeque::Size() const noexcept { + return this->GetLogicalSize(); +} + +template +std::size_t LockFreeDeque::GetSizeByIterating() const noexcept { + + int _counter = 0; + auto _cur = this->m_head.load()->m_atomic_next.load(); + while (_cur != this->m_dummy) { + _cur = _cur->m_atomic_next.load(); + _counter++; + } + + return _counter; +} +#endif + + +} diff --git a/src/tools/lock_free_deque.h b/src/tools/lock_free_deque.h new file mode 100644 index 0000000..692a6ad --- /dev/null +++ b/src/tools/lock_free_deque.h @@ -0,0 +1,112 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_LOCK_FREE_DEQUE_H__ +#define __AURORA_LOCK_FREE_DEQUE_H__ + +#include +#include +#include + +#include "common/comm_defs.h" +#include "tools/lock_free_unordered_single_list.h" + +namespace RaftCore::DataStructure { + +using ::RaftCore::DataStructure::AtomicPtrSingleListNode; +using ::RaftCore::DataStructure::UnorderedSingleListNode; +using ::RaftCore::DataStructure::LockFreeUnorderedSingleList; + +template +class DequeNode final{ + +public: + + DequeNode() noexcept; //For dumb nodes + + DequeNode(const std::shared_ptr &p_val) noexcept; + + virtual ~DequeNode() noexcept; + + std::atomic*> m_atomic_next; + + std::shared_ptr m_val; + + /* 0: normal node. + 1: fake node. */ + uint32_t m_flag = 0; +}; + +template +class LockFreeDeque final{ + +public: + + LockFreeDeque() noexcept; + + virtual ~LockFreeDeque() noexcept; + + void Push(const std::shared_ptr &p_one, uint32_t flag = 0) noexcept; + + std::shared_ptr Pop() noexcept; + +#ifdef _DEQUE_TEST_ + std::size_t GetSizeByIterating() const noexcept; + + std::size_t GetLogicalSize() const noexcept; + + std::size_t GetPhysicalSize() const noexcept; + + std::size_t Size() const noexcept; +#endif + + static void GC() noexcept; + +private: + + DequeNode* PopNode() noexcept; + +private: + + std::atomic*> m_head; + + std::atomic*> m_tail; + + DequeNode* m_dummy = nullptr; + +#ifdef _DEQUE_TEST_ + std::atomic m_logical_size; + + std::atomic m_physical_size; +#endif + + static LockFreeUnorderedSingleList> m_garbage; + +private: + + LockFreeDeque& operator=(const LockFreeDeque&) = delete; + +}; + +} //end namespace + +#include "tools/lock_free_deque.cc" + +#endif diff --git a/src/tools/lock_free_hash.cc b/src/tools/lock_free_hash.cc new file mode 100644 index 0000000..70f4fe0 --- /dev/null +++ b/src/tools/lock_free_hash.cc @@ -0,0 +1,484 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include +#include + +#include "common/comm_defs.h" +#include "tools/utilities.h" +#include "config/config.h" +#include "tools/lock_free_hash.h" + +namespace RaftCore::DataStructure { + +template +bool HashTypeBase::IsDeleted() const noexcept { + return this->m_deleted; +} + +template +void HashTypeBase::SetDeleted() const noexcept { + this->m_deleted = true; +} + +template +void HashTypeBase::SetValid() const noexcept{ + this->m_deleted = false; +} + +template +HashNode::HashNode(const std::shared_ptr &key,const std::shared_ptr &val) noexcept{ + this->m_shp_key = key; + this->m_shp_val = val; + this->m_next = nullptr; +} + +/* +template +void HashNode::Update(const std::shared_ptr &val) noexcept { + this->m_shp_val = val; +}*/ + +template +HashNode::~HashNode() noexcept {} + +/* +template +void HashNode::Update(const std::shared_ptr &val) noexcept { + + //this->m_mutex.lock(); + this->m_shp_val = val; + //this->m_mutex.unlock(); +}*/ + +template +HashNode* HashNode::GetNext() const noexcept { + return this->m_next; +} + +template +void HashNode::SetNext(const HashNode * const p_next) noexcept{ + this->m_next = const_cast*>(p_next); +} + +template +void HashNode::ModifyKey(std::function&)> op) noexcept { + op(this->m_shp_key); +} + +template +bool HashNode::IsDeleted() const noexcept { + return this->m_shp_key->IsDeleted(); +} + +template +void HashNode::SetDeleted() const noexcept { + this->m_shp_key->SetDeleted(); +} + +template +void HashNode::SetValid() const noexcept { + this->m_shp_key->SetValid(); +} + +template +void HashNode::SetTag(uint32_t tag) noexcept { + this->m_iterating_tag = tag; +} + +template +uint32_t HashNode::GetTag() const noexcept { + return this->m_iterating_tag; +} + +template +void HashNode::LockValue() noexcept {} + +template +void HashNode::UnLockValue() noexcept {} + +template +std::shared_ptr HashNode::GetKey() const noexcept { + return this->m_shp_key; +} + +template +std::size_t HashNode::GetKeyHash() const noexcept { + return this->m_shp_key->Hash(); +} + +template +std::shared_ptr HashNode::GetVal() const noexcept { + return this->m_shp_val; +} + +template +bool HashNode::operator==(const HashNode& one) const noexcept { + return (*this->m_shp_key) == *one.m_shp_key; +} + +template +bool HashNode::operator==(const T& one) const noexcept { + return (*this->m_shp_key) == one; +} + +template typename NodeType> +LockFreeHash::LockFreeHash(uint32_t slot_num) noexcept { + + static_assert(std::is_base_of,T>::value,"template parameter of LockFreeHash invalid"); + + int _one_slot_size = sizeof(void*) + sizeof(NodeType) + sizeof(std::atomic*>) + + sizeof(T) + ::RaftCore::Tools::SizeOfX(); + + if (slot_num == 0) + slot_num = ::RaftCore::Tools::RoundUp(::RaftCore::Config::FLAGS_binlog_meta_hash_buf_size * 1024 * 1024 / _one_slot_size); + + this->m_slots_num = slot_num; + this->m_slots_mask = ::RaftCore::Tools::GetMask(this->m_slots_num); + + int buf_size = this->m_slots_num * sizeof(void*); + this->m_solts = (std::atomic*> **)std::malloc(buf_size); + + for (std::size_t i = 0; i < this->m_slots_num; ++i) + this->m_solts[i] = new std::atomic*>(nullptr); + + this->m_size.store(0); +} + +template typename NodeType> +LockFreeHash::~LockFreeHash() noexcept { + this->Clear(true); + std::free(this->m_solts); +} + +template typename NodeType> +void LockFreeHash::Clear(bool destruct) noexcept { + + assert(this->m_solts != nullptr); + + //int _tmp_1 = 0; + + for (std::size_t i = 0; i < this->m_slots_num; ++i) { + std::atomic*> *_p_atomic = this->m_solts[i] ; + NodeType* _p_cur = _p_atomic->load(); + + while (_p_cur != nullptr) { + auto *_tmp = _p_cur; + _p_cur = _p_cur->GetNext(); + delete _tmp; + //_tmp_1++; + } + if (destruct) + delete _p_atomic; + else + _p_atomic->store(nullptr); + } + + //VLOG(89) << "hash released " << _tmp_1 << " inserted elements"; + + this->m_size.store(0); +} + +template typename NodeType> +void LockFreeHash::Insert(const std::shared_ptr &key, const std::shared_ptr &val, uint32_t tag) noexcept { + std::size_t hash_val = key->Hash(); + std::size_t idx = hash_val & this->m_slots_mask; + + std::atomic*> * p_atomic = this->m_solts[idx]; + NodeType* p_cur = p_atomic->load(); + + bool _key_exist = false; + while (p_cur != nullptr) { + + if (!p_cur->operator==(*key)) { + p_cur = p_cur->GetNext(); //move next + continue; + } + + _key_exist = true; + break; + } + + NodeType* p_old_head = p_atomic->load(); + NodeType* p_new_node = new NodeType(key,val); + + p_new_node->SetNext(p_old_head); + p_new_node->SetTag(tag); + + /*"When a compare-and-exchange is in a loop, the weak version will yield better performance on some platforms" + from https://en.cppreference.com/w/cpp/atomic/atomic/compare_exchange. + Moreover , whether specify the following memory order or not influencing little on overall performance under my test. */ + while (!p_atomic->compare_exchange_weak(p_old_head, p_new_node, std::memory_order_acq_rel, std::memory_order_acquire)) + p_new_node->SetNext(p_old_head); + + /*Set the first existing key to be deleted , ensuring the key can be inserted at head. + There is a time slice during which iterating could read redundant elements.This is avoided by recording what + has been read when traversing. */ + if (!_key_exist) + this->m_size.fetch_add(1); + else + p_cur->SetDeleted(); +} + +template typename NodeType> +bool LockFreeHash::Find(const T &key) const noexcept { + + std::size_t hash_val = key.Hash(); + std::size_t idx = hash_val & this->m_slots_mask; + + std::atomic*> * p_atomic = this->m_solts[idx]; + NodeType* p_cur = p_atomic->load(); + + while (p_cur != nullptr) { + if (p_cur->operator==(key) && !p_cur->IsDeleted()) + return true; + + //move next + p_cur = p_cur->GetNext(); + } + + return false; +} + +/* +template typename NodeType> +bool LockFreeHash::Upsert(const T *key, const std::shared_ptr val) noexcept { + std::size_t hash_val = key->Hash(); + std::size_t idx = hash_val & this->m_slots_mask; + + std::atomic*> * p_atomic = this->m_solts[idx]; + NodeType* p_cur = p_atomic->load(); + + while (p_cur != nullptr) { + if (p_cur->operator==(*key) && !p_cur->IsDeleted()) { + p_cur->Update(val); + return false; + } + + //move next + p_cur = p_cur->GetNext(); + } + + std::shared_ptr _shp_key(const_cast(key)); + this->Insert(_shp_key, val); + + return true; +}*/ + +template typename NodeType> +uint32_t LockFreeHash::Size() const noexcept { + return this->m_size.load(); +} + +template typename NodeType> +bool LockFreeHash::Read(const T &key, std::shared_ptr &val) const noexcept { + + std::size_t hash_val = key.Hash(); + std::size_t idx = hash_val & this->m_slots_mask; + + std::atomic*> * p_atomic = this->m_solts[idx]; + NodeType* p_cur = p_atomic->load(); + + while (p_cur != nullptr) { + + bool _found = p_cur->operator==(key) && !p_cur->IsDeleted(); + if (!_found) { + p_cur = p_cur->GetNext(); + continue; + } + + val = p_cur->GetVal(); + return true; + } + + return false; +} + +template typename NodeType> +void LockFreeHash::Delete(const T &key) noexcept { + std::size_t hash_val = key.Hash(); + std::size_t idx = hash_val & this->m_slots_mask; + + std::atomic*> * p_atomic = this->m_solts[idx]; + NodeType* p_cur = p_atomic->load(); + + while (p_cur != nullptr) { + if (p_cur->operator==(key) && !p_cur->IsDeleted()) { + p_cur->SetDeleted(); + this->m_size.fetch_sub(1); + return ; + } + + //move next + p_cur = p_cur->GetNext(); + } +} + +template typename NodeType> +void LockFreeHash::GetOrderedByKey(std::list> &_output) const noexcept { + + _output.clear(); + + std::set,MyComparator> _rb_tree; + for (std::size_t i = 0; i < this->m_slots_num; ++i) { + std::atomic*> *p_atomic = this->m_solts[i] ; + const NodeType* p_cur = p_atomic->load(); + + while (p_cur != nullptr) { + auto _p_tmp = p_cur; + p_cur = p_cur->GetNext(); + if(_p_tmp->IsDeleted()) + continue; + + //Avoid reading redundant keys. + if (_rb_tree.find(_p_tmp->GetKey()) == _rb_tree.cend()) + _rb_tree.emplace(_p_tmp->GetKey()); + } + } + + for (auto iter = _rb_tree.begin(); iter != _rb_tree.end(); iter++) + _output.emplace_back(*iter); +} + +template typename NodeType> +void LockFreeHash::GetOrderedByValue(std::map, std::shared_ptr, ValueComparator> &_output) const noexcept { + + std::unordered_set,MyHasher,MyEqualer> _traversed; + + for (std::size_t i = 0; i < this->m_slots_num; ++i) { + std::atomic*> *p_atomic = this->m_solts[i] ; + const NodeType* p_cur = p_atomic->load(); + + while (p_cur != nullptr) { + auto _p_tmp = p_cur; + p_cur = p_cur->GetNext(); + if(_p_tmp->IsDeleted()) + continue; + + auto _shp_key = _p_tmp->GetKey(); + if (_traversed.find(_shp_key) != _traversed.cend()) + continue; + + _output.emplace(_p_tmp->GetVal(), _shp_key); + _traversed.emplace(_shp_key); + } + } +} + +template typename NodeType> +void LockFreeHash::Map(std::function&)> op) noexcept { + + uint32_t _cur_tag = ::RaftCore::Tools::GenerateRandom(1, _MAX_UINT32_); + + for (std::size_t i = 0; i < this->m_slots_num; ++i) { + + std::atomic*> *p_atomic = this->m_solts[i]; + NodeType* p_cur = p_atomic->load(); + + while (p_cur != nullptr) { + + auto _p_tmp = p_cur; + p_cur = p_cur->GetNext(); + + //If already been iterated or deleted,jump over. + if (_p_tmp->GetTag() == _cur_tag || _p_tmp->IsDeleted()) + continue; + + std::size_t _old_hash_val = _p_tmp->GetKeyHash(); + + //Do modifying. + _p_tmp->ModifyKey(op); + + std::size_t _new_hash_val = _p_tmp->GetKeyHash(); + if (_old_hash_val == _new_hash_val) + continue; + + //Copy the object out first. + std::shared_ptr _shp_new_key = std::make_shared(*_p_tmp->GetKey()); + + //Set the original object to be deleted. + _p_tmp->SetDeleted(); + + //Insert the new element with a new tag. + this->Insert(_shp_new_key,_p_tmp->GetVal(),_cur_tag); + } + } +} + +template typename NodeType> +bool LockFreeHash::CheckCond(std::function criteria) const noexcept { + + std::unordered_set,MyHasher,MyEqualer> _traversed; + for (std::size_t i = 0; i < this->m_slots_num; ++i) { + std::atomic*> *p_atomic = this->m_solts[i] ; + const NodeType* p_cur = p_atomic->load(); + + while (p_cur != nullptr) { + auto _p_tmp = p_cur; + p_cur = p_cur->GetNext(); + if(_p_tmp->IsDeleted()) + continue; + + auto _shp_key = _p_tmp->GetKey(); + if (_traversed.find(_shp_key) != _traversed.cend()) + continue; + + if (!criteria(*_shp_key)) + return false; + _traversed.emplace(_shp_key); + } + } + + return true; +} + +template typename NodeType> +void LockFreeHash::Iterate(std::function &k, const std::shared_ptr &v)> op) noexcept { + + std::unordered_set,MyHasher,MyEqualer> _traversed; + for (std::size_t i = 0; i < this->m_slots_num; ++i) { + + std::atomic*> *p_atomic = this->m_solts[i]; + NodeType* p_cur = p_atomic->load(); + + while (p_cur != nullptr) { + + auto *_p_tmp = p_cur; + p_cur = p_cur->GetNext(); + + //If already been iterated or deleted,jump over. + if (_p_tmp->IsDeleted()) + continue; + + auto _shp_key = _p_tmp->GetKey(); + if (_traversed.find(_shp_key) != _traversed.cend()) + continue; + + _p_tmp->LockValue(); + bool _rst = op(_shp_key, _p_tmp->GetVal()); + _p_tmp->UnLockValue(); + + if (!_rst) + return; + + _traversed.emplace(_shp_key); + } + } +} + +} diff --git a/src/tools/lock_free_hash.h b/src/tools/lock_free_hash.h new file mode 100644 index 0000000..debddeb --- /dev/null +++ b/src/tools/lock_free_hash.h @@ -0,0 +1,208 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_LOCK_FREE_HASH_H__ +#define __AURORA_LOCK_FREE_HASH_H__ + +#include +#include +#include +#include +#include + +namespace RaftCore::DataStructure { + +template +class HashTypeBase { + +public: + + HashTypeBase() noexcept{} + + virtual bool operator<(const T&)const noexcept = 0; + + virtual bool operator==(const T&)const noexcept = 0; + + virtual const T& operator=(const T&)noexcept = 0; + + virtual std::size_t Hash() const noexcept = 0; + + virtual bool IsDeleted() const noexcept final; + + virtual void SetDeleted() const noexcept final; + + virtual void SetValid() const noexcept final; + +private: + + mutable bool m_deleted = false; +}; + +template +class HashNode { + +public: + + HashNode(const std::shared_ptr &key,const std::shared_ptr &val) noexcept; + + virtual ~HashNode() noexcept; + + //virtual void Update(const std::shared_ptr &val) noexcept; + + HashNode* GetNext() const noexcept; + + void SetNext(const HashNode * const p_next) noexcept; + + std::shared_ptr GetKey() const noexcept; + + std::size_t GetKeyHash() const noexcept; + + virtual std::shared_ptr GetVal() const noexcept; + + void ModifyKey(std::function&)> op) noexcept; + + bool IsDeleted() const noexcept; + + void SetDeleted() const noexcept; + + void SetValid() const noexcept; + + void SetTag(uint32_t tag) noexcept; + + uint32_t GetTag() const noexcept; + + bool operator==(const HashNode& one) const noexcept; + + bool operator==(const T& one) const noexcept; + + virtual void LockValue() noexcept; + + virtual void UnLockValue() noexcept; + +protected: + + //mutable std::mutex m_mutex; + + std::shared_ptr m_shp_key; + + std::shared_ptr m_shp_val; + + HashNode* m_next = nullptr; + + uint32_t m_iterating_tag = 0; + +private: + + HashNode(const HashNode&) = delete; + + HashNode& operator=(const HashNode&) = delete; +}; + + +template typename NodeType=HashNode> +class LockFreeHash { + +public: + + typedef std::function &left, const std::shared_ptr &right)> ValueComparator; + + LockFreeHash(uint32_t slot_num=0) noexcept; + + virtual ~LockFreeHash() noexcept; + + void Insert(const std::shared_ptr &key, const std::shared_ptr &val = nullptr, uint32_t tag = 0) noexcept; + + void Delete(const T &key) noexcept; + + bool Find(const T &key) const noexcept; + + /*Note: val pointer's ownership will be taken over. And the return value indicate whether the + key pointer's ownership has been taken. */ + //bool Upsert(const T *key, const std::shared_ptr val = nullptr) noexcept; + + bool Read(const T &key, std::shared_ptr &val) const noexcept; + + uint32_t Size() const noexcept; + + /*The GetOrderedBy* are time consuming operations when slots number is large, + be sure not to invoke it in a real-time processing scenario.*/ + void GetOrderedByKey(std::list> &_output) const noexcept; + + void GetOrderedByValue(std::map,std::shared_ptr,ValueComparator> &_output) const noexcept; + + //Map the operator op to every element in the hash. + void Map(std::function&)> op) noexcept; + + //Read only iterator. + void Iterate(std::function &k,const std::shared_ptr &v)> op) noexcept; + + bool CheckCond(std::function criteria) const noexcept; + + //Clear inserted elements but not the base structure nodes of the current hash. + void Clear(bool destruct = false) noexcept; + +protected: + + uint32_t m_slots_mask = 0; + + std::atomic*> ** m_solts = nullptr; + +private: + + struct MyComparator { + bool operator()(const std::shared_ptr &a,const std::shared_ptr &b) const{ + return *a < *b; + } + }; + + struct MyEqualer { + bool operator()(const std::shared_ptr &a,const std::shared_ptr &b) const{ + return *a == *b; + } + }; + + struct MyHasher { + std::size_t operator()(const std::shared_ptr &a) const{ + return a->Hash(); + } + }; + + uint32_t m_slots_num = 0; + + std::atomic m_size; + +private: + + LockFreeHash(const LockFreeHash&) = delete; + + LockFreeHash& operator=(const LockFreeHash&) = delete; + +}; + +} //end namespace + +/*This is for separating template class member function definitions + from its .h file into a corresponding .cc file: + https://www.codeproject.com/Articles/48575/How-to-define-a-template-class-in-a-h-file-and-imp. +*/ + +#include "tools/lock_free_hash.cc" + +#endif diff --git a/src/tools/lock_free_hash_specific.cc b/src/tools/lock_free_hash_specific.cc new file mode 100644 index 0000000..46cc082 --- /dev/null +++ b/src/tools/lock_free_hash_specific.cc @@ -0,0 +1,89 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#include "tools/lock_free_hash_specific.h" + +namespace RaftCore::DataStructure { + +template +HashNodeAtomic::HashNodeAtomic(const std::shared_ptr &key, + const std::shared_ptr &val) noexcept : HashNode(key, val) {} + +template +void HashNodeAtomic::Update(R* val)noexcept { + this->SpinLock(); + + //Do not delete the managed ptr. + this->m_shp_val.reset(val, [](auto *p) {}); + this->SpinUnLock(); +} + +template +void HashNodeAtomic::LockValue() noexcept { + this->SpinLock(); +} + +template +void HashNodeAtomic::UnLockValue() noexcept { + this->SpinUnLock(); +} + +template +HashNodeAtomic* HashNodeAtomic::GetNext() const noexcept { + return dynamic_cast*>(this->m_next); +} + +template +LockFreeHashAtomic::LockFreeHashAtomic(uint32_t slot_num)noexcept : LockFreeHash(slot_num) { +} + +template +bool LockFreeHashAtomic::Upsert(const T *key, R* p_avl) noexcept { + std::size_t hash_val = key->Hash(); + std::size_t idx = hash_val & this->m_slots_mask; + + std::atomic*> * p_atomic = this->m_solts[idx]; + HashNodeAtomic* p_cur = p_atomic->load(); + + while (p_cur != nullptr) { + if (p_cur->operator==(*key) && !p_cur->IsDeleted()) { + + std::atomic _atomic(p_avl); + p_cur->Update(_atomic); + return false; + } + + //move next + p_cur = p_cur->GetNext(); + } + + std::shared_ptr _shp_key(const_cast(key)); + + //Here we need to use an empty deleter. + std::shared_ptr _shp_val(p_avl, [](auto* p) {}); + + this->Insert(_shp_key, _shp_val); + + return true; +} + + +} //end namespace + diff --git a/src/tools/lock_free_hash_specific.h b/src/tools/lock_free_hash_specific.h new file mode 100644 index 0000000..14a2a4c --- /dev/null +++ b/src/tools/lock_free_hash_specific.h @@ -0,0 +1,81 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_LOCK_FREE_HASH_SPECIFIC_H__ +#define __AURORA_LOCK_FREE_HASH_SPECIFIC_H__ + +#include +#include + +#include "tools/lock_free_hash.h" + +namespace RaftCore::DataStructure { + +using ::RaftCore::DataStructure::HashNode; +using ::RaftCore::DataStructure::LockFreeHash; +using ::RaftCore::DataStructure::LockableNode; + +//Partial specification. +template +class HashNodeAtomic final : public HashNode, public LockableNode { + +public: + + //using SpecifiedNode = HashNode>; + + HashNodeAtomic(const std::shared_ptr &key,const std::shared_ptr &val) noexcept; + + HashNodeAtomic* GetNext() const noexcept; + + void Update(R* val) noexcept; + + virtual void LockValue() noexcept override; + + virtual void UnLockValue() noexcept override; + +private: + + HashNodeAtomic(const HashNodeAtomic&) = delete; + + HashNodeAtomic& operator=(const HashNodeAtomic&) = delete; +}; + +template +class LockFreeHashAtomic final : public LockFreeHash { + +public: + + LockFreeHashAtomic(uint32_t slot_num=0)noexcept; + + bool Upsert(const T *key, R* p_avl) noexcept; + +private: + + LockFreeHashAtomic(const LockFreeHashAtomic&) = delete; + + LockFreeHashAtomic& operator=(const LockFreeHashAtomic&) = delete; + +}; + +} //end namespace + +#include "tools/lock_free_hash_specific.cc" + +#endif diff --git a/src/tools/lock_free_priority_queue.cc b/src/tools/lock_free_priority_queue.cc new file mode 100644 index 0000000..0d87930 --- /dev/null +++ b/src/tools/lock_free_priority_queue.cc @@ -0,0 +1,159 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "common/comm_defs.h" +#include "common/error_code.h" +#include "config/config.h" +#include "tools/lock_free_priority_queue.h" + +namespace RaftCore::DataStructure { + +LockFreePriotityQueue::Task::Task(TaskType x,LockFreeQueueBase *y)noexcept{ + this->m_task_type = x; + this->m_pc_queue.reset(y); +} + +LockFreePriotityQueue::Task::Task(const Task& one) noexcept{ + this->m_task_type = one.m_task_type; + /*To avoid compile errors under darwin clang,parameter of the copy-constructor must be + const-qualified. */ + Task &_real_one = const_cast(one); + this->m_pc_queue.swap(_real_one.m_pc_queue); +} + +void LockFreePriotityQueue::Task::operator=(Task& one) noexcept{ + this->m_task_type = one.m_task_type; + this->m_pc_queue.swap(one.m_pc_queue); +} + +LockFreePriotityQueue::Task::~Task() { + m_pc_queue.reset(); +} + +bool LockFreePriotityQueue::Task::operator<(const Task& _other) { + return this->m_task_type < _other.m_task_type; +} + +LockFreePriotityQueue::LockFreePriotityQueue() noexcept{} + +LockFreePriotityQueue::~LockFreePriotityQueue() noexcept{} + +void LockFreePriotityQueue::Initialize(int consumer_threads_num) noexcept { + this->m_consumer_thread_num = consumer_threads_num; + this->m_stop = false; + this->m_running_thread_num.store(0); +} + +void LockFreePriotityQueue::UnInitialize() noexcept { + this->Stop(); + this->m_task_queue.clear(); +} + +void LockFreePriotityQueue::AddTask(TaskType _task_type, LockFreeQueueBase* _queue) noexcept { + this->m_task_queue.emplace(std::piecewise_construct, + std::forward_as_tuple((uint32_t)_task_type), + std::forward_as_tuple(_task_type, _queue)); + //auto _iter = this->m_task_queue.begin(); +} + +int LockFreePriotityQueue::Push(TaskType _task_type,void* _shp_element) noexcept { + uint32_t _task_type_uint = uint32_t(_task_type); + auto _iter = this->m_task_queue.find(_task_type_uint); + CHECK(_iter != this->m_task_queue.cend()) << ",task type:" << _task_type_uint; + int _ret = _iter->second.m_pc_queue->Push(_shp_element); + if (_ret == QUEUE_SUCC) + this->m_cv.notify_one(); //It's not mandatory to hold the corresponding lock. + + return _ret; +} + +void LockFreePriotityQueue::Launch() noexcept { + for (int i = 0; i < this->m_consumer_thread_num; ++i) { + std::thread* _p_thread = new std::thread(&LockFreePriotityQueue::ThreadEntrance,this); + LOG(INFO) << "MCMP queue background thread :" << _p_thread->get_id() << " started"; + _p_thread->detach(); + } +} + +void LockFreePriotityQueue::Stop() noexcept { + this->m_stop = true; + while (this->m_running_thread_num.load() != 0) + std::this_thread::sleep_for(std::chrono::microseconds(::RaftCore::Config::FLAGS_thread_stop_waiting_us)); +} + +uint32_t LockFreePriotityQueue::GetSize() const noexcept { + uint32_t _sum = 0; + for (const auto &_item : this->m_task_queue) + _sum += _item.second.m_pc_queue->GetSize(); + + return _sum; +} + +void LockFreePriotityQueue::ThreadEntrance() noexcept { + + CHECK(this->m_task_queue.size() > 0); + + this->m_running_thread_num.fetch_add(1); + + auto _wait_us = std::chrono::milliseconds(::RaftCore::Config::FLAGS_lockfree_queue_consumer_wait_ms); + + auto _cond_data_arrived = [&]()->bool{ + for (auto _it = this->m_task_queue.cbegin(); _it != this->m_task_queue.cend(); _it++) + if (!_it->second.m_pc_queue->Empty()) + return true; + + return false; + }; + + std::unique_lock _unique_wrapper(this->m_cv_mutex, std::defer_lock); + + while (true) { + //To detect somewhere else want the consuming threads to end. + if (this->m_stop) + break; + + /*Trade-off: there is a small windows during which we would lose messages, in that case, + we'll wait until timeout reach. */ + _unique_wrapper.lock(); + bool _job_comes = this->m_cv.wait_for(_unique_wrapper, _wait_us, _cond_data_arrived); + _unique_wrapper.unlock(); + + if (!_job_comes) + continue; + + //Start from begin. + auto _iter = this->m_task_queue.begin(); + + //Drain all queues. + while(true){ + if (_iter->second.m_pc_queue->PopConsume() == QUEUE_SUCC) + continue; + + // Queue is empty or PopConsume fail. + _iter++; + + //If there are no more tasks, quit current loop. + if (_iter == this->m_task_queue.end()) + break; + } + } + + this->m_running_thread_num.fetch_sub(1); +} + +} diff --git a/src/tools/lock_free_priority_queue.h b/src/tools/lock_free_priority_queue.h new file mode 100644 index 0000000..d666d14 --- /dev/null +++ b/src/tools/lock_free_priority_queue.h @@ -0,0 +1,115 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_LOCK_FREE_PRIORITY_QUEUE_H__ +#define __AURORA_LOCK_FREE_PRIORITY_QUEUE_H__ + +#include +#include +#include +#include +#include + +#include "tools/lock_free_queue.h" + +namespace RaftCore::DataStructure { + +class LockFreePriotityQueue final{ + +public: + + enum class TaskType { + /*The sequence of declarations also defines the priority from highest to lowest.*/ + CLIENT_REACTING = 0, //Enum value also indicated the index in the task array + RESYNC_DATA, + RESYNC_LOG, + }; + + struct Task { + + Task(TaskType x, LockFreeQueueBase *y) noexcept; + + Task(const Task& one) noexcept; + + void operator=(Task& one) noexcept; + + virtual ~Task(); + + bool operator<(const Task& _other); + + TaskType m_task_type; + + std::unique_ptr m_pc_queue; + }; + +public: + + LockFreePriotityQueue() noexcept; + + virtual ~LockFreePriotityQueue() noexcept; + + void Initialize(int consumer_threads_num) noexcept; + + void UnInitialize() noexcept; + + /*Note: 1.AddTask is not thread safe, only invoke this method during server initialization. + 2.Order of calling AddTask should be the same with the order of _task_type parameter defined + in the 'TaskType'.This constrain is not reasonable and should be optimized off in the future. */ + void AddTask(TaskType _task_type, LockFreeQueueBase* _queue) noexcept; + + /*_shp_element: pointer of std::shared_ptr<> pointing to the element to be inserted. + The shared_ptr object's ownership is guaranteed to be increased. */ + int Push(TaskType _task_type,void* _shp_element) noexcept; + + void Launch() noexcept; + + uint32_t GetSize() const noexcept; + +private: + + void Stop() noexcept; + + void ThreadEntrance() noexcept; + +private: + + std::condition_variable m_cv; + + std::mutex m_cv_mutex; + + int m_consumer_thread_num=0; + + std::atomic m_running_thread_num; + + volatile bool m_stop = false; + + std::map m_task_queue; + +private: + + LockFreePriotityQueue& operator=(const LockFreePriotityQueue&) = delete; + + LockFreePriotityQueue(const LockFreePriotityQueue&) = delete; + +}; + +} //end namespace + +#endif diff --git a/src/tools/lock_free_queue.cc b/src/tools/lock_free_queue.cc new file mode 100644 index 0000000..fed8a40 --- /dev/null +++ b/src/tools/lock_free_queue.cc @@ -0,0 +1,203 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "common/error_code.h" +#include "tools/utilities.h" +#include "tools/lock_free_queue.h" + +namespace RaftCore::DataStructure { + +template +const char* QueueNode::m_status_macro_names[] = {"SLOT_EMPTY","SLOT_PRODUCING","SLOT_PRODUCED","SLOT_CONSUMING"}; + +template +QueueNode::QueueNode() noexcept { + this->m_state.store(SlotState::SLOT_EMPTY); +} + +template +QueueNode::~QueueNode() noexcept {} + +template +LockFreeQueue::LockFreeQueue() noexcept{} + +template +void LockFreeQueue::Initilize(TypeCallBackFunc fn_cb,int queue_size) noexcept{ + + this->m_element_size = ::RaftCore::Tools::RoundUp(queue_size); + this->m_element_mask = ::RaftCore::Tools::GetMask(this->m_element_size); + + this->m_data = new QueueNode[this->m_element_size]; + + this->m_fn_cb = fn_cb; + + this->m_head.store(0); + this->m_tail.store(0); +} + +template +LockFreeQueue::~LockFreeQueue() noexcept{ + delete []this->m_data; +} + +template +uint32_t LockFreeQueue::GetSize() const noexcept { + uint32_t _cur_head = this->m_head.load(); + uint32_t _cur_tail = this->m_tail.load(); + + uint32_t _size = 0; + while (_cur_tail != _cur_head) { + _cur_tail = (_cur_tail + 1) & this->m_element_mask; + _size++; + } + + return _size; +} + +template +uint32_t LockFreeQueue::GetCapacity() const noexcept { + return this->m_element_size; +} + +template +bool LockFreeQueue::Empty() const noexcept { + return this->m_head.load() == this->m_tail.load(); +} + +template +int LockFreeQueue::PopConsume() noexcept { + + std::shared_ptr _p_item; + int n_rst = this->Pop(_p_item); + if (n_rst!=QUEUE_SUCC) { + if (n_rst!=QUEUE_EMPTY) + LOG(ERROR) << "Consumer : Pop failed,returned Val:" << n_rst; + return n_rst; + } + + if (!this->m_fn_cb(_p_item)) + LOG(ERROR) << "Consumer : Process entry failed"; + + return QUEUE_SUCC; +} + +template +int LockFreeQueue::Pop(std::shared_ptr &ptr_element) noexcept { + + uint32_t _cur_tail = this->m_tail.load(); + + //Queue empty + if (_cur_tail == this->m_head.load()) + return QUEUE_EMPTY; + + uint32_t next = (_cur_tail + 1) & this->m_element_mask; + + //Get the position where to consume. + /* Note:compare_exchange_weak are allowed to fail spuriously, which is, act as if *this != expected even if they are equal. + Meaning when compare_exchange_weak return false: + 1> *this != expected and they are actually not equal, no problems. + 2> *this != expected but they are actually equal: + 1) _cur_tail will be replaced with _cur_tail itself, it doesn't change. + 2) next will be re-calculated , it also doesn't change. + In a word,nothing wrong would happened under spuriously fail. */ + while (!this->m_tail.compare_exchange_weak(_cur_tail, next)) { + //Threads can go over the produced range, but it is not an error. + if (_cur_tail == this->m_head.load()) { + VLOG(89) << "consuming,found slot is not produced,at position:" << _cur_tail << ",probably due to empty" ; + return QUEUE_EMPTY; + } + next = (_cur_tail + 1) & this->m_element_mask; + } + + //Only one thread are allowed to operate on the element at index 'next' + SlotState slot_state = SlotState::SLOT_PRODUCED; + while (!this->m_data[next].m_state.compare_exchange_weak(slot_state, SlotState::SLOT_CONSUMING)) { + + bool _try_again = slot_state == SlotState::SLOT_PRODUCED || //CAS spurious fail. + slot_state == SlotState::SLOT_EMPTY || //Producer is producing ,try again. + slot_state == SlotState::SLOT_PRODUCING; //Producer is producing ,try again. + + if (_try_again) { + slot_state = SlotState::SLOT_PRODUCED; + continue; + } + + CHECK(false) << "cannot update state from produced to consuming at position:" << next << ",detected state:" + << QueueNode::MacroToString(slot_state) << ",something is terribly wrong" ; + } + + //Consuming + ptr_element = this->m_data[next].m_val; + this->m_data[next].m_val.reset(); //Release the ownership + + //Update slot state + slot_state = SlotState::SLOT_CONSUMING; + CHECK(this->m_data[next].m_state.compare_exchange_strong(slot_state,SlotState::SLOT_EMPTY)) << "cannot update state from consuming to empty at position:" + << next << ",detected state:" << QueueNode::MacroToString(slot_state) << ",something is terribly wrong" ; + + return QUEUE_SUCC; +} + +template +int LockFreeQueue::Push(void* ptr_shp_element) noexcept { + uint32_t _cur_head = this->m_head.load(); + + uint32_t next = (_cur_head + 1) & this->m_element_mask; + + //Check the validity of position 'next' + if (next == this->m_tail.load()) + return QUEUE_FULL; + + //Get the position where to produce. + /*Note : spuriously fail of compare_exchange_weak is acceptable, since when it happened,_cur_head would remain the same + as what is is before calling this function, and will go to next round of execution.As explained above.*/ + while (!this->m_head.compare_exchange_weak(_cur_head, next)) { + next = (_cur_head + 1) & this->m_element_mask; + if (next == this->m_tail.load()) { + LOG(WARNING) << "producing,found slot is not empty,at position:" << next << ",probably due to full" ; + return QUEUE_FULL; + } + } + + //Which is guaranteed here is that only one thread will be allowed to operate on the element at index 'next'. + SlotState slot_state = SlotState::SLOT_EMPTY; + while(!this->m_data[next].m_state.compare_exchange_weak(slot_state, SlotState::SLOT_PRODUCING)) { + + /* + slot_state == SlotState::SLOT_EMPTY || //CAS spurious fail. + slot_state == SlotState::SLOT_PRODUCED || //Consumer is consuming this node,try again. + slot_state == SlotState::SLOT_CONSUMING || //Consumer is consuming this node,try again. + slot_state == SlotState::SLOT_PRODUCING ; //An overlapping producing occurred. + */ + + slot_state = SlotState::SLOT_EMPTY; + continue; + } + + //Producing + this->m_data[next].m_val = *((std::shared_ptr*)ptr_shp_element); + + //Update slot state + slot_state = SlotState::SLOT_PRODUCING; + CHECK(this->m_data[next].m_state.compare_exchange_strong(slot_state, SlotState::SLOT_PRODUCED)) << "cannot update state from producing to produced at position:" + << next << ",detected state:" << QueueNode::MacroToString(slot_state) << ",something is terribly wrong" ; + + return QUEUE_SUCC; +} + +} diff --git a/src/tools/lock_free_queue.h b/src/tools/lock_free_queue.h new file mode 100644 index 0000000..c90ea03 --- /dev/null +++ b/src/tools/lock_free_queue.h @@ -0,0 +1,144 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_TRIVIAL_LOCK_QUEUE_H__ +#define __AURORA_TRIVIAL_LOCK_QUEUE_H__ + +#include +#include +#include + +namespace RaftCore::DataStructure { + +enum class SlotState { + /*--------------Node State--------------*/ + SLOT_EMPTY, + SLOT_PRODUCING, + SLOT_PRODUCED, + SLOT_CONSUMING +}; + + +template +class QueueNode final{ + +public: + + QueueNode() noexcept; + + virtual ~QueueNode() noexcept; + + std::shared_ptr m_val; + + std::atomic m_state; + + inline static const char* MacroToString(SlotState enum_val) { + return m_status_macro_names[int(enum_val)]; + } + +private: + + static const char* m_status_macro_names[]; + +}; + +/*Note : LockFreeQueueBase is a wrapper aimed at eliminating specifying the template parameters needed by the invokers +when they call LockFreeQueue methods. */ +class LockFreeQueueBase { + +public: + + LockFreeQueueBase(){} + + virtual ~LockFreeQueueBase(){} + + virtual int Push(void* ptr_shp_element) noexcept = 0; + + virtual int PopConsume() noexcept = 0; + + virtual uint32_t GetSize() const noexcept = 0; + + virtual uint32_t GetCapacity() const noexcept = 0; + + virtual bool Empty() const noexcept = 0; +}; + + +//The following is a ring-buf supported multi-thread producing and multi-thread consuming +template +class LockFreeQueue final : public LockFreeQueueBase { + +public: + + typedef std::function &ptr_element)> TypeCallBackFunc; + + LockFreeQueue() noexcept; + + void Initilize(TypeCallBackFunc fn_cb,int queue_size) noexcept; + + virtual ~LockFreeQueue() noexcept; + + virtual int Push(void* ptr_shp_element) noexcept override; + + virtual int PopConsume() noexcept override; + + //Get a snapshot size. + virtual uint32_t GetSize() const noexcept override; + + //For gtest usage. + virtual uint32_t GetCapacity() const noexcept override; + + virtual bool Empty() const noexcept override; + +private: + + int Pop(std::shared_ptr &ptr_element) noexcept; + +private: + + //Position where holds the latest produced element. + std::atomic m_head; + + //Position which just before the earliest produced element.If empty (m_head == m_tail). + std::atomic m_tail; + + /*Note :In the current design, there will always be at least one slot empty , to simplify + the implementation. */ + QueueNode *m_data = nullptr; //Data ring buffer. + + TypeCallBackFunc m_fn_cb; + + uint32_t m_element_size = 0; + + uint32_t m_element_mask = 0; + +private: + + LockFreeQueue& operator=(const LockFreeQueue&) = delete; + + LockFreeQueue(const LockFreeQueue&) = delete; + +}; + +} //end namespace + +#include "tools/lock_free_queue.cc" + +#endif diff --git a/src/tools/lock_free_unordered_single_list.cc b/src/tools/lock_free_unordered_single_list.cc new file mode 100644 index 0000000..66802b3 --- /dev/null +++ b/src/tools/lock_free_unordered_single_list.cc @@ -0,0 +1,130 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "glog/logging.h" +#include "tools/lock_free_unordered_single_list.h" + +namespace RaftCore::DataStructure { + +template +template +UnorderedSingleListNode::UnorderedSingleListNode(Args&&... args)noexcept { + this->m_data = new T(std::forward(args)...); + this->m_next.store(nullptr); +} + +template +UnorderedSingleListNode::~UnorderedSingleListNode()noexcept { + if (this->m_data != nullptr) { + delete this->m_data; + this->m_data = nullptr; + } +} + +template +UnorderedSingleListNode::UnorderedSingleListNode(T* p_src)noexcept { + this->m_data = p_src; + this->m_next.store(nullptr); +} + +template +LockFreeUnorderedSingleList::LockFreeUnorderedSingleList() noexcept { + this->m_head.store(nullptr); +} + +template +LockFreeUnorderedSingleList::~LockFreeUnorderedSingleList() noexcept{} + +template +void LockFreeUnorderedSingleList::SetDeleter(std::function deleter)noexcept { + this->m_deleter = deleter; +} + +template +void LockFreeUnorderedSingleList::PushFront(T* src) noexcept { + + + auto *_p_cur_head = this->m_head.load(); + auto * _p_new_node = new UnorderedSingleListNode(src); + _p_new_node->m_next = _p_cur_head; + while (!this->m_head.compare_exchange_weak(_p_cur_head, _p_new_node)) + _p_new_node->m_next = _p_cur_head; +} + +template +void LockFreeUnorderedSingleList::PurgeSingleList(uint32_t retain_num) noexcept { + + std::size_t _cur_num = 1; + auto *_p_start_point = this->m_head.load(); + + while (_p_start_point != nullptr) { + _p_start_point = _p_start_point->m_next.load(); + _cur_num++; + if (_cur_num >= retain_num) + break; + } + + if (_p_start_point == nullptr) + return; + + auto *_p_cur = _p_start_point->m_next.load(); + _p_start_point->m_next.store(nullptr); + + std::size_t _released_num = 0; + while (_p_cur != nullptr) { + auto *_p_next = _p_cur->m_next.load(); + + //Use the customizable deleter. + this->m_deleter(_p_cur->m_data); + _p_cur->m_data = nullptr; + delete _p_cur; + + _p_cur = _p_next; + _released_num++; + } + + if (_released_num > 0) + VLOG(89) << "released " << _released_num << " elements in singleList's garbage list."; +} + +#ifdef _UNORDERED_SINGLE_LIST_TEST_ +template +uint32_t LockFreeUnorderedSingleList::Size() noexcept { + uint32_t _size = 0; + auto *_p_cur = this->m_head.load(); + while (_p_cur != nullptr) { + _p_cur = _p_cur->m_next; + _size++; + } + + return _size; +} + +template +void LockFreeUnorderedSingleList::Iterate(std::function func) noexcept { + auto *_p_cur = this->m_head.load(); + while (_p_cur != nullptr) { + auto *_p_next = _p_cur->m_next.load(); + func(_p_cur->m_data); + _p_cur = _p_next; + } +} + +#endif + +} diff --git a/src/tools/lock_free_unordered_single_list.h b/src/tools/lock_free_unordered_single_list.h new file mode 100644 index 0000000..4bdaa5c --- /dev/null +++ b/src/tools/lock_free_unordered_single_list.h @@ -0,0 +1,91 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_LOCK_FREE_UNORDERED_SINGLE_LIST_H__ +#define __AURORA_LOCK_FREE_UNORDERED_SINGLE_LIST_H__ + +#include +#include + +#include "common/macro_manager.h" + +namespace RaftCore::DataStructure { + +template +class UnorderedSingleListNode final{ + +public: + + template + UnorderedSingleListNode(Args&&... args) noexcept; + + virtual ~UnorderedSingleListNode()noexcept; + + explicit UnorderedSingleListNode(T* p_src)noexcept; + + T* m_data; + std::atomic*> m_next; +}; + + +template +using AtomicPtrSingleListNode = std::atomic*>; + +template +class LockFreeUnorderedSingleList final{ + +public: + + LockFreeUnorderedSingleList() noexcept; + + virtual ~LockFreeUnorderedSingleList() noexcept; + + void SetDeleter(std::function deleter)noexcept; + + //Will take the ownership of 'src'. + void PushFront(T* src) noexcept; + + void PurgeSingleList(uint32_t retain_num) noexcept; + +#ifdef _UNORDERED_SINGLE_LIST_TEST_ + uint32_t Size() noexcept; + + void Iterate(std::function func) noexcept; +#endif + +private: + + std::atomic*> m_head; + + std::function m_deleter = [](T* data) { delete data; }; + +private: + + LockFreeUnorderedSingleList(const LockFreeUnorderedSingleList&) = delete; + + LockFreeUnorderedSingleList& operator=(const LockFreeUnorderedSingleList&) = delete; + +}; + +} //end namespace + +#include "tools/lock_free_unordered_single_list.cc" + +#endif diff --git a/src/tools/timer.cc b/src/tools/timer.cc new file mode 100644 index 0000000..4ed6f5b --- /dev/null +++ b/src/tools/timer.cc @@ -0,0 +1,126 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include +#include + +#include "common/comm_defs.h" +#include "config/config.h" +#include "tools/timer.h" + +namespace RaftCore::Timer { + +std::priority_queue,GlobalTimer::TaskCmp> GlobalTimer::m_heap; + +std::shared_timed_mutex GlobalTimer::m_share_timed_mutex; + +volatile GlobalTimer::ETimerThreadState GlobalTimer::m_thread_state = GlobalTimer::ETimerThreadState::INITIALIZED; + +using ::RaftCore::Common::WriteLock; +using ::RaftCore::Common::ReadLock; + +bool GlobalTimer::TaskCmp::operator()(const Task &x, const Task &y) { + return x.m_next_run > y.m_next_run; +} + +void GlobalTimer::Task::operator=(const GlobalTimer::Task &one) { + this->m_next_run = one.m_next_run; + this->m_interval_ms = one.m_interval_ms; + this->m_processor = one.m_processor; +} + +void GlobalTimer::Initialize() noexcept { + m_thread_state = ETimerThreadState::RUNNING; + std::thread _t([&]() { ThreadEntrance(); }); + _t.detach(); +} + +void GlobalTimer::UnInitialize() noexcept { + Stop(); + WriteLock _w_lock(m_share_timed_mutex); + while (!m_heap.empty()) + m_heap.pop(); +} + +void GlobalTimer::AddTask(uint32_t interval_ms, std::function processor) noexcept{ + + //interval_ms==0 mean one shot, should be supported. + if (interval_ms > 0) + CHECK(interval_ms >= ::RaftCore::Config::FLAGS_timer_precision_ms) << "timer precision not enough :" + << ::RaftCore::Config::FLAGS_timer_precision_ms << "|" << interval_ms; + + uint64_t _now = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + + WriteLock _w_lock(m_share_timed_mutex); + m_heap.emplace(_now + interval_ms,interval_ms, processor); +} + +void GlobalTimer::ThreadEntrance() noexcept { + + LOG(INFO) << "Global timer thread started."; + + while (true) { + std::this_thread::sleep_for(std::chrono::milliseconds(::RaftCore::Config::FLAGS_timer_precision_ms)); + if (m_thread_state == ETimerThreadState::STOPPING) + break; + + uint64_t _now = std::chrono::duration_cast(std::chrono::system_clock::now().time_since_epoch()).count(); + + WriteLock _w_lock(m_share_timed_mutex); + while (!m_heap.empty()) { + + /*Avoiding one single task takes too much time thus timing out all the subsequent tasks. + Falling into a deal loop. */ + if (m_thread_state == ETimerThreadState::STOPPING) + break; + + const auto &_task = m_heap.top(); + + if (_task.m_next_run > _now) + break; + + //Run the task.Returned value indicating whether that task wants to be executed next time. + if (_task.m_processor()) { + //Push the next round of execution of task. + auto _new = _task; + _new.m_next_run = _now + _task.m_interval_ms; + + //emplace() will causes issues under MSVC 2015.details see : https://gist.github.com/ppLorins/09de033a4b0748d883c8bf8fe12b7703 + //m_heap.emplace(_now + _task.m_interval_ms, _task.m_interval_ms, _task.m_processor); + m_heap.push(_new); + } + + m_heap.pop(); + } + } + + m_thread_state = ETimerThreadState::STOPPED; + + LOG(INFO) << "Global timer thread exited."; +} + +void GlobalTimer::Stop() noexcept { + m_thread_state = ETimerThreadState::STOPPING; + while (m_thread_state != ETimerThreadState::STOPPED) + std::this_thread::sleep_for(std::chrono::microseconds(::RaftCore::Config::FLAGS_thread_stop_waiting_us)); +} + +} + + + diff --git a/src/tools/timer.h b/src/tools/timer.h new file mode 100644 index 0000000..1a88ba2 --- /dev/null +++ b/src/tools/timer.h @@ -0,0 +1,88 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#ifndef __AURORA_TIMER_H__ +#define __AURORA_TIMER_H__ + +#include +#include +#include +#include + +namespace RaftCore::Timer { + +class GlobalTimer { + + public: + + static void Initialize() noexcept; + + static void UnInitialize() noexcept; + + static void AddTask(uint32_t interval_ms,std::function processor) noexcept; + + private: + + static void Stop() noexcept; + + static void ThreadEntrance() noexcept; + + private: + + struct Task { + + Task(int x, std::function y) : m_interval_ms(x),m_processor(y) {}; + + Task(uint64_t a, int x, std::function y) : m_next_run(a), m_interval_ms(x),m_processor(y) {}; + + Task(uint64_t a, int x) : m_next_run(a), m_interval_ms(x) {}; + + void operator=(const Task &one); + + uint64_t m_next_run; + int m_interval_ms; + std::function m_processor; + }; + + struct TaskCmp { + bool operator()(const Task &x,const Task &y); + }; + + enum class ETimerThreadState {INITIALIZED,RUNNING,STOPPING,STOPPED}; + + static std::priority_queue,TaskCmp> m_heap; + + static std::shared_timed_mutex m_share_timed_mutex; + + static volatile ETimerThreadState m_thread_state; + + private: + + GlobalTimer() = delete; + + virtual ~GlobalTimer() noexcept = delete; + + GlobalTimer(const GlobalTimer&) = delete; + + GlobalTimer& operator=(const GlobalTimer&) = delete; +}; + +} + +#endif + diff --git a/src/tools/trivial_lock_double_list.cc b/src/tools/trivial_lock_double_list.cc new file mode 100644 index 0000000..4228364 --- /dev/null +++ b/src/tools/trivial_lock_double_list.cc @@ -0,0 +1,663 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "tools/trivial_lock_double_list.h" + +namespace RaftCore::DataStructure { + +template +DoubleListNode::DoubleListNode(const std::shared_ptr &p_val) noexcept{ + + static_assert(std::is_base_of,T>::value,"template parameter of TrivialLockDoubleList invalid"); + + this->m_atomic_pre.store(nullptr); + this->m_atomic_next.store(nullptr); + + this->m_val = p_val; +} + +template +DoubleListNode::~DoubleListNode() noexcept {} + +template +bool DoubleListNode::operator<(const DoubleListNode& other) const noexcept { + return *this->m_val < *other.m_val; +} + +template +bool DoubleListNode::operator>(const DoubleListNode& other) const noexcept { + return *this->m_val > *other.m_val; +} + +template +bool DoubleListNode::operator==(const DoubleListNode& other) const noexcept { + return *this->m_val == *other.m_val; +} + +template +void DoubleListNode::Apply(DoubleListNode* phead, std::function*)> unary) noexcept { + auto _p_cur = phead; + while (_p_cur != nullptr) { + //Caution: unary may modify _p_cur's next pointer after its execution. + auto _p_next = _p_cur->m_atomic_next.load(); + unary(_p_cur); + _p_cur = _p_next; + } +} + +template +TrivialLockDoubleList::TrivialLockDoubleList(const std::shared_ptr &p_min ,const std::shared_ptr &p_max) noexcept{ + + static_assert(std::is_base_of,T>::value,"template parameter of TrivialLockDoubleList invalid"); + + CHECK(p_min && p_max) << "TrivialLockDoubleList init fail"; + + /* There are at least two nodes in the double linked list: + 1> the HEAD node with the minimum of T + 2> the TAIL node with the maximum of T + + This is for conveniently inserting. */ + + //Init dump head node + this->m_head = new DoubleListNode(p_min); + + //Init dump tail node + this->m_tail = new DoubleListNode(p_max); + + //Join the two together,no need to lock + this->m_head->m_atomic_next.store(this->m_tail); + this->m_tail->m_atomic_pre.store(this->m_head); +} + +template +TrivialLockDoubleList::~TrivialLockDoubleList() noexcept{ + this->Clear(); + + delete this->m_head; + delete this->m_tail; +} + +template +void TrivialLockDoubleList::Clear() noexcept { + + //Reserve head & tail node. + auto _p_cur = this->m_head->m_atomic_next.load(); + while (_p_cur != this->m_tail) { + auto tmp = _p_cur; + _p_cur = _p_cur->m_atomic_next.load(); + delete tmp; + } + + this->m_head->m_atomic_next.store(this->m_tail); + this->m_tail->m_atomic_pre.store(this->m_head); +} + +template +void TrivialLockDoubleList::Insert(const std::shared_ptr &p_one) noexcept { + DoubleListNode* new_node = new DoubleListNode(p_one); + this->Insert(new_node); +} + +template +void TrivialLockDoubleList::Insert(DoubleListNode* new_node) noexcept { + this->InsertTracker(new_node); +} + +template +void TrivialLockDoubleList::InsertTracker(DoubleListNode* new_node) noexcept { + + ThreadIDWrapper *_p_tracker = new ThreadIDWrapper(std::this_thread::get_id()); + + bool _ownership_taken = false; + if (this->m_p_insert_footprint->Upsert(_p_tracker, new_node)) + _ownership_taken = true; + + while (!this->InsertRaw(new_node)) + VLOG(89) << "-------redo InsertRaw!-----"; + + //Since _p_tracker already exist, the return value must be true. + CHECK(!this->m_p_insert_footprint->Upsert(_p_tracker, nullptr)); + + if (!_ownership_taken) + delete _p_tracker; +} + +template +bool TrivialLockDoubleList::InsertRaw(DoubleListNode* new_node) noexcept{ + + const auto &_p_one = new_node->m_val; + + //The pointer points to the node just after the current node being iterated + auto _p_next = this->m_tail; + + //The pointer to the current node being iterated, initially they are both pointing to the tail + auto _p_cur = _p_next->m_atomic_pre.load(); + + /* Note : The above two pointers are not necessarily being adjacent all the time. + They may pointing to the same node in certain scenarios . */ + while (true) { + /*Note: Case where '_p_cur == nullptr' could happen: iterating reach the end of a cut head . + Since _p_cur already points to the cut list, it need to start all over again. */ + if (_p_cur == nullptr) + return false; + + //Replace the old value with the new one in case of a partial comparison. + if (*_p_one == *_p_cur->m_val && !_p_cur->IsDeleted()) { + //TODO: delete then insert. + //_p_cur->m_val = _p_one; + return true; + } + + /*Note: + 1.Deleted elements will be treated like the normal(non-deleted) ones. Since maintaining order of + the list without considering the deleted elements is just equivalent to considering them,thinking + about inserting a new node after a deleted node with a value greater than it violating nothing on + the correctness , but the latter form will introduce huge complexity. + 2.For the case of inserting CAS fail and due to conflict with cutting head, the following judge will + get satisfied AS BEFORE. And will trigger _p_cur==nullptr eventually, so it's safe to do a recursive + insertion above. + */ + if ( *_p_one < *_p_cur->m_val ) { + //Both moving toward to the head direction + _p_next = _p_cur; + _p_cur = _p_cur->m_atomic_pre.load(); + continue; + } + + //For the deleted elements,insert the equivalent one just after it as above says. + + //Alway assume the new node should be inserted between _p_cur and _p_next at the moment + new_node->m_atomic_pre.store(_p_cur); + new_node->m_atomic_next.store(_p_next); + + //Start inserting.... + auto tmp_next = _p_next; // Copy it out first. This is very important !! + if (!_p_cur->m_atomic_next.compare_exchange_strong(tmp_next, new_node)) { + /*Collision happened. Other thread(s) have already modified the 'next' + pointer of '_p_cur',we need to redo the inserting process.There are two + scenarios where this could happen: + 1> other thread(s) are inserting new node. + 2> other thread(s) are cutting head. */ + + //Reset the conditions and start the inserting process from scratch all over again + VLOG(89) << "insert_CAS_fail," << _p_cur << " insert_next_changefrom " << _p_next << " to " << tmp_next; + _p_cur = _p_next; + continue; + } + + auto _p_tmp = _p_cur; + bool _rst = _p_next->m_atomic_pre.compare_exchange_strong(_p_tmp, new_node); + if(!_rst) + CHECK(false) << "TrivialLockDoubleList::Insert unexpected inserting status found," + << "cannot CAS node's previous pointer,something terribly wrong happened." + << " insert_CAS_fail ," << _p_next << " insert_pre_changefrom " + << _p_cur << " to " << _p_tmp << ", head:" << this->m_head; + + //this->GetSize(); + + //Here the new node should be inserted properly,stop iterating + break; + } + + return true; +} + +template +bool TrivialLockDoubleList::Delete(const std::shared_ptr &p_one) noexcept { + + //Use a stack memory to get around multiple thread allocation/deallocation issue. + auto *_p_node = new DoubleListNode(p_one); + + ThreadIDWrapper *_p_tracker = new ThreadIDWrapper(std::this_thread::get_id()); + + bool _ownership_taken = false; + if (this->m_p_insert_footprint->Upsert(_p_tracker, _p_node)) + _ownership_taken = true; + + //Default to that the element to be deleted not found. + bool _ret_val = false; + + auto *_p_cur = this->m_tail->m_atomic_pre.load(); + + //Reach head of the [cutoff] list. + while (_p_cur != nullptr && _p_cur != this->m_head) { + if (*p_one != *_p_cur->m_val) { + _p_cur = _p_cur->m_atomic_pre.load(); + continue; + } + _p_cur->SetDeleted(); + _ret_val = true; + break; + } + + CHECK(!this->m_p_insert_footprint->Upsert(_p_tracker, nullptr)); + + if (!_ownership_taken) { + delete _p_tracker; + delete _p_node; + } + + return _ret_val; +} + +template +void TrivialLockDoubleList::DeleteAll() noexcept { + //Reserve head & tail node. + auto _p_cur = this->m_head->m_atomic_next.load(); + while (_p_cur != this->m_tail) { + _p_cur->SetDeleted(); + _p_cur = _p_cur->m_atomic_next.load(); + } +} + +//template +//bool TrivialLockDoubleList::MoveForward(DoubleListNode* &p_pre,DoubleListNode* &p_next) noexcept { +// +// auto _reach_tail = [&]() { +// return p_pre == this->m_tail || p_next == this->m_tail; +// }; +// +// if (_reach_tail()) +// return false; +// +// do { +// /*If reach here , no elements could be inserted between p_pre and p_next since they +// are adjacent. So the following move forward operations are safe . */ +// p_pre = p_next; +// p_next = p_next->m_atomic_next.load(); +// if (_reach_tail()) +// return false; +// } while (p_pre->IsDeleted() && p_next->IsDeleted()); +// +// return true; +//} + +template +bool TrivialLockDoubleList::MoveForward(DoubleListNode* &p_pre,DoubleListNode* &p_next) noexcept { + + p_pre = p_next; + p_next = this->FindNextNonDelete(p_next); + if (p_next == this->m_tail) + return false; + + return true; +} + +template +DoubleListNode* TrivialLockDoubleList::ExpandForward(DoubleListNode* p_cur) noexcept { + if (p_cur == this->m_tail) + return p_cur; + + auto *_p_pre = p_cur; + auto *_p_x = _p_pre->m_atomic_next.load(); + + while (_p_x != this->m_tail) { + if (!_p_x->IsDeleted()) + break; + + if (_p_x->operator!=(*p_cur)) + break; + + _p_pre = _p_x; + _p_x = _p_pre->m_atomic_next.load(); + } + + return _p_pre; +} + +template +DoubleListNode* TrivialLockDoubleList::ExpandBackward(DoubleListNode* p_cur) noexcept { + if (p_cur == this->m_head) + return p_cur; + + auto *_p_pre = p_cur; + auto *_p_x = _p_pre->m_atomic_pre.load(); + + while (_p_x != this->m_head) { + if (!_p_x->IsDeleted()) + break; + + if (_p_x->operator!=(*p_cur)) + break; + + _p_pre = _p_x; + _p_x = _p_pre->m_atomic_pre.load(); + } + + return _p_pre; +} + +template +DoubleListNode* TrivialLockDoubleList::FindNextNonDelete(DoubleListNode* p_cur) noexcept { + + if (p_cur == this->m_tail) + return this->m_tail; + + auto * _p_next_non_deleted = p_cur->m_atomic_next.load(); + while (_p_next_non_deleted != this->m_tail) { + if (!_p_next_non_deleted->IsDeleted()) + break; + _p_next_non_deleted = _p_next_non_deleted->m_atomic_next.load(); + } + + return _p_next_non_deleted; +} + +template +void TrivialLockDoubleList::SiftOutDeleted(DoubleListNode* &output_head) noexcept { + auto _to_remove = output_head; + while (_to_remove != nullptr) { + bool _deleted = false; + if (_to_remove->IsDeleted()) { + auto _p_pre = _to_remove->m_atomic_pre.load(); + auto _p_next = _to_remove->m_atomic_next.load(); + + if (_p_pre) + _p_pre->m_atomic_next.store(_p_next); + else + output_head = _p_next; + + if(_p_next) + _p_next->m_atomic_pre.store(_p_pre); + + _deleted = true; + } + + auto tmp = _to_remove; + _to_remove = _to_remove->m_atomic_next.load(); + + if (_deleted) + delete tmp; + } +} + +template +DoubleListNode* TrivialLockDoubleList::CutHead(std::function criteria) noexcept { + + /*Note: In the current design , there can be only one thread invoking this method.But it + is allowed to have several other threads doing Insert at the mean time. */ + std::unique_lock _mutex_lock(this->m_recursive_mutex); + + //VLOG(89) << "list_debug pos1"; + + auto *_p_cur = this->FindNextNonDelete(this->m_head); + if (_p_cur == this->m_tail) { + //VLOG(89) << "list_debug pos1.1"; + return nullptr; + } + + //VLOG(89) << "list_debug pos2"; + + //output_head always point the first element regardless of deleted or non-deleted. + auto* _p_immediate_first = this->m_head->m_atomic_next.load(); + auto *output_head = _p_immediate_first; + + //The first element will always be cut off. + auto _p_next = this->FindNextNonDelete(_p_cur); + + while (true) { + + //VLOG(89) << "list_debug pos2.1"; + + if (criteria(*_p_cur->m_val, *_p_next->m_val)) { + //VLOG(89) << "list_debug pos2.2"; + if (this->MoveForward(_p_cur, _p_next)) { + //VLOG(89) << "list_debug pos2.3"; + continue; + } + } + + //VLOG(89) << "list_debug pos3"; + + //To get around the deleted elements, we need to expand '_p_cur' and '_p_next'. + _p_cur = this->ExpandForward(_p_cur); + _p_next = this->ExpandBackward(_p_next); + + //-----------Start cutting head-----------// + + /*This is the tricky part,need to consider simultaneously Inserting and CutHeading : If we set _p_cur->next + to nullptr successfully, no other threads could insert new node between _p_cur and _p_next.This is the critical + safety guarantee for other operations. */ + auto _p_tmp = _p_next; + if (!_p_cur->m_atomic_next.compare_exchange_strong(_p_tmp, nullptr)) { + /*Strong CAS fail ,means that other thread(s) already made _p_next to point to + the newly inserted node.What we need to do is just redo the iterating from current node. + Also _p_next need to be updated to the newly inserted node,otherwise the criteria will + be evaluated to false forever. */ + VLOG(89) << "cuthead_CAS_fail," << _p_cur << " next_changefrom " << _p_next << " to " << _p_tmp; + _p_next = _p_tmp; + continue; + } + + //Cutting done,just break out. + break; + } + + /*Note: Once goes here, _p_cur is the latest item of the cut out list and + _p_next is the first item of the remaining list. */ + + //Connect the head with the first non adjacent node. + auto _p_tmp = this->m_head->m_atomic_next.load(); + while (!this->m_head->m_atomic_next.compare_exchange_strong(_p_tmp,_p_next)) { + /*If reach here , mean new nodes already been inserted between m_head and the cutting head. + To avoid cut off non adjacent nodes,the cutting head process need starting allover again. */ + + //Recovery _p_cur's next pointer,should never fail. + decltype(_p_next) _p_tmp = nullptr; + CHECK(_p_cur->m_atomic_next.compare_exchange_strong(_p_tmp, _p_next)); + VLOG(89) << "-------recursive cuthead occur!-----"; + return this->CutHead(criteria); + } + + //VLOG(89) << "list_debug pos4"; + + _p_tmp = _p_cur; + while (!_p_next->m_atomic_pre.compare_exchange_strong(_p_tmp, this->m_head)) { + /* '_p_next->m_atomic_pre' may still points to the old place due to an uncomplete inserting process. + What we need to do is just waiting for it pointing to the updated place where is exactly _p_cur. */ + LOG(WARNING) << "cutting head tail->pre CAS_fail " << _p_next << " previous change from " + << _p_cur << " to " << _p_tmp << ",head:" << this->m_head << ",tail:" << this->m_tail + << ",do CAS again."; + _p_tmp = _p_cur; + continue; + } + + //Detach first node + output_head->m_atomic_pre.store(nullptr); + + //VLOG(89) << "list_debug pos5 " << output_head; + + //Waiting for all threads that are iterating in the cut off list to be finished. + this->WaitForListClean(_p_cur); + + //VLOG(89) << "list_debug pos6 " << output_head; + + //auto *_tmp = output_head; + //while (_tmp != nullptr) { + // VLOG(89) << "list_debug pos6.1 " << _tmp->IsDeleted(); + // _tmp = _tmp->m_atomic_next.load(); + //} + + //Now the list is cut off with the deleted elements. Need to erase the deleted elements + this->SiftOutDeleted(output_head); + + //VLOG(89) << "list_debug pos7 " << output_head; + + return output_head; +} + +template +DoubleListNode* TrivialLockDoubleList::CutHead(std::function criteria) noexcept { + + /*Note: In the current design , there can be only one thread invoking this method.But it + is allowed to have several other threads doing Insert at the mean time. */ + std::unique_lock _mutex_lock(this->m_recursive_mutex); + + //VLOG(89) << "debug double list enter"; + + auto _p_cur = this->m_head->m_atomic_next.load(); + if (_p_cur == this->m_tail) + return nullptr; + + auto _p_pre = this->m_head; + auto _p_start = _p_cur; + auto output_head = _p_cur; + + while (true) { + + if (criteria(*_p_cur->m_val)) { + _p_pre = _p_cur; + _p_cur = _p_cur->m_atomic_next.load(); + if (_p_cur != this->m_tail) + continue; + } + + //No nodes are available + if (_p_cur == _p_start) + return nullptr; + + //-----------Start cutting head-----------// + + /*This is the tricky part,need to consider simultaneously Inserting and CutHeading : If we set _p_cur->next + to nullptr successfully, no other threads could insert new node between _p_pre and _p_cur.This is the critical + safety guarantee for other operations. */ + auto _p_tmp = _p_cur; + if (!_p_pre->m_atomic_next.compare_exchange_strong(_p_tmp, nullptr)) { + /*Strong CAS fail ,means that other thread(s) already made _p_cur to point to + the newly inserted node.What we need to do is just redo the iterating from current node. + Also _p_cur need to be updated to the newly inserted node,otherwise the criteria will + be evaluated to false forever. */ + _p_cur = _p_pre; + continue; + } + + //Cutting done,just break out. + break; + } + + /*Note: Once goes here, _p_pre is the latest item of the cut out list and + _p_cur is the first item of the remaining list. */ + //Detach first node + auto _p_tmp = _p_start; + while (!this->m_head->m_atomic_next.compare_exchange_strong(_p_tmp, _p_cur)) { + decltype(_p_cur) _p_tmp = nullptr; + CHECK(_p_pre->m_atomic_next.compare_exchange_strong(_p_tmp, _p_cur)); + VLOG(89) << "-------recursive cuthead occur!-----"; + return this->CutHead(criteria); + } + + _p_tmp = _p_pre; + while (!_p_cur->m_atomic_pre.compare_exchange_strong(_p_tmp, this->m_head)) { + LOG(WARNING) << "cutting head tail->pre CAS_fail " << _p_cur << " previous change from " + << _p_pre << " to " << _p_tmp << ",head:" << this->m_head << ",tail:" << this->m_tail + << ",do CAS again."; + _p_tmp = _p_pre; + continue; + } + + output_head->m_atomic_pre.store(nullptr); + + //Waiting for all threads that are iterating in the cut off list to be finished. + this->WaitForListClean(_p_pre); + + //Now the list is cut off with the deleted elements. Need to erase the deleted elements + this->SiftOutDeleted(output_head); + + //VLOG(89) << "debug double list leave"; + + return output_head; +} + +template +DoubleListNode* TrivialLockDoubleList::CutHeadByValue(const T &val) noexcept { + auto judge_smaller_equal = [&](const T &one) -> bool{ + return one <= val; + }; + + return this->CutHead(judge_smaller_equal); +} + +template +void TrivialLockDoubleList::ReleaseCutHead(DoubleListNode* output_head) noexcept { + auto _p_cur = output_head; + while (_p_cur != nullptr) { + auto _p_next = _p_cur->m_atomic_next.load(); + delete _p_cur; + _p_cur = _p_next; + } +} + +template +void TrivialLockDoubleList::IterateCutHead(std::function accessor, DoubleListNode* output_head) const noexcept { + auto _p_cur = output_head; + while (_p_cur != nullptr) { + auto _p_next = _p_cur->m_atomic_next.load(); + accessor(*_p_cur->m_val); + _p_cur = _p_next; + } +} + +template +void TrivialLockDoubleList::Iterate(std::function accessor) const noexcept { + + auto _cur = this->m_head->m_atomic_next.load(); + while (_cur) { + + if (_cur == this->m_tail) + break; + + auto _p_tmp = _cur; + _cur = _cur->m_atomic_next.load(); + + if (_p_tmp->IsDeleted()) + continue; + + if (!accessor(*_p_tmp->m_val)) + break; + } +} + +template +bool TrivialLockDoubleList::Empty() const noexcept { + return this->m_head->m_atomic_next.load() == this->m_tail; +} + +#ifdef _TRIIAL_DOUBLE_LIST_TEST_ +template +int TrivialLockDoubleList::GetSize() const noexcept { + + int _size = 0; + auto _cur = this->m_head; + while (_cur) { + if (!_cur->IsDeleted()) + _size++; + _cur = _cur->m_atomic_next.load(); + } + + //Exclude head & tail. + return _size - 2; +} + +template +DoubleListNode* TrivialLockDoubleList::GetHead() const noexcept { + return this->m_head; +} + +#endif + +} diff --git a/src/tools/trivial_lock_double_list.h b/src/tools/trivial_lock_double_list.h new file mode 100644 index 0000000..c183236 --- /dev/null +++ b/src/tools/trivial_lock_double_list.h @@ -0,0 +1,137 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_TRIVIAL_LOCK_LIST_H__ +#define __AURORA_TRIVIAL_LOCK_LIST_H__ + +#include +#include +#include +#include +#include + +#include "glog/logging.h" +#include "tools/data_structure_base.h" +#include "tools/trivial_lock_list_base.h" + +namespace RaftCore::DataStructure { + +template +class DoubleListNode final : public OrderedTypeBase>, public LogicalDelete { + +public: + + DoubleListNode(const std::shared_ptr &p_val) noexcept; + + virtual ~DoubleListNode() noexcept; + + virtual bool operator<(const DoubleListNode& other)const noexcept override; + + virtual bool operator>(const DoubleListNode& other)const noexcept override; + + virtual bool operator==(const DoubleListNode& other)const noexcept override; + + //There are several lock-free operations base on std::atomic::CAS + std::atomic*> m_atomic_pre; + + std::atomic*> m_atomic_next; + + std::shared_ptr m_val; + + static void Apply(DoubleListNode* phead, std::function*)> unary) noexcept; +}; + +template +class TrivialLockDoubleList final : OperationTracker> { + +public: + + TrivialLockDoubleList(const std::shared_ptr &p_min, const std::shared_ptr &p_max) noexcept; + + virtual ~TrivialLockDoubleList() noexcept; + + void Insert(const std::shared_ptr &p_one) noexcept; + + void Insert(DoubleListNode* new_node) noexcept; + + /*Note : Delete & CutHead are not intended to be invoked simultaneously. */ + bool Delete(const std::shared_ptr &p_one) noexcept; + + void DeleteAll() noexcept; + + //1. Each pair of the adjacent elements satisfy criteria: cut them all. + //2. otherwise, cut the satisfied elements. + DoubleListNode* CutHead(std::function criteria) noexcept; + + DoubleListNode* CutHead(std::function criteria) noexcept; + + DoubleListNode* CutHeadByValue(const T &val) noexcept; + + static void ReleaseCutHead(DoubleListNode* output_head) noexcept; + + //This method is not thread safe , but no way to call it simultaneously. + void Clear() noexcept; + + void IterateCutHead(std::function accessor, DoubleListNode* output_head) const noexcept; + + void Iterate(std::function accessor) const noexcept; + + bool Empty() const noexcept; + +#ifdef _TRIIAL_DOUBLE_LIST_TEST_ + int GetSize() const noexcept; + + DoubleListNode* GetHead() const noexcept; +#endif + +private: + + void InsertTracker(DoubleListNode* new_node) noexcept; + + /* Self-purging redundant */ + bool InsertRaw(DoubleListNode* new_node) noexcept; + + DoubleListNode* FindNextNonDelete(DoubleListNode* p_cur) noexcept; + + DoubleListNode* ExpandForward(DoubleListNode* p_cur) noexcept; + + DoubleListNode* ExpandBackward(DoubleListNode* p_cur) noexcept; + + bool MoveForward(DoubleListNode* &p_pre,DoubleListNode* &p_next) noexcept; + + void SiftOutDeleted(DoubleListNode* &output_head) noexcept; + + DoubleListNode* m_head = nullptr; + + DoubleListNode* m_tail = nullptr; + + std::recursive_mutex m_recursive_mutex; + +private: + + TrivialLockDoubleList& operator=(const TrivialLockDoubleList&) = delete; + +}; + +} //end namespace + +#include "tools/trivial_lock_double_list.cc" + +#endif diff --git a/src/tools/trivial_lock_list_base.cc b/src/tools/trivial_lock_list_base.cc new file mode 100644 index 0000000..1ad8c27 --- /dev/null +++ b/src/tools/trivial_lock_list_base.cc @@ -0,0 +1,97 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#include "config/config.h" +#include "tools/trivial_lock_list_base.h" + +namespace RaftCore::DataStructure { + +template +ThreadIDWrapper::ThreadIDWrapper(std::thread::id tid)noexcept { + this->m_tid = tid; +} + +template +ThreadIDWrapper::~ThreadIDWrapper()noexcept {} + +template +bool ThreadIDWrapper::operator<(const ThreadIDWrapper &other)const noexcept { + return this->m_tid < other.m_tid; +} + +template +bool ThreadIDWrapper::operator==(const ThreadIDWrapper &other)const noexcept { + return this->m_tid == other.m_tid; +} + +//TODO:figure out why this compiles fail under VS2015 +//template +//const TrivialLockSingleList::ThreadIDWrapper& TrivialLockSingleList::ThreadIDWrapper::operator=( +// const ThreadIDWrapper &other)noexcept { +// this->m_tid = other.m_tid; +// return *this; +//} + +template +std::size_t ThreadIDWrapper::Hash()const noexcept { + return std::hash{}(this->m_tid); +} + +template +std::thread::id ThreadIDWrapper::GetTid() const noexcept { + return this->m_tid; +} + +template +OperationTracker::OperationTracker()noexcept { + uint32_t _slot_num = ::RaftCore::Config::FLAGS_list_op_tracker_hash_slot_num; + this->m_p_insert_footprint = new LockFreeHashAtomic, T>(_slot_num); + + static_assert(std::is_base_of,T>::value,"template parameter of OperationTracker invalid"); +} + +template +OperationTracker::~OperationTracker()noexcept {} + +template +void OperationTracker::WaitForListClean(T* output_tail) noexcept { + bool _finished = true; + //Waiting for unfinished insertions inside the cut off list to be finished. + auto _checker = [&](const std::shared_ptr> &k, const std::shared_ptr &v) ->bool { + if (!v) + return true; + + if (*v.get() > *output_tail) + return true; + + _finished = false; + + return false; + }; + + do { + std::this_thread::yield(); + _finished = true; + this->m_p_insert_footprint->Iterate(_checker); + } while (!_finished); +} + +} //end namespace + diff --git a/src/tools/trivial_lock_list_base.h b/src/tools/trivial_lock_list_base.h new file mode 100644 index 0000000..6f49346 --- /dev/null +++ b/src/tools/trivial_lock_list_base.h @@ -0,0 +1,97 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_TRIVIAL_LOCK_LIST_BASE_H__ +#define __AURORA_TRIVIAL_LOCK_LIST_BASE_H__ + +#include +#include + +#include "tools/data_structure_base.h" +#include "tools/lock_free_hash_specific.h" + +namespace RaftCore::DataStructure { + +using ::RaftCore::DataStructure::HashTypeBase; +using ::RaftCore::DataStructure::HashNode; +using ::RaftCore::DataStructure::LockFreeHashAtomic; + +//The template is an wrapper for compile compatibility. +template +class ThreadIDWrapper final : public HashTypeBase> { + +public: + + ThreadIDWrapper(std::thread::id tid)noexcept; + + virtual ~ThreadIDWrapper()noexcept; + + virtual bool operator<(const ThreadIDWrapper&)const noexcept override; + + virtual bool operator==(const ThreadIDWrapper&)const noexcept override; + + virtual const ThreadIDWrapper& operator=(const ThreadIDWrapper&other)noexcept override + { + this->m_tid = other.m_tid; + return *this; + } + + virtual std::size_t Hash() const noexcept override; + + std::thread::id GetTid() const noexcept; + +private: + + std::thread::id m_tid; + +private: + + ThreadIDWrapper(const ThreadIDWrapper&) = delete; +}; + +template +class OperationTracker { + +public: + + OperationTracker()noexcept; + + virtual ~OperationTracker()noexcept; + +protected: + + void WaitForListClean(T* output_head) noexcept; + +protected: + + LockFreeHashAtomic, T> *m_p_insert_footprint = nullptr; + +private: + + OperationTracker(const OperationTracker&) = delete; + + OperationTracker& operator=(const OperationTracker&) = delete; +}; + +} //end namespace + +#include "tools/trivial_lock_list_base.cc" + +#endif diff --git a/src/tools/trivial_lock_single_list.cc b/src/tools/trivial_lock_single_list.cc new file mode 100644 index 0000000..7fdd343 --- /dev/null +++ b/src/tools/trivial_lock_single_list.cc @@ -0,0 +1,384 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "tools/trivial_lock_single_list.h" + +namespace RaftCore::DataStructure { + +template +SingleListNode::SingleListNode(const std::shared_ptr &shp_val) noexcept{ + static_assert(std::is_base_of,T>::value,"template parameter of TrivialLockSingleList invalid"); + this->m_val = shp_val; + this->m_atomic_next.store(nullptr); +} + +template +SingleListNode::~SingleListNode() noexcept {} + +template +bool SingleListNode::operator<(const SingleListNode& other) const noexcept { + return *this->m_val < *other.m_val; +} + +template +bool SingleListNode::operator>(const SingleListNode& other) const noexcept { + return *this->m_val > *other.m_val; +} + +template +bool SingleListNode::operator==(const SingleListNode& other) const noexcept { + return *this->m_val == *other.m_val; +} + +template +void SingleListNode::Apply(SingleListNode* phead, std::function*)> unary) noexcept { + auto _p_cur = phead; + while (_p_cur != nullptr) { + //Note: unary may modify _p_cur's next pointer after its execution. + auto _p_next = _p_cur->m_atomic_next.load(); + unary(_p_cur); + _p_cur = _p_next; + } +} + +template +TrivialLockSingleList::TrivialLockSingleList(const std::shared_ptr &p_min, const std::shared_ptr &p_max) noexcept { + static_assert(std::is_base_of,T>::value,"template parameter of TrivialLockSingleList invalid"); + + this->m_head = new SingleListNode(p_min); + this->m_tail = new SingleListNode(p_max); + + this->m_head->m_atomic_next.store(this->m_tail); +} + +template +TrivialLockSingleList::~TrivialLockSingleList() noexcept{ + this->Clear(); +} + +template +void TrivialLockSingleList::Clear() noexcept { + auto *_p_cur = this->m_head->m_atomic_next.load(); + while (_p_cur != this->m_tail) { + auto tmp = _p_cur; + _p_cur = _p_cur->m_atomic_next.load(); + delete tmp; + } + + this->m_head->m_atomic_next.store(this->m_tail); +} + +template +SingleListNode* TrivialLockSingleList::SetEmpty() noexcept { + auto *_p_old = this->m_head->m_atomic_next.load(); + while (!this->m_head->m_atomic_next.compare_exchange_weak(_p_old, this->m_tail)) + continue; + + if (_p_old == this->m_tail) + return nullptr; + + auto *_p_cur = _p_old; + auto *_p_next = _p_cur->m_atomic_next.load(); + while (_p_next != this->m_tail) { + _p_cur = _p_next; + _p_next = _p_next->m_atomic_next.load(); + } + + _p_cur->m_atomic_next.store(nullptr); + + return _p_old; +} + +template +void TrivialLockSingleList::Insert(const std::shared_ptr &p_one) noexcept { + SingleListNode* new_node = new SingleListNode(p_one); + this->Insert(new_node); +} + +template +void TrivialLockSingleList::Insert(SingleListNode* new_node) noexcept { + this->InsertTracker(new_node); +} + +template +void TrivialLockSingleList::InsertTracker(SingleListNode* new_node) noexcept { + + ThreadIDWrapper *_p_tracker = new ThreadIDWrapper(std::this_thread::get_id()); + + bool _ownership_taken = false; + if (this->m_p_insert_footprint->Upsert(_p_tracker, new_node)) + _ownership_taken = true; + + while (!this->InsertRaw(new_node)) + VLOG(89) << "-------redo InsertRaw!-----"; + + //Since _p_tracker already exist, the return value must be true. + CHECK(!this->m_p_insert_footprint->Upsert(_p_tracker, nullptr)); + + if (!_ownership_taken) + delete _p_tracker; +} + +template +bool TrivialLockSingleList::InsertRaw(SingleListNode* new_node) noexcept { + + const auto &_p_one = new_node->m_val; + + auto *_p_pre = this->m_head; + auto *_p_cur = _p_pre->m_atomic_next.load(); + + while (true) { + + //Reaching the end of the cut head list, need to start over again. + if (_p_cur == nullptr) + return false; + + if (_p_cur == this->m_tail) { + new_node->m_atomic_next.store(this->m_tail); + if (!_p_pre->m_atomic_next.compare_exchange_strong(_p_cur, new_node)) + continue; + break; + } + + if (*_p_one == *_p_cur->m_val && !_p_cur->IsDeleted()) + return true; + + if (*_p_one > *_p_cur->m_val) { + _p_pre = _p_cur; + _p_cur = _p_cur->m_atomic_next.load(); + continue; + } + + //Once get here, the new node should be inserted between _p_pre and _p_cur. + new_node->m_atomic_next.store(_p_cur); + + if (!_p_pre->m_atomic_next.compare_exchange_strong(_p_cur, new_node)) + continue; + + break; + } + + return true; +} + +template +bool TrivialLockSingleList::Delete(const std::shared_ptr &p_one) noexcept { + + auto *_p_node = new SingleListNode(p_one); + + ThreadIDWrapper *_p_tracker = new ThreadIDWrapper(std::this_thread::get_id()); + + bool _ownership_taken = false; + if (this->m_p_insert_footprint->Upsert(_p_tracker, _p_node)) + _ownership_taken = true; + + //Default to that the element to be deleted not found. + bool _ret_val = false; + + auto *_p_cur = this->m_head; + while (_p_cur != this->m_tail) { + if (*p_one != *_p_cur->m_val) { + _p_cur = _p_cur->m_atomic_next.load(); + continue; + } + _p_cur->SetDeleted(); + _ret_val = true; + break; + } + + CHECK(!this->m_p_insert_footprint->Upsert(_p_tracker, nullptr)); + + if (!_ownership_taken) { + delete _p_tracker; + delete _p_node; + } + + return _ret_val; +} + +template +void TrivialLockSingleList::SiftOutDeleted(SingleListNode* &output_head) noexcept { + auto *_to_remove = output_head; + decltype(_to_remove) _p_pre = nullptr; + + while (_to_remove != nullptr) { + bool _deleted = false; + + if (_to_remove->IsDeleted()) { + + auto _p_next = _to_remove->m_atomic_next.load(); + + if (_p_pre) + _p_pre->m_atomic_next.store(_p_next); + else + output_head = _p_next; + + _deleted = true; + } + + auto _tmp = _to_remove; + _to_remove = _to_remove->m_atomic_next.load(); + + if (_deleted) + delete _tmp; + else + _p_pre = _tmp; + } +} + +template +SingleListNode* TrivialLockSingleList::CutHead(std::function criteria) noexcept { + + /*Note: In the current design , there can be only one thread invoking this method.But it + is allowed to have several other threads doing Insert at the mean time. */ + std::unique_lock _mutex_lock(this->m_recursive_mutex); + + auto *_p_pre = this->m_head; + auto *_p_cur = _p_pre->m_atomic_next.load(); + auto *_p_start = _p_cur; + + if (_p_cur == this->m_tail) + return nullptr; + + //VLOG(89) << "debug double list cutting starts from:" << _p_cur->m_val->PrintMe(); + + while (true) { + + if (criteria(*_p_cur->m_val)) { + _p_pre = _p_cur; + _p_cur = _p_cur->m_atomic_next.load(); + if (_p_cur != this->m_tail) + continue; + } + + //No nodes are available + if (_p_cur == _p_start) + return nullptr; + + //-----------Start cutting head-----------// + if (!_p_pre->m_atomic_next.compare_exchange_strong(_p_cur, nullptr)) { + _p_cur = _p_pre; + continue; + } + + //Cutting done,just break out. + break; + } + + /*Note: Once goes here, _p_pre is the latest item of the cut out list and + _p_cur is the first item of the remaining list. */ + //Detach first node + auto _p_tmp = _p_start; + while (!this->m_head->m_atomic_next.compare_exchange_strong(_p_tmp, _p_cur)) { + decltype(_p_cur) _p_tmp_x = nullptr; + CHECK(_p_pre->m_atomic_next.compare_exchange_strong(_p_tmp_x, _p_cur)); + //VLOG(89) << "-------recursive cuthead occur!-----"; + return this->CutHead(criteria); + } + + auto *_output_head = _p_start; + + //VLOG(89) << "cut head waitdone"; + + this->WaitForListClean(_p_cur); + + //Now the list is cut off with the deleted elements. Need to erase the deleted elements + this->SiftOutDeleted(_output_head); + + //VLOG(89) << "debug double list leave with something"; + + return _output_head; +} + +template +SingleListNode* TrivialLockSingleList::CutHeadByValue(const T &val) noexcept { + auto judge_smaller_equal = [&](const T &one) -> bool{ return one <= val; }; + + //VLOG(89) << "start cuthead less than:" << val.PrintMe(); + + return this->CutHead(judge_smaller_equal); +} + +template +void TrivialLockSingleList::ReleaseCutHead(SingleListNode* output_head) noexcept { + auto _p_cur = output_head; + while (_p_cur != nullptr) { + auto _p_next = _p_cur->m_atomic_next.load(); + delete _p_cur; + _p_cur = _p_next; + } +} + +template +void TrivialLockSingleList::IterateCutHead(std::function &)> accessor, SingleListNode* output_head) const noexcept { + auto _p_cur = output_head; + while (_p_cur != nullptr) { + auto _p_next = _p_cur->m_atomic_next.load(); + accessor(_p_cur->m_val); + _p_cur = _p_next; + } +} + +template +void TrivialLockSingleList::Iterate(std::function &)> accessor) const noexcept { + + auto *_cur = this->m_head->m_atomic_next.load(); + while (_cur != nullptr) { + + if (_cur == this->m_tail) + break; + + auto _p_tmp = _cur; + _cur = _cur->m_atomic_next.load(); + + if (_p_tmp->IsDeleted()) + continue; + + if (!accessor(_p_tmp->m_val)) + break; + } +} + +template +bool TrivialLockSingleList::Empty() const noexcept { + return this->m_head->m_atomic_next.load() == this->m_tail; +} + +#ifdef _SINGLE_LIST_TEST_ +template +int TrivialLockSingleList::GetSize() const noexcept { + + int _size = 0; + auto *_cur = this->m_head->m_atomic_next.load(); + while (_cur != this->m_tail) { + if (!_cur->IsDeleted()) + _size++; + _cur = _cur->m_atomic_next.load(); + } + + return _size; +} + +template +SingleListNode* TrivialLockSingleList::GetHead() const noexcept { + return this->m_head; +} + +#endif + +} diff --git a/src/tools/trivial_lock_single_list.h b/src/tools/trivial_lock_single_list.h new file mode 100644 index 0000000..090887b --- /dev/null +++ b/src/tools/trivial_lock_single_list.h @@ -0,0 +1,124 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_TRIVIAL_LOCK_ORDERED_SINGLE_LIST_H__ +#define __AURORA_TRIVIAL_LOCK_ORDERED_SINGLE_LIST_H__ + +#include +#include +#include +#include +#include + +#include "glog/logging.h" + +#include "common/macro_manager.h" +#include "tools/data_structure_base.h" +#include "tools/trivial_lock_list_base.h" + +namespace RaftCore::DataStructure { + +template +class SingleListNode final : public OrderedTypeBase>, public LogicalDelete { + +public: + + SingleListNode(const std::shared_ptr &shp_val) noexcept; + + virtual ~SingleListNode() noexcept; + + virtual bool operator<(const SingleListNode& other)const noexcept override; + + virtual bool operator>(const SingleListNode& other)const noexcept override; + + virtual bool operator==(const SingleListNode& other)const noexcept override; + + static void Apply(SingleListNode* phead, std::function*)> unary) noexcept; + + std::shared_ptr m_val; + + std::atomic*> m_atomic_next; +}; + +template +class TrivialLockSingleList final : public OperationTracker> { + +public: + + TrivialLockSingleList(const std::shared_ptr &p_min, const std::shared_ptr &p_max) noexcept; + + virtual ~TrivialLockSingleList() noexcept; + + void Insert(const std::shared_ptr &p_one) noexcept; + + void Insert(SingleListNode* new_node) noexcept; + + /*Note : Delete & CutHead are not intended to be invoked simultaneously. */ + bool Delete(const std::shared_ptr &p_one) noexcept; + + SingleListNode* CutHead(std::function criteria) noexcept; + + SingleListNode* CutHeadByValue(const T &val) noexcept; + + static void ReleaseCutHead(SingleListNode* output_head) noexcept; + + //This method is not thread safe , but no way to call it simultaneously. + void Clear() noexcept; + + SingleListNode* SetEmpty() noexcept; + + void IterateCutHead(std::function &)> accessor, SingleListNode* output_head) const noexcept; + + void Iterate(std::function &)> accessor) const noexcept; + + bool Empty() const noexcept; + +#ifdef _SINGLE_LIST_TEST_ + int GetSize() const noexcept; + + SingleListNode* GetHead() const noexcept; +#endif + +private: + + void InsertTracker(SingleListNode* new_node) noexcept; + + bool InsertRaw(SingleListNode* new_node) noexcept; + + void SiftOutDeleted(SingleListNode* &output_head) noexcept; + + SingleListNode* m_head; + + //Used for indicating a cut head list. + SingleListNode* m_tail; + + std::recursive_mutex m_recursive_mutex; + +private: + + TrivialLockSingleList& operator=(const TrivialLockSingleList&) = delete; + +}; + +} //end namespace + +#include "tools/trivial_lock_single_list.cc" + +#endif diff --git a/src/tools/utilities.cc b/src/tools/utilities.cc new file mode 100644 index 0000000..32b13d9 --- /dev/null +++ b/src/tools/utilities.cc @@ -0,0 +1,150 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include "glog/logging.h" +#include "boost/asio.hpp" + +#include "tools/utilities.h" + +#define _FOUR_BYTES_BITS_ (32) + +namespace RaftCore::Tools { + +void GetLocalIPs(std::list& ips){ + + boost::asio::io_service io_service; + boost::asio::ip::tcp::resolver resolver(io_service); + + std::string h = boost::asio::ip::host_name(); + + std::for_each(resolver.resolve({ h, "" }), {}, [&](const auto& re) { + ips.emplace_back(re.endpoint().address().to_string()); + }); + + //Forcibly add loop back address due to it may not appear under certain platforms. + const static std::string _loop_back_addr = "127.0.0.1"; + if (std::find(ips.cbegin(), ips.cend(), _loop_back_addr) == ips.cend()) + ips.emplace_back(_loop_back_addr); +} + +uint32_t RoundUp(uint32_t num) { + + const static uint32_t mask = 0x80000000; + + uint32_t tmp = num; + uint32_t ret = 0x80000000; + + for (int i=0; i < _FOUR_BYTES_BITS_; ++i) { + if (tmp & mask) + break; + tmp <<= 1; + ret >>= 1; + } + + int _shift = (tmp & ~mask)? 1 : 0 ; + return ret << _shift; +} + +uint32_t GetMask(uint32_t num) { + + uint32_t tmp = num; + uint32_t mask = 0x80000000; + + int _counter = 1; + for (int i=0; i < _FOUR_BYTES_BITS_; ++i) { + if (tmp & mask) + break; + tmp <<= 1; + _counter++; + } + + uint32_t ret = 0x1; + for (int i = 0; i < _FOUR_BYTES_BITS_ - _counter - 1; ++i) { + ret <<= 1; + ret++; + } + + return ret; +} + +TypeTimePoint StartTimeing() { + return std::chrono::steady_clock::now(); +} + +void EndTiming(const TypeTimePoint &tp_start, const char* operation_name, const LogIdentifier *p_cur_id) { + auto _now = std::chrono::steady_clock::now(); + std::chrono::microseconds _us = std::chrono::duration_cast(_now - tp_start); + + if (p_cur_id == nullptr) + VLOG(88) << operation_name << " cost us:" << _us.count(); + else + VLOG(88) << operation_name << " cost us:" << _us.count() << " ,idx:" << *p_cur_id; +} + +void StringSplit(const std::string &input, char delimiter, std::set &output) { + std::list _output; + StringSplit(input, delimiter, _output); + for (const auto &_item : _output) + output.emplace(_item); +} + +void StringSplit(const std::string &input, char delimiter, std::list &output) { + + std::size_t _pos = 0, _cur_pos=0; + + while ((_cur_pos = input.find(delimiter, _pos)) != std::string::npos) { + std::size_t _len = _cur_pos - _pos ; + if (_len > 0) + output.emplace_back(input.substr(_pos, _len)); + + _pos = _cur_pos; + if (++_pos >= input.length()) + break; + } + + if (_pos < input.length()) + output.emplace_back(input.substr(_pos)); +} + +std::string TimePointToString(const TypeSysTimePoint &tp){ + + //Get seconds. + char _buf[128]; + std::time_t _t = std::chrono::system_clock::to_time_t(tp); + std::tm * _ptm = std::localtime(&_t); + std::strftime(_buf, 32, "%Y.%m.%d %a, %H:%M:%S", _ptm); + + //Get milliseconds. + char _result[128]; + std::chrono::milliseconds ms = std::chrono::duration_cast(tp.time_since_epoch()); + std::snprintf(_result,sizeof(_result),"%s.%llu",_buf,ms.count() % 1000); + + return std::string(_result); +} + +uint32_t GenerateRandom(uint32_t from, uint32_t to) { + std::random_device rd; + std::mt19937 gen(rd()); + std::uniform_int_distribution dis(from,to); + return dis(gen); +} + +} + + + diff --git a/src/tools/utilities.h b/src/tools/utilities.h new file mode 100644 index 0000000..2ef7606 --- /dev/null +++ b/src/tools/utilities.h @@ -0,0 +1,125 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#ifndef __AURORA_UTILITIES_H__ +#define __AURORA_UTILITIES_H__ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common/log_identifier.h" +#include "boost/crc.hpp" + +namespace RaftCore::Tools { + +using ::RaftCore::Common::LogIdentifier; + +typedef std::chrono::time_point TypeTimePoint; +typedef std::chrono::time_point TypeSysTimePoint; + +//Note: The suffix letter of 'x' is to avoid name conflict under darwin. +enum class LocalEndian{ UKNOWN_X, BIG_ENDIAN_X, LITTLE_ENDIAN_X }; + +inline bool LocalBigEndian() { + + static LocalEndian g_is_local_big_endian = LocalEndian::UKNOWN_X; + + if (g_is_local_big_endian != LocalEndian::UKNOWN_X) + return (g_is_local_big_endian == LocalEndian::BIG_ENDIAN_X); + + uint32_t uTest = 0x12345678; + + unsigned char* pTest = (unsigned char*)&uTest; + + g_is_local_big_endian = LocalEndian::LITTLE_ENDIAN_X; + if ((*pTest) == 0x12) { + g_is_local_big_endian = LocalEndian::BIG_ENDIAN_X; + return true; + } + + return false; +} + +template +inline void ConvertToBigEndian(_type input, _type *output) { + + //Note:"input" is a copied. + assert(output != nullptr && output != &input); + + if (LocalBigEndian()) { + *output = input; + return; + } + + unsigned char* pCur = (unsigned char*)&input; + unsigned char* pTarget = (unsigned char*)output; + + int iter_cnt = sizeof(_type) - 1; + for (int i = 0; i <= iter_cnt; ++i) + pTarget[iter_cnt-i] = pCur[i]; +} + +template +inline void ConvertBigEndianToLocal(_type input, _type *output) { + ConvertToBigEndian(input, output); +} + +void GetLocalIPs(std::list& ips); + +uint32_t RoundUp(uint32_t num); + +uint32_t GetMask(uint32_t num); + +inline uint32_t CalculateCRC32(const void* data, unsigned int len) { + boost::crc_32_type crc_result; + crc_result.process_bytes(data,len); + return crc_result.checksum(); +} + +TypeTimePoint StartTimeing(); + +void EndTiming(const TypeTimePoint &tp_start, const char* operation_name, const LogIdentifier *p_cur_id = nullptr); + +void StringSplit(const std::string &input, char delimiter, std::set &output); + +void StringSplit(const std::string &input, char delimiter, std::list &output); + +std::string TimePointToString(const TypeSysTimePoint &tp); + +uint32_t GenerateRandom(uint32_t from, uint32_t to); + +template +inline uint32_t SizeOfX() noexcept { + return sizeof(T); +} + +template<> +inline uint32_t SizeOfX() noexcept { + return 0; +} + +} + +#endif + diff --git a/src/topology/topology_mgr.cc b/src/topology/topology_mgr.cc new file mode 100644 index 0000000..2169234 --- /dev/null +++ b/src/topology/topology_mgr.cc @@ -0,0 +1,212 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#include + +#include "common/comm_defs.h" +#include "topology/topology_mgr.h" + +#define _AURORA_TOPOLOGY_LEADER_INDICATOR_ "leader" +#define _AURORA_TOPOLOGY_FOLLOWER_INDICATOR_ "followers" +#define _AURORA_TOPOLOGY_CANDIDATE_INDICATOR_ "candidates" +#define _AURORA_TOPOLOGY_MYADDR_INDICATOR_ "my_addr" + +namespace RaftCore { + +std::fstream CTopologyMgr::m_file_stream; + +Topology CTopologyMgr::m_ins; + +std::shared_timed_mutex CTopologyMgr::m_mutex; + +using ::RaftCore::Common::ReadLock; +using ::RaftCore::Common::WriteLock; + +std::ostream& operator<<(std::ostream& os, const Topology& obj) { + os << "------ Topology ------" << std::endl; + os << "leader:" << std::endl << obj.m_leader << std::endl; + + os << "followers:" << std::endl; + for (const auto &_item : obj.m_followers) { + os << _item << std::endl; + } + + os << "candidates:" << std::endl; + for (const auto &_item : obj.m_candidates) { + os << _item << std::endl; + } + + os << "my_addr:" << obj.m_my_addr << std::endl; + + return os; +} + +Topology::Topology()noexcept { + this->Reset(); +} + +void Topology::Reset() noexcept{ + this->m_leader = ""; + this->m_followers.clear(); + this->m_candidates.clear(); + this->m_my_addr = ""; +} + +uint32_t Topology::GetClusterSize() const noexcept { + return (uint32_t)this->m_followers.size() + (uint32_t)this->m_candidates.size() + 1; +} + +bool Topology::InCurrentCluster(const std::string &node) noexcept{ + if (node == this->m_leader) + return true; + + if (this->m_candidates.find(node) != this->m_candidates.cend()) + return true; + + if (this->m_followers.find(node) != this->m_followers.cend()) + return true; + + return false; +} + +void CTopologyMgr::Initialize() noexcept { + Load(); +} + +void CTopologyMgr::UnInitialize() noexcept { + m_file_stream.close(); +} + +bool CTopologyMgr::Load() noexcept{ + + m_file_stream.open(_AURORA_TOPOLOGY_CONFFIG_FILE_); + if (!m_file_stream.is_open()) { + LOG(ERROR) << "open topology config file " << _AURORA_TOPOLOGY_CONFFIG_FILE_ << " fail."; + return false; + } + + std::regex _pattern("(#*)\\d{1,3}\.\\d{1,3}\.\\d{1,3}\.\\d{1,3}:\\d+"); + std::smatch _sm; + + WriteLock _w_lock(m_mutex); + + m_ins.Reset(); + + int _section_flg = 0; // 1: leader , 2: follower,3:candidate, 4:my_addr + + for (std::string _ori_line; std::getline(m_file_stream, _ori_line);) { + + std::string _line = ""; + //_line.reserve(_ori_line.length()); + std::copy_if(_ori_line.begin(), _ori_line.end(), std::back_inserter(_line), [](char c) { return c != '\r' && c != '\n'; }); + + if (_line == _AURORA_TOPOLOGY_LEADER_INDICATOR_) { + _section_flg = 1; + continue; + } + + if (_line == _AURORA_TOPOLOGY_FOLLOWER_INDICATOR_) { + _section_flg = 2; + continue; + } + + if (_line == _AURORA_TOPOLOGY_CANDIDATE_INDICATOR_) { + _section_flg = 3; + continue; + } + + if (_line == _AURORA_TOPOLOGY_MYADDR_INDICATOR_) { + _section_flg = 4; + continue; + } + + if (!std::regex_match(_line, _sm, _pattern)) { + LOG(ERROR) << "unrecognized line found when parsing topology config file, ignore it:" << _line; + continue; + } + + //Support comment. + if (_sm[1] == "#") + continue; + + if (_section_flg == 1) + m_ins.m_leader = _line; + else if (_section_flg == 2) + m_ins.m_followers.emplace(_line); + else if (_section_flg == 3) + m_ins.m_candidates.emplace(_line); + else if (_section_flg == 4) + m_ins.m_my_addr = _line; + else + CHECK(false) << "unknown section flag : " << _section_flg; + } + + //'m_my_addr' must be in the cluster. + CHECK(m_ins.InCurrentCluster(m_ins.m_my_addr)); + + return true; +} + +void CTopologyMgr::Read(Topology *p_output) noexcept{ + + if (p_output == nullptr) { + LOG(ERROR) << "input data is null,invalid."; + return ; + } + + ReadLock _r_lock(m_mutex); + p_output->m_leader = m_ins.m_leader; + p_output->m_followers = m_ins.m_followers; + p_output->m_candidates = m_ins.m_candidates; + p_output->m_my_addr = m_ins.m_my_addr; +} + +void CTopologyMgr::Update(const Topology &input) noexcept{ + + WriteLock _w_lock(m_mutex); + m_ins.m_leader = input.m_leader; + m_ins.m_followers = input.m_followers; + m_ins.m_candidates = input.m_candidates; + m_ins.m_my_addr = input.m_my_addr; + + m_file_stream.close(); + + //Reopen file for re-writing and truncate the old contents + m_file_stream.open(_AURORA_TOPOLOGY_CONFFIG_FILE_,std::ios_base::in | std::ios_base::out | std::ios_base::trunc); + + std::string content = ""; + content.append(std::string(_AURORA_TOPOLOGY_LEADER_INDICATOR_) + "\n"); + content.append(m_ins.m_leader + "\n"); + + content.append(std::string(_AURORA_TOPOLOGY_FOLLOWER_INDICATOR_) + "\n"); + for (const auto &item : m_ins.m_followers) + content.append((item + "\n")); + + content.append(std::string(_AURORA_TOPOLOGY_CANDIDATE_INDICATOR_) + "\n"); + for (const auto &item : m_ins.m_candidates) + content.append((item + "\n")); + + content.append(std::string(_AURORA_TOPOLOGY_MYADDR_INDICATOR_) + "\n"); + content.append(m_ins.m_my_addr + "\n"); + + m_file_stream.write(content.c_str(),content.length()); + m_file_stream.flush(); +} + + +} \ No newline at end of file diff --git a/src/topology/topology_mgr.h b/src/topology/topology_mgr.h new file mode 100644 index 0000000..331a8cc --- /dev/null +++ b/src/topology/topology_mgr.h @@ -0,0 +1,91 @@ +/* +* +* Copyright (C) <2019> + +* This program is free software: you can redistribute it and/or modify +* it under the terms of the GNU General Public License as published by +* the Free Software Foundation, either version 3 of the License, or +* (at your option) any later version. + +* This program is distributed in the hope that it will be useful, +* but WITHOUT ANY WARRANTY; without even the implied warranty of +* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +* GNU General Public License for more details. + +* You should have received a copy of the GNU General Public License +* along with this program. If not, see . +*/ + +#pragma once + +#ifndef __AURORA_TOPOLOGY_H__ +#define __AURORA_TOPOLOGY_H__ + +#include +#include +#include +#include +#include + +#define _AURORA_TOPOLOGY_CONFFIG_FILE_ "topology.config" + +namespace RaftCore{ + +struct Topology { + std::string m_leader = ""; + std::set m_followers; + std::set m_candidates; + std::string m_my_addr = ""; + + Topology()noexcept; + + void Reset()noexcept; + + uint32_t GetClusterSize() const noexcept; + + bool InCurrentCluster(const std::string &node) noexcept; +}; + + +std::ostream& operator<<(std::ostream& os, const Topology& obj); + +class CTopologyMgr final{ + +public: + + static void Initialize() noexcept; + + static void UnInitialize() noexcept; + + static void Update(const Topology &input) noexcept; + + static void Read(Topology *p_output=nullptr) noexcept; + +private: + + static bool Load() noexcept; + +private: + + static Topology m_ins; + + static std::fstream m_file_stream; + + static std::shared_timed_mutex m_mutex; + +private: + + CTopologyMgr() = delete; + + virtual ~CTopologyMgr() noexcept = delete; + + CTopologyMgr(const CTopologyMgr&) = delete; + + CTopologyMgr& operator=(const CTopologyMgr&) = delete; + +}; + +} + + +#endif \ No newline at end of file diff --git a/work/.test.sh.un~ b/work/.test.sh.un~ new file mode 100644 index 0000000..d1c21b7 Binary files /dev/null and b/work/.test.sh.un~ differ diff --git a/work/backend_svr/election.config b/work/backend_svr/election.config new file mode 100644 index 0000000..a00feb3 --- /dev/null +++ b/work/backend_svr/election.config @@ -0,0 +1,3 @@ +current term:0 +highest vote for: +known voting terms: diff --git a/work/backend_svr/membership-change.config b/work/backend_svr/membership-change.config new file mode 100644 index 0000000..61082a4 --- /dev/null +++ b/work/backend_svr/membership-change.config @@ -0,0 +1,3 @@ +cluster status:STABLE +new cluster: +version:110 diff --git a/work/backend_svr/restart_leader.sh b/work/backend_svr/restart_leader.sh new file mode 100644 index 0000000..7a7b0fb --- /dev/null +++ b/work/backend_svr/restart_leader.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +log_pattern=*aurora.VM_16_7_centos.root* +bin=../../bin/release/aurora +glog_para='' +process_name=`basename $bin` + +if [[ "$OSTYPE" == "cygwin" ]]; then + log_pattern=*raft_svr.exe.ARTHUR-PC.arthur.log* + bin=../../../Release/raft_svr.exe + process_name=raft_svr +elif [[ "$OSTYPE" == "darwin"* ]]; then + log_pattern=*aurora.bogon.arthur* + glog_para='--log_dir=. --logbuflevel=-1' +fi + +pid=`ps -ef | grep $process_name | grep -v grep | awk '{print $2}'` +kill -9 $pid + +rm -f $log_pattern +rm -f raft.binlog.leader + +$bin --do_heartbeat=false --iterating_wait_timeo_us=2000000 --port=10010 --leader_append_entries_rpc_timeo_ms=5000 --leader_commit_entries_rpc_timeo_ms=5000 --client_cq_num=2 --client_thread_num=2 --notify_cq_num=2 --notify_cq_threads=4 --call_cq_num=2 --call_cq_threads=2 --iterating_threads=2 --client_pool_size=50000 $glog_para > std.txt 2>&1 & diff --git a/work/backend_svr/topology.config b/work/backend_svr/topology.config new file mode 100644 index 0000000..825c51e --- /dev/null +++ b/work/backend_svr/topology.config @@ -0,0 +1,9 @@ +leader +127.0.0.1:10010 +followers +127.0.0.1:10020 +127.0.0.1:10021 +127.0.0.1:10022 +candidates +my_addr +127.0.0.1:10010 diff --git a/work/benchmark.sh b/work/benchmark.sh new file mode 100644 index 0000000..b940bb3 --- /dev/null +++ b/work/benchmark.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +working_dir='C:/Users/95/Documents/Visual Studio 2015/Projects/apollo/raft/work' + +client_exe='C:/Users/95/Documents/Visual Studio 2015/Projects/apollo/Release/grpc_1.8.x_client.exe' +svr_exe='C:/Users/95/Documents/Visual Studio 2015/Projects/apollo/Debug/grpc_1.8.x_svr.exe' + +if [ "$1" == "release" ];then + svr_exe="C:/Users/95/Documents/Visual Studio 2015/Projects/apollo/Release/grpc_1.8.x_svr.exe" +fi + + +cd "${working_dir}" + +for cq_pair in {1,2,4} +do + for thread_pair in {1,2,4,8,16} + do + for pool in {100,200,400} + do + "${svr_exe}" --thread_pair=1 --cq_pair=1 --pool=100 > /dev/null 2>&1 & + pid=$! + output=`"${client_exe}" --count_per_thread=20000 --thread_per_cq=2 --cq=2 --addr=localhost:50051 --conn=1` + tp=`echo "${output}" | grep 'final throughput' | awk -F: '{print $2}'` + echo "cq_pair:${cq_pair},thread_pair:${thread_pair},pool:${pool} tp:${tp}" + kill -9 ${pid} + done + done +done + + + diff --git a/work/election.config b/work/election.config new file mode 100644 index 0000000..a00feb3 --- /dev/null +++ b/work/election.config @@ -0,0 +1,3 @@ +current term:0 +highest vote for: +known voting terms: diff --git a/work/follower_0/debug.sh b/work/follower_0/debug.sh new file mode 100644 index 0000000..726e22a --- /dev/null +++ b/work/follower_0/debug.sh @@ -0,0 +1,27 @@ +#!/bin/bash + + +grep "insert a disorder msg" raft_svr.exe.ARTHUR-PC.arthur.log.INFO.* | awk '{print $12}' | awk -F: '{print $3}' > tmp.txt +grep "process disorder done" raft_svr.exe.ARTHUR-PC.arthur.log.INFO.* | awk '{print $15}' | awk -F: '{print $3}' >> tmp.txt +sort tmp.txt > tmp2.txt + +cur_base='' +start=0 + +while read line;do + if [ $start -eq 0 ];then + cur_base="$line" + start=1 + continue + fi + + if [ "$cur_base" == "$line" ];then + start=0 + continue + fi + + echo "error line:$cur_base" + cur_base="$line" + start=1 + +done < tmp2.txt \ No newline at end of file diff --git a/work/follower_0/election.config b/work/follower_0/election.config new file mode 100644 index 0000000..a00feb3 --- /dev/null +++ b/work/follower_0/election.config @@ -0,0 +1,3 @@ +current term:0 +highest vote for: +known voting terms: diff --git a/work/follower_0/membership-change.config b/work/follower_0/membership-change.config new file mode 100644 index 0000000..61082a4 --- /dev/null +++ b/work/follower_0/membership-change.config @@ -0,0 +1,3 @@ +cluster status:STABLE +new cluster: +version:110 diff --git a/work/follower_0/statistic.sh b/work/follower_0/statistic.sh new file mode 100644 index 0000000..da49621 --- /dev/null +++ b/work/follower_0/statistic.sh @@ -0,0 +1,6 @@ +#!/bin/bash + + +grep "process disorder done" raft_svr.exe.ARTHUR-PC.arthur.log.INFO.* | awk '{print $13}' | awk -F\: 'BEGIN{sum=0;}{x=($2/1000);sum+=x;}END{printf "avg diroder latency(ms):%d",sum/NR;}' + + diff --git a/work/follower_0/topology.config b/work/follower_0/topology.config new file mode 100644 index 0000000..3e74a8c --- /dev/null +++ b/work/follower_0/topology.config @@ -0,0 +1,9 @@ +leader +127.0.0.1:10010 +followers +127.0.0.1:10020 +127.0.0.1:10021 +127.0.0.1:10022 +candidates +my_addr +127.0.0.1:10020 \ No newline at end of file diff --git a/work/follower_1/election.config b/work/follower_1/election.config new file mode 100644 index 0000000..a00feb3 --- /dev/null +++ b/work/follower_1/election.config @@ -0,0 +1,3 @@ +current term:0 +highest vote for: +known voting terms: diff --git a/work/follower_1/membership-change.config b/work/follower_1/membership-change.config new file mode 100644 index 0000000..61082a4 --- /dev/null +++ b/work/follower_1/membership-change.config @@ -0,0 +1,3 @@ +cluster status:STABLE +new cluster: +version:110 diff --git a/work/follower_1/topology.config b/work/follower_1/topology.config new file mode 100644 index 0000000..cfad9cb --- /dev/null +++ b/work/follower_1/topology.config @@ -0,0 +1,9 @@ +leader +127.0.0.1:10010 +followers +127.0.0.1:10020 +127.0.0.1:10021 +127.0.0.1:10022 +candidates +my_addr +127.0.0.1:10021 \ No newline at end of file diff --git a/work/follower_10/election.config b/work/follower_10/election.config new file mode 100644 index 0000000..263c13c --- /dev/null +++ b/work/follower_10/election.config @@ -0,0 +1,3 @@ +current term:8 +highest vote for: +known voting terms: diff --git a/work/follower_10/guid.config b/work/follower_10/guid.config new file mode 100644 index 0000000..e87c089 --- /dev/null +++ b/work/follower_10/guid.config @@ -0,0 +1 @@ +522802 \ No newline at end of file diff --git a/work/follower_10/membership-change.config b/work/follower_10/membership-change.config new file mode 100644 index 0000000..c907289 --- /dev/null +++ b/work/follower_10/membership-change.config @@ -0,0 +1,3 @@ +cluster status:START_CHANGING +new cluster:127.0.0.1:10031,127.0.0.1:10030,127.0.0.1:10022,127.0.0.1:10010, +version:111 diff --git a/work/follower_10/topology.config b/work/follower_10/topology.config new file mode 100644 index 0000000..709ad5b --- /dev/null +++ b/work/follower_10/topology.config @@ -0,0 +1,11 @@ +leader +127.0.0.1:10022 +followers +127.0.0.1:10010 +127.0.0.1:10020 +127.0.0.1:10021 +127.0.0.1:10030 +127.0.0.1:10031 +candidates +my_addr +127.0.0.1:10030 \ No newline at end of file diff --git a/work/follower_11/election.config b/work/follower_11/election.config new file mode 100644 index 0000000..263c13c --- /dev/null +++ b/work/follower_11/election.config @@ -0,0 +1,3 @@ +current term:8 +highest vote for: +known voting terms: diff --git a/work/follower_11/guid.config b/work/follower_11/guid.config new file mode 100644 index 0000000..e87c089 --- /dev/null +++ b/work/follower_11/guid.config @@ -0,0 +1 @@ +522802 \ No newline at end of file diff --git a/work/follower_11/membership-change.config b/work/follower_11/membership-change.config new file mode 100644 index 0000000..61082a4 --- /dev/null +++ b/work/follower_11/membership-change.config @@ -0,0 +1,3 @@ +cluster status:STABLE +new cluster: +version:110 diff --git a/work/follower_11/topology.config b/work/follower_11/topology.config new file mode 100644 index 0000000..132657d --- /dev/null +++ b/work/follower_11/topology.config @@ -0,0 +1,11 @@ +leader +127.0.0.1:10022 +followers +127.0.0.1:10010 +127.0.0.1:10020 +127.0.0.1:10021 +127.0.0.1:10030 +127.0.0.1:10031 +candidates +my_addr +127.0.0.1:10031 \ No newline at end of file diff --git a/work/follower_2/election.config b/work/follower_2/election.config new file mode 100644 index 0000000..a00feb3 --- /dev/null +++ b/work/follower_2/election.config @@ -0,0 +1,3 @@ +current term:0 +highest vote for: +known voting terms: diff --git a/work/follower_2/membership-change.config b/work/follower_2/membership-change.config new file mode 100644 index 0000000..61082a4 --- /dev/null +++ b/work/follower_2/membership-change.config @@ -0,0 +1,3 @@ +cluster status:STABLE +new cluster: +version:110 diff --git a/work/follower_2/topology.config b/work/follower_2/topology.config new file mode 100644 index 0000000..958d3c8 --- /dev/null +++ b/work/follower_2/topology.config @@ -0,0 +1,9 @@ +leader +127.0.0.1:10010 +followers +127.0.0.1:10020 +127.0.0.1:10021 +127.0.0.1:10022 +candidates +my_addr +127.0.0.1:10022 diff --git a/work/membership-change.config b/work/membership-change.config new file mode 100644 index 0000000..61082a4 --- /dev/null +++ b/work/membership-change.config @@ -0,0 +1,3 @@ +cluster status:STABLE +new cluster: +version:110 diff --git a/work/raft.binlog.candidate b/work/raft.binlog.candidate new file mode 100644 index 0000000..e69de29 diff --git a/work/raft.binlog.election-0 b/work/raft.binlog.election-0 new file mode 100644 index 0000000..eaf3bc9 Binary files /dev/null and b/work/raft.binlog.election-0 differ diff --git a/work/raft.binlog.election-1 b/work/raft.binlog.election-1 new file mode 100644 index 0000000..9836452 Binary files /dev/null and b/work/raft.binlog.election-1 differ diff --git a/work/raft.binlog.election-2 b/work/raft.binlog.election-2 new file mode 100644 index 0000000..7fe79e5 Binary files /dev/null and b/work/raft.binlog.election-2 differ diff --git a/work/raft.binlog.follower b/work/raft.binlog.follower new file mode 100644 index 0000000..5da309b Binary files /dev/null and b/work/raft.binlog.follower differ diff --git a/work/raft.binlog.follower.reserve b/work/raft.binlog.follower.reserve new file mode 100644 index 0000000..5da309b Binary files /dev/null and b/work/raft.binlog.follower.reserve differ diff --git a/work/raft.binlog.leader b/work/raft.binlog.leader new file mode 100644 index 0000000..957cfbb Binary files /dev/null and b/work/raft.binlog.leader differ diff --git a/work/raft.binlog.leader.reserve b/work/raft.binlog.leader.reserve new file mode 100644 index 0000000..5da309b Binary files /dev/null and b/work/raft.binlog.leader.reserve differ diff --git a/work/raft.binlog.setHead b/work/raft.binlog.setHead new file mode 100644 index 0000000..d13e5be Binary files /dev/null and b/work/raft.binlog.setHead differ diff --git a/work/raft.binlog.test b/work/raft.binlog.test new file mode 100644 index 0000000..cc2bc55 Binary files /dev/null and b/work/raft.binlog.test differ diff --git a/work/restart_followers.sh b/work/restart_followers.sh new file mode 100644 index 0000000..2a5ebef --- /dev/null +++ b/work/restart_followers.sh @@ -0,0 +1,41 @@ +#!/bin/bash + +log_pattern=*aurora.VM_16_7_centos.root* +bin=../../bin/release/aurora +glog_para='' +process_name=`basename $bin` + +if [[ "$OSTYPE" == "cygwin" ]]; then + log_pattern=*raft_svr.exe.ARTHUR-PC.arthur.log* + bin=../../../Release/raft_svr.exe + process_name=raft_svr + + #kill them in batch under windows + ps -ef | grep $process_name | grep -v grep | awk '{print $2}' | xargs kill -9 +elif [[ "$OSTYPE" == "darwin"* ]]; then + log_pattern=*aurora.bogon.arthur* + glog_para='--log_dir=. --logbuflevel=-1' +fi + +restart_one(){ + idx=$1 + cd follower_${idx} + port=1002${idx} + + if [[ "$OSTYPE" != "cygwin" ]]; then + pid=`ps -ef | grep $process_name | grep -v grep | grep $port | awk '{print $2}'` + kill -9 $pid + fi + + rm -f raft.binlog.follower + rm -f $log_pattern + $bin --checking_heartbeat=false --iterating_wait_timeo_us=50000 --disorder_msg_timeo_ms=100000 --port=${port} --notify_cq_num=1 --notify_cq_threads=4 --call_cq_num=1 --call_cq_threads=4 --iterating_threads=2 $glog_para > std.txt 2>&1 & + cd .. +} + + +restart_one 0 +restart_one 1 +restart_one 2 + + diff --git a/work/test.sh b/work/test.sh new file mode 100644 index 0000000..a5c1710 --- /dev/null +++ b/work/test.sh @@ -0,0 +1,6 @@ + +thread_ids=`awk '{print $3}' analysis.txt | sort -n | uniq ` +for id in $thread_ids +do + grep $id analysis.txt | tail -n1 +done diff --git a/work/topology.config b/work/topology.config new file mode 100644 index 0000000..7cc31e1 --- /dev/null +++ b/work/topology.config @@ -0,0 +1,9 @@ +leader +127.0.0.1:10010 +followers +127.0.0.1:10020 +127.0.0.1:10021 +127.0.0.1:10022 +candidates +my_addr +127.0.0.1:10010 \ No newline at end of file diff --git a/work/topology.config.memchg b/work/topology.config.memchg new file mode 100644 index 0000000..bd1bc01 --- /dev/null +++ b/work/topology.config.memchg @@ -0,0 +1,9 @@ +leader +127.0.0.1:10010 +followers +127.0.0.1:10020 +127.0.0.1:10021 +127.0.0.1:10022 +127.0.0.1:10030 +127.0.0.1:10031 +candidates diff --git a/work/topology.config.reserve b/work/topology.config.reserve new file mode 100644 index 0000000..f06907b --- /dev/null +++ b/work/topology.config.reserve @@ -0,0 +1,7 @@ +leader +127.0.0.1:10010 +followers +127.0.0.1:10020 +127.0.0.1:10021 +127.0.0.1:10022 +candidates