From 88dac0b2fb674f48f26f0053517a915fb11921ce Mon Sep 17 00:00:00 2001 From: Peter Molnar Date: Tue, 23 May 2017 11:14:47 +0100 Subject: [PATCH] version 2.0 --- LICENSE | 674 +++++++++++++++++++ README.md | 8 + config.ini.dist | 68 ++ nasg.py | 1405 +++++++++++++++++++++++++++++++++++----- nasg/__init__.py | 0 nasg/cmdline.py | 115 ---- nasg/func.py | 21 - nasg/img.py | 297 --------- nasg/img_test.py | 0 nasg/jinjaenv.py | 29 - nasg/searchindex.py | 76 --- nasg/singular.py | 580 ----------------- nasg/taxonomy.py | 319 --------- nasg/tests/cmdline.py | 26 - nasg/tests/func.py | 60 -- nasg/tests/jinjaenv.py | 36 - nasg/tests/singular.py | 10 - nasg/tests/taxonomy.py | 10 - new.py | 132 ++++ requirements.txt | 28 + shared.py | 76 +++ 21 files changed, 2222 insertions(+), 1748 deletions(-) create mode 100644 LICENSE create mode 100644 README.md create mode 100644 config.ini.dist delete mode 100644 nasg/__init__.py delete mode 100644 nasg/cmdline.py delete mode 100644 nasg/func.py delete mode 100644 nasg/img.py delete mode 100644 nasg/img_test.py delete mode 100644 nasg/jinjaenv.py delete mode 100644 nasg/searchindex.py delete mode 100644 nasg/singular.py delete mode 100644 nasg/taxonomy.py delete mode 100644 nasg/tests/cmdline.py delete mode 100644 nasg/tests/func.py delete mode 100644 nasg/tests/jinjaenv.py delete mode 100644 nasg/tests/singular.py delete mode 100644 nasg/tests/taxonomy.py create mode 100644 new.py create mode 100644 requirements.txt create mode 100644 shared.py diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..9cecc1d --- /dev/null +++ b/LICENSE @@ -0,0 +1,674 @@ + GNU GENERAL PUBLIC LICENSE + Version 3, 29 June 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU General Public License is a free, copyleft license for +software and other kinds of works. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +the GNU General Public License is intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. We, the Free Software Foundation, use the +GNU General Public License for most of our software; it applies also to +any other work released this way by its authors. You can apply it to +your programs, too. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + To protect your rights, we need to prevent others from denying you +these rights or asking you to surrender the rights. Therefore, you have +certain responsibilities if you distribute copies of the software, or if +you modify it: responsibilities to respect the freedom of others. + + For example, if you distribute copies of such a program, whether +gratis or for a fee, you must pass on to the recipients the same +freedoms that you received. You must make sure that they, too, receive +or can get the source code. And you must show them these terms so they +know their rights. + + Developers that use the GNU GPL protect your rights with two steps: +(1) assert copyright on the software, and (2) offer you this License +giving you legal permission to copy, distribute and/or modify it. + + For the developers' and authors' protection, the GPL clearly explains +that there is no warranty for this free software. For both users' and +authors' sake, the GPL requires that modified versions be marked as +changed, so that their problems will not be attributed erroneously to +authors of previous versions. + + Some devices are designed to deny users access to install or run +modified versions of the software inside them, although the manufacturer +can do so. This is fundamentally incompatible with the aim of +protecting users' freedom to change the software. The systematic +pattern of such abuse occurs in the area of products for individuals to +use, which is precisely where it is most unacceptable. Therefore, we +have designed this version of the GPL to prohibit the practice for those +products. If such problems arise substantially in other domains, we +stand ready to extend this provision to those domains in future versions +of the GPL, as needed to protect the freedom of users. + + Finally, every program is threatened constantly by software patents. +States should not allow patents to restrict development and use of +software on general-purpose computers, but in those that do, we wish to +avoid the special danger that patents applied to a free program could +make it effectively proprietary. To prevent this, the GPL assures that +patents cannot be used to render the program non-free. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Use with the GNU Affero General Public License. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU Affero General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the special requirements of the GNU Affero General Public License, +section 13, concerning interaction through a network will apply to the +combination as such. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU General Public License from time to time. Such new versions will +be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + {one line to give the program's name and a brief idea of what it does.} + Copyright (C) {year} {name of author} + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If the program does terminal interaction, make it output a short +notice like this when it starts in an interactive mode: + + {project} Copyright (C) {year} {fullname} + This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'. + This is free software, and you are welcome to redistribute it + under certain conditions; type `show c' for details. + +The hypothetical commands `show w' and `show c' should show the appropriate +parts of the General Public License. Of course, your program's commands +might be different; for a GUI interface, you would use an "about box". + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU GPL, see +. + + The GNU General Public License does not permit incorporating your program +into proprietary programs. If your program is a subroutine library, you +may consider it more useful to permit linking proprietary applications with +the library. If this is what you want to do, use the GNU Lesser General +Public License instead of this License. But first, please read +. diff --git a/README.md b/README.md new file mode 100644 index 0000000..025a491 --- /dev/null +++ b/README.md @@ -0,0 +1,8 @@ +# NASG: Not Another Statig Generator... + +So I ended up writing my static generator and this is (most) of the code for it. + +Don't expect anything fancy and please be aware that my Python Fu has much to learn and earn. + +I've written about the generic ideas and approaches here in my +[Going Static](https://petermolnar.net/going-static/) entry. diff --git a/config.ini.dist b/config.ini.dist new file mode 100644 index 0000000..b1e51f9 --- /dev/null +++ b/config.ini.dist @@ -0,0 +1,68 @@ +[common] +base = ~/your_base_directory +domain = your.domain.com +; thiss should be in the base directory speficied above +watermark = watermark_file.png +; size of the default image show, get one from the list in [downsize] +fallbackimg = 720 +; items per page, also applies to RSS feed +pagination = 8 + +[source] +; directories for various content; all within "base" from "common" +content = content +files = files +templates = templates +tags = tag +photos = photos +offlinecopies = offlinecopies +; static is a folder where the contents will be copied to the destination +static = copy + +[target] +; directories for various targets; all within "base" from "common" +; these are created automatically +build = build + +[var] +; directories for various targets; all within "build" from "target" +; these are created automatically +searchdb = s + +[site] +charset = UTF-8 +title = additional title text | ${common:domain} +url = https://${common:domain} +default_lang = en +domains = ${common:domain} extra.domain + +[author] +; author details for h-card +name = Your Name +email = your@email.address +url = ${site:url} +; you may want to put this into the "static" dir from "source" +avatar = name_of_avatar_file.jpg +; GPG long key +gpg = XXXX XXX XXXX XXXX + +[socials] +; list of social network name = social network account +flickr = https://www.flickr.com/people/yourhandle +github = https://github.com/yourhandle +; feel free to add more + +[photo] +; regex to be matched in EXIF data against EXIF:Author +regex = "this should be a tricky regex to match your usual author line in EXIF" + +[downsize] +; size = suffix +90 = s +360 = m +720 = z +1280 = b + +[crop] +; size = crop to square boolean, defaults to False +90 = true diff --git a/nasg.py b/nasg.py index ebac014..385f4ad 100644 --- a/nasg.py +++ b/nasg.py @@ -1,203 +1,1270 @@ -import argparse -import logging +#!/usr/bin/env python3 + import os import re -import arrow +import configparser +import argparse +import shutil +import logging +import json +import glob +import subprocess +import tempfile import atexit -from concurrent.futures import ProcessPoolExecutor -from multiprocessing import cpu_count +import re +import hashlib +import math +import asyncio +import magic + +import arrow +import wand.image +import similar_text +import frontmatter from slugify import slugify +import langdetect +import requests +from breadability.readable import Article +from whoosh import index +import jinja2 -import nasg.config as config -import nasg.singular as singular -import nasg.searchindex as searchindex -import nasg.taxonomy as taxonomy +import shared -from pprint import pprint +def splitpath(path): + parts = [] + (path, tail) = os.path.split(path) + while path and tail: + parts.insert(0,tail) + (path,tail) = os.path.split(path) + return parts -parser = argparse.ArgumentParser(description='Parameters for NASG') -parser.add_argument( - '--regenerate', '-f', - dest='regenerate', - action='store_true', - default=False, - help='force regeneration of all HTML outputs' -) -parser.add_argument( - '--downsize', '-c', - action='store_true', - dest='downsize', - default=False, - help='force re-downsizing of all suitable images' -) -parser.add_argument( - '--debug', '-d', - action='store_true', - dest='debug', - default=False, - help='turn on debug log' -) +class Indexer(object): -class Engine(object): def __init__(self): - self._initdirs() - self._lock() - atexit.register(self._lock, action='clear') + self.tmp = tempfile.mkdtemp( + 'whooshdb_', + dir=tempfile.gettempdir() + ) + atexit.register( + shutil.rmtree, + os.path.abspath(self.tmp) + ) + self.ix = index.create_in(self.tmp, shared.schema) + self.target = os.path.abspath(os.path.join( + shared.config.get('target', 'builddir'), + shared.config.get('var', 'searchdb') + )) + self.writer = self.ix.writer() + + + async def append(self, singular): + logging.info("appending search index with %s", singular.fname) + + content_real = [ + singular.fname, + singular.summary, + singular.content, + ] + + content_remote = [] + for url, offlinecopy in singular.offlinecopies.items(): + content_remote.append("%s" % offlinecopy) + + self.writer.add_document( + title=singular.title, + url=singular.url, + content=" ".join(list(map(str,[*content_real, *content_remote]))), + date=singular.published.datetime, + tags=",".join(list(map(str, singular.tags))), + weight=1, + img="%s" % singular.photo + ) + + def finish(self): + self.writer.commit() + if os.path.isdir(self.target): + shutil.rmtree(self.target) + shutil.copytree(self.tmp, self.target) + +class OfflineCopy(object): + def __init__(self, url): + self.url = url + h = url.encode('utf-8') + self.fname = hashlib.sha1(h).hexdigest() + self.targetdir = os.path.abspath( + shared.config.get('source', 'offlinecopiesdir') + ) + self.target = os.path.join( + self.targetdir, + self.fname + ) + self.fm = frontmatter.loads('') + self.fm.metadata = { + 'url': self.url, + 'date': arrow.utcnow().format("YYYY-MM-DDTHH:mm:ssZ"), + } + + def __repr__(self): + return self.fm.content + + def write(self): + logging.info( + "savig offline copy of\n\t%s to:\n\t%s", + self.url, + self.target + ) + with open(self.target, 'wt') as f: + f.write(frontmatter.dumps(self.fm)) + + def run(self): + if os.path.isfile(self.target): + with open(self.target) as f: + self.fm = frontmatter.loads(f.read()) + return + + logging.info("prepairing offline copy of %s", self.url) + headers = requests.utils.default_headers() + headers.update({ + 'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64; rv:52.0) Gecko/20100101 Firefox/52.0', + 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' + }) + + try: + r = requests.get( + self.url, + allow_redirects=True, + timeout=60, + headers=headers + ) + except Exception as e: + logging.error("%s failed:\n%s", self.url, e) + self.write() + return + + if r.status_code != requests.codes.ok: + logging.warning("%s returned %s", self.url, r.status_code) + self.write() + return + + if not len(r.text): + logging.warning("%s was empty", self.url) + self.write() + return + + doc = Article(r.text, url=self.url) + self.fm.metadata['title'] = doc._original_document.title + self.fm.metadata['realurl'] = r.url + self.fm.content = Pandoc(False).convert(doc.readable) + self.write() + + +class Renderer(object): + def __init__(self): + self.sitevars = dict(shared.config.items('site')) + self.sitevars['author'] = dict(shared.config.items('author')) + self.sitevars['author']['socials'] = dict(shared.config.items('socials')) + + self.jinjaldr = jinja2.FileSystemLoader( + searchpath=shared.config.get('source', 'templatesdir') + ) + self.j2 = jinja2.Environment(loader=self.jinjaldr) + self.j2.filters['date'] = Renderer.jinja_filter_date + self.j2.filters['search'] = Renderer.jinja_filter_search + self.j2.filters['slugify'] = Renderer.jinja_filter_slugify + + @staticmethod + def jinja_filter_date(d, form='%Y-%m-%d %H:%m:%S'): + if d == 'now': + return arrow.now().strftime(form) + if form == 'c': + form = '%Y-%m-%dT%H:%M:%S%z' + return d.strftime(form) + + @staticmethod + def jinja_filter_slugify(s): + return slugify(s, only_ascii=True, lower=True) + + @staticmethod + def jinja_filter_search(s, r): + if r in s: + return True + return False + + #def rendersingular(self, singular): + #logging.debug("rendering and saving %s", singular.fname) + #targetdir = os.path.abspath(os.path.join( + #shared.config.get('target', 'builddir'), + #singular.fname + #)) + #target = os.path.join(targetdir, 'index.html') + + #if not shared.config.get('params', 'force') and os.path.isfile(target): + #ttime = int(os.path.getmtime(target)) + #if ttime == singular.mtime: + #logging.debug('%s exists and up-to-date (lastmod: %d)', target, ttime) + #return + + #if not os.path.isdir(targetdir): + #os.mkdir(targetdir) + + #tmpl = self.j2.get_template(singular.tmplfile) + #tmplvars = { + #'post': singular.tmplvars, + #'site': self.sitevars, + #'taxonomy': {}, + #} + #r = tmpl.render(tmplvars) + #with open(target, "w") as html: + #html.write(r) + #html.close() + #os.utime(target, (singular.mtime, singular.mtime)) + + +class BaseIter(object): + def __init__(self): + self.data = {} + + def append(self, key, value): + if key in self.data: + logging.error("duplicate key: %s", key) + return + self.data[key] = value + + def __getitem__(self, key): + return self.data.get(key, {}) + + def __repr__(self): + return json.dumps(list(self.data.values())) + + def __next__(self): + try: + r = self.data.next() + except: + raise StopIteration() + return r + + def __iter__(self): + for k, v in self.data.items(): + yield (k, v) + return + +class CMDLine(object): + def __init__(self, executable): + self.executable = self._which(executable) + if self.executable is None: + raise OSError('No %s found in PATH!' % executable) + return + + @staticmethod + def _which(name): + for d in os.environ['PATH'].split(':'): + which = glob.glob(os.path.join(d, name), recursive=True) + if which: + return which.pop() + return None + + def __enter__(self): + self.process = subprocess.Popen( + [self.executable, "-stay_open", "True", "-@", "-"], + universal_newlines=True, + stdin=subprocess.PIPE, stdout=subprocess.PIPE) + return self + + def __exit__(self, exc_type, exc_value, traceback): + self.process.stdin.write("-stay_open\nFalse\n") + self.process.stdin.flush() + + def execute(self, *args): + args = args + ("-execute\n",) + self.process.stdin.write(str.join("\n", args)) + self.process.stdin.flush() + output = "" + fd = self.process.stdout.fileno() + while not output.endswith(self.sentinel): + output += os.read(fd, 4096).decode('utf-8', errors='ignore') + return output[:-len(self.sentinel)] + + +class Pandoc(CMDLine): + """ Handles calling external binary `exiftool` in an efficient way """ + def __init__(self, md2html=True): + super().__init__('pandoc') + if md2html: + self.i = "markdown+" + "+".join([ + 'backtick_code_blocks', + 'auto_identifiers', + 'fenced_code_attributes', + 'definition_lists', + 'grid_tables', + 'pipe_tables', + 'strikeout', + 'superscript', + 'subscript', + 'markdown_in_html_blocks', + 'shortcut_reference_links', + 'autolink_bare_uris', + 'raw_html', + 'link_attributes', + 'header_attributes', + 'footnotes', + ]) + self.o = 'html5' + else: + self.o = "markdown-" + "-".join([ + 'raw_html', + 'native_divs', + 'native_spans', + ]) + self.i = 'html' + + def convert(self, text): + cmd = ( + self.executable, + '-o-', + '--from=%s' % self.i, + '--to=%s' % self.o + ) + logging.debug('converting content with Pandoc') + p = subprocess.Popen( + cmd, + stdin=subprocess.PIPE, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + ) + + stdout, stderr = p.communicate(input=text.encode()) + if stderr: + logging.error("Error during pandoc covert:\n\t%s\n\t%s", cmd, stderr) + return stdout.decode('utf-8').strip() + +# based on http://stackoverflow.com/a/10075210 +class ExifTool(CMDLine): + """ Handles calling external binary `exiftool` in an efficient way """ + sentinel = "{ready}\n" + + def __init__(self): + super().__init__('exiftool') + + def get_metadata(self, *filenames): + return json.loads(self.execute('-sort', '-json', '-MIMEType', '-FileType', '-FileName', '-ModifyDate', '-CreateDate', '-DateTimeOriginal', '-ImageHeight', '-ImageWidth', '-Aperture', '-FOV', '-ISO', '-FocalLength', '-FNumber', '-FocalLengthIn35mmFormat', '-ExposureTime', '-Copyright', '-Artist', '-Model', '-GPSLongitude#', '-GPSLatitude#', '-LensID', *filenames)) + +class Images(BaseIter): + def __init__(self, extensions=['jpg', 'gif', 'png']): + super(Images, self).__init__() + logging.info( + "initiating images with extensions: %s", + extensions + ) self.files = [] - self.categories = {} - self.tags = {} - self.allposts = taxonomy.TaxonomyHandler('') - self.frontposts = taxonomy.TaxonomyHandler('') - self.allowedpattern = re.compile(config.accept_sourcefiles) - self.counter = {} - - def _parse_results(self, futures): - for future in futures: - try: - future.result() - except Exception as e: - logging.error("processing failed: %s", e) + self.data = {} + # if anyone knows how to do this in a more pythonic way, please tell me + paths = [ + shared.config.get('source', 'filesdir'), + shared.config.get('source', 'photosdir') + ] + for p in paths: + for ext in extensions: + self.files += glob.glob(os.path.join(p, "*.%s" % ext)) - def collect(self): - self._setup_categories() - self._setup_singulars() + def populate(self): + with ExifTool() as e: + _meta = e.get_metadata(*self.files) + # parsing the returned meta into a dict of [filename]={meta} + for e in _meta: + if 'FileName' not in e: + logging.error("missing 'FileName' in element %s", e) + continue + fname = os.path.basename(e['FileName']) + del(e['FileName']) + # duplicate files are going to be a problem, so don't send it + # away with a simple error log entry + if fname in self.data: + raise ValueError('filename collision: %s', fname) + # convert dates + for k, v in e.items(): + e[k] = self.exifdate(v) + self.data[fname] = WebImage(fname, e) - def render(self): - self._render_singulars() - #self._render_taxonomy() + def exifdate(self, value): + """ converts and EXIF date string to ISO 8601 format + :param value: EXIF date (2016:05:01 00:08:24) + :type arg1: str + :return: ISO 8601 string with UTC timezone 2016-05-01T00:08:24+0000 + :rtype: str + """ + if not isinstance(value, str): + return value + match = shared.EXIFREXEG.match(value) + if not match: + return value + return "%s-%s-%sT%s+0000" % ( + match.group('year'), + match.group('month'), + match.group('day'), + match.group('time') + ) - def _render_singulars(self): - logging.warning("rendering singulars") - pprint(self.allposts) - #futures = [] - #with ProcessPoolExecutor(max_workers=cpu_count()) as executor: - for p in self.allposts: - #futures.append(executor.submit(p.write)) - p.write() - #for future in futures: - #try: - #future.result() - #except Exception as e: - #logging.error("processing failed: %s", e) +class WebImage(object): + def __init__(self, fname, meta): + logging.info( + "parsing image: %s", + fname + ) + self.meta = meta + self.fpath = os.path.abspath(meta.get('SourceFile', fname)) + self.fname, self.ext = os.path.splitext(fname) + self.alttext = '' + self.sizes = [] + self.fallbacksize = int(shared.config.get('common','fallbackimg', fallback='720')) + for size in shared.config.options('downsize'): + sizeext = shared.config.get('downsize', size) + fname = "%s_%s%s" % (self.fname, sizeext, self.ext) + self.sizes.append(( + int(size), + { + 'fpath': os.path.join( + shared.config.get('target', 'filesdir'), + fname + ), + 'url': "%s/%s/%s" % ( + shared.config.get('site', 'url'), + shared.config.get('source', 'files'), + fname + ), + 'crop': shared.config.getboolean('crop', size, fallback=False), + } + )) - def _render_taxonomy(self): - futures = [] - with ProcessPoolExecutor(max_workers=cpu_count()) as executor: - for tslug, t in self.tags.items(): - #t.write() - futures.append(executor.submit(t.write)) - for cslug, c in self.categories.items(): - #c.write() - futures.append(executor.submit(c.write)) - #self.frontposts.write() - futures.append(executor.submit(self.frontposts.write)) - self._parse_results(futures) + self.sizes = sorted(self.sizes, reverse=False) - - def _setup_categories(self): - for cat, meta in config.categories.items(): - cpath = os.path.join(config.CONTENT, cat) - if not os.path.isdir(cpath): - logging.error("category %s not found at: %s", cat, cpath) - continue - - self.categories[cat] = taxonomy.TaxonomyHandler( - meta.get('name', cat), - taxonomy=meta.get('type', 'category'), - slug=cat, - render=meta.get('render', True) + self.target = False + if self.is_downsizeable: + self.fallback = [e for e in self.sizes if e[0] == self.fallbacksize][0][1]['url'] + self.target = self.sizes[-1][1]['url'] + else: + self.fallback = "%s/%s/%s" % ( + shared.config.get('site', 'url'), + shared.config.get('source', 'files'), + "%s%s" % (self.fname, self.ext) ) - - def _setup_singulars(self): - futures = [] - with ProcessPoolExecutor(max_workers=cpu_count()) as executor: - for slug, tax in self.categories.items(): - cpath = os.path.join(config.CONTENT, slug) - for f in os.listdir(cpath): - fpath = os.path.join(cpath,f) - if not self.allowedpattern.fullmatch(f): - logging.warning("unexpected file at: %s" % fpath) - continue - #self._posttype(fpath, slug) - futures.append(executor.submit(self._posttype, fpath, slug)) - self._parse_results(futures) - - def _posttype(self, fpath, cat): - c = self.categories[cat] - - if re.match('.*\.jpg', fpath): - p = singular.PhotoHandler(fpath) - elif 'page' == c.taxonomy: - p = singular.PageHandler(fpath) + def __str__(self): + if self.is_downsizeable: + return '\n
%s
%s%s
\n' % ( + self.target, + self.fallback, + self.alttext, + self.fname, + self.ext + ) else: - p = singular.ArticleHandler(fpath) + return '\n
%s
%s%s
\n' % ( + self.fallback, + self.alttext, + self.fname, + self.ext + ) - c.append(p) - self.allposts.append(p) + @property + def rssenclosure(self): + """ Returns the largest available image for RSS to add as attachment """ + target = self.sizes[-1][1] + return { + 'mime': magic.Magic(mime=True).from_file(target['fpath']), + 'url': target['url'], + 'bytes': os.path.getsize(target['fpath']) + } - front = config.categories[cat].get('front', True) - if front: - self.frontposts.append(p) + @property + def is_photo(self): + """ Match image meta against config artist regex to see if the file is + a photo or just a regular image """ + pattern = shared.config.get('photo', 'regex', fallback=None) + if not pattern or not isinstance(pattern, str): + return False + pattern = re.compile(pattern) - ptags = p.vars.get('tags', []) - for tag in ptags: - tslug = slugify(tag, only_ascii=True, lower=True) - if tslug not in self.tags: - self.tags[tslug] = taxonomy.TaxonomyHandler( - tag, - taxonomy='tag', - slug=tslug + cpr = self.meta.get('Copyright', '') + art = self.meta.get('Artist', '') + if not cpr and not art: + return False + + if pattern.search(cpr) \ + or pattern.search(art): + return True + + return False + + @property + def is_downsizeable(self): + """ Check if the image is large enough and jpeg or png in order to + downsize it """ + fb = self.sizes[-1][0] + ftype = self.meta.get('FileType', None) + if not ftype: + return False + if ftype.lower() == 'jpeg' or ftype.lower() == 'png': + width = int(self.meta.get('ImageWidth', 0)) + height = int(self.meta.get('ImageHeight', 0)) + if width > fb or height > fb: + return True + return False + + def _copy(self): + target = os.path.join( + shared.config.get('target', 'filesdir'), + "%s%s" % (self.fname, self.ext) + ) + if not os.path.isfile(target): + logging.debug("can't downsize %s, copying instead" % self.fname) + shutil.copy(self.fpath, target) + + def _watermark(self, img): + """ Composite image by adding watermark file over it """ + wmarkfile = os.path.join( + shared.config.get('common', 'basedir'), + shared.config.get('common', 'watermark') + ) + if not os.path.isfile(wmarkfile): + return img + + with wand.image.Image(filename=wmarkfile) as wmark: + if img.width > img.height: + w = img.width * 0.16 + h = wmark.height * (w / wmark.width) + x = img.width - w - (img.width * 0.01) + y = img.height - h - (img.height * 0.01) + else: + w = img.height * 0.16 + h = wmark.height * (w / wmark.width) + x = img.width - h - (img.width * 0.01) + y = img.height - w - (img.height * 0.01) + + w = round(w) + h = round(h) + x = round(x) + y = round(y) + + wmark.resize(w, h) + if img.width <= img.height: + wmark.rotate(-90) + img.composite(image=wmark, left=x, top=y) + return img + + + def _intermediate_dimensions(self, size, width, height, crop = False): + size = int(size) + w = width + h = height + if (width > height and not crop) \ + or (width < height and crop): + w = size + h = int(float(size / width) * height) + else: + h = size + w = int(float(size / height) * width) + return (w, h) + + + def _intermediate(self, img, size, meta, existing = []): + if img.width <= size and img.height <= size: + return False + + crop = meta.get('crop', False) + with img.clone() as thumb: + width, height = self._intermediate_dimensions( + size, + img.width, + img.height, + crop + ) + thumb.resize(width, height) + + if crop: + thumb.liquid_rescale(size, size, 1, 1) + + if self.meta.get('FileType', 'jpeg').lower() == 'jpeg': + thumb.compression_quality = 86 + thumb.unsharp_mask( + radius=0, + sigma=0.5, + amount=1, + threshold=0.03 ) - self.tags[tslug].append(p) + thumb.format = 'pjpeg' + + # this is to make sure pjpeg happens + with open(meta['fpath'], 'wb') as f: + thumb.save(file=f) + + return True - def _initdirs(self): - for d in [ - config.TARGET, - config.TTHEME, - config.TFILES, - config.VAR, - config.SEARCHDB, - config.TSDB, - config.LOGDIR - ]: - if not os.path.exists(d): - os.mkdir(d) + async def downsize(self, existing = []): + if not self.is_downsizeable: + self._copy() + return + logging.info("checking downsizing for %s", self.fname) + needed = shared.config.getboolean('params', 'regenerate', fallback=False) - def _lock(self, action='set'): - if 'set' == action: - if os.path.exists(config.LOCKFILE): - raise ValueError("lockfile %s present" % config.LOCKFILE) - with open(config.LOCKFILE, "wt") as l: - l.write("%s" % arrow.utcnow()) - l.close() - elif 'clear' == action: - if os.path.exists(config.LOCKFILE): - os.unlink(config.LOCKFILE) + if not needed: + for (size, meta) in self.sizes: + if meta['fpath'] not in existing: + needed = True + + if not needed: + logging.debug("downsizing not needed for %s", self.fname) + return + + with wand.image.Image(filename=self.fpath) as img: + img.auto_orient() + + if self.is_photo: + logging.info("%s is a photo", self.fpath) + img = self._watermark(img) + + for (size, meta) in self.sizes: + self._intermediate(img, size, meta, existing) + +class Taxonomy(BaseIter): + def __init__(self, name = None, taxonomy = None, slug = None): + super(Taxonomy, self).__init__() + self.name = name + if name and not slug: + self.slug = slugify(name, only_ascii=True, lower=True) else: - return os.path.exists(config.LOCKFILE) + self.slug = slug + self.taxonomy = taxonomy + + @property + def pages(self): + return math.ceil(len(self.data) / shared.config.getint('common', 'pagination')) + + def __repr__(self): + return "taxonomy %s with %d items" % (self.taxonomy, len(self.data)) + + @property + def basep(self): + p = shared.config.get('target', 'builddir') + if self.taxonomy: + p = os.path.join(p, self.taxonomy) + return p + + @property + def myp(self): + p = self.basep + if self.slug: + return os.path.join(p,self.slug) + return p + + @property + def feedp(self): + return os.path.join(self.myp, 'feed') + + @property + def pagep(self): + return os.path.join(self.myp, 'page') + + @property + def baseurl(self): + if self.taxonomy and self.slug: + return "/%s/%s/" % (self.taxonomy, self.slug) + else: + return '/' + + @property + def mtime(self): + return int(list(sorted(self.data.keys(), reverse=True))[0]) + + def __mkdirs(self): + check = [self.basep, self.myp, self.feedp] + + if self.pages > 1: + check.append(self.pagep) + for i in range(2, self.pages+1): + subpagep = os.path.abspath(os.path.join( + self.pagep, + '%d' % i + )) + check.append(subpagep) + + for p in check: + if not os.path.isdir(p): + logging.debug("creating dir %s", p) + os.mkdir(p) + + def tpath(self, page): + if page == 1: + return "%s/index.html" % (self.myp) + else: + return "%s/%d/index.html" % (self.pagep, page) + + + async def render(self, renderer): + self.__mkdirs() + page = 1 + testpath = self.tpath(page) + if not shared.config.getboolean('params', 'force') and os.path.isfile(testpath): + ttime = int(os.path.getmtime(testpath)) + if ttime == self.mtime: + logging.info('taxonomy index for "%s" exists and up-to-date (lastmod: %d)', self.slug, ttime) + return + + while page <= self.pages: + self.renderpage(renderer, page) + page = page+1 + + def renderpage(self, renderer, page): + pagination = int(shared.config.get('common', 'pagination')) + start = int((page-1) * pagination) + end = int(start + pagination) + + posttmpls = [self.data[k].tmplvars for k in list(sorted( + self.data.keys(), reverse=True))[start:end]] + + target = self.tpath(page) + logging.info("rendering taxonomy page %d to %s", page, target) + tmplvars = { + 'taxonomy': { + 'url': self.baseurl, + 'name': self.name, + 'taxonomy': self.taxonomy, + 'paged': page, + 'total': self.pages, + 'perpage': pagination + }, + 'site': renderer.sitevars, + 'posts': posttmpls, + } + + r = renderer.j2.get_template('archive.html').render(tmplvars) + with open(target, "wt") as html: + html.write(r) + os.utime(target, (self.mtime, self.mtime)) + + if 1 == page: + target = os.path.join(self.feedp, 'index.xml') + logging.info("rendering RSS feed to %s", target) + r = renderer.j2.get_template('rss.html').render(tmplvars) + with open(target, "wt") as html: + html.write(r) + os.utime(target, (self.mtime, self.mtime)) + +class Content(BaseIter): + def __init__(self, images, extensions=['md']): + super(Content, self).__init__() + self.images = images + basepath = shared.config.get('source', 'contentdir') + self.files = [] + for ext in extensions: + self.files += glob.glob(os.path.join(basepath, "*", "*.%s" % ext)) + self.tags = {} + self.categories = {} + self.front = Taxonomy() + + def populate(self): + for fpath in self.files: + item = Singular(fpath, self.images) + self.append(item.pubtime, item) + + if item.isonfront: + self.front.append(item.pubtime, item) + + if item.iscategorised: + if item.category not in self.categories: + self.categories[item.category] = Taxonomy(item.category, 'category') + self.categories[item.category].append(item.pubtime, item) + + for tag in item.tags: + tslug = slugify(tag, only_ascii=True, lower=True) + if tslug not in self.tags: + self.tags[tslug] = Taxonomy(tag, 'tag', tslug) + self.tags[tslug].append(item.pubtime, item) + self.symlinktag(tslug, item.path) + + def symlinktag(self, tslug, fpath): + fdir, fname = os.path.split(fpath) + tagpath = os.path.join(shared.config.get('source', 'tagsdir'), tslug) + if not os.path.isdir(tagpath): + os.mkdir(tagpath) + sympath = os.path.relpath(fdir, tagpath) + dst = os.path.join(tagpath, fname) + src = os.path.join(sympath, fname) + if not os.path.islink(dst): + os.symlink(src, dst) + + def sitemap(self): + target = os.path.join( + shared.config.get('target', 'builddir'), + 'sitemap.txt' + ) + urls = [] + for t, item in self.data.items(): + urls.append( "%s/%s/" % ( + shared.config.get('site', 'url'), + item.fname + )) + + with open(target, "wt") as f: + logging.info("writing sitemap to %s" % (target)) + f.write("\n".join(urls)) + +class Singular(object): + def __init__(self, path, images): + logging.debug("initiating singular object from %s", path) + self.path = path + self.images = images + self.category = splitpath(path)[-2] + self.mtime = int(os.path.getmtime(self.path)) + self.fname, self.ext = os.path.splitext(os.path.basename(self.path)) + self.meta = {} + self.content = '' + self.photo = self.images.data.get("%s.jpg" % self.fname, None) + self.__parse() + + def __repr__(self): + return "%s (lastmod: %s)" % (self.fname, self.published) + + def __parse(self): + with open(self.path, mode='rt') as f: + self.meta, self.content = frontmatter.parse(f.read()) + self.__filter_images() + if self.isphoto: + #self.photo.alttext = self.content + self.content = "%s\n%s" % ( + self.content, + self.photo + ) + + #@property + #def isrepost(self): + #isrepost = False + + #if len(self.reactions.keys()): + #isrepost = list(self.reactions.keys())[0] + + #if isrepost: + #if len(self.reactions[isrepost]) == 1: + #linkto = self.reactions[isrepost][0] + + + def __filter_images(self): + linkto = False + isrepost = None + + if len(self.reactions.keys()): + isrepost = list(self.reactions.keys())[0] + if isrepost and \ + len(self.reactions[isrepost]) == 1: + linkto = self.reactions[isrepost][0] + + m = shared.MDIMGREGEX.findall(self.content) + if not m: + logging.debug("no images found") + return + + for shortcode, alt, fname, title, cl in m: + image = self.images.data.get(fname, None) + if not image: + logging.debug("%s not found in images", fname) + continue + + logging.debug( + "replacing %s in content with %s", + shortcode, + "%s" % image + ) + self.content = self.content.replace( + shortcode, + "%s" % image + ) + + @property + def reactions(self): + # getting rid of '-' to avoid css trouble and similar + convert = { + 'bookmark-of': 'bookmark', + 'repost-of': 'repost', + 'in-reply-to': 'reply', + } + reactions = {} + + for k, v in convert.items(): + x = self.meta.get(k, None) + if not x: + continue + if isinstance(x, str): + x = [x] + reactions[v] = x + + return reactions + + @property + def lang(self): + lang = 'en' + try: + lang = langdetect.detect("\n".join([ + self.title, + self.content + ])) + except: + pass + return lang + + @property + def tags(self): + return list(self.meta.get('tags', [])) + + @property + def published(self): + return arrow.get( + self.meta.get('published', self.mtime) + ) + + @property + def updated(self): + return arrow.get( + self.meta.get('updated', + self.meta.get('published', self.mtime) + ) + ) + + @property + def pubtime(self): + return int(self.published.timestamp) + + @property + def isphoto(self): + if not self.photo: + return False + return self.photo.is_photo + + @property + def isbookmark(self): + return self.meta.get('bookmark-of', False) + + @property + def ispage(self): + if not self.meta: + return True + return False + + @property + def isonfront(self): + if self.ispage or self.isbookmark: + return False + return True + + @property + def iscategorised(self): + if self.ispage: + return False + return True + + @property + def summary(self): + return self.meta.get('summary', '') + + @property + def title(self): + for maybe in ['title', 'bookmark-of', 'in-reply-to', 'repost-of']: + maybe = self.meta.get(maybe, False) + if maybe: + return maybe + return self.fname + + @property + def url(self): + return "%s/%s/" % (shared.config.get('site', 'url'), self.fname) + + @property + def tmplfile(self): + if self.ispage: + return 'page.html' + else: + return 'singular.html' + + @property + def html(self): + return Pandoc().convert(self.content) + + @property + def offlinecopies(self): + # stupidly simple property caching + if hasattr(self, 'copies'): + return self.copies + + copies = {} + for maybe in ['bookmark-of', 'in-reply-to', 'repost-of']: + maybe = self.meta.get(maybe, False) + if not maybe: + continue + if not isinstance(maybe, list): + maybe = [maybe] + for url in maybe: + copies[url] = OfflineCopy(url) + copies[url].run() + + self.copies = copies + return copies + + @property + def exif(self): + if not self.isphoto: + return None + + exif = {} + mapping = { + 'camera': [ + 'Model' + ], + 'aperture': [ + 'FNumber', + 'Aperture' + ], + 'shutter_speed': [ + 'ExposureTime' + ], + 'focallength35mm': [ + 'FocalLengthIn35mmFormat', + ], + 'focallength': [ + 'FocalLength', + ], + 'iso': [ + 'ISO' + ], + 'lens': [ + 'LensID', + ], + 'date': [ + 'CreateDate', + 'DateTimeOriginal', + ], + 'geo_latitude': [ + 'GPSLatitude' + ], + 'geo_longitude': [ + 'GPSLongitude' + ], + } + + for ekey, candidates in mapping.items(): + for candidate in candidates: + maybe = self.photo.meta.get(candidate, None) + if maybe: + if 'geo_' in ekey: + exif[ekey] = round(float(maybe), 5) + else: + exif[ekey] = maybe + break + + return exif + + @property + def rssenclosure(self): + if not self.isphoto: + return {} + return self.photo.rssenclosure + + @property + def tmplvars(self): + return { + 'title': self.title, + 'published': self.published.datetime, + 'tags': self.tags, + 'author': dict(shared.config.items('author')), + 'content': self.content, + 'html': self.html, + 'category': self.category, + 'reactions': self.reactions, + 'updated': self.updated.datetime, + 'summary': self.meta.get('summary', ''), + 'exif': self.exif, + 'lang': self.lang, + 'syndicate': '', + 'slug': self.fname, + 'shortslug': self.shortslug, + 'rssenclosure': self.rssenclosure, + } + + @property + def shortslug(self): + return self.baseN(self.pubtime) + + @staticmethod + def baseN(num, b=36, numerals="0123456789abcdefghijklmnopqrstuvwxyz"): + """ Used to create short, lowecase slug for a number (an epoch) passed """ + num = int(num) + return ((num == 0) and numerals[0]) or ( + Singular.baseN( + num // b, + b, + numerals + ).lstrip(numerals[0]) + numerals[num % b] + ) + + async def render(self, renderer): + logging.info("rendering and saving %s", self.fname) + targetdir = os.path.abspath(os.path.join( + shared.config.get('target', 'builddir'), + self.fname + )) + target = os.path.join(targetdir, 'index.html') + + if not shared.config.getboolean('params', 'force') and os.path.isfile(target): + ttime = int(os.path.getmtime(target)) + logging.debug('ttime is %d mtime is %d', ttime, self.mtime) + if ttime == self.mtime: + logging.debug('%s exists and up-to-date (lastmod: %d)', target, ttime) + return + + if not os.path.isdir(targetdir): + os.mkdir(targetdir) + + tmplvars = { + 'post': self.tmplvars, + 'site': renderer.sitevars, + 'taxonomy': {}, + } + r = renderer.j2.get_template(self.tmplfile).render(tmplvars) + with open(target, "w") as html: + logging.debug('writing %s', target) + html.write(r) + html.close() + os.utime(target, (self.mtime, self.mtime)) + +class NASG(object): + def __init__(self): + # --- set params + parser = argparse.ArgumentParser(description='Parameters for NASG') + parser.add_argument( + '--regenerate', + action='store_true', + default=False, + help='force downsizing images' + ) + parser.add_argument( + '--force', + action='store_true', + default=False, + help='force rendering HTML' + ) + parser.add_argument( + '--loglevel', + default='info', + help='change loglevel' + ) + parser.add_argument( + '--nodownsize', + action='store_true', + default=False, + help='skip image downsizing' + ) + parser.add_argument( + '--norender', + action='store_true', + default=False, + help='skip rendering' + ) + + params = vars(parser.parse_args()) + shared.config.add_section('params') + for k, v in params.items(): + shared.config.set('params', k, str(v)) + + + # remove the rest of the potential loggers + while len(logging.root.handlers) > 0: + logging.root.removeHandler(logging.root.handlers[-1]) + + # --- set loglevel + llevel = { + 'critical': 50, + 'error': 40, + 'warning': 30, + 'info': 20, + 'debug': 10 + } + logging.basicConfig( + level=llevel[shared.config.get('params', 'loglevel')], + format='%(asctime)s - %(levelname)s - %(message)s' + ) + + async def __adownsize(self, images, existing): + for fname, img in images: + await img.downsize(existing) + + async def __acrender(self, content, renderer): + for (pubtime, singular) in content: + await singular.render(renderer) + + async def __atrender(self, taxonomies, renderer): + for e in taxonomies: + for name, t in e.items(): + await t.render(renderer) + + async def __afrender(self, front, renderer): + await front.render(renderer) + + async def __aindex(self, content, searchdb): + for (pubtime, singular) in content: + await searchdb.append(singular) + + def run(self): + loop = asyncio.get_event_loop() + + for d in shared.config.options('target'): + if 'dir' in d and not os.path.isdir(shared.config.get('target', d)): + os.mkdir(shared.config.get('target', d)) + + logging.info("discovering images") + images = Images() + images.populate() + existing = glob.glob(os.path.join(shared.config.get('target', 'filesdir'), "*")) + if not shared.config.getboolean('params', 'nodownsize'): + logging.info("downsizing images") + loop.run_until_complete(self.__adownsize(images, existing)) + + logging.info("discovering content") + content = Content(images) + content.populate() + + if not shared.config.getboolean('params', 'norender'): + renderer = Renderer() + logging.info("rendering content") + loop.run_until_complete(self.__acrender(content, renderer)) + + logging.info("rendering categories and tags") + loop.run_until_complete(self.__atrender([content.categories, content.tags], renderer)) + + logging.info("rendering the front page elements") + loop.run_until_complete(self.__afrender(content.front, renderer)) + + logging.info("rendering sitemap") + content.sitemap() + + logging.info("copy the static bits") + src = shared.config.get('source', 'staticdir') + for item in os.listdir(src): + s = os.path.join(src, item) + d = os.path.join(shared.config.get('target', 'builddir'), item) + logging.debug("copying %s to %s", s, d) + shutil.copy2(s, d) + + logging.info("pouplating searchdb") + searchdb = Indexer() + loop.run_until_complete(self.__aindex(content, searchdb)) + searchdb.finish() + + loop.close() if __name__ == '__main__': - config.options.update(vars(parser.parse_args())) - loglevel = 30 - if config.options['debug']: - loglevel = 10 - - while len(logging.root.handlers) > 0: - logging.root.removeHandler(logging.root.handlers[-1]) - - logging.basicConfig( - level=loglevel, - format='%(asctime)s - %(levelname)s - %(message)s' - ) - - engine = Engine() - engine.collect() - engine.render() \ No newline at end of file + worker = NASG() + worker.run() diff --git a/nasg/__init__.py b/nasg/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/nasg/cmdline.py b/nasg/cmdline.py deleted file mode 100644 index 595b713..0000000 --- a/nasg/cmdline.py +++ /dev/null @@ -1,115 +0,0 @@ -import subprocess -import os -import json -import logging - - -class CommandLine(object): - def __init__(self, cmd, stdin=''): - self.cmd = cmd.split(' ') - self.stdin = stdin - self.stdout = '' - self.binary = None - self._which() - - if not self.binary: - raise ValueError('%s binary was not found in PATH' % self.cmd[0]) - - # based on: http://stackoverflow.com/a/377028/673576 - def _which(self): - if self._is_exe(self.cmd[0]): - self.binary = self.cmd[0] - return - - for path in os.environ["PATH"].split(os.pathsep): - path = path.strip('"') - fpath = os.path.join(path, self.cmd[0]) - if self._is_exe(fpath): - self.binary = self.cmd[0] = fpath - return - - def _is_exe(self, fpath): - return os.path.isfile(fpath) and os.access(fpath, os.X_OK) - - def run(self): - p = subprocess.Popen( - self.cmd, - stdin=subprocess.PIPE, - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - env=os.environ.copy() - ) - stdout, stderr = p.communicate(self.stdin.encode('utf-8')) - self.stdout = stdout.decode('utf-8').strip() - return self - - -class Exiftool(CommandLine): - def __init__(self, fpath = ''): - self.fpath = fpath - cmd ="/usr/local/bin/exiftool -json -sort -groupNames %s" % (fpath) - super(Exiftool, self).__init__(cmd) - - def get(self): - self.run() - exif = {} - try: - exif = json.loads(self.stdout)[0] - except json.JSONDecodeError as e: - logging.error("Error when decoding JSON returned from exiftool: %s" % e) - pass - - return exif - - -class Pandoc(CommandLine): - """ Use: Pandoc.[formatter function].get() - available formatter functions: - - md2html: from markdown extra to html5 - - html2md: from html5 to simple markdown - - The default is plain markdown to html5 (if no formatter function added) - """ - - def __init__(self, text): - self.stdin = text - self.format_in = 'markdown' - self.format_out = 'html5' - self.stdout = '' - - def md2html(self): - self.format_in = "markdown+" + "+".join([ - 'backtick_code_blocks', - 'auto_identifiers', - 'fenced_code_attributes', - 'definition_lists', - 'grid_tables', - 'pipe_tables', - 'strikeout', - 'superscript', - 'subscript', - 'markdown_in_html_blocks', - 'shortcut_reference_links', - 'autolink_bare_uris', - 'raw_html', - 'link_attributes', - 'header_attributes', - 'footnotes', - ]) - return self - - - def html2md(self): - self.format_out = "markdown-" + "-".join([ - 'raw_html', - 'native_divs', - 'native_spans', - ]) - return self - - - def get(self): - cmd = "/usr/bin/pandoc -o- --from=%s --to=%s" % (self.format_in, self.format_out) - super(Pandoc, self).__init__(cmd, stdin=self.stdin) - self.run() - return self.stdout \ No newline at end of file diff --git a/nasg/func.py b/nasg/func.py deleted file mode 100644 index f0f5009..0000000 --- a/nasg/func.py +++ /dev/null @@ -1,21 +0,0 @@ -import re - -def gps2dec(exifgps, ref=None): - pattern = re.compile(r"(?P[0-9.]+)\s+deg\s+(?P[0-9.]+)'\s+(?P[0-9.]+)\"(?:\s+(?P[NEWS]))?") - v = pattern.match(exifgps).groupdict() - - dd = float(v['deg']) + (((float(v['min']) * 60) + (float(v['sec']))) / 3600) - if ref == 'West' or ref == 'South' or v['dir'] == "S" or v['dir'] == "W": - dd = dd * -1 - return round(dd, 6) - -def baseN(num, b=36, numerals="0123456789abcdefghijklmnopqrstuvwxyz"): - """ Used to create short, lowecase slug for a number (an epoch) passed """ - num = int(num) - return ((num == 0) and numerals[0]) or ( - baseN( - num // b, - b, - numerals - ).lstrip(numerals[0]) + numerals[num % b] - ) \ No newline at end of file diff --git a/nasg/img.py b/nasg/img.py deleted file mode 100644 index 5e5ce8e..0000000 --- a/nasg/img.py +++ /dev/null @@ -1,297 +0,0 @@ -import os -import re -import shutil -import logging -import imghdr -from similar_text import similar_text -import wand.api -import wand.image -import wand.drawing -import wand.color - -import nasg.config as config -from nasg.cmdline import Exiftool - - -class ImageHandler(object): - - sizes = { - 90: { - 'ext': 's', - 'crop': True, - }, - 360: { - 'ext': 'm', - }, - 720: { - 'ext': 'z', - 'fallback': True - }, - 1280: { - 'ext': 'b', - } - } - - def __init__(self, fpath, alttext='', title='', imgcl='', linkto=False): - logging.info("parsing image: %s" % fpath) - self.fpath = os.path.abspath(fpath) - self.fname, self.ext = os.path.splitext(os.path.basename(fpath)) - - self.linkto = linkto - self.alttext = alttext - self.title = title - self.imgcl = imgcl - self.what = imghdr.what(self.fpath) - self.mime = "image/%s" % (self.what) - self.exif = {} - self.is_photo = False - if self.what == 'jpeg': - self._setexif() - self._is_photo() - self.is_downsizeable = False - if not self.imgcl: - if self.what == 'jpeg' or self.what == 'png': - self.is_downsizeable = True - self.sizes = sorted(self.sizes.items()) - for size, meta in self.sizes: - meta['fname'] = "%s_%s%s" % ( - self.fname, - meta['ext'], - self.ext - ) - meta['fpath'] = os.path.join( - config.TFILES, - meta['fname'] - ) - meta['url'] = "%s/%s/%s" % ( - config.site['url'], - config.UFILES, - meta['fname'] - ) - if 'fallback' in meta: - self.fallback = meta['url'] - self.targeturl = meta['url'] - - - def featured(self): - # sizes elements are tuples: size, meta - return { - 'mime': self.mime, - 'url': self.sizes[-1][1]['url'], - 'bytes': os.path.getsize(self.sizes[-1][1]['fpath']) - } - - - def _setexif(self): - self.exif = Exiftool(self.fpath).get() - - - def _is_photo(self): - model = self.exif.get('EXIF:Model', None) - if hasattr(config, 'cameras') and \ - model in config.cameras: - self.is_photo = True - return - - cprght = self.exif.get('IPTC:CopyrightNotice', '') - if hasattr(config, 'copyr'): - for s in config.copyr: - pattern = re.compile(r'%s' % s) - if pattern.match(cprght): - self.is_photo = True - return - - - def _watermark(self, img): - if 'watermark' not in config.options: - return img - if not os.path.isfile(config.options['watermark']): - return img - - wmark = wand.image.Image(filename=config.options['watermark']) - - if img.width > img.height: - w = img.width * 0.16 - h = wmark.height * (w / wmark.width) - x = img.width - w - (img.width * 0.01) - y = img.height - h - (img.height * 0.01) - else: - w = img.height * 0.16 - h = wmark.height * (w / wmark.width) - x = img.width - h - (img.width * 0.01) - y = img.height - w - (img.height * 0.01) - - w = round(w) - h = round(h) - x = round(x) - y = round(y) - - wmark.resize(w, h) - if img.width < img.height: - wmark.rotate(-90) - img.composite(image=wmark, left=x, top=y) - return img - - - def _sourceurlmark(self, img): - with wand.drawing.Drawing() as draw: - draw.fill_color = wand.color.Color('#fff') - draw.fill_opacity = 0.8 - draw.stroke_color = wand.color.Color('#fff') - draw.stroke_opacity = 0.8 - r_h = round(img.height * 0.3) - r_top = round((img.height/2) - (r_h/2)) - - draw.rectangle( - left=0, - top=r_top, - width=img.width, - height=r_h - ) - - draw(img) - - with wand.drawing.Drawing() as draw: - draw.font = config.FONT - draw.font_size = round((img.width)/len(self.linkto)*1.5) - draw.gravity = 'center' - draw.text( - 0, - 0, - self.linkto - ) - draw(img) - return img - - def downsize(self): - if not self.is_downsizeable: - return self._copy() - if not self._isneeded(): - logging.debug("downsizing not needed for %s", self.fpath) - return - - logging.debug("downsizing %s", self.fpath) - try: - img = wand.image.Image(filename=self.fpath) - img.auto_orient() - except ValueError as e: - logging.error("opening %s with wand failed: %s", self.fpath, e) - return - - if self.is_photo: - img = self._watermark(img) - elif self.linkto: - img = self._sourceurlmark(img) - - for size, meta in self.sizes: - self._intermediate(img, size, meta) - - #self._setmeta() - - - def _copy(self): - target = os.path.join( - config.TFILES, - "%s%s" % (self.fname, self.ext) - ) - if os.path.isfile(target) and \ - not config.options['downsize']: - return - - logging.debug("copying %s to %s", self.fpath, target) - shutil.copy(self.fpath, target) - - - def _isneeded(self): - if config.options['downsize']: - return True - for size, meta in self.sizes: - if not os.path.isfile(meta['fpath']): - return True - - - def _intermediate_dimensions(self, img, size, meta): - if (img.width > img.height and 'crop' not in meta) \ - or (img.width < img.height and 'crop' in meta): - width = size - height = int(float(size / img.width) * img.height) - else: - height = size - width = int(float(size / img.height) * img.width) - - return (width, height) - - - def _intermediate(self, img, size, meta): - if os.path.isfile(meta['fpath']) and \ - not config.options['downsize']: - return - - try: - thumb = img.clone() - width, height = self._intermediate_dimensions(img, size, meta) - thumb.resize(width, height) - - if 'crop' in meta: - if 'liquidcrop' in config.options and \ - config.options['liquidcrop']: - thumb.liquid_rescale(size, size, 1, 1) - else: - l = t = 0 - if width > size: - l = int((width - size) / 2) - if height > size: - t = int((height - size) / 2) - thumb.crop(left=l, top=t, width=size, height=size) - - if img.format == "JPEG": - thumb.compression_quality = 86 - thumb.unsharp_mask( - radius=0, - sigma=0.5, - amount=1, - threshold=0.03 - ) - thumb.format = 'pjpeg' - - - # this is to make sure pjpeg happens - with open(meta['fpath'], 'wb') as f: - thumb.save(file=f) - - except ValueError as e: - logging.error("error while downsizing %s: %s", self.fpath, e) - return - - - def srcset(self, generate_caption=True, uphoto=False): - if not self.is_downsizeable: - return False - - uphotoclass='' - if uphoto: - uphotoclass=' u-photo' - - cl = '' - if self.imgcl: - cl = self.imgcl - - caption = '' - if self.alttext \ - and similar_text(self.alttext, self.fname) < 90 \ - and similar_text(self.alttext, self.fname + '.' + self.ext) < 90 \ - and generate_caption: - caption = '
%s
' % (self.alttext) - - if self.linkto: - target = self.linkto - - # don't put linebreaks in this: Pandoc tends to evaluate them - return '
%s%s
' % ( - uphotoclass, - self.targeturl, - self.fallback, - self.imgcl, - self.alttext, - caption - ) \ No newline at end of file diff --git a/nasg/img_test.py b/nasg/img_test.py deleted file mode 100644 index e69de29..0000000 diff --git a/nasg/jinjaenv.py b/nasg/jinjaenv.py deleted file mode 100644 index 53880dc..0000000 --- a/nasg/jinjaenv.py +++ /dev/null @@ -1,29 +0,0 @@ -import arrow -import jinja2 -from slugify import slugify -import nasg.config as config - -JINJA2ENV = jinja2.Environment( - loader=jinja2.FileSystemLoader( - searchpath=config.TEMPLATES - ) -) - -def jinja_filter_date(d, form='%Y-%m-%d %H:%m:%S'): - if d == 'now': - return arrow.now().datetime.strftime(form) - if form == 'c': - form = '%Y-%m-%dT%H:%M:%S%z' - return d.strftime(form) - -def jinja_filter_slugify(s): - return slugify(s, only_ascii=True, lower=True) - -def jinja_filter_search(s, r): - if r in s: - return True - return False - -JINJA2ENV.filters['date'] = jinja_filter_date -JINJA2ENV.filters['search'] = jinja_filter_search -JINJA2ENV.filters['slugify'] = jinja_filter_slugify \ No newline at end of file diff --git a/nasg/searchindex.py b/nasg/searchindex.py deleted file mode 100644 index 82cd7ed..0000000 --- a/nasg/searchindex.py +++ /dev/null @@ -1,76 +0,0 @@ -from whoosh import fields -from whoosh import analysis -from whoosh import index -import tempfile -import atexit -import shutil -import nasg.config as config - -class SearchIndex(object): - schema = fields.Schema( - url=fields.ID( - stored=True, - ), - title=fields.TEXT( - stored=True, - analyzer=analysis.FancyAnalyzer( - ) - ), - date=fields.DATETIME( - stored=True, - sortable=True - ), - content=fields.TEXT( - stored=True, - analyzer=analysis.FancyAnalyzer( - ) - ), - tags=fields.TEXT( - stored=True, - analyzer=analysis.KeywordAnalyzer( - lowercase=True, - commas=True - ) - ), - weight=fields.NUMERIC( - sortable=True - ), - img=fields.TEXT( - stored=True - ) - ) - - - def __init__(self): - self.tmp = tempfile.mkdtemp('whooshdb_', dir=tempfile.gettempdir()) - self.ix = index.create_in(self.tmp, self.schema) - atexit.register(self.cleanup) - - - def add(self, vars): - ix = self.ix.writer() - ix.add_document( - title=vars['title'], - url=vars['url'], - content=vars['content'], - date=vars['published'], - tags=vars['tags'], - weight=1, - img=vars['img'] - ) - ix.commit() - - - def cleanup(self): - if not os.path.exists(self.tmp): - return - - logging.warning("cleaning up tmp whoosh") - shutil.rmtree(self.tmp) - - - def save(self): - logging.info("deleting old searchdb") - shutil.rmtree(config.SEARCHDB) - logging.info("moving new searchdb") - shutil.move(self.tmp, config.SEARCHDB) \ No newline at end of file diff --git a/nasg/singular.py b/nasg/singular.py deleted file mode 100644 index ac0b283..0000000 --- a/nasg/singular.py +++ /dev/null @@ -1,580 +0,0 @@ -import os -import re -import logging -import arrow -import frontmatter -import langdetect -from slugify import slugify - -import nasg.config as config -import nasg.func as func -import nasg.cmdline as cmdline -from nasg.img import ImageHandler -import nasg.jinjaenv as jinjaenv - -class SingularHandler(object): - def __init__(self, fpath): - logging.info("setting up singular from %s", fpath) - self.fpath= os.path.abspath(fpath) - self.fname, self.ext = os.path.splitext(os.path.basename(self.fpath)) - self.target = os.path.join( - config.TARGET, "%s" % (self.fname), "index.html" - ) - - slug = slugify(self.fname, only_ascii=True, lower=True) - self.modtime = int(os.path.getmtime(self.fpath)) - self.category = os.path.dirname(self.fpath).replace(config.CONTENT, '').strip('/') - - self.vars = { - 'category': self.category, - 'tags': [], - 'published': arrow.get(self.modtime), - 'updated': arrow.get(0), - 'author': config.author, - 'title': '', - 'raw_summary': '', - 'raw_content': '', - 'content': '', - 'summary': '', - 'reactions': {}, - 'exif': {}, - 'lang': config.site['lang'], - #'syndicate': [], - 'slug': slug, - 'shortslug': slug, - 'srcset': '', - 'url': "%s/%s/" % (config.site['url'], slug), - } - - self.redirects = {} - self.pings = {} - self.template = 'singular.html' - self.img = None - self.rendered = '' - - - def __repr__(self): - return "Post '%s' (%s @ %s)" % ( - self.vars['title'], - self.fname, - self.fpath - ) - - - def _modtime(self): - """ Set file mtime in case it doesn't match the in-file publish or updated time """ - - use = 'published' - if self.vars['updated'].timestamp > self.vars['published'].timestamp: - use = 'updated' - - self.modtime = int(self.vars[use].timestamp) - stattime = int(os.path.getmtime(self.fpath)) - if stattime != self.modtime: - os.utime(self.fpath, (self.modtime, self.modtime)) - - - def _detect_lang(self): - # try to detect language, ignore failures - try: - self.vars['lang'] = langdetect.detect( - "%s %s" % ( - self.vars['title'], - self.vars['raw_content'] - ) - ) - except: - pass - - - def _redirects(self): - if self.category in config.categories and \ - 'nocollection' in config.categories[self.category] and \ - config.categories[self.category]['nocollection']: - return - - self.redirects[self.vars['shortslug']] = 1 - - - def _shortslug(self): - shortslug = func.baseN(self.vars['published'].timestamp) - self.vars['shortslug'] = shortslug - - - def _prerender(self): - for s in ['content', 'summary']: - self.vars[s] = cmdline.Pandoc(self.vars[s]).md2html().get() - - - def _postsetup(self): - for s in ['content', 'summary']: - if not self.vars[s]: - self.vars[s] = self.vars['raw_%s' % s] - - self._modtime() - self._shortslug() - self._detect_lang() - self._redirects() - self._pings() - - - def _render(self): - self._prerender() - tmpl = jinjaenv.JINJA2ENV.get_template(self.template) - logging.info("rendering %s", self.fname) - tmplvars = { - 'post': self.vars, - 'site': config.site, - 'taxonomy': {}, - } - self.rendered = tmpl.render(tmplvars) - - - def _exists(self): - """ check if target exists and up to date """ - - if config.options['regenerate']: - logging.debug('REGENERATE active') - return False - - if not os.path.isfile(self.target): - logging.debug('%s missing', self.target) - return False - - ttime = os.stat(self.target) - if self.modtime == ttime.st_mtime: - logging.debug('%s exist and up to date', self.target) - return True - - return False - - - def write(self): - """ Write HTML file """ - - if self._exists(): - logging.info("skipping existing %s", self.target) - return - - self._render() - d = os.path.dirname(self.target) - if not os.path.isdir(d): - os.mkdir(d) - - with open(self.target, "wt") as html: - logging.info("writing %s", self.target) - html.write(self.rendered) - html.close() - os.utime(self.target, (self.modtime, self.modtime)) - - - def indexvars(self): - """ Return values formatter for search index """ - - c = "%s %s %s %s %s" % ( - self.vars['slug'], - self.vars['raw_summary'], - self.vars['raw_content'], - self.vars['reactions'], - self.vars['exif'] - ) - - #c = "%s %s" % (c, self._localcopy_include()) - - imgstr = '' - if self.img: - imgstr = self.img.mksrcset(generate_caption=False) - - ivars = { - 'title': self.vars['title'], - 'url': self.vars['url'], - 'content': c, - 'date': self.vars['published'].datetime, - 'tags': ",".join(self.vars['tags']), - 'img': imgstr - } - - return ivars - - def _pings(self): - """ Extract all URLs that needs pinging """ - - urlregex = re.compile( - r'\s+https?\:\/\/?[a-zA-Z0-9\.\/\?\:@\-_=#]+' - r'\.[a-zA-Z0-9\.\/\?\:@\-_=#]*' - ) - urls = re.findall(urlregex, self.vars['raw_content']) - - for r in self.vars['reactions'].items(): - reactiontype, reactions = r - if isinstance(reactions, str): - urls.append(reactions) - elif isinstance(reactions, list): - urls = [*reactions, *urls] - - #for s in self.syndicate.keys(): - #matches.append('https://brid.gy/publish/%s' % (s)) - - urlredux = {} - for url in urls: - # exclude local matches - if config.site['domain'] in url: - continue - urlredux[url] = 1 - - self.pings = urlredux - - - def _c_adaptify_altfpath(self, fname): - for c, cmeta in config.categories.items(): - tpath = os.path.join(config.CONTENT, c, fname) - if os.path.isfile(tpath): - return tpath - return None - - - def _c_adaptify(self): - """ Generate srcset for all suitable images """ - - linkto = False - isrepost = None - - if len(self.vars['reactions'].keys()): - isrepost = list(self.vars['reactions'].keys())[0] - if isrepost and \ - len(self.vars['reactions'][isrepost]) == 1: - linkto = self.vars['reactions'][isrepost][0] - - p = re.compile( - r'(!\[(.*)\]\((?:\/(?:files|cache)' - r'(?:\/[0-9]{4}\/[0-9]{2})?\/(.*\.(?:jpe?g|png|gif)))' - r'(?:\s+[\'\"]?(.*?)[\'\"]?)?\)(?:\{(.*?)\})?)' - , re.IGNORECASE) - - m = p.findall(self.vars['content']) - if not m: - return - - for shortcode, alt, fname, title, cl in m: - fpath = os.path.join(config.SFILES, fname) - if not os.path.isfile(fpath): - fpath = self._c_adaptify_altfpath(fname) - if not fpath: - logging.error("missing image in %s: %s", self.fpath, fname) - continue - - im = ImageHandler( - fpath, - alttext=alt, - title=title, - imgcl=cl, - linkto=linkto - ) - - im.downsize() - srcset = im.srcset() - if srcset: - self.vars['content'] = self.vars['content'].replace( - shortcode, srcset - ) - - del(im) - - - def _c_video(self): - """ [video] shortcode extractor """ - - p = re.compile( - r'(\[video mp4=\"(?:/(?:files|cache)\/(?P.*?))\"\]' - r'(?:\[/video\])?)' - ) - - videos = p.findall(self.vars['content']) - if not videos: - return - - for shortcode, vidf in videos: - video = '' % ( - config.site['url'], - vidf - ) - self.vars['content'] = self.vars['content'].replace(shortcode, video) - - - def _c_snippets(self): - """ Replaces [git:(repo)/(file.ext)] with corresponding code snippet """ - - p = re.compile(r'(\[git:([^\/]+)\/([^\]]+\.([^\]]+))\])') - snippets = p.findall(self.vars['content']) - if not snippets: - return - - for shortcode, d, f, ext in snippets: - fpath = os.path.join(config.SOURCE, d, f) - if not os.path.isfile(fpath): - logging.error("missing blogsnippet: %s", self.fpath) - continue - - if re.compile(r'conf', re.IGNORECASE).match(ext): - lang = 'apache' - else: - lang = ext - - with open(fpath, "rt") as snip: - c = snip.read() - snip.close - - c = "\n\n```%s\n%s\n```\n" % (lang, c) - logging.debug("replacing blogsnippet %s", self.fpath) - self.vars['content'] = self.vars['content'].replace( - shortcode, c - ) - - - #def _c_files(self): - #""" Copy misc files referenced """ - - #match = re.compile( - #r'\s(?:%s)?/(?:files|cache)' - #r'/.*\.(?:(?!jpe?g|png|gif).*)\s' % (glob.conf['site']['domain']) - #) - #split = re.compile( - #r'\s(?:%s)?/((?:files|cache)' - #r'/(.*\.(?:(?!jpe?g|png|gif).*)))\s' % (glob.conf['site']['domain']) - #) - ##files = re.findall(match, self.content) - ##print(files) - - -class ArticleHandler(SingularHandler): - def __init__(self, *args, **kwargs): - super(ArticleHandler, self).__init__(*args, **kwargs) - self._setup() - - def _setup(self): - post = frontmatter.load(self.fpath) - self.vars['raw_content'] = "%s" % post.content - self.vars['content'] = "%s" % post.content - - if 'tags' in post.metadata: - self.vars['tags'] = post.metadata['tags'] - - if 'title' in post.metadata: - self.vars['title'] = post.metadata['title'] - - if 'published' in post.metadata: - self.vars['published'] = arrow.get(post.metadata['published']) - - if 'updated' in post.metadata: - self.vars['updated'] = arrow.get(post.metadata['updated']) - - if 'summary' in post.metadata: - self.vars['raw_summary'] = post.metadata['summary'] - self.vars['summary'] = "%s" % post.metadata['summary'] - - if 'redirect' in post.metadata and \ - isinstance(post.metadata['redirect'], list): - for r in post.metadata['redirect']: - self.redirects[r.strip().strip('/')] = 1 - - #if 'syndicate' in post.metadata: - #z = post.metadata['syndicate'] - #if isinstance(z, str): - #self.syndicate[z] = '' - #elif isinstance(z, dict): - #for s, c in z.items(): - #self.syndicate[s] = c - #elif isinstance(z, list): - #for s in z: - #self.syndicate[s] = '' - - self.vars['reactions'] = {} - # getting rid of '-' to avoid css trouble and similar - rmap = { - 'bookmark-of': 'bookmark', - 'repost-of': 'repost', - 'in-reply-to': 'reply', - } - - for x in rmap.items(): - key, replace = x - if key in post.metadata: - if isinstance(post.metadata[key], str): - self.vars['reactions'][replace] = [post.metadata[key]] - elif isinstance(post.metadata[key], list): - self.vars['reactions'][replace] = post.metadata[key] - - self._c_adaptify() - self._c_snippets() - self._c_video() - #self._files() - super(ArticleHandler, self)._postsetup() - - -class PhotoHandler(SingularHandler): - def __init__(self, *args, **kwargs): - super(PhotoHandler, self).__init__(*args, **kwargs) - self.img = ImageHandler(self.fpath) - self._setup() - - def _setvars(self): - mapping = { - 'camera': [ - 'EXIF:Model' - ], - 'aperture': [ - 'EXIF:FNumber', - 'Composite:Aperture' - ], - 'shutter_speed': [ - 'EXIF:ExposureTime' - ], - 'focallength': [ - 'EXIF:FocalLength', - 'Composite:FocalLength35efl', - ], - 'iso': [ - 'EXIF:ISO' - ], - 'lens': [ - 'Composite:LensID', - 'MakerNotes:Lens', - 'Composite:LensSpec' - ] - } - - for ekey, candidates in mapping.items(): - for candidate in candidates: - val = self.img.exif.get(candidate, None) - if val: - self.vars['exif'][ekey] = val - break - - gps = ['Latitude', 'Longitude'] - for g in gps: - gk = 'EXIF:GPS%s' % (g) - if gk not in self.img.exif: - continue - - r = 'EXIF:GPS%sRef' % (g) - ref = None - if r in self.img.exif: - ref = self.img.exif[r] - - self.vars['exif']['geo_%s' % (g.lower())] = func.gps2dec( - self.img.exif[gk], - ref - ) - - - def _setfromexif_str(self, varkey, exifkeys): - for key in exifkeys: - val = self.img.exif.get(key, None) - if not val: - continue - self.vars[varkey] = val.strip() - return - - - def _setfromexif_lst(self, varkey, exifkeys): - collected = {} - for key in exifkeys: - val = self.img.exif.get(key, None) - if not val: - continue - if isinstance(val, str): - self.img.exif[key] = val.split(",") - # not elif: the previous one converts all string to list - # we rely on that - if isinstance(val, list): - for v in val: - collected[slugify(str(v).strip())] = str(v).strip() - - self.vars[varkey] = collected.values() - return - - - def _setfromexif_date(self, varkey, exifkeys): - pattern = re.compile( - "(?P[0-9]{4}):(?P[0-9]{2}):(?P[0-9]{2})\s+" - "(?P[0-9]{2}:[0-9]{2}:[0-9]{2})Z?" - ) - - for key in exifkeys: - if key not in self.img.exif: - continue - - if not self.img.exif[key]: - continue - - date = None - v = pattern.match(self.img.exif[key]).groupdict() - if not v: - continue - - try: - date = arrow.get('%s-%s-%s %s' % (v['Y'], v['M'], v['D'], v['T'])) - except: - continue - - if not date: - continue - - - self.vars['published'] = date - logging.debug("'published' set to %s from key %s", self.vars['published'], key) - return - - - def _setup(self): - self._setfromexif_str('title', [ - 'XMP:Title', - 'XMP:Headline', - 'IPTC:Headline' - ]) - - self._setfromexif_str('raw_content', [ - 'XMP:Description', - 'IPTC:Caption-Abstract' - ]) - - self._setfromexif_lst('tags', [ - 'XMP:Keywords', - 'IPTC:Keywords' - ]) - - self._setfromexif_date('published', [ - 'XMP:DateTimeDigitized', - 'XMP:CreateDate', - 'EXIF:CreateDate', - 'EXIF:ModifyDate' - ]) - - self._setvars() - self.img.title = self.vars['title'] - self.img.alttext = self.vars['title'] - - self.vars['content'] = "%s\n\n%s" % ( - self.vars['raw_content'], - self.img.srcset(generate_caption=False, uphoto=True) - ) - - self.img.downsize() - self.vars['img'] = self.img.featured() - super(PhotoHandler, self)._postsetup() - - -class PageHandler(SingularHandler): - def __init__(self, *args, **kwargs): - super(PageHandler, self).__init__(*args, **kwargs) - self.template = 'page.html' - self._setup() - - - def _setup(self): - with open(self.fpath) as c: - self.vars['raw_content'] = c.read() - c.close() - - self._c_adaptify() - super(PageHandler, self)._postsetup() \ No newline at end of file diff --git a/nasg/taxonomy.py b/nasg/taxonomy.py deleted file mode 100644 index 5db2506..0000000 --- a/nasg/taxonomy.py +++ /dev/null @@ -1,319 +0,0 @@ -import math -import logging -import os -import collections -from slugify import slugify -import nasg.config as config -import nasg.jinjaenv as jinjaenv -import arrow - -class TaxonomyHandler(object): - def __init__(self, name, taxonomy='category', slug='', description='', render=True): - logging.info("setting up taxonomy: %s", name) - self.name = name - self.taxonomy = taxonomy - self.description = description - self.render = render - if slug: - self.slug = slug - else: - self.slug = slugify(self.name, only_ascii=True, lower=True) - - self.posts = collections.OrderedDict() - #self.basedir = os.path.join(config.TARGET, self.taxonomy, self.slug) - - if len(self.taxonomy) and len(self.name): - self.basedir = os.path.join(config.TARGET, self.taxonomy, self.slug) - self.baseurl = "/%s/%s/" % (self.taxonomy, self.slug) - else: - self.baseurl = '/' - self.basedir = os.path.join(config.TARGET) - - self.modtime = 0 - - - def __getitem__(self, key): - return self.posts[key] - - - def __repr__(self): - return 'Taxonomy %s (name: %s, slug: %s) with %i posts' % ( - self.taxonomy, - self.name, - self.slug, - len(self.posts) - ) - - - def __next__(self): - try: - r = self.posts.next() - except: - raise StopIteration() - return r - - - def __iter__(self): - for ix, post in self.posts.items(): - yield post - return - - - def append(self, post): - k = int(post.vars['published'].timestamp) - if k in self.posts: - logging.error("colliding post timestamps: %s vs %s", self.posts[k].fpath, post.fpath) - inc = 1 - while k in self.posts: - k = int(k+1) - - self.posts[k] = post - self.posts = collections.OrderedDict(sorted(self.posts.items(), reverse=True)) - - - def write(self): - if not self.render: - return - l = list(self.posts.keys()) - if len(l): - self.modtime = max(list(self.posts.keys())) - else: - self.modtime = arrow.utcnow().timestamp - self._write_pages() - self._write_rss() - - - def _page_vars(self, page, pages, start, end): - return { - 'taxonomy': { - 'url': self.baseurl, - 'name': self.name, - 'taxonomy': self.taxonomy, - 'description': self.description, - 'paged': page, - 'total': pages, - 'perpage': int(config.site['pagination']), - }, - 'site': config.site, - 'posts': [self.posts[k].vars for k in list(sorted( - self.posts.keys(), reverse=True))[start:end]], - } - - - def _write_file(self, fpath, template, tvars): - tmpl = jinjaenv.JINJA2ENV.get_template(template) - logging.info("writing %s" % (fpath)) - with open(fpath, "wt") as f: - r = tmpl.render(tvars) - f.write(r) - f.close() - os.utime(fpath, (self.modtime, self.modtime)) - - - def _write_rss(self): - rssdir = os.path.join(self.basedir, 'feed') - if not os.path.isdir(rssdir): - os.makedirs(rssdir) - fpath = os.path.join(rssdir, 'index.xml') - tvars = self._page_vars(1, 1, 0, int(config.site['rsspagination'])) - self._write_file(fpath, 'rss.html', tvars) - - - def _write_page(self, page, pages, start, end): - if 1 == page: - pagedir = self.basedir - else: - pagedir = os.path.join(self.basedir, 'page', "%i" % page) - - if not os.path.isdir(pagedir): - os.makedirs(pagedir) - - fpath = os.path.join(pagedir, 'index.html') - tvars = self._page_vars(page, pages, start, end) - self._write_file(fpath, 'archive.html', tvars) - - - def _write_pages(self): - perpage = int(config.site['pagination']) - pages = math.ceil(len(self.posts)/perpage) - page = 1 - - while page <= pages: - start = int((page-1) * perpage) - end = int(start+perpage) - self._write_page(page, pages, start, end) - page += 1 - - - #def _test_freshness(self): - #t, lp = list(self.posts.items())[0] - #self.lptime = lp.ftime.st_mtime - - #if os.path.isfile(self.indexpath): - #p = self.indexpath - #elif os.path.isfile(self.simplepath): - #p = self.simplepath - #else: - #return False - - #itime = os.stat(p) - #if itime.st_mtime == self.lptime and not glob.FORCEWRITE: - #logging.debug( - #'Taxonomy tree is fresh for %s' % (self.name) - #) - #return True - - #return False - - - #def _test_dirs(self): - #if not os.path.isdir(self.taxp): - #os.mkdir(self.taxp) - #if not os.path.isdir(self.basep): - #os.mkdir(self.basep) - - - #def write_paginated(self): - - #if self._test_freshness(): - #return - - #self._test_dirs() - - #taxp = os.path.join(glob.TARGET, self.taxonomy) - #basep = os.path.join(glob.TARGET, self.taxonomy, self.slug) - - #if not os.path.isdir(taxp): - #os.mkdir(taxp) - #if not os.path.isdir(basep): - #os.mkdir(basep) - - - #pages = math.ceil(len(self.posts) / glob.conf['perpage']) - #page = 1 - - - #if len(self.taxonomy) and len(self.slug): - #base_url = "/%s/%s/" % (self.taxonomy, self.slug) - #else: - #base_url = '/' - - - #while page <= pages: - #start = int((page-1) * int(glob.conf['perpage'])) - #end = int(start + int(glob.conf['perpage'])) - #dorss = False - #posttmpls = [self.posts[k].tmpl() for k in list(sorted( - #self.posts.keys(), reverse=True))[start:end]] - - #if page == 1: - #tpath = self.indexpath - #do_rss = True - ## RSS - - #else: - #do_rss = False - #if not os.path.isdir(self.pagedp): - #os.mkdir(self.pagedp) - - #tdir = os.path.join(self.pagedp, "%d" % page) - - #if not os.path.isdir(tdir): - #os.mkdir(tdir) - #tpath = os.path.join(tdir, "index.html") - - #tvars = { - #'taxonomy': { - #'url': base_url, - #'name': self.name, - #'taxonomy': self.taxonomy, - #'description': self.description, - #'paged': page, - #'total': pages, - #'perpage': glob.conf['perpage'], - #}, - #'site': glob.conf['site'], - #'posts': posttmpls, - #} - - - #tmpl = glob.jinja2env.get_template('archive.html') - #logging.info("rendering %s" % (tpath)) - #with open(tpath, "w") as html: - #r = tmpl.render(tvars) - #soup = BeautifulSoup(r, "html5lib") - #r = soup.prettify() - #logging.info("writing %s" % (tpath)) - #html.write(r) - #html.close() - #os.utime(tpath, (self.lptime, self.lptime)) - - #if do_rss: - #feeddir = os.path.join(self.basep, 'feed') - #if not os.path.isdir(feeddir): - #os.mkdir(feeddir) - #feedpath = os.path.join(feeddir, "index.xml") - #tmpl = glob.jinja2env.get_template('rss.html') - #logging.info("rendering %s" % (feedpath)) - #with open(feedpath, "w") as html: - #r = tmpl.render(tvars) - #logging.info("writing %s" % (feedpath)) - #html.write(r) - #html.close() - #os.utime(feedpath, (self.lptime, self.lptime)) - - #page = page+1 - - #def write_simple(self, template='archive.html'): - - #if self._test_freshness(): - #return - - #self._test_dirs() - - #base_url = "/%s/" % (self.slug) - - #posttmpls = [self.posts[k].tmpl() for k in list(sorted( - #self.posts.keys(), reverse=True))] - - #tvars = { - #'taxonomy': { - #'url': base_url, - #'name': self.name, - #'taxonomy': self.taxonomy, - #'description': self.description, - #'paged': 0, - #'total': 0, - #'perpage': glob.conf['perpage'], - #}, - #'site': glob.conf['site'], - #'posts': posttmpls, - #} - - #with open(os.path.join(self.simplepath), "w") as html: - #html.write(json.dumps(tvars, indent=4, sort_keys=True, default=str)) - #html.close() - - ##tmpl = glob.jinja2env.get_template('gallery.html') - ##logging.info("rendering %s" % (indexpath)) - ##with open(indexpath, "w") as html: - ##r = tmpl.render(tvars) - ##soup = BeautifulSoup(r, "html5lib") - ##r = soup.prettify() - ##logging.info("writing %s" % (indexpath)) - ##html.write(r) - ##html.close() - ##os.utime(indexpath, (lptime, lptime)) - - - #def writesitemap(self): - #sitemap = "%s/sitemap.txt" % (glob.TARGET) - #urls = [] - #for p in self.posts.items(): - #t, data = p - #urls.append( "%s/%s" % ( glob.conf['site']['url'], data.slug ) ) - - #with open(sitemap, "w") as f: - #logging.info("writing %s" % (sitemap)) - #f.write("\n".join(urls)) - #f.close() \ No newline at end of file diff --git a/nasg/tests/cmdline.py b/nasg/tests/cmdline.py deleted file mode 100644 index bcee844..0000000 --- a/nasg/tests/cmdline.py +++ /dev/null @@ -1,26 +0,0 @@ -import unittest -import nasg.cmdline as cmdline - -class Test(unittest.TestCase): - - def testException(self): - self.assertRaises( - ValueError, - cmdline.CommandLine, - '12345678' - ) - - def testOK(self): - self.assertEqual( - cmdline.CommandLine('ls ./test_cmdline.py').run().stdout, - './test_cmdline.py' - ) - - def testExiftool(self): - self.assertEqual( - cmdline.Exiftool().get(), - {} - ) - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/nasg/tests/func.py b/nasg/tests/func.py deleted file mode 100644 index 13c1666..0000000 --- a/nasg/tests/func.py +++ /dev/null @@ -1,60 +0,0 @@ -import unittest -import nasg.func as func - - -class Test(unittest.TestCase): - - def test_baseN_zero(self): - self.assertEqual( - func.baseN(0), - '0' - ) - - def test_baseN(self): - self.assertEqual( - func.baseN(1489437846), - 'omrtli' - ) - - def test_gps2dec_W(self): - self.assertEqual( - func.gps2dec( - '103 deg 52\' 32.79" W' - ), - -103.875775 - ) - - def test_gps2dec_E(self): - self.assertEqual( - func.gps2dec( - '103 deg 52\' 32.79" E' - ), - 103.875775 - ) - - def test_gps2dec_N(self): - self.assertEqual( - func.gps2dec( - '33 deg 9\' 34.93" N' - ), - 33.159703 - ) - - def test_gps2dec_S(self): - self.assertEqual( - func.gps2dec( - '33 deg 9\' 34.93" S' - ), - -33.159703 - ) - - def test_gps2dec(self): - self.assertEqual( - func.gps2dec( - '33 deg 9\' 34.93"' - ), - 33.159703 - ) - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/nasg/tests/jinjaenv.py b/nasg/tests/jinjaenv.py deleted file mode 100644 index e043476..0000000 --- a/nasg/tests/jinjaenv.py +++ /dev/null @@ -1,36 +0,0 @@ -import unittest -import nasg.jinjaenv as jinjaenv -import arrow - -class CommandLineTest(unittest.TestCase): - - def test_jinja_filter_date(self): - t = arrow.utcnow() - self.assertEqual( - jinjaenv.jinja_filter_date(t.datetime, 'c'), - t.format('YYYY-MM-DDTHH:mm:ssZ') - ) - - def test_jinja_filter_slugify(self): - self.assertEqual( - jinjaenv.jinja_filter_slugify('Árvíztűrő Tükörfúrógép'), - 'arvizturo-tukorfurogep' - ) - - def test_jinja_filter_search1(self): - self.assertTrue( - jinjaenv.jinja_filter_search('almafa', 'alma') - ) - - def test_jinja_filter_search3(self): - self.assertTrue( - jinjaenv.jinja_filter_search( ['almafa' ], 'almafa') - ) - - def test_jinja_filter_search2(self): - self.assertFalse( - jinjaenv.jinja_filter_search('almafa', 'eszeveszett') - ) - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/nasg/tests/singular.py b/nasg/tests/singular.py deleted file mode 100644 index 345c510..0000000 --- a/nasg/tests/singular.py +++ /dev/null @@ -1,10 +0,0 @@ -import unittest -import nasg.singular as singular - -class Test(unittest.TestCase): - - def test(self): - self.assertEqual('','') - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/nasg/tests/taxonomy.py b/nasg/tests/taxonomy.py deleted file mode 100644 index 282341e..0000000 --- a/nasg/tests/taxonomy.py +++ /dev/null @@ -1,10 +0,0 @@ -import unittest -import nasg.taxonomy as taxonomy - -class Test(unittest.TestCase): - - def test(self): - self.assertEqual('','') - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/new.py b/new.py new file mode 100644 index 0000000..0686f6b --- /dev/null +++ b/new.py @@ -0,0 +1,132 @@ +#!/usr/bin/env python3 + +import os +import sys +import arrow +import argparse +import frontmatter +import glob +import sys +import tempfile +from slugify import slugify + +import nasg +import shared + +if __name__ == '__main__': + # --- set params + slugs = [os.path.splitext(i)[0] for i in list(map( + os.path.basename, glob.glob( + os.path.join( + shared.config.get('source', 'contentdir'), + "*", + "*.md" + ) + ) + ))] + + categories = list(map( + os.path.basename, glob.glob( + os.path.join( + shared.config.get('source', 'contentdir'), + "*", + ) + ) + )) + now = arrow.utcnow() + parser = argparse.ArgumentParser(description='create doc and print it to stdout') + parser.add_argument('--tags', '-t', help='; separated, quoted list of tags') + parser.add_argument('--date', '-d', help=' YYYY-mm-ddTHH:MM:SS+TZTZ formatted date, if not now') + parser.add_argument('--slug', '-s', help='slug (normally autogenerated from title or pubdate)') + parser.add_argument('--title', '-l', help='title of new entry') + parser.add_argument('--bookmark', '-b', help='URL to bookmark') + parser.add_argument('--reply', '-r', help='URL to reply to') + parser.add_argument('--repost', '-p', help='URL to repost') + parser.add_argument('--content', '-c', help='content of entry') + parser.add_argument('--summary', '-u', help='summary of entry') + parser.add_argument('--redirect', '-i', help='; separated, quoted list of redirects') + args = vars(parser.parse_args()) + + if not args['date']: + d = now.format("YYYY-MM-DDTHH:mm:ssZ") + args['date'] = input('Date [%s]: ' % (d)) or d + + if not args['title']: + args['title'] = input('Title []: ') or '' + + if not args['tags']: + args['tags'] = input('Tags (separated by ;) []: ') or None + if args['tags']: + args['tags'] = args['tags'].split(';') + + if not args['bookmark']: + args['bookmark'] = input('Bookmark of URL []: ') or '' + + if not args['reply']: + args['reply'] = input('Reply to URL []: ') or '' + + if not args['repost']: + args['repost'] = input('Repost of URL []: ') or '' + + if not args['slug']: + if args['title']: + slug = slugify(args['title'], only_ascii=True, lower=True) + elif args['bookmark']: + slug = slugify("re: %s" % (args['bookmark']), only_ascii=True, lower=True) + elif args['reply']: + slug = slugify("re: %s" % (args['reply']), only_ascii=True, lower=True) + elif args['repost']: + slug = slugify("re: %s" % (args['repost']), only_ascii=True, lower=True) + else: + slug = nasg.Singular.baseN(now.timestamp) + args['slug'] = input('Slug [%s]: ' % (slug)) or slug + + if args['slug'] in slugs: + print("This slug already exists: %s", args['slug']) + slugbase = args['slug'] + inc = 1 + while args['slug'] in slugs: + args['slug'] = "%s-%d" % (slugbase, inc) + inc = inc+1 + print("Using %s as slug", args['slug']) + + if not args['summary']: + args['summary'] = input('Summary []: ') or '' + + if not args['content']: + args['content'] = input('Content []: ') or '' + + if not args['redirect']: + args['redirect'] = input('Additional slugs (separated by ;) []: ') or None + if args['redirect']: + args['redirect'] = args['redirect'].split(';') + + doc = frontmatter.loads('') + slug = args['slug'] + del(args['slug']) + content = args['content'] + del(args['content']) + + repl = { + 'repost': 'repost-of', + 'bookmark': 'bookmark-of', + 'reply': 'in-reply-to', + 'date': 'published', + } + for orig, new in repl.items(): + args[new] = args[orig] + del(args[orig]) + + doc.metadata = dict((k, v) for k, v in args.items() if v) + doc.content = content + + tmpsave = os.path.join(tempfile.gettempdir(), "%s.md" % slug) + saveto = input('Save to: [%s]: ' % categories) or tmpsave + + if tmpsave != saveto: + saveto = os.path.join(shared.config.get('source', 'contentdir'), saveto, "%s.md" % slug) + + with open(saveto, 'wt') as f: + f.write(frontmatter.dumps(doc)) + + print("wrote file to:\n%s" % saveto) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..3acb881 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,28 @@ +aiofiles==0.3.1 +appdirs==1.4.3 +arrow==0.10.0 +breadability==0.1.20 +chardet==3.0.3 +docopt==0.6.2 +httptools==0.0.9 +Jinja2==2.9.6 +langdetect==1.0.7 +lxml==3.7.3 +MarkupSafe==1.0 +packaging==16.8 +pyparsing==2.2.0 +python-dateutil==2.6.0 +python-frontmatter==0.4.2 +python-magic==0.4.13 +PyYAML==3.12 +requests==2.14.2 +sanic==0.5.4 +similar-text==0.2.0 +six==1.10.0 +ujson==1.35 +unicode-slugify==0.1.3 +Unidecode==0.4.20 +uvloop==0.8.0 +Wand==0.4.4 +websockets==3.3 +Whoosh==2.7.4 diff --git a/shared.py b/shared.py new file mode 100644 index 0000000..0a37ea2 --- /dev/null +++ b/shared.py @@ -0,0 +1,76 @@ +import configparser +import os +from whoosh import fields +from whoosh import analysis +import re + +def __expandconfig(config): + """ add the dirs to the config automatically """ + basepath = os.path.expanduser(config.get('common','base')) + config.set('common', 'basedir', basepath) + for section in ['source', 'target']: + for option in config.options(section): + opt = config.get(section, option) + config.set(section, "%sdir" % option, os.path.join(basepath,opt)) + config.set('target', 'filesdir', os.path.join( + config.get('target', 'builddir'), + config.get('source', 'files'), + )) + return config + +URLREGEX = re.compile( + r'\s+https?\:\/\/?[a-zA-Z0-9\.\/\?\:@\-_=#]+' + r'\.[a-zA-Z0-9\.\/\?\:@\-_=#]*' +) + +EXIFREXEG = re.compile( + r'^(?P[0-9]{4}):(?P[0-9]{2}):(?P[0-9]{2})\s+' + r'(?P