blogit.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538
  1. # ============================================================================
  2. # Blogit.py is free software; you can redistribute it and/or modify
  3. # it under the terms of the GNU General Public License, version 3
  4. # as published by the Free Software Foundation;
  5. #
  6. # Blogit.py is distributed in the hope that it will be useful,
  7. # but WITHOUT ANY WARRANTY; without even the implied warranty of
  8. # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  9. # GNU General Public License for more details.
  10. #
  11. # You should have received a copy of the GNU General Public License
  12. # along with Blogit.py; if not, write to the Free Software
  13. # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
  14. # ============================================================================
  15. # Copyright (C) 2013-2016 Oz Nahum Tiram <nahumoz@gmail.com>
  16. # ============================================================================
  17. from __future__ import print_function
  18. import os
  19. import re
  20. import datetime
  21. import argparse
  22. import sys
  23. import operator
  24. import shutil
  25. from StringIO import StringIO
  26. import codecs
  27. import subprocess as sp
  28. import SimpleHTTPServer
  29. import BaseHTTPServer
  30. import socket
  31. import SocketServer
  32. from jinja2 import Environment, FileSystemLoader
  33. import markdown2
  34. import tinydb
  35. from tinydb import Query, where
  36. sys.path.insert(0, os.getcwd())
  37. from conf import CONFIG, ARCHIVE_SIZE, GLOBAL_TEMPLATE_CONTEXT, KINDS
  38. jinja_env = Environment(lstrip_blocks=True, trim_blocks=True,
  39. loader=FileSystemLoader(CONFIG['templates']))
  40. class DataBase(object): # pragma: no coverage
  41. """A thin wrapper around TinyDB instance"""
  42. def __init__(self, path):
  43. _db = tinydb.TinyDB(path)
  44. self.posts = _db.table('posts')
  45. self.tags = _db.table('tags')
  46. self.pages = _db.table('pages')
  47. self.templates = _db.table('templates')
  48. self._db = _db
  49. DB = DataBase(os.path.join(CONFIG['content_root'], 'blogit.db'))
  50. class Tag(object):
  51. table = DB.tags
  52. db = DB
  53. def __init__(self, name):
  54. self .name = name
  55. self.permalink = GLOBAL_TEMPLATE_CONTEXT["site_url"]
  56. Tags = Query()
  57. tag = self.table.get(Tags.name == self.name)
  58. if not tag:
  59. self.table.insert({'name': self.name, 'post_ids': []})
  60. def __str__(self):
  61. return self.name
  62. def __repr__(self): # pragma: no coverage
  63. return self.name
  64. @property
  65. def slug(self):
  66. _slug = self.name.lower()
  67. _slug = re.sub(r'[;:,. ]+', '-', _slug)
  68. return _slug
  69. @property
  70. def posts(self):
  71. """
  72. return a list of post ids tagged with Tag
  73. """
  74. Tags = Query()
  75. tag = self.table.get(Tags.name == self.name)
  76. return tag['post_ids']
  77. @posts.setter
  78. def posts(self, post_ids):
  79. if not isinstance(post_ids, list):
  80. raise ValueError("post_ids must be of type list")
  81. Tags = Query()
  82. tag = self.table.get(Tags.name == self.name)
  83. # if not tag: # pragma: no coverage
  84. # raise ValueError("Tag %s not found" % self.name)
  85. # else:
  86. new = set(post_ids) - set(tag['post_ids'])
  87. tag['post_ids'].extend(list(new))
  88. self.table.update({'post_ids': tag['post_ids']}, eids=[tag.eid])
  89. @property
  90. def entries(self):
  91. """return the actual lists of entries tagged with"""
  92. Posts = Query()
  93. for id in self.posts:
  94. post = self.db.posts.get(eid=id)
  95. if not post:
  96. raise ValueError("no post found for eid %s" % id)
  97. yield Entry(os.path.join(CONFIG['content_root'], post['filename']))
  98. def render(self):
  99. """Render html page and atom feed"""
  100. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  101. context['tag'] = self
  102. context['entries'] = _sort_entries(self.entries)
  103. # render html page
  104. render_to = os.path.join(CONFIG['output_to'], 'tags', self.slug)
  105. if not os.path.exists(render_to): # pragma: no coverage
  106. os.makedirs(render_to)
  107. _render(context, 'tag_index.html', os.path.join(render_to, 'index.html'))
  108. # render atom.xml
  109. context['entries'] = context['entries'][:10]
  110. _render(context, 'atom.xml', os.path.join(render_to, 'atom.xml'))
  111. return True
  112. class Entry(object):
  113. """This is the base class for creating an HTML page from a Markdown
  114. based page.
  115. The file has the following structure for a page:
  116. .. code:
  117. ---
  118. title: example page
  119. public: yes
  120. kind: page
  121. template: about.html
  122. ---
  123. # some heading
  124. content paragraph
  125. ## heading 2
  126. some more content
  127. The file has the following structure for a blog entry:
  128. .. code:
  129. ---
  130. title: Blog post 1
  131. author: Famous author
  132. published: 2015-01-11
  133. tags: [python, git, bash, linux]
  134. public: yes
  135. chronological: yes
  136. kind: writing
  137. summary: This is a summry of post 1. Donec id elit non mi porta
  138. ---
  139. This is the body of post 1. Donec id elit non mi porta gravida
  140. """
  141. db = DB
  142. @classmethod
  143. def entry_from_db(kls, filename):
  144. f = os.path.join(filename)
  145. return kls(f)
  146. def __init__(self, path):
  147. self._path = path
  148. self.path = path.split(CONFIG['content_root'])[-1].lstrip('/')
  149. self.id = None # this is set inside prepare()
  150. self.prepare()
  151. def __str__(self):
  152. return self.path
  153. def __repr__(self): # pragma: no coverage
  154. return self.path
  155. @property
  156. def name(self):
  157. return os.path.splitext(os.path.basename(self.path))[0]
  158. @property
  159. def abspath(self):
  160. return self._path
  161. @property
  162. def destination(self):
  163. return os.path.join(CONFIG['output_to'], self.permalink)
  164. @property
  165. def title(self):
  166. return self.header['title']
  167. @property
  168. def summary_html(self):
  169. return "%s" % markdown2.markdown(self.header.get('summary', "").strip())
  170. @property
  171. def summary_atom(self):
  172. summarya = markdown2.markdown(self.header.get('summary', "").strip())
  173. summarya = re.sub("<p>|</p>", "", summarya)
  174. more = '<a href="%s"> continue reading...</a>' % (self.permalink)
  175. return summarya+more
  176. @property
  177. def publish_date(self):
  178. return self.header.get('published',
  179. datetime.date.today().strftime("%Y-%m-%d"))
  180. @property
  181. def permalink(self):
  182. if self.kind == 'page':
  183. dest = '%s.html' % self.title.replace('/', "-")
  184. else:
  185. dest = "%s/%s/index.html" % (KINDS[self.kind]['name_plural'], self.name)
  186. dest = dest.lstrip('/')
  187. return dest
  188. @property
  189. def tags(self):
  190. """this property is always called after prepare"""
  191. if 'tags' in self.header:
  192. tags = [Tag(t) for t in self.header['tags']]
  193. map(lambda t: setattr(t, 'posts', [self.id]), tags)
  194. return tags
  195. else:
  196. return []
  197. def prepare(self):
  198. self.body_html = markdown2.markdown(
  199. codecs.open(self.abspath, 'r').read(),
  200. extras=['fenced-code-blocks', 'hilite', 'tables', 'metadata'])
  201. self.header = self.body_html.metadata
  202. if 'tags' in self.header: # pages can lack tags
  203. self.header['tags'] = self.header['tags'].split(',')
  204. self.date = self.header.get('published', datetime.date.today())
  205. if isinstance(self.date, unicode):
  206. self.date = datetime.datetime.strptime(self.date, "%Y-%m-%d")
  207. for k, v in self.header.items():
  208. try:
  209. setattr(self, k, v)
  210. except AttributeError:
  211. pass
  212. if self.header['kind'] == 'writing':
  213. rv = Entry.db.posts.search(where('filename') == self.path)
  214. if not rv:
  215. _id = Entry.db.posts.insert({'filename': self.path})
  216. else:
  217. _id = rv[0].eid
  218. elif self.header['kind'] == 'page':
  219. rv = Entry.db.pages.search(where('filename') == self.path)
  220. if not rv:
  221. _id = Entry.db.pages.insert({'filename': self.path})
  222. else:
  223. _id = rv[0].eid
  224. self.id = _id
  225. def render(self):
  226. if self.header.get('public', '').lower() in ['true', 'yes']:
  227. try:
  228. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  229. context['entry'] = self
  230. _render(context, self.header.get('template', 'entry.html'),
  231. self.destination)
  232. return True
  233. except Exception as e: # pragma: no cover
  234. print(context)
  235. print(self.path)
  236. print(e)
  237. sys.exit(1)
  238. def _sort_entries(entries, reversed=True):
  239. """Sort all entries by date and reverse the list"""
  240. return list(sorted(entries, key=operator.attrgetter('date'), reverse=reversed))
  241. def _render(context, template_path, output_path, encoding='utf-8'):
  242. template = jinja_env.get_template(template_path)
  243. rendered = template.render(context)
  244. html = template.render(context)
  245. try:
  246. os.makedirs(os.path.dirname(output_path))
  247. except OSError:
  248. pass
  249. destination = codecs.open(output_path, 'w', encoding)
  250. destination.write(html)
  251. destination.close()
  252. def render_archive(entries):
  253. """
  254. This function creates the archive page
  255. """
  256. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  257. context['entries'] = entries[ARCHIVE_SIZE:10]
  258. _render(context, 'archive_index.html',
  259. os.path.join(CONFIG['output_to'],'archive/index.html')),
  260. def find_new_posts_and_pages(db):
  261. """Walk content dir, put each post and page in the database"""
  262. Q = Query()
  263. for root, dirs, files in os.walk(CONFIG['content_root']):
  264. for filename in files:
  265. if filename.endswith(('md', 'markdown')):
  266. fullpath = os.path.join(root, filename)
  267. if not db.posts.contains(Q.filename == fullpath) and \
  268. not db.pages.contains(Q.filename == fullpath):
  269. e = Entry(fullpath)
  270. yield e, e.id
  271. def _get_last_entries(db):
  272. eids = [post.eid for post in db.posts.all()]
  273. eids = sorted(eids)[-10:][::-1]
  274. entries = [Entry(os.path.join(CONFIG['content_root'],
  275. db.posts.get(eid=eid)['filename'])) for eid in eids]
  276. return entries
  277. def update_index(entries):
  278. """find the last 10 entries in the database and create the main
  279. page.
  280. Each entry in has an eid, so we only get the last 10 eids.
  281. This method also updates the ATOM feed.
  282. """
  283. context = GLOBAL_TEMPLATE_CONTEXT.copy()
  284. context['entries'] = entries
  285. map(lambda x: _render(
  286. context, x[0], os.path.join(CONFIG['output_to'], x[1])),
  287. (('entry_index.html', 'index.html'), ('atom.xml', 'atom.xml')))
  288. def build():
  289. """Incremental build of the website"""
  290. print("\nRendering website now...\n")
  291. print("entries:")
  292. tags = dict()
  293. root = CONFIG['content_root']
  294. for post_id, post in find_new_posts_and_pages(DB):
  295. # entry = post
  296. # this method will also parse the post's tags and
  297. # update the db collection containing the tags.
  298. if post.render():
  299. if post.header['kind'] in ['writing', 'link']:
  300. for tag in post.tags:
  301. tag.posts = [post_id]
  302. tags[tag.name] = tag
  303. print("%s" % post.path)
  304. for name, to in tags.iteritems():
  305. print("updating tag %s" % name)
  306. to.render()
  307. # update index
  308. print("updating index")
  309. update_index(_get_last_entries(DB))
  310. # update archive
  311. print("updating archive")
  312. render_archive(_sort_entries([Entry(p['filename'])
  313. for p in db.posts.all()]))
  314. class StoppableHTTPServer(BaseHTTPServer.HTTPServer): # pragma: no coverage
  315. def server_bind(self):
  316. BaseHTTPServer.HTTPServer.server_bind(self)
  317. self.socket.settimeout(1)
  318. self.run = True
  319. def get_request(self):
  320. while self.run:
  321. try:
  322. sock, addr = self.socket.accept()
  323. sock.settimeout(None)
  324. return (sock, addr)
  325. except socket.timeout:
  326. pass
  327. def stop(self):
  328. self.run = False
  329. def serve(self):
  330. while self.run:
  331. self.handle_request()
  332. def preview(): # pragma: no coverage
  333. """launch an HTTP to preview the website"""
  334. Handler = SimpleHTTPServer.SimpleHTTPRequestHandler
  335. SocketServer.TCPServer.allow_reuse_address = True
  336. port = CONFIG['http_port']
  337. httpd = SocketServer.TCPServer(("", port), Handler)
  338. os.chdir(CONFIG['output_to'])
  339. print("and ready to test at http://127.0.0.1:%d" % CONFIG['http_port'])
  340. print("Hit Ctrl+C to exit")
  341. try:
  342. httpd.serve_forever()
  343. except KeyboardInterrupt:
  344. httpd.shutdown()
  345. def publish(GITDIRECTORY=CONFIG['output_to']): # pragma: no coverage
  346. sp.call('git push', cwd=GITDIRECTORY, shell=True)
  347. def new_post(GITDIRECTORY=CONFIG['output_to'],
  348. kind=KINDS['writing']): # pragma: no coverage
  349. """
  350. This function should create a template for a new post with a title
  351. read from the user input.
  352. Most other fields should be defaults.
  353. TODO: update this function
  354. """
  355. title = raw_input("Give the title of the post: ")
  356. while ':' in title:
  357. title = raw_input("Give the title of the post (':' not allowed): ")
  358. author = CONFIG['author']
  359. date = datetime.datetime.strftime(datetime.datetime.now(), '%Y-%m-%d')
  360. tags = raw_input("Give the tags, separated by ', ':")
  361. published = 'yes'
  362. chronological = 'yes'
  363. summary = ("summary: Type your summary here.")
  364. # make file name
  365. fname = os.path.join(os.getcwd(), 'content', kind['name_plural'],
  366. datetime.datetime.strftime(datetime.datetime.now(),
  367. '%Y'),
  368. date+'-'+title.replace(' ', '-')+'.markdown')
  369. with open(fname, 'w') as npost:
  370. npost.write('---\n')
  371. npost.write('title: %s\n' % title)
  372. npost.write('author: %s\n' % author)
  373. npost.write('published: %s\n' % date)
  374. npost.write('tags: %s\n' % tags)
  375. npost.write('public: %s\n' % published)
  376. npost.write('chronological: %s\n' % chronological)
  377. npost.write('kind: %s\n' % kind['name'])
  378. npost.write('%s' % summary)
  379. npost.write('---\n')
  380. print('%s %s' % (CONFIG['editor'], repr(fname)))
  381. os.system('%s %s' % (CONFIG['editor'], fname))
  382. def clean(GITDIRECTORY=CONFIG['output_to']): # pragma: no coverage
  383. directoriestoclean = ["writings", "notes", "links", "tags", "archive"]
  384. os.chdir(GITDIRECTORY)
  385. for directory in directoriestoclean:
  386. shutil.rmtree(directory)
  387. def dist(SOURCEDIR=os.getcwd()+"/content/",
  388. DESTDIR=CONFIG['raw_content']): # pragma: no coverage
  389. """
  390. sync raw files from SOURCE to DEST
  391. """
  392. sp.call(["rsync", "-avP", SOURCEDIR, DESTDIR], shell=False,
  393. cwd=os.getcwd())
  394. def main(): # pragma: no coverage
  395. parser = argparse.ArgumentParser(
  396. description='blogit - a tool to blog on github.')
  397. parser.add_argument('-b', '--build', action="store_true",
  398. help='convert the markdown files to HTML')
  399. parser.add_argument('-p', '--preview', action="store_true",
  400. help='Launch HTTP server to preview the website')
  401. parser.add_argument('-c', '--clean', action="store_true",
  402. help='clean output files')
  403. parser.add_argument('-n', '--new', action="store_true",
  404. help='create new post')
  405. parser.add_argument('-d', '--dist', action="store_true",
  406. help='sync raw files from SOURCE to DEST')
  407. parser.add_argument('--publish', action="store_true",
  408. help='push built HTML to git upstream')
  409. args = parser.parse_args()
  410. if len(sys.argv) < 2:
  411. parser.print_help()
  412. sys.exit()
  413. if args.clean:
  414. clean()
  415. if args.build:
  416. build()
  417. if args.dist:
  418. dist()
  419. if args.preview:
  420. preview()
  421. if args.new:
  422. new_post()
  423. if args.publish:
  424. publish()
  425. if __name__ == '__main__': # pragma: no coverage
  426. main()