1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
|
import sys import MySQLdb from datetime import *
filepath = sys.argv[1] array = filepath.split("/") category = array[1] filename = array[3]
print "filepath: " + filepath
dict = {'id': '', 'name': filename, 'category': category, 'tag': '', 'title': '', 'img': '', 'intro': '', 'tag_id': '', 'tag_name': '', 'body': ''}
def getIntro(line, bodyDict, markDict): for k in markDict: if markDict[k] < 2: se = "<" + k + ">" ee = "</" + k + ">"
if line.find(se) >= 0: if line.find(ee) > 0: bodyDict[k] = line[len(se):(len(line) - len(ee) - 1)] markDict[k] = 2 else: bodyDict[k] = bodyDict[k] + line[len(se):len(line)] markDict[k] = 1 elif line.find(ee) >= 0: bodyDict[k] = bodyDict[k] + line[0:(len(line) - len(ee) - 1)] markDict[k] = 2 else: if markDict[k] == 1: bodyDict[k] = bodyDict[k] + line markDict[k] = 1
def parse_html(): file_html = open("/opt/apps/nginx-apps/blog/" + filepath) body = '' flag_body = False flag_note = False markDict = {"id": 0, "tag": 0, "title": 0, "img": 0, "intro": 0} for line in file_html.xreadlines(): if line.find("<body>") >= 0: flag_body = True elif flag_body: if line.find("<!--intro") >= 0: flag_note = True elif line.find("intro-->") >= 0: flag_note = False
if flag_note: getIntro(line, dict, markDict)
body = body + line
dict['body'] = body
return dict
def update_data(dict): conn = MySQLdb.connect(host='127.0.0.1', user='mysql_blog', passwd='mysqBlogPWD2016', db='blog', charset='utf8') cur = conn.cursor() try: tag_id = [] tag_arr = dict['tag'].split(",") for tag in tag_arr: tag = tag.strip() sql = "SELECT id FROM t_tag WHERE lower(tag) = %s" print sql cur.execute(sql, (tag.lower(),)) results = cur.fetchone() if not results: sql = "INSERT INTO t_tag(tag, type, status) VALUES(%s, %s, %s)" print sql cur.execute(sql, (tag, "2", "1")) ido = conn.insert_id() ids = str(ido) tag_id.append(ids) else: tag_id.append(str(results[0]))
sql = "SELECT COUNT(1) FROM t_article WHERE id = %s" print sql cur.execute(sql, (int(dict['id']),)) results = cur.fetchone() now_time = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
tag_ids = ",".join(tag_id) intro = dict['intro'].replace('\n', '').replace('\r', '') if results[0] <= 0: sql = "INSERT INTO t_article(id, title, content, html, tag_id, tag_name, img_path, " \ "status, create_time) VALUES(%s, %s, %s, %s, %s, %s, %s, %s, %s)" print sql cur.execute(sql, (int(dict['id']), dict['title'], intro, dict['body'], tag_ids, dict['tag'], dict['img'], '1', now_time)) cur.execute(create_sql(tag_id, int(dict['id']))) else: sql = "UPDATE t_article SET title=%s, content=%s, html=%s, tag_id=%s, tag_name=%s, " \ "img_path=%s, update_time=%s WHERE id = %s"
print sql cur.execute(sql, (dict['title'], intro, dict['body'], tag_ids, dict['tag'], dict['img'], now_time, int(dict['id'])))
sql = "DELETE FROM t_article_tag WHERE aid = %s" print sql cur.execute(sql, (int(dict['id']),)) cur.execute(create_sql(tag_id, int(dict['id'])))
conn.commit() except Exception, e: raise e finally: cur.close() conn.close()
def create_sql(tag_id, aid): sql = "INSERT INTO t_article_tag(aid, tagid) VALUES" values = [] for tag in tag_id: values.append('(' + str(aid) + ',' + str(tag) + ')')
sql = sql + ",".join(values) print sql return sql
data = parse_html() update_data(data)
|