python代码命令大全 python代码大全和用法( 二 )


systemp_type = platform.system()
if (systemp_type == \’Windows\’):
font_path=\’simfang.ttf\’
try:
current_path = os.getcwd()
except:
current_path = os.path.dirname(__file__)
current_file = os.path.join(current_path, \’docs\’)
【python代码命令大全 python代码大全和用法】current_file = current_path
elif (systemp_type == \’Linux\’):
font_path = \’Arial Unicode MS.ttf\’
else:
quit()
if not os.path.exists(current_file):
os.mkdir(current_file)
print(\’目录中部存在docs文件夹,完成新文件夹创建过程 。\’)
print(\’当前操作系统:%s,文件存储路径为:%s\’ % (systemp_type, current_file))
main()
end_time = datetime.datetime.now()
tt = end_time – start_timepython
print(\’ending time:%s\’, end_time)
print(\’this analysis total spend time:%s\’ % tt.seconds)
模块5:下载ppt素材_author_ = \’xisuo\’
import urllib.request
import requests
from bs4 import BeautifulSoup
from lxml import etree
import os
response=requests.get(url).text
html=etree.HTML(response)
src_list=html.xpath(\’//div/article/p/img/@src\’)
current_path=os.path.dirname(__file__)
save_path=os.path.join(current_path,\’ppt_img\’)
if os.path.exists(save_path):
os.mkdir(save_path)
print(\’img folder create successful\’)
for src in src_list:
save_img_path=os.path.join(save_path,\’%d.jpg\’%i)
with open(save_img_path,\’wb\’) as f:
f.write(urllib.request.urlopen(src).read())
f.close()
i=i+1
print(\’save true\’)
except Exception as e:
print(\’save img fail\’)
模块6:模型存储和读取rom sklearn import joblib
from sklearn import svm
from sklearn2pmml import PMMLPipeline, sklearn2pmml
import pickle
def save_model(train_X,train_y):
save model
:return:
clf = svm.SVC()
clf.fit(X, y)
joblib.dump(clf, \”train_model.m\”)
sklearn2pmml(clf, \”train_model.pmml\”)
with open(\’train_model.pickle\’, \’wb\’) as f:
pickle.dump(clf, f)
return True
def load_model():
laod model
:return:
clf_joblib=joblib.load(\’train_model.m\’)
clf_pickle== pickle.load(open(\’linearregression.pickle\’,\’rb\’))
return clf_joblib,clf_pickle
模块7:TF-IDFimport time
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfVectorizer
print(\’开始读取KeyTag标签…\’)
read_data_path = \’D:/untitled/incomelevel_kwtag_20190801.txt\’
load_data = https://www.0579wy.com/article/pd.read_csv(read_data_path, sep=/’//t/’,encoding=/’utf-8/’)
data = https://www.0579wy.com/article/pd.DataFrame(load_data,columns = [/’income_level/’,/’kw_tag/’])
print(\’…读取KeyTag标签完成\’)
print(\’开始分组处理KeyTag标签…\’)
incomelevel_top = data[data[\’income_level\’] == \’高\’]
kw_tag_top = \’ \’.join(incomelevel_top[\’kw_tag\’])
print(\’kw_tag_top : \\n\’,kw_tag_top)
incomelevel_mid = data[data[\’income_level\’] == \’中\’]
kw_tag_mid = \’ \’.join(incomelevel_mid[\’kw_tag\’])
print(\’kw_tag_mid : \\n\’,kw_tag_mid)
incomelevel_low = data[data[\’income_level\’] == \’低\’]
kw_tag_low = \’ \’.join(incomelevel_low[\’kw_tag\’])
print(\’kw_tag_low : \\n\’,kw_tag_low)
print(\’…分组处理KeyTag标签完成\’)
vectorizer = CountVectorizer()
result = vectorizer.fit_transform([kw_tag_top, kw_tag_mid, kw_tag_low])
transformer = TfidfVectorizer()
kw_tag_score = transformer.fit_transform([kw_tag_top, kw_tag_mid, kw_tag_low])
print(\’…KeyTag分词结束\’)
kw_tag_value = https://www.0579wy.com/article/transformer.get_feature_names()
result_target = pd.DataFrame(kw_tag_value,columns = [\’kw_tag\’])
print(\’result_target : \\n\’,result_target)
tf_score = kw_tag_score.toarray()
print(\’tf_score : \\n\’,tf_score)
kw_tag_score_mid = pd.DataFrame(tf_score[1],columns = [\’kw_tag_score_mid\’])
kw_tag_score_low = pd.DataFrame(tf_score[2],columns = [\’kw_tag_score_low\’])
print(len(kw_tag_score_top))

推荐阅读