import 구문
1 2 3 4 5 6 | import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.model_selection import cross_val_score, train_test_split
|
jupyter notebook에서 파일에 바로 내용을 써 넣어서 채우고 싶을 때
1 2 3 4 5 6 | % % writefile test2
1 , 2 , 3 , 4 , 5
2 , 3 , 4 , 5 , 6
7 , 8 , 9 , 0 , 1
|
그래프 한글폰트 설정
1 2 3 4 5 6 7 8 9 10 11 12 13 14 | import matplotlib.font_manager
if platform.system() = = 'Windows' :
path = "c:\Windows\Fonts\malgun.ttf"
font_name = matplotlib.font_manager.FontProperties(fname = path).get_name()
plt.rc( 'font' , family = font_name)
elif platform.system() = = 'Darwin' :
rc( 'font' , family = 'AppleGothic' )
elif platform.system() = = 'Linux' :
rc( 'font' , family = 'NanumBarunGothic' )
|
datetime 데이터 분해하는 코드
1 2 3 4 5 6 7 8 | train[ "d-year" ] = train[ "datetime" ].dt.year
train[ "d-month" ] = train[ "datetime" ].dt.month
train[ "d-day" ] = train[ "datetime" ].dt.day
train[ "d-hour" ] = train[ "datetime" ].dt.hour
train[ "d-minute" ] = train[ "datetime" ].dt.minute
train[ "d-second" ] = train[ "datetime" ].dt.second
train[[ "datetime" , "d-year" , "d-month" , "d-day" , "d-hour" , "d-minute" , "d-second" ]].head()
|
zip파일 압축풀기
1 2 3 4 | local_zip = './data/cats_and_dogs_filtered.zip'
zip_ref = zipfile.ZipFile(local_zip, 'r' )
zip_ref.extractall( './data' )
zip_ref.close()
|
폴더 없으면 폴더 만들기. 그리고 wget
1 2 3 4 5 6 | if not os.path.exists( "./data" ):
os.makedirs( "./data" )
if not os.path.exists( "./data/cats_and_dogs_filtered.zip" ):
!wget - - no - check - certificate \
https: / / storage.googleapis.com / mledu - datasets / cats_and_dogs_filtered. zip \
- O . / data / cats_and_dogs_filtered. zip
|
csv 파일 읽기
1 2 3 4 5 6 | import pandas as pd
df_train = pd.read_csv( 'data/ratings_train.txt' , delimiter = '\t' , keep_default_na = False )
df_test = pd.read_csv( 'data/ratings_test.txt' , delimiter = '\t' , keep_default_na = False )
df_train.head()
|
학습 데이터, 테스트 데이터로 분리하기
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 | import numpy as np
from sklearn.model_selection import train_test_split
X = [[ 0 , 1 ],[ 2 , 3 ],[ 4 , 5 ],[ 6 , 7 ],[ 8 , 9 ]]
Y = [ 0 , 1 , 2 , 3 , 4 ]
X_train, X_test = train_test_split(X, test_size = 0.2 , random_state = 123 )
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size = 0.33 , random_state = 321 )
|
|