PCA and VCD (Could delete since there were no matches)

PCA and VCD (Could delete since there were no matches)#

# Comput PCS and VCD features to feed into model

import pandas as pd

half_enantiomer_data = pd.read_csv("half_enantiomer_data.csv")

# Read in pca and vcd dataframe to use as features
pca_vcd_values = pd.read_csv("../data/vcd/pca_vcd_values.csv")

pca_vcd_values

	Molecule Name	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14
0	(R)-(-)-Carvone	7.464317	-43.591126	-8.046428	-0.574730	-3.580254	5.872903	-2.689295	0.910235	3.263888	1.241722	-1.817992	-10.064818	-5.517172	6.956873	0.003187
1	(1R,2S,5R)-(-)-Menthol	-15.984788	-48.409755	11.599679	-6.459996	-2.895890	-21.817187	7.987366	30.168379	-4.855940	-27.966750	-15.593791	18.881794	6.680355	-3.869645	-18.299473
2	(-)-Menthyl chloride	-5.850617	-48.612997	25.383460	-1.972609	-9.623228	-12.079316	-5.014842	9.790833	-11.125811	-10.948924	12.772888	-4.068723	4.196934	0.326200	0.769741
3	(-)-Borneol	0.756580	-25.884643	-4.987043	6.148153	-18.167909	7.294051	-16.954397	9.166877	10.160046	8.512541	22.090822	-60.188603	5.246429	9.022766	-17.823573
4	(-)-Isoplegol	-12.599590	-33.717238	-12.966523	-18.479083	5.945713	-6.208222	-4.337392	10.783442	-12.640204	1.773643	1.927400	6.935122	5.856485	8.257695	-9.983220
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
156	β-Cholestanol	6.415996	-18.840982	20.303654	-29.238431	-26.058052	8.328641	-8.520251	19.753263	24.650594	37.600273	-1.058662	-16.200175	37.116864	-1.951506	-10.766619
157	Cholesterol Ethyl Carbonate	230.771312	139.215929	-45.562654	-28.445263	7.019739	-30.857068	18.819408	-6.542138	7.706800	-5.896378	7.642116	-5.188079	5.985704	-34.834249	-7.737224
158	Cholesterol n-Caprate	181.677425	33.151223	72.048835	-49.147260	19.664893	20.812246	-3.977717	5.351038	14.182154	-2.607771	6.333818	-0.251176	-7.470138	19.545668	-8.373291
159	Cholesterol Laurate	-130.152846	164.035229	61.091725	-37.136634	-13.847154	14.540341	-4.944992	-3.105062	2.508800	1.700032	5.700678	8.617406	-4.496284	22.150449	2.389079
160	Cholesterol Myristate	-132.817778	151.835799	66.528593	-50.454738	7.112731	21.576747	6.387919	-5.878909	-6.011154	7.648669	4.893929	7.866558	1.410726	11.221058	0.188386

161 rows × 16 columns

# Set index to Molecule Name
pca_vcd_values = pca_vcd_values.set_index('Molecule Name')

pca_vcd_values.head()

	0	1	2	3	4	5	6	7	8	9	10	11	12	13	14
Molecule Name
(R)-(-)-Carvone	7.464317	-43.591126	-8.046428	-0.574730	-3.580254	5.872903	-2.689295	0.910235	3.263888	1.241722	-1.817992	-10.064818	-5.517172	6.956873	0.003187
(1R,2S,5R)-(-)-Menthol	-15.984788	-48.409755	11.599679	-6.459996	-2.895890	-21.817187	7.987366	30.168379	-4.855940	-27.966750	-15.593791	18.881794	6.680355	-3.869645	-18.299473
(-)-Menthyl chloride	-5.850617	-48.612997	25.383460	-1.972609	-9.623228	-12.079316	-5.014842	9.790833	-11.125811	-10.948924	12.772888	-4.068723	4.196934	0.326200	0.769741
(-)-Borneol	0.756580	-25.884643	-4.987043	6.148153	-18.167909	7.294051	-16.954397	9.166877	10.160046	8.512541	22.090822	-60.188603	5.246429	9.022766	-17.823573
(-)-Isoplegol	-12.599590	-33.717238	-12.966523	-18.479083	5.945713	-6.208222	-4.337392	10.783442	-12.640204	1.773643	1.927400	6.935122	5.856485	8.257695	-9.983220

# Copy original dataframe
half_enantiomer_data_copy = half_enantiomer_data
half_enantiomer_data_copy;

# Keep the columns in the gme df that match the index names in original dataset
common_index = half_enantiomer_data_copy.index.intersection(pca_vcd_values.index)
half_enantiomer_data_copy = half_enantiomer_data_copy.loc[common_index]
pca_vcd_features = pca_vcd_values.loc[common_index]

# Combine original dataset with pca_vcd
pca_vcd = half_enantiomer_data_copy.join(pca_vcd_features, how="inner")

pca_vcd

	Unnamed: 0	Unnamed: 0.1	Molecule Name	Pubchem ID #	Note	SMILES String	Other SMILES	Method	Contributor	Detection Threshold	...	5	6	7	8	9	10	11	12	13	14

0 rows × 32 columns