

--drop table forbes cascade;
create table forbes (
	seq integer primary key,
	rank integer,
	name varchar,
	industry varchar,
	headquarters varchar,
	country varchar,
	ceo varchar,
	founded integer,
	employees integer,
	revenue integer,
	profits integer,
	assets integer,
	mkt_value integer,
	forbes_webpage varchar
);

COPY forbes
FROM '/home/john/webjohn/lab/scrape/forbes2022.csv'
DELIMITER ','
CSV HEADER;


psql -U voyccom_jhagstrand -d voyccom_movie -c "\copy forbes FROM '/home/voyccom/webjohn/lab/lab/scrape/forbes2022.csv' delimiter ',' csv header;"


In our google sheet
missing values in a varchar column are 'na'
missing values in an integer column are empty
Using google sheet Find and Replace, the regep for an empty cell is
^\s*$

fix data

seq=1044, name='TD SYNNEX', profits and assets got combined into one cell
seq=1033, name=nasdaq change to nasdaq, Inc.

missing industry
  12 | Microsoft
   92 | Cisco Systems
   98 | IBM
  115 | Oracle
  169 | SAP
  172 | Lockheed Martin
  184 | Accenture
  267 | Mastercard
  278 | McKesson
  310 | Blackstone
  386 | Tata Consultancy Services
  411 | BAE Systems
  423 | Lennar
  431 | Boeing
  445 | D.R. Horton
  554 | Aon
  558 | WTW
  569 | Cognizant
  698 | S&P Global
  821 | Wipro
 1029 | American Financial Group
 1033 | NASDAQ
 1080 | CGI
 1479 | Gartner
 1588 | DXC Technology
 1892 | PennyMac Financial Services
 1917 | Ares Management
 1930 | Mr. Cooper Group

--drop table industry cascade;
create table industry (
	id serial primary key,
	industry varchar unique,
	sector varchar
);

