comparing_parquet_files

Created Diff never expires
17 removals
Lines
Total
Removed
Words
Total
Removed
To continue using this feature, upgrade to
Diffchecker logo
Diffchecker Pro
155 lines
18 additions
Lines
Total
Added
Words
Total
Added
To continue using this feature, upgrade to
Diffchecker logo
Diffchecker Pro
156 lines
nic@xps-15:~/arrow$ parquet-tools inspect ../Downloads/papers.parquet
nic@xps-15:~/arrow$ parquet-tools inspect "/tmp/RtmpfoyxmB/file18fa6b312836/part-0.parquet"


############ file meta data ############
############ file meta data ############
created_by: parquet-go version 18.0.0-SNAPSHOT
created_by: parquet-cpp-arrow version 20.0.0-SNAPSHOT
num_columns: 13
num_columns: 13
num_rows: 64141
num_rows: 64141
num_row_groups: 1
num_row_groups: 1
format_version: 2.6
format_version: 2.6
serialized_size: 1819
serialized_size: 3124




############ Columns ############
############ Columns ############
paper_id
paper_id
softcite_id
softcite_id
title
title
published_year
published_year
published_date
published_date
publication_venue
publication_venue
publisher_name
publisher_name
doi
doi
pmcid
pmcid
pmid
pmid
genre
genre
license_type
license_type
has_mentions
has_mentions


############ Column(paper_id) ############
############ Column(paper_id) ############
name: paper_id
name: paper_id
path: paper_id
path: paper_id
max_definition_level: 0
max_definition_level: 1
max_repetition_level: 0
max_repetition_level: 0
physical_type: INT32
physical_type: INT32
logical_type: Int(bitWidth=32, isSigned=false)
logical_type: Int(bitWidth=32, isSigned=false)
converted_type (legacy): UINT_32
converted_type (legacy): UINT_32
compression: GZIP (space_saved: 13%)
compression: GZIP (space_saved: 22%)


############ Column(softcite_id) ############
############ Column(softcite_id) ############
name: softcite_id
name: softcite_id
path: softcite_id
path: softcite_id
max_definition_level: 0
max_definition_level: 1
max_repetition_level: 0
max_repetition_level: 0
physical_type: BYTE_ARRAY
physical_type: BYTE_ARRAY
logical_type: String
logical_type: String
converted_type (legacy): UTF8
converted_type (legacy): UTF8
compression: GZIP (space_saved: 50%)
compression: GZIP (space_saved: 47%)


############ Column(title) ############
############ Column(title) ############
name: title
name: title
path: title
path: title
max_definition_level: 1
max_definition_level: 1
max_repetition_level: 0
max_repetition_level: 0
physical_type: BYTE_ARRAY
physical_type: BYTE_ARRAY
logical_type: String
logical_type: String
converted_type (legacy): UTF8
converted_type (legacy): UTF8
compression: GZIP (space_saved: 56%)
compression: GZIP (space_saved: 55%)


############ Column(published_year) ############
############ Column(published_year) ############
name: published_year
name: published_year
path: published_year
path: published_year
max_definition_level: 1
max_definition_level: 1
max_repetition_level: 0
max_repetition_level: 0
physical_type: INT32
physical_type: INT32
logical_type: Int(bitWidth=16, isSigned=false)
logical_type: Int(bitWidth=16, isSigned=false)
converted_type (legacy): UINT_16
converted_type (legacy): UINT_16
compression: GZIP (space_saved: 18%)
compression: GZIP (space_saved: 18%)


############ Column(published_date) ############
############ Column(published_date) ############
name: published_date
name: published_date
path: published_date
path: published_date
max_definition_level: 1
max_definition_level: 1
max_repetition_level: 0
max_repetition_level: 0
physical_type: INT32
physical_type: INT32
logical_type: Date
logical_type: Date
converted_type (legacy): DATE
converted_type (legacy): DATE
compression: GZIP (space_saved: 10%)
compression: GZIP (space_saved: 10%)


############ Column(publication_venue) ############
############ Column(publication_venue) ############
name: publication_venue
name: publication_venue
path: publication_venue
path: publication_venue
max_definition_level: 0
max_definition_level: 1
max_repetition_level: 0
max_repetition_level: 0
physical_type: BYTE_ARRAY
physical_type: BYTE_ARRAY
logical_type: String
logical_type: String
converted_type (legacy): UTF8
converted_type (legacy): UTF8
compression: GZIP (space_saved: 59%)
compression: GZIP (space_saved: 59%)


############ Column(publisher_name) ############
############ Column(publisher_name) ############
name: publisher_name
name: publisher_name
path: publisher_name
path: publisher_name
max_definition_level: 1
max_definition_level: 1
max_repetition_level: 0
max_repetition_level: 0
physical_type: BYTE_ARRAY
physical_type: BYTE_ARRAY
logical_type: String
logical_type: String
converted_type (legacy): UTF8
converted_type (legacy): UTF8
compression: GZIP (space_saved: 49%)
compression: GZIP (space_saved: 48%)


############ Column(doi) ############
############ Column(doi) ############
name: doi
name: doi
path: doi
path: doi
max_definition_level: 0
max_definition_level: 1
max_repetition_level: 0
max_repetition_level: 0
physical_type: BYTE_ARRAY
physical_type: BYTE_ARRAY
logical_type: String
logical_type: String
converted_type (legacy): UTF8
converted_type (legacy): UTF8
compression: GZIP (space_saved: 61%)
compression: GZIP (space_saved: 59%)


############ Column(pmcid) ############
############ Column(pmcid) ############
name: pmcid
name: pmcid
path: pmcid
path: pmcid
max_definition_level: 1
max_definition_level: 1
max_repetition_level: 0
max_repetition_level: 0
physical_type: BYTE_ARRAY
physical_type: BYTE_ARRAY
logical_type: String
logical_type: String
converted_type (legacy): UTF8
converted_type (legacy): UTF8
compression: GZIP (space_saved: 63%)
compression: GZIP (space_saved: 63%)


############ Column(pmid) ############
############ Column(pmid) ############
name: pmid
name: pmid
path: pmid
path: pmid
max_definition_level: 1
max_definition_level: 1
max_repetition_level: 0
max_repetition_level: 0
physical_type: BYTE_ARRAY
physical_type: BYTE_ARRAY
logical_type: String
logical_type: String
converted_type (legacy): UTF8
converted_type (legacy): UTF8
compression: GZIP (space_saved: 57%)
compression: GZIP (space_saved: 56%)


############ Column(genre) ############
############ Column(genre) ############
name: genre
name: genre
path: genre
path: genre
max_definition_level: 1
max_definition_level: 1
max_repetition_level: 0
max_repetition_level: 0
physical_type: BYTE_ARRAY
physical_type: BYTE_ARRAY
logical_type: String
logical_type: String
converted_type (legacy): UTF8
converted_type (legacy): UTF8
compression: GZIP (space_saved: 60%)
compression: GZIP (space_saved: 56%)


############ Column(license_type) ############
############ Column(license_type) ############
name: license_type
name: license_type
path: license_type
path: license_type
max_definition_level: 1
max_definition_level: 1
max_repetition_level: 0
max_repetition_level: 0
physical_type: BYTE_ARRAY
physical_type: BYTE_ARRAY
logical_type: String
logical_type: String
converted_type (legacy): UTF8
converted_type (legacy): UTF8
compression: GZIP (space_saved: 49%)
compression: GZIP (space_saved: 45%)


############ Column(has_mentions) ############
############ Column(has_mentions) ############
name: has_mentions
name: has_mentions
path: has_mentions
path: has_mentions
max_definition_level: 0
max_definition_level: 1
max_repetition_level: 0
max_repetition_level: 0
physical_type: BOOLEAN
physical_type: BOOLEAN
logical_type: None
logical_type: None
converted_type (legacy): NONE
converted_type (legacy): NONE
compression: GZIP (space_saved: 99%)
compression: GZIP (space_saved: 99%)